[AArch64][4/10] ARMv8.2-A FP16 three operands vector intrinsics
[gcc.git] / gcc / config / aarch64 / arm_neon.h
1 /* ARM NEON intrinsics include file.
2
3 Copyright (C) 2011-2016 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
29
30 #pragma GCC push_options
31 #pragma GCC target ("+nothing+simd")
32
33 #include <stdint.h>
34
35 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
36 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
37
38 typedef __Int8x8_t int8x8_t;
39 typedef __Int16x4_t int16x4_t;
40 typedef __Int32x2_t int32x2_t;
41 typedef __Int64x1_t int64x1_t;
42 typedef __Float16x4_t float16x4_t;
43 typedef __Float32x2_t float32x2_t;
44 typedef __Poly8x8_t poly8x8_t;
45 typedef __Poly16x4_t poly16x4_t;
46 typedef __Uint8x8_t uint8x8_t;
47 typedef __Uint16x4_t uint16x4_t;
48 typedef __Uint32x2_t uint32x2_t;
49 typedef __Float64x1_t float64x1_t;
50 typedef __Uint64x1_t uint64x1_t;
51 typedef __Int8x16_t int8x16_t;
52 typedef __Int16x8_t int16x8_t;
53 typedef __Int32x4_t int32x4_t;
54 typedef __Int64x2_t int64x2_t;
55 typedef __Float16x8_t float16x8_t;
56 typedef __Float32x4_t float32x4_t;
57 typedef __Float64x2_t float64x2_t;
58 typedef __Poly8x16_t poly8x16_t;
59 typedef __Poly16x8_t poly16x8_t;
60 typedef __Poly64x2_t poly64x2_t;
61 typedef __Uint8x16_t uint8x16_t;
62 typedef __Uint16x8_t uint16x8_t;
63 typedef __Uint32x4_t uint32x4_t;
64 typedef __Uint64x2_t uint64x2_t;
65
66 typedef __Poly8_t poly8_t;
67 typedef __Poly16_t poly16_t;
68 typedef __Poly64_t poly64_t;
69 typedef __Poly128_t poly128_t;
70
71 typedef __fp16 float16_t;
72 typedef float float32_t;
73 typedef double float64_t;
74
75 typedef struct int8x8x2_t
76 {
77 int8x8_t val[2];
78 } int8x8x2_t;
79
80 typedef struct int8x16x2_t
81 {
82 int8x16_t val[2];
83 } int8x16x2_t;
84
85 typedef struct int16x4x2_t
86 {
87 int16x4_t val[2];
88 } int16x4x2_t;
89
90 typedef struct int16x8x2_t
91 {
92 int16x8_t val[2];
93 } int16x8x2_t;
94
95 typedef struct int32x2x2_t
96 {
97 int32x2_t val[2];
98 } int32x2x2_t;
99
100 typedef struct int32x4x2_t
101 {
102 int32x4_t val[2];
103 } int32x4x2_t;
104
105 typedef struct int64x1x2_t
106 {
107 int64x1_t val[2];
108 } int64x1x2_t;
109
110 typedef struct int64x2x2_t
111 {
112 int64x2_t val[2];
113 } int64x2x2_t;
114
115 typedef struct uint8x8x2_t
116 {
117 uint8x8_t val[2];
118 } uint8x8x2_t;
119
120 typedef struct uint8x16x2_t
121 {
122 uint8x16_t val[2];
123 } uint8x16x2_t;
124
125 typedef struct uint16x4x2_t
126 {
127 uint16x4_t val[2];
128 } uint16x4x2_t;
129
130 typedef struct uint16x8x2_t
131 {
132 uint16x8_t val[2];
133 } uint16x8x2_t;
134
135 typedef struct uint32x2x2_t
136 {
137 uint32x2_t val[2];
138 } uint32x2x2_t;
139
140 typedef struct uint32x4x2_t
141 {
142 uint32x4_t val[2];
143 } uint32x4x2_t;
144
145 typedef struct uint64x1x2_t
146 {
147 uint64x1_t val[2];
148 } uint64x1x2_t;
149
150 typedef struct uint64x2x2_t
151 {
152 uint64x2_t val[2];
153 } uint64x2x2_t;
154
155 typedef struct float16x4x2_t
156 {
157 float16x4_t val[2];
158 } float16x4x2_t;
159
160 typedef struct float16x8x2_t
161 {
162 float16x8_t val[2];
163 } float16x8x2_t;
164
165 typedef struct float32x2x2_t
166 {
167 float32x2_t val[2];
168 } float32x2x2_t;
169
170 typedef struct float32x4x2_t
171 {
172 float32x4_t val[2];
173 } float32x4x2_t;
174
175 typedef struct float64x2x2_t
176 {
177 float64x2_t val[2];
178 } float64x2x2_t;
179
180 typedef struct float64x1x2_t
181 {
182 float64x1_t val[2];
183 } float64x1x2_t;
184
185 typedef struct poly8x8x2_t
186 {
187 poly8x8_t val[2];
188 } poly8x8x2_t;
189
190 typedef struct poly8x16x2_t
191 {
192 poly8x16_t val[2];
193 } poly8x16x2_t;
194
195 typedef struct poly16x4x2_t
196 {
197 poly16x4_t val[2];
198 } poly16x4x2_t;
199
200 typedef struct poly16x8x2_t
201 {
202 poly16x8_t val[2];
203 } poly16x8x2_t;
204
205 typedef struct int8x8x3_t
206 {
207 int8x8_t val[3];
208 } int8x8x3_t;
209
210 typedef struct int8x16x3_t
211 {
212 int8x16_t val[3];
213 } int8x16x3_t;
214
215 typedef struct int16x4x3_t
216 {
217 int16x4_t val[3];
218 } int16x4x3_t;
219
220 typedef struct int16x8x3_t
221 {
222 int16x8_t val[3];
223 } int16x8x3_t;
224
225 typedef struct int32x2x3_t
226 {
227 int32x2_t val[3];
228 } int32x2x3_t;
229
230 typedef struct int32x4x3_t
231 {
232 int32x4_t val[3];
233 } int32x4x3_t;
234
235 typedef struct int64x1x3_t
236 {
237 int64x1_t val[3];
238 } int64x1x3_t;
239
240 typedef struct int64x2x3_t
241 {
242 int64x2_t val[3];
243 } int64x2x3_t;
244
245 typedef struct uint8x8x3_t
246 {
247 uint8x8_t val[3];
248 } uint8x8x3_t;
249
250 typedef struct uint8x16x3_t
251 {
252 uint8x16_t val[3];
253 } uint8x16x3_t;
254
255 typedef struct uint16x4x3_t
256 {
257 uint16x4_t val[3];
258 } uint16x4x3_t;
259
260 typedef struct uint16x8x3_t
261 {
262 uint16x8_t val[3];
263 } uint16x8x3_t;
264
265 typedef struct uint32x2x3_t
266 {
267 uint32x2_t val[3];
268 } uint32x2x3_t;
269
270 typedef struct uint32x4x3_t
271 {
272 uint32x4_t val[3];
273 } uint32x4x3_t;
274
275 typedef struct uint64x1x3_t
276 {
277 uint64x1_t val[3];
278 } uint64x1x3_t;
279
280 typedef struct uint64x2x3_t
281 {
282 uint64x2_t val[3];
283 } uint64x2x3_t;
284
285 typedef struct float16x4x3_t
286 {
287 float16x4_t val[3];
288 } float16x4x3_t;
289
290 typedef struct float16x8x3_t
291 {
292 float16x8_t val[3];
293 } float16x8x3_t;
294
295 typedef struct float32x2x3_t
296 {
297 float32x2_t val[3];
298 } float32x2x3_t;
299
300 typedef struct float32x4x3_t
301 {
302 float32x4_t val[3];
303 } float32x4x3_t;
304
305 typedef struct float64x2x3_t
306 {
307 float64x2_t val[3];
308 } float64x2x3_t;
309
310 typedef struct float64x1x3_t
311 {
312 float64x1_t val[3];
313 } float64x1x3_t;
314
315 typedef struct poly8x8x3_t
316 {
317 poly8x8_t val[3];
318 } poly8x8x3_t;
319
320 typedef struct poly8x16x3_t
321 {
322 poly8x16_t val[3];
323 } poly8x16x3_t;
324
325 typedef struct poly16x4x3_t
326 {
327 poly16x4_t val[3];
328 } poly16x4x3_t;
329
330 typedef struct poly16x8x3_t
331 {
332 poly16x8_t val[3];
333 } poly16x8x3_t;
334
335 typedef struct int8x8x4_t
336 {
337 int8x8_t val[4];
338 } int8x8x4_t;
339
340 typedef struct int8x16x4_t
341 {
342 int8x16_t val[4];
343 } int8x16x4_t;
344
345 typedef struct int16x4x4_t
346 {
347 int16x4_t val[4];
348 } int16x4x4_t;
349
350 typedef struct int16x8x4_t
351 {
352 int16x8_t val[4];
353 } int16x8x4_t;
354
355 typedef struct int32x2x4_t
356 {
357 int32x2_t val[4];
358 } int32x2x4_t;
359
360 typedef struct int32x4x4_t
361 {
362 int32x4_t val[4];
363 } int32x4x4_t;
364
365 typedef struct int64x1x4_t
366 {
367 int64x1_t val[4];
368 } int64x1x4_t;
369
370 typedef struct int64x2x4_t
371 {
372 int64x2_t val[4];
373 } int64x2x4_t;
374
375 typedef struct uint8x8x4_t
376 {
377 uint8x8_t val[4];
378 } uint8x8x4_t;
379
380 typedef struct uint8x16x4_t
381 {
382 uint8x16_t val[4];
383 } uint8x16x4_t;
384
385 typedef struct uint16x4x4_t
386 {
387 uint16x4_t val[4];
388 } uint16x4x4_t;
389
390 typedef struct uint16x8x4_t
391 {
392 uint16x8_t val[4];
393 } uint16x8x4_t;
394
395 typedef struct uint32x2x4_t
396 {
397 uint32x2_t val[4];
398 } uint32x2x4_t;
399
400 typedef struct uint32x4x4_t
401 {
402 uint32x4_t val[4];
403 } uint32x4x4_t;
404
405 typedef struct uint64x1x4_t
406 {
407 uint64x1_t val[4];
408 } uint64x1x4_t;
409
410 typedef struct uint64x2x4_t
411 {
412 uint64x2_t val[4];
413 } uint64x2x4_t;
414
415 typedef struct float16x4x4_t
416 {
417 float16x4_t val[4];
418 } float16x4x4_t;
419
420 typedef struct float16x8x4_t
421 {
422 float16x8_t val[4];
423 } float16x8x4_t;
424
425 typedef struct float32x2x4_t
426 {
427 float32x2_t val[4];
428 } float32x2x4_t;
429
430 typedef struct float32x4x4_t
431 {
432 float32x4_t val[4];
433 } float32x4x4_t;
434
435 typedef struct float64x2x4_t
436 {
437 float64x2_t val[4];
438 } float64x2x4_t;
439
440 typedef struct float64x1x4_t
441 {
442 float64x1_t val[4];
443 } float64x1x4_t;
444
445 typedef struct poly8x8x4_t
446 {
447 poly8x8_t val[4];
448 } poly8x8x4_t;
449
450 typedef struct poly8x16x4_t
451 {
452 poly8x16_t val[4];
453 } poly8x16x4_t;
454
455 typedef struct poly16x4x4_t
456 {
457 poly16x4_t val[4];
458 } poly16x4x4_t;
459
460 typedef struct poly16x8x4_t
461 {
462 poly16x8_t val[4];
463 } poly16x8x4_t;
464
465 /* __aarch64_vdup_lane internal macros. */
466 #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \
467 vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b))
468
469 #define __aarch64_vdup_lane_f16(__a, __b) \
470 __aarch64_vdup_lane_any (f16, , __a, __b)
471 #define __aarch64_vdup_lane_f32(__a, __b) \
472 __aarch64_vdup_lane_any (f32, , __a, __b)
473 #define __aarch64_vdup_lane_f64(__a, __b) \
474 __aarch64_vdup_lane_any (f64, , __a, __b)
475 #define __aarch64_vdup_lane_p8(__a, __b) \
476 __aarch64_vdup_lane_any (p8, , __a, __b)
477 #define __aarch64_vdup_lane_p16(__a, __b) \
478 __aarch64_vdup_lane_any (p16, , __a, __b)
479 #define __aarch64_vdup_lane_s8(__a, __b) \
480 __aarch64_vdup_lane_any (s8, , __a, __b)
481 #define __aarch64_vdup_lane_s16(__a, __b) \
482 __aarch64_vdup_lane_any (s16, , __a, __b)
483 #define __aarch64_vdup_lane_s32(__a, __b) \
484 __aarch64_vdup_lane_any (s32, , __a, __b)
485 #define __aarch64_vdup_lane_s64(__a, __b) \
486 __aarch64_vdup_lane_any (s64, , __a, __b)
487 #define __aarch64_vdup_lane_u8(__a, __b) \
488 __aarch64_vdup_lane_any (u8, , __a, __b)
489 #define __aarch64_vdup_lane_u16(__a, __b) \
490 __aarch64_vdup_lane_any (u16, , __a, __b)
491 #define __aarch64_vdup_lane_u32(__a, __b) \
492 __aarch64_vdup_lane_any (u32, , __a, __b)
493 #define __aarch64_vdup_lane_u64(__a, __b) \
494 __aarch64_vdup_lane_any (u64, , __a, __b)
495
496 /* __aarch64_vdup_laneq internal macros. */
497 #define __aarch64_vdup_laneq_f16(__a, __b) \
498 __aarch64_vdup_lane_any (f16, , __a, __b)
499 #define __aarch64_vdup_laneq_f32(__a, __b) \
500 __aarch64_vdup_lane_any (f32, , __a, __b)
501 #define __aarch64_vdup_laneq_f64(__a, __b) \
502 __aarch64_vdup_lane_any (f64, , __a, __b)
503 #define __aarch64_vdup_laneq_p8(__a, __b) \
504 __aarch64_vdup_lane_any (p8, , __a, __b)
505 #define __aarch64_vdup_laneq_p16(__a, __b) \
506 __aarch64_vdup_lane_any (p16, , __a, __b)
507 #define __aarch64_vdup_laneq_s8(__a, __b) \
508 __aarch64_vdup_lane_any (s8, , __a, __b)
509 #define __aarch64_vdup_laneq_s16(__a, __b) \
510 __aarch64_vdup_lane_any (s16, , __a, __b)
511 #define __aarch64_vdup_laneq_s32(__a, __b) \
512 __aarch64_vdup_lane_any (s32, , __a, __b)
513 #define __aarch64_vdup_laneq_s64(__a, __b) \
514 __aarch64_vdup_lane_any (s64, , __a, __b)
515 #define __aarch64_vdup_laneq_u8(__a, __b) \
516 __aarch64_vdup_lane_any (u8, , __a, __b)
517 #define __aarch64_vdup_laneq_u16(__a, __b) \
518 __aarch64_vdup_lane_any (u16, , __a, __b)
519 #define __aarch64_vdup_laneq_u32(__a, __b) \
520 __aarch64_vdup_lane_any (u32, , __a, __b)
521 #define __aarch64_vdup_laneq_u64(__a, __b) \
522 __aarch64_vdup_lane_any (u64, , __a, __b)
523
524 /* __aarch64_vdupq_lane internal macros. */
525 #define __aarch64_vdupq_lane_f16(__a, __b) \
526 __aarch64_vdup_lane_any (f16, q, __a, __b)
527 #define __aarch64_vdupq_lane_f32(__a, __b) \
528 __aarch64_vdup_lane_any (f32, q, __a, __b)
529 #define __aarch64_vdupq_lane_f64(__a, __b) \
530 __aarch64_vdup_lane_any (f64, q, __a, __b)
531 #define __aarch64_vdupq_lane_p8(__a, __b) \
532 __aarch64_vdup_lane_any (p8, q, __a, __b)
533 #define __aarch64_vdupq_lane_p16(__a, __b) \
534 __aarch64_vdup_lane_any (p16, q, __a, __b)
535 #define __aarch64_vdupq_lane_s8(__a, __b) \
536 __aarch64_vdup_lane_any (s8, q, __a, __b)
537 #define __aarch64_vdupq_lane_s16(__a, __b) \
538 __aarch64_vdup_lane_any (s16, q, __a, __b)
539 #define __aarch64_vdupq_lane_s32(__a, __b) \
540 __aarch64_vdup_lane_any (s32, q, __a, __b)
541 #define __aarch64_vdupq_lane_s64(__a, __b) \
542 __aarch64_vdup_lane_any (s64, q, __a, __b)
543 #define __aarch64_vdupq_lane_u8(__a, __b) \
544 __aarch64_vdup_lane_any (u8, q, __a, __b)
545 #define __aarch64_vdupq_lane_u16(__a, __b) \
546 __aarch64_vdup_lane_any (u16, q, __a, __b)
547 #define __aarch64_vdupq_lane_u32(__a, __b) \
548 __aarch64_vdup_lane_any (u32, q, __a, __b)
549 #define __aarch64_vdupq_lane_u64(__a, __b) \
550 __aarch64_vdup_lane_any (u64, q, __a, __b)
551
552 /* __aarch64_vdupq_laneq internal macros. */
553 #define __aarch64_vdupq_laneq_f16(__a, __b) \
554 __aarch64_vdup_lane_any (f16, q, __a, __b)
555 #define __aarch64_vdupq_laneq_f32(__a, __b) \
556 __aarch64_vdup_lane_any (f32, q, __a, __b)
557 #define __aarch64_vdupq_laneq_f64(__a, __b) \
558 __aarch64_vdup_lane_any (f64, q, __a, __b)
559 #define __aarch64_vdupq_laneq_p8(__a, __b) \
560 __aarch64_vdup_lane_any (p8, q, __a, __b)
561 #define __aarch64_vdupq_laneq_p16(__a, __b) \
562 __aarch64_vdup_lane_any (p16, q, __a, __b)
563 #define __aarch64_vdupq_laneq_s8(__a, __b) \
564 __aarch64_vdup_lane_any (s8, q, __a, __b)
565 #define __aarch64_vdupq_laneq_s16(__a, __b) \
566 __aarch64_vdup_lane_any (s16, q, __a, __b)
567 #define __aarch64_vdupq_laneq_s32(__a, __b) \
568 __aarch64_vdup_lane_any (s32, q, __a, __b)
569 #define __aarch64_vdupq_laneq_s64(__a, __b) \
570 __aarch64_vdup_lane_any (s64, q, __a, __b)
571 #define __aarch64_vdupq_laneq_u8(__a, __b) \
572 __aarch64_vdup_lane_any (u8, q, __a, __b)
573 #define __aarch64_vdupq_laneq_u16(__a, __b) \
574 __aarch64_vdup_lane_any (u16, q, __a, __b)
575 #define __aarch64_vdupq_laneq_u32(__a, __b) \
576 __aarch64_vdup_lane_any (u32, q, __a, __b)
577 #define __aarch64_vdupq_laneq_u64(__a, __b) \
578 __aarch64_vdup_lane_any (u64, q, __a, __b)
579
580 /* Internal macro for lane indices. */
581
582 #define __AARCH64_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0]))
583 #define __AARCH64_LANE_CHECK(__vec, __idx) \
584 __builtin_aarch64_im_lane_boundsi (sizeof(__vec), sizeof(__vec[0]), __idx)
585
586 /* For big-endian, GCC's vector indices are the opposite way around
587 to the architectural lane indices used by Neon intrinsics. */
588 #ifdef __AARCH64EB__
589 #define __aarch64_lane(__vec, __idx) (__AARCH64_NUM_LANES (__vec) - 1 - __idx)
590 #else
591 #define __aarch64_lane(__vec, __idx) __idx
592 #endif
593
594 /* vget_lane internal macro. */
595 #define __aarch64_vget_lane_any(__vec, __index) \
596 __extension__ \
597 ({ \
598 __AARCH64_LANE_CHECK (__vec, __index); \
599 __vec[__aarch64_lane (__vec, __index)]; \
600 })
601
602 /* vset_lane and vld1_lane internal macro. */
603 #define __aarch64_vset_lane_any(__elem, __vec, __index) \
604 __extension__ \
605 ({ \
606 __AARCH64_LANE_CHECK (__vec, __index); \
607 __vec[__aarch64_lane (__vec, __index)] = __elem; \
608 __vec; \
609 })
610
611 /* vadd */
612 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
613 vadd_s8 (int8x8_t __a, int8x8_t __b)
614 {
615 return __a + __b;
616 }
617
618 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
619 vadd_s16 (int16x4_t __a, int16x4_t __b)
620 {
621 return __a + __b;
622 }
623
624 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
625 vadd_s32 (int32x2_t __a, int32x2_t __b)
626 {
627 return __a + __b;
628 }
629
630 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
631 vadd_f32 (float32x2_t __a, float32x2_t __b)
632 {
633 return __a + __b;
634 }
635
636 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
637 vadd_f64 (float64x1_t __a, float64x1_t __b)
638 {
639 return __a + __b;
640 }
641
642 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
643 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
644 {
645 return __a + __b;
646 }
647
648 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
649 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
650 {
651 return __a + __b;
652 }
653
654 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
655 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
656 {
657 return __a + __b;
658 }
659
660 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
661 vadd_s64 (int64x1_t __a, int64x1_t __b)
662 {
663 return __a + __b;
664 }
665
666 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
667 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
668 {
669 return __a + __b;
670 }
671
672 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
673 vaddq_s8 (int8x16_t __a, int8x16_t __b)
674 {
675 return __a + __b;
676 }
677
678 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
679 vaddq_s16 (int16x8_t __a, int16x8_t __b)
680 {
681 return __a + __b;
682 }
683
684 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
685 vaddq_s32 (int32x4_t __a, int32x4_t __b)
686 {
687 return __a + __b;
688 }
689
690 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
691 vaddq_s64 (int64x2_t __a, int64x2_t __b)
692 {
693 return __a + __b;
694 }
695
696 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
697 vaddq_f32 (float32x4_t __a, float32x4_t __b)
698 {
699 return __a + __b;
700 }
701
702 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
703 vaddq_f64 (float64x2_t __a, float64x2_t __b)
704 {
705 return __a + __b;
706 }
707
708 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
709 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
710 {
711 return __a + __b;
712 }
713
714 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
715 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
716 {
717 return __a + __b;
718 }
719
720 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
721 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
722 {
723 return __a + __b;
724 }
725
726 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
727 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
728 {
729 return __a + __b;
730 }
731
732 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
733 vaddl_s8 (int8x8_t __a, int8x8_t __b)
734 {
735 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
736 }
737
738 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
739 vaddl_s16 (int16x4_t __a, int16x4_t __b)
740 {
741 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
742 }
743
744 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
745 vaddl_s32 (int32x2_t __a, int32x2_t __b)
746 {
747 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
748 }
749
750 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
751 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
752 {
753 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
754 (int8x8_t) __b);
755 }
756
757 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
758 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
759 {
760 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
761 (int16x4_t) __b);
762 }
763
764 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
765 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
766 {
767 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
768 (int32x2_t) __b);
769 }
770
771 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
772 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
773 {
774 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
775 }
776
777 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
778 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
779 {
780 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
781 }
782
783 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
784 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
785 {
786 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
787 }
788
789 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
790 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
791 {
792 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
793 (int8x16_t) __b);
794 }
795
796 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
797 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
798 {
799 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
800 (int16x8_t) __b);
801 }
802
803 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
804 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
805 {
806 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
807 (int32x4_t) __b);
808 }
809
810 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
811 vaddw_s8 (int16x8_t __a, int8x8_t __b)
812 {
813 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
814 }
815
816 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
817 vaddw_s16 (int32x4_t __a, int16x4_t __b)
818 {
819 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
820 }
821
822 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
823 vaddw_s32 (int64x2_t __a, int32x2_t __b)
824 {
825 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
826 }
827
828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
829 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
830 {
831 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
832 (int8x8_t) __b);
833 }
834
835 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
836 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
837 {
838 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
839 (int16x4_t) __b);
840 }
841
842 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
843 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
844 {
845 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
846 (int32x2_t) __b);
847 }
848
849 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
850 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
851 {
852 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
853 }
854
855 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
856 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
857 {
858 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
859 }
860
861 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
862 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
863 {
864 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
865 }
866
867 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
868 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
869 {
870 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
871 (int8x16_t) __b);
872 }
873
874 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
875 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
876 {
877 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
878 (int16x8_t) __b);
879 }
880
881 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
882 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
883 {
884 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
885 (int32x4_t) __b);
886 }
887
888 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
889 vhadd_s8 (int8x8_t __a, int8x8_t __b)
890 {
891 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
892 }
893
894 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
895 vhadd_s16 (int16x4_t __a, int16x4_t __b)
896 {
897 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
898 }
899
900 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
901 vhadd_s32 (int32x2_t __a, int32x2_t __b)
902 {
903 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
904 }
905
906 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
907 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
908 {
909 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
910 (int8x8_t) __b);
911 }
912
913 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
914 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
915 {
916 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
917 (int16x4_t) __b);
918 }
919
920 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
921 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
922 {
923 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
924 (int32x2_t) __b);
925 }
926
927 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
928 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
929 {
930 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
931 }
932
933 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
934 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
935 {
936 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
937 }
938
939 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
940 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
941 {
942 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
943 }
944
945 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
946 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
947 {
948 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
949 (int8x16_t) __b);
950 }
951
952 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
953 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
954 {
955 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
956 (int16x8_t) __b);
957 }
958
959 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
960 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
961 {
962 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
963 (int32x4_t) __b);
964 }
965
966 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
967 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
968 {
969 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
970 }
971
972 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
973 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
974 {
975 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
976 }
977
978 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
979 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
980 {
981 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
982 }
983
984 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
985 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
986 {
987 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
988 (int8x8_t) __b);
989 }
990
991 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
992 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
993 {
994 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
995 (int16x4_t) __b);
996 }
997
998 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
999 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
1000 {
1001 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
1002 (int32x2_t) __b);
1003 }
1004
1005 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1006 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1007 {
1008 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1009 }
1010
1011 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1012 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1013 {
1014 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1015 }
1016
1017 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1018 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1019 {
1020 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1021 }
1022
1023 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1024 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1025 {
1026 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1027 (int8x16_t) __b);
1028 }
1029
1030 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1031 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1032 {
1033 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1034 (int16x8_t) __b);
1035 }
1036
1037 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1038 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1039 {
1040 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1041 (int32x4_t) __b);
1042 }
1043
1044 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1045 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1046 {
1047 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1048 }
1049
1050 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1051 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1052 {
1053 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1054 }
1055
1056 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1057 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1058 {
1059 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1060 }
1061
1062 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1063 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1064 {
1065 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1066 (int16x8_t) __b);
1067 }
1068
1069 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1070 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1071 {
1072 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1073 (int32x4_t) __b);
1074 }
1075
1076 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1077 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1078 {
1079 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1080 (int64x2_t) __b);
1081 }
1082
1083 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1084 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1085 {
1086 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1087 }
1088
1089 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1090 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1091 {
1092 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1093 }
1094
1095 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1096 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1097 {
1098 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1099 }
1100
1101 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1102 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1103 {
1104 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1105 (int16x8_t) __b);
1106 }
1107
1108 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1109 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1110 {
1111 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1112 (int32x4_t) __b);
1113 }
1114
1115 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1116 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1117 {
1118 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1119 (int64x2_t) __b);
1120 }
1121
1122 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1123 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1124 {
1125 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1126 }
1127
1128 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1129 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1130 {
1131 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1132 }
1133
1134 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1135 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1136 {
1137 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1138 }
1139
1140 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1141 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1142 {
1143 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1144 (int16x8_t) __b,
1145 (int16x8_t) __c);
1146 }
1147
1148 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1149 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1150 {
1151 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1152 (int32x4_t) __b,
1153 (int32x4_t) __c);
1154 }
1155
1156 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1157 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1158 {
1159 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1160 (int64x2_t) __b,
1161 (int64x2_t) __c);
1162 }
1163
1164 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1165 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1166 {
1167 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1168 }
1169
1170 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1171 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1172 {
1173 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1174 }
1175
1176 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1177 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1178 {
1179 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1180 }
1181
1182 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1183 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1184 {
1185 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1186 (int16x8_t) __b,
1187 (int16x8_t) __c);
1188 }
1189
1190 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1191 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1192 {
1193 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1194 (int32x4_t) __b,
1195 (int32x4_t) __c);
1196 }
1197
1198 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1199 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1200 {
1201 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1202 (int64x2_t) __b,
1203 (int64x2_t) __c);
1204 }
1205
1206 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1207 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1208 {
1209 return __a / __b;
1210 }
1211
1212 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1213 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1214 {
1215 return __a / __b;
1216 }
1217
1218 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1219 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1220 {
1221 return __a / __b;
1222 }
1223
1224 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1225 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1226 {
1227 return __a / __b;
1228 }
1229
1230 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1231 vmul_s8 (int8x8_t __a, int8x8_t __b)
1232 {
1233 return __a * __b;
1234 }
1235
1236 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1237 vmul_s16 (int16x4_t __a, int16x4_t __b)
1238 {
1239 return __a * __b;
1240 }
1241
1242 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1243 vmul_s32 (int32x2_t __a, int32x2_t __b)
1244 {
1245 return __a * __b;
1246 }
1247
1248 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1249 vmul_f32 (float32x2_t __a, float32x2_t __b)
1250 {
1251 return __a * __b;
1252 }
1253
1254 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1255 vmul_f64 (float64x1_t __a, float64x1_t __b)
1256 {
1257 return __a * __b;
1258 }
1259
1260 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1261 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1262 {
1263 return __a * __b;
1264 }
1265
1266 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1267 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1268 {
1269 return __a * __b;
1270 }
1271
1272 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1273 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1274 {
1275 return __a * __b;
1276 }
1277
1278 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1279 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1280 {
1281 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1282 (int8x8_t) __b);
1283 }
1284
1285 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1286 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1287 {
1288 return __a * __b;
1289 }
1290
1291 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1292 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1293 {
1294 return __a * __b;
1295 }
1296
1297 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1298 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1299 {
1300 return __a * __b;
1301 }
1302
1303 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1304 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1305 {
1306 return __a * __b;
1307 }
1308
1309 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1310 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1311 {
1312 return __a * __b;
1313 }
1314
1315 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1316 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1317 {
1318 return __a * __b;
1319 }
1320
1321 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1322 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1323 {
1324 return __a * __b;
1325 }
1326
1327 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1328 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1329 {
1330 return __a * __b;
1331 }
1332
1333 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1334 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1335 {
1336 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1337 (int8x16_t) __b);
1338 }
1339
1340 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1341 vand_s8 (int8x8_t __a, int8x8_t __b)
1342 {
1343 return __a & __b;
1344 }
1345
1346 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1347 vand_s16 (int16x4_t __a, int16x4_t __b)
1348 {
1349 return __a & __b;
1350 }
1351
1352 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1353 vand_s32 (int32x2_t __a, int32x2_t __b)
1354 {
1355 return __a & __b;
1356 }
1357
1358 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1359 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1360 {
1361 return __a & __b;
1362 }
1363
1364 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1365 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1366 {
1367 return __a & __b;
1368 }
1369
1370 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1371 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1372 {
1373 return __a & __b;
1374 }
1375
1376 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1377 vand_s64 (int64x1_t __a, int64x1_t __b)
1378 {
1379 return __a & __b;
1380 }
1381
1382 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1383 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1384 {
1385 return __a & __b;
1386 }
1387
1388 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1389 vandq_s8 (int8x16_t __a, int8x16_t __b)
1390 {
1391 return __a & __b;
1392 }
1393
1394 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1395 vandq_s16 (int16x8_t __a, int16x8_t __b)
1396 {
1397 return __a & __b;
1398 }
1399
1400 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1401 vandq_s32 (int32x4_t __a, int32x4_t __b)
1402 {
1403 return __a & __b;
1404 }
1405
1406 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1407 vandq_s64 (int64x2_t __a, int64x2_t __b)
1408 {
1409 return __a & __b;
1410 }
1411
1412 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1413 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1414 {
1415 return __a & __b;
1416 }
1417
1418 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1419 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1420 {
1421 return __a & __b;
1422 }
1423
1424 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1425 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1426 {
1427 return __a & __b;
1428 }
1429
1430 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1431 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1432 {
1433 return __a & __b;
1434 }
1435
1436 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1437 vorr_s8 (int8x8_t __a, int8x8_t __b)
1438 {
1439 return __a | __b;
1440 }
1441
1442 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1443 vorr_s16 (int16x4_t __a, int16x4_t __b)
1444 {
1445 return __a | __b;
1446 }
1447
1448 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1449 vorr_s32 (int32x2_t __a, int32x2_t __b)
1450 {
1451 return __a | __b;
1452 }
1453
1454 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1455 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1456 {
1457 return __a | __b;
1458 }
1459
1460 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1461 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1462 {
1463 return __a | __b;
1464 }
1465
1466 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1467 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1468 {
1469 return __a | __b;
1470 }
1471
1472 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1473 vorr_s64 (int64x1_t __a, int64x1_t __b)
1474 {
1475 return __a | __b;
1476 }
1477
1478 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1479 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1480 {
1481 return __a | __b;
1482 }
1483
1484 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1485 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1486 {
1487 return __a | __b;
1488 }
1489
1490 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1491 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1492 {
1493 return __a | __b;
1494 }
1495
1496 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1497 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1498 {
1499 return __a | __b;
1500 }
1501
1502 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1503 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1504 {
1505 return __a | __b;
1506 }
1507
1508 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1509 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1510 {
1511 return __a | __b;
1512 }
1513
1514 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1515 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1516 {
1517 return __a | __b;
1518 }
1519
1520 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1521 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1522 {
1523 return __a | __b;
1524 }
1525
1526 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1527 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1528 {
1529 return __a | __b;
1530 }
1531
1532 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1533 veor_s8 (int8x8_t __a, int8x8_t __b)
1534 {
1535 return __a ^ __b;
1536 }
1537
1538 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1539 veor_s16 (int16x4_t __a, int16x4_t __b)
1540 {
1541 return __a ^ __b;
1542 }
1543
1544 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1545 veor_s32 (int32x2_t __a, int32x2_t __b)
1546 {
1547 return __a ^ __b;
1548 }
1549
1550 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1551 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1552 {
1553 return __a ^ __b;
1554 }
1555
1556 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1557 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1558 {
1559 return __a ^ __b;
1560 }
1561
1562 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1563 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1564 {
1565 return __a ^ __b;
1566 }
1567
1568 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1569 veor_s64 (int64x1_t __a, int64x1_t __b)
1570 {
1571 return __a ^ __b;
1572 }
1573
1574 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1575 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1576 {
1577 return __a ^ __b;
1578 }
1579
1580 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1581 veorq_s8 (int8x16_t __a, int8x16_t __b)
1582 {
1583 return __a ^ __b;
1584 }
1585
1586 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1587 veorq_s16 (int16x8_t __a, int16x8_t __b)
1588 {
1589 return __a ^ __b;
1590 }
1591
1592 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1593 veorq_s32 (int32x4_t __a, int32x4_t __b)
1594 {
1595 return __a ^ __b;
1596 }
1597
1598 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1599 veorq_s64 (int64x2_t __a, int64x2_t __b)
1600 {
1601 return __a ^ __b;
1602 }
1603
1604 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1605 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1606 {
1607 return __a ^ __b;
1608 }
1609
1610 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1611 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1612 {
1613 return __a ^ __b;
1614 }
1615
1616 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1617 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1618 {
1619 return __a ^ __b;
1620 }
1621
1622 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1623 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1624 {
1625 return __a ^ __b;
1626 }
1627
1628 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1629 vbic_s8 (int8x8_t __a, int8x8_t __b)
1630 {
1631 return __a & ~__b;
1632 }
1633
1634 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1635 vbic_s16 (int16x4_t __a, int16x4_t __b)
1636 {
1637 return __a & ~__b;
1638 }
1639
1640 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1641 vbic_s32 (int32x2_t __a, int32x2_t __b)
1642 {
1643 return __a & ~__b;
1644 }
1645
1646 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1647 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1648 {
1649 return __a & ~__b;
1650 }
1651
1652 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1653 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1654 {
1655 return __a & ~__b;
1656 }
1657
1658 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1659 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1660 {
1661 return __a & ~__b;
1662 }
1663
1664 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1665 vbic_s64 (int64x1_t __a, int64x1_t __b)
1666 {
1667 return __a & ~__b;
1668 }
1669
1670 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1671 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1672 {
1673 return __a & ~__b;
1674 }
1675
1676 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1677 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1678 {
1679 return __a & ~__b;
1680 }
1681
1682 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1683 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1684 {
1685 return __a & ~__b;
1686 }
1687
1688 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1689 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1690 {
1691 return __a & ~__b;
1692 }
1693
1694 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1695 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1696 {
1697 return __a & ~__b;
1698 }
1699
1700 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1701 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1702 {
1703 return __a & ~__b;
1704 }
1705
1706 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1707 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1708 {
1709 return __a & ~__b;
1710 }
1711
1712 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1713 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1714 {
1715 return __a & ~__b;
1716 }
1717
1718 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1719 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1720 {
1721 return __a & ~__b;
1722 }
1723
1724 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1725 vorn_s8 (int8x8_t __a, int8x8_t __b)
1726 {
1727 return __a | ~__b;
1728 }
1729
1730 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1731 vorn_s16 (int16x4_t __a, int16x4_t __b)
1732 {
1733 return __a | ~__b;
1734 }
1735
1736 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1737 vorn_s32 (int32x2_t __a, int32x2_t __b)
1738 {
1739 return __a | ~__b;
1740 }
1741
1742 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1743 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1744 {
1745 return __a | ~__b;
1746 }
1747
1748 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1749 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1750 {
1751 return __a | ~__b;
1752 }
1753
1754 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1755 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1756 {
1757 return __a | ~__b;
1758 }
1759
1760 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1761 vorn_s64 (int64x1_t __a, int64x1_t __b)
1762 {
1763 return __a | ~__b;
1764 }
1765
1766 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1767 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1768 {
1769 return __a | ~__b;
1770 }
1771
1772 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1773 vornq_s8 (int8x16_t __a, int8x16_t __b)
1774 {
1775 return __a | ~__b;
1776 }
1777
1778 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1779 vornq_s16 (int16x8_t __a, int16x8_t __b)
1780 {
1781 return __a | ~__b;
1782 }
1783
1784 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1785 vornq_s32 (int32x4_t __a, int32x4_t __b)
1786 {
1787 return __a | ~__b;
1788 }
1789
1790 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1791 vornq_s64 (int64x2_t __a, int64x2_t __b)
1792 {
1793 return __a | ~__b;
1794 }
1795
1796 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1797 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1798 {
1799 return __a | ~__b;
1800 }
1801
1802 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1803 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1804 {
1805 return __a | ~__b;
1806 }
1807
1808 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1809 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1810 {
1811 return __a | ~__b;
1812 }
1813
1814 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1815 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1816 {
1817 return __a | ~__b;
1818 }
1819
1820 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1821 vsub_s8 (int8x8_t __a, int8x8_t __b)
1822 {
1823 return __a - __b;
1824 }
1825
1826 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1827 vsub_s16 (int16x4_t __a, int16x4_t __b)
1828 {
1829 return __a - __b;
1830 }
1831
1832 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1833 vsub_s32 (int32x2_t __a, int32x2_t __b)
1834 {
1835 return __a - __b;
1836 }
1837
1838 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1839 vsub_f32 (float32x2_t __a, float32x2_t __b)
1840 {
1841 return __a - __b;
1842 }
1843
1844 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1845 vsub_f64 (float64x1_t __a, float64x1_t __b)
1846 {
1847 return __a - __b;
1848 }
1849
1850 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1851 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1852 {
1853 return __a - __b;
1854 }
1855
1856 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1857 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1858 {
1859 return __a - __b;
1860 }
1861
1862 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1863 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1864 {
1865 return __a - __b;
1866 }
1867
1868 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1869 vsub_s64 (int64x1_t __a, int64x1_t __b)
1870 {
1871 return __a - __b;
1872 }
1873
1874 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1875 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1876 {
1877 return __a - __b;
1878 }
1879
1880 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1881 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1882 {
1883 return __a - __b;
1884 }
1885
1886 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1887 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1888 {
1889 return __a - __b;
1890 }
1891
1892 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1893 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1894 {
1895 return __a - __b;
1896 }
1897
1898 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1899 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1900 {
1901 return __a - __b;
1902 }
1903
1904 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1905 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1906 {
1907 return __a - __b;
1908 }
1909
1910 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1911 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1912 {
1913 return __a - __b;
1914 }
1915
1916 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1917 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1918 {
1919 return __a - __b;
1920 }
1921
1922 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1923 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1924 {
1925 return __a - __b;
1926 }
1927
1928 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1929 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1930 {
1931 return __a - __b;
1932 }
1933
1934 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1935 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1936 {
1937 return __a - __b;
1938 }
1939
1940 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1941 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1942 {
1943 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1944 }
1945
1946 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1947 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1948 {
1949 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1950 }
1951
1952 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1953 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1954 {
1955 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1956 }
1957
1958 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1959 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1960 {
1961 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1962 (int8x8_t) __b);
1963 }
1964
1965 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1966 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1967 {
1968 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1969 (int16x4_t) __b);
1970 }
1971
1972 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1973 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1974 {
1975 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1976 (int32x2_t) __b);
1977 }
1978
1979 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1980 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1981 {
1982 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1983 }
1984
1985 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1986 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1987 {
1988 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1989 }
1990
1991 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1992 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1993 {
1994 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1995 }
1996
1997 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1998 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1999 {
2000 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
2001 (int8x16_t) __b);
2002 }
2003
2004 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2005 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
2006 {
2007 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2008 (int16x8_t) __b);
2009 }
2010
2011 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2012 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2013 {
2014 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2015 (int32x4_t) __b);
2016 }
2017
2018 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2019 vsubw_s8 (int16x8_t __a, int8x8_t __b)
2020 {
2021 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2022 }
2023
2024 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2025 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2026 {
2027 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2028 }
2029
2030 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2031 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2032 {
2033 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2034 }
2035
2036 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2037 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2038 {
2039 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2040 (int8x8_t) __b);
2041 }
2042
2043 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2044 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2045 {
2046 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2047 (int16x4_t) __b);
2048 }
2049
2050 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2051 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2052 {
2053 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2054 (int32x2_t) __b);
2055 }
2056
2057 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2058 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2059 {
2060 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2061 }
2062
2063 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2064 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2065 {
2066 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2067 }
2068
2069 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2070 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2071 {
2072 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2073 }
2074
2075 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2076 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2077 {
2078 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2079 (int8x16_t) __b);
2080 }
2081
2082 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2083 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2084 {
2085 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2086 (int16x8_t) __b);
2087 }
2088
2089 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2090 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2091 {
2092 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2093 (int32x4_t) __b);
2094 }
2095
2096 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2097 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2098 {
2099 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2100 }
2101
2102 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2103 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2104 {
2105 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2106 }
2107
2108 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2109 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2110 {
2111 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2112 }
2113
2114 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2115 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2116 {
2117 return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
2118 }
2119
2120 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2121 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2122 {
2123 return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
2124 }
2125
2126 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2127 vhsub_s8 (int8x8_t __a, int8x8_t __b)
2128 {
2129 return (int8x8_t)__builtin_aarch64_shsubv8qi (__a, __b);
2130 }
2131
2132 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2133 vhsub_s16 (int16x4_t __a, int16x4_t __b)
2134 {
2135 return (int16x4_t) __builtin_aarch64_shsubv4hi (__a, __b);
2136 }
2137
2138 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2139 vhsub_s32 (int32x2_t __a, int32x2_t __b)
2140 {
2141 return (int32x2_t) __builtin_aarch64_shsubv2si (__a, __b);
2142 }
2143
2144 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2145 vhsub_u8 (uint8x8_t __a, uint8x8_t __b)
2146 {
2147 return (uint8x8_t) __builtin_aarch64_uhsubv8qi ((int8x8_t) __a,
2148 (int8x8_t) __b);
2149 }
2150
2151 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2152 vhsub_u16 (uint16x4_t __a, uint16x4_t __b)
2153 {
2154 return (uint16x4_t) __builtin_aarch64_uhsubv4hi ((int16x4_t) __a,
2155 (int16x4_t) __b);
2156 }
2157
2158 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2159 vhsub_u32 (uint32x2_t __a, uint32x2_t __b)
2160 {
2161 return (uint32x2_t) __builtin_aarch64_uhsubv2si ((int32x2_t) __a,
2162 (int32x2_t) __b);
2163 }
2164
2165 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2166 vhsubq_s8 (int8x16_t __a, int8x16_t __b)
2167 {
2168 return (int8x16_t) __builtin_aarch64_shsubv16qi (__a, __b);
2169 }
2170
2171 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2172 vhsubq_s16 (int16x8_t __a, int16x8_t __b)
2173 {
2174 return (int16x8_t) __builtin_aarch64_shsubv8hi (__a, __b);
2175 }
2176
2177 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2178 vhsubq_s32 (int32x4_t __a, int32x4_t __b)
2179 {
2180 return (int32x4_t) __builtin_aarch64_shsubv4si (__a, __b);
2181 }
2182
2183 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2184 vhsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2185 {
2186 return (uint8x16_t) __builtin_aarch64_uhsubv16qi ((int8x16_t) __a,
2187 (int8x16_t) __b);
2188 }
2189
2190 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2191 vhsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2192 {
2193 return (uint16x8_t) __builtin_aarch64_uhsubv8hi ((int16x8_t) __a,
2194 (int16x8_t) __b);
2195 }
2196
2197 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2198 vhsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2199 {
2200 return (uint32x4_t) __builtin_aarch64_uhsubv4si ((int32x4_t) __a,
2201 (int32x4_t) __b);
2202 }
2203
2204 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2205 vsubhn_s16 (int16x8_t __a, int16x8_t __b)
2206 {
2207 return (int8x8_t) __builtin_aarch64_subhnv8hi (__a, __b);
2208 }
2209
2210 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2211 vsubhn_s32 (int32x4_t __a, int32x4_t __b)
2212 {
2213 return (int16x4_t) __builtin_aarch64_subhnv4si (__a, __b);
2214 }
2215
2216 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2217 vsubhn_s64 (int64x2_t __a, int64x2_t __b)
2218 {
2219 return (int32x2_t) __builtin_aarch64_subhnv2di (__a, __b);
2220 }
2221
2222 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2223 vsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2224 {
2225 return (uint8x8_t) __builtin_aarch64_subhnv8hi ((int16x8_t) __a,
2226 (int16x8_t) __b);
2227 }
2228
2229 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2230 vsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2231 {
2232 return (uint16x4_t) __builtin_aarch64_subhnv4si ((int32x4_t) __a,
2233 (int32x4_t) __b);
2234 }
2235
2236 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2237 vsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2238 {
2239 return (uint32x2_t) __builtin_aarch64_subhnv2di ((int64x2_t) __a,
2240 (int64x2_t) __b);
2241 }
2242
2243 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2244 vrsubhn_s16 (int16x8_t __a, int16x8_t __b)
2245 {
2246 return (int8x8_t) __builtin_aarch64_rsubhnv8hi (__a, __b);
2247 }
2248
2249 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2250 vrsubhn_s32 (int32x4_t __a, int32x4_t __b)
2251 {
2252 return (int16x4_t) __builtin_aarch64_rsubhnv4si (__a, __b);
2253 }
2254
2255 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2256 vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
2257 {
2258 return (int32x2_t) __builtin_aarch64_rsubhnv2di (__a, __b);
2259 }
2260
2261 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2262 vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
2263 {
2264 return (uint8x8_t) __builtin_aarch64_rsubhnv8hi ((int16x8_t) __a,
2265 (int16x8_t) __b);
2266 }
2267
2268 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2269 vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
2270 {
2271 return (uint16x4_t) __builtin_aarch64_rsubhnv4si ((int32x4_t) __a,
2272 (int32x4_t) __b);
2273 }
2274
2275 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2276 vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
2277 {
2278 return (uint32x2_t) __builtin_aarch64_rsubhnv2di ((int64x2_t) __a,
2279 (int64x2_t) __b);
2280 }
2281
2282 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2283 vrsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
2284 {
2285 return (int8x16_t) __builtin_aarch64_rsubhn2v8hi (__a, __b, __c);
2286 }
2287
2288 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2289 vrsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
2290 {
2291 return (int16x8_t) __builtin_aarch64_rsubhn2v4si (__a, __b, __c);
2292 }
2293
2294 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2295 vrsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
2296 {
2297 return (int32x4_t) __builtin_aarch64_rsubhn2v2di (__a, __b, __c);
2298 }
2299
2300 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2301 vrsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
2302 {
2303 return (uint8x16_t) __builtin_aarch64_rsubhn2v8hi ((int8x8_t) __a,
2304 (int16x8_t) __b,
2305 (int16x8_t) __c);
2306 }
2307
2308 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2309 vrsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
2310 {
2311 return (uint16x8_t) __builtin_aarch64_rsubhn2v4si ((int16x4_t) __a,
2312 (int32x4_t) __b,
2313 (int32x4_t) __c);
2314 }
2315
2316 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2317 vrsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
2318 {
2319 return (uint32x4_t) __builtin_aarch64_rsubhn2v2di ((int32x2_t) __a,
2320 (int64x2_t) __b,
2321 (int64x2_t) __c);
2322 }
2323
2324 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2325 vsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
2326 {
2327 return (int8x16_t) __builtin_aarch64_subhn2v8hi (__a, __b, __c);
2328 }
2329
2330 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2331 vsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
2332 {
2333 return (int16x8_t) __builtin_aarch64_subhn2v4si (__a, __b, __c);;
2334 }
2335
2336 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2337 vsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
2338 {
2339 return (int32x4_t) __builtin_aarch64_subhn2v2di (__a, __b, __c);
2340 }
2341
2342 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2343 vsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
2344 {
2345 return (uint8x16_t) __builtin_aarch64_subhn2v8hi ((int8x8_t) __a,
2346 (int16x8_t) __b,
2347 (int16x8_t) __c);
2348 }
2349
2350 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2351 vsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
2352 {
2353 return (uint16x8_t) __builtin_aarch64_subhn2v4si ((int16x4_t) __a,
2354 (int32x4_t) __b,
2355 (int32x4_t) __c);
2356 }
2357
2358 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2359 vsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
2360 {
2361 return (uint32x4_t) __builtin_aarch64_subhn2v2di ((int32x2_t) __a,
2362 (int64x2_t) __b,
2363 (int64x2_t) __c);
2364 }
2365
2366 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2367 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2368 {
2369 return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
2370 }
2371
2372 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2373 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2374 {
2375 return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
2376 }
2377
2378 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2379 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2380 {
2381 return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
2382 }
2383
2384 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2385 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2386 {
2387 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2388 }
2389
2390 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2391 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2392 {
2393 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2394 }
2395
2396 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2397 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2398 {
2399 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2400 }
2401
2402 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2403 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2404 {
2405 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2406 }
2407
2408 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2409 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2410 {
2411 return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
2412 }
2413
2414 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2415 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2416 {
2417 return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
2418 }
2419
2420 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2421 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2422 {
2423 return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
2424 }
2425
2426 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2427 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2428 {
2429 return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
2430 }
2431
2432 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2433 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2434 {
2435 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2436 }
2437
2438 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2439 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2440 {
2441 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2442 }
2443
2444 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2445 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2446 {
2447 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2448 }
2449
2450 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2451 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2452 {
2453 return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
2454 }
2455
2456 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2457 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2458 {
2459 return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
2460 }
2461
2462 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2463 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2464 {
2465 return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
2466 }
2467
2468 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2469 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2470 {
2471 return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
2472 }
2473
2474 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2475 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2476 {
2477 return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
2478 }
2479
2480 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2481 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2482 {
2483 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2484 }
2485
2486 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2487 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2488 {
2489 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2490 }
2491
2492 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2493 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2494 {
2495 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2496 }
2497
2498 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2499 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2500 {
2501 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2502 }
2503
2504 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2505 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2506 {
2507 return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
2508 }
2509
2510 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2511 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2512 {
2513 return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
2514 }
2515
2516 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2517 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2518 {
2519 return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
2520 }
2521
2522 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2523 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2524 {
2525 return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
2526 }
2527
2528 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2529 vqneg_s8 (int8x8_t __a)
2530 {
2531 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2532 }
2533
2534 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2535 vqneg_s16 (int16x4_t __a)
2536 {
2537 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2538 }
2539
2540 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2541 vqneg_s32 (int32x2_t __a)
2542 {
2543 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2544 }
2545
2546 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2547 vqneg_s64 (int64x1_t __a)
2548 {
2549 return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])};
2550 }
2551
2552 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2553 vqnegq_s8 (int8x16_t __a)
2554 {
2555 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2556 }
2557
2558 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2559 vqnegq_s16 (int16x8_t __a)
2560 {
2561 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2562 }
2563
2564 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2565 vqnegq_s32 (int32x4_t __a)
2566 {
2567 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2568 }
2569
2570 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2571 vqabs_s8 (int8x8_t __a)
2572 {
2573 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2574 }
2575
2576 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2577 vqabs_s16 (int16x4_t __a)
2578 {
2579 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2580 }
2581
2582 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2583 vqabs_s32 (int32x2_t __a)
2584 {
2585 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2586 }
2587
2588 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2589 vqabs_s64 (int64x1_t __a)
2590 {
2591 return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])};
2592 }
2593
2594 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2595 vqabsq_s8 (int8x16_t __a)
2596 {
2597 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2598 }
2599
2600 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2601 vqabsq_s16 (int16x8_t __a)
2602 {
2603 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2604 }
2605
2606 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2607 vqabsq_s32 (int32x4_t __a)
2608 {
2609 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2610 }
2611
2612 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2613 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2614 {
2615 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2616 }
2617
2618 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2619 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2620 {
2621 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2622 }
2623
2624 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2625 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2626 {
2627 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2628 }
2629
2630 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2631 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2632 {
2633 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2634 }
2635
2636 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2637 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2638 {
2639 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2640 }
2641
2642 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2643 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2644 {
2645 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2646 }
2647
2648 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2649 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2650 {
2651 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2652 }
2653
2654 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2655 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2656 {
2657 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2658 }
2659
2660 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2661 vcreate_s8 (uint64_t __a)
2662 {
2663 return (int8x8_t) __a;
2664 }
2665
2666 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2667 vcreate_s16 (uint64_t __a)
2668 {
2669 return (int16x4_t) __a;
2670 }
2671
2672 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2673 vcreate_s32 (uint64_t __a)
2674 {
2675 return (int32x2_t) __a;
2676 }
2677
2678 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2679 vcreate_s64 (uint64_t __a)
2680 {
2681 return (int64x1_t) {__a};
2682 }
2683
2684 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
2685 vcreate_f16 (uint64_t __a)
2686 {
2687 return (float16x4_t) __a;
2688 }
2689
2690 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2691 vcreate_f32 (uint64_t __a)
2692 {
2693 return (float32x2_t) __a;
2694 }
2695
2696 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2697 vcreate_u8 (uint64_t __a)
2698 {
2699 return (uint8x8_t) __a;
2700 }
2701
2702 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2703 vcreate_u16 (uint64_t __a)
2704 {
2705 return (uint16x4_t) __a;
2706 }
2707
2708 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2709 vcreate_u32 (uint64_t __a)
2710 {
2711 return (uint32x2_t) __a;
2712 }
2713
2714 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2715 vcreate_u64 (uint64_t __a)
2716 {
2717 return (uint64x1_t) {__a};
2718 }
2719
2720 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2721 vcreate_f64 (uint64_t __a)
2722 {
2723 return (float64x1_t) __a;
2724 }
2725
2726 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2727 vcreate_p8 (uint64_t __a)
2728 {
2729 return (poly8x8_t) __a;
2730 }
2731
2732 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2733 vcreate_p16 (uint64_t __a)
2734 {
2735 return (poly16x4_t) __a;
2736 }
2737
2738 /* vget_lane */
2739
2740 __extension__ static __inline float16_t __attribute__ ((__always_inline__))
2741 vget_lane_f16 (float16x4_t __a, const int __b)
2742 {
2743 return __aarch64_vget_lane_any (__a, __b);
2744 }
2745
2746 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2747 vget_lane_f32 (float32x2_t __a, const int __b)
2748 {
2749 return __aarch64_vget_lane_any (__a, __b);
2750 }
2751
2752 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2753 vget_lane_f64 (float64x1_t __a, const int __b)
2754 {
2755 return __aarch64_vget_lane_any (__a, __b);
2756 }
2757
2758 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2759 vget_lane_p8 (poly8x8_t __a, const int __b)
2760 {
2761 return __aarch64_vget_lane_any (__a, __b);
2762 }
2763
2764 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2765 vget_lane_p16 (poly16x4_t __a, const int __b)
2766 {
2767 return __aarch64_vget_lane_any (__a, __b);
2768 }
2769
2770 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2771 vget_lane_s8 (int8x8_t __a, const int __b)
2772 {
2773 return __aarch64_vget_lane_any (__a, __b);
2774 }
2775
2776 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2777 vget_lane_s16 (int16x4_t __a, const int __b)
2778 {
2779 return __aarch64_vget_lane_any (__a, __b);
2780 }
2781
2782 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2783 vget_lane_s32 (int32x2_t __a, const int __b)
2784 {
2785 return __aarch64_vget_lane_any (__a, __b);
2786 }
2787
2788 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2789 vget_lane_s64 (int64x1_t __a, const int __b)
2790 {
2791 return __aarch64_vget_lane_any (__a, __b);
2792 }
2793
2794 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2795 vget_lane_u8 (uint8x8_t __a, const int __b)
2796 {
2797 return __aarch64_vget_lane_any (__a, __b);
2798 }
2799
2800 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2801 vget_lane_u16 (uint16x4_t __a, const int __b)
2802 {
2803 return __aarch64_vget_lane_any (__a, __b);
2804 }
2805
2806 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2807 vget_lane_u32 (uint32x2_t __a, const int __b)
2808 {
2809 return __aarch64_vget_lane_any (__a, __b);
2810 }
2811
2812 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2813 vget_lane_u64 (uint64x1_t __a, const int __b)
2814 {
2815 return __aarch64_vget_lane_any (__a, __b);
2816 }
2817
2818 /* vgetq_lane */
2819
2820 __extension__ static __inline float16_t __attribute__ ((__always_inline__))
2821 vgetq_lane_f16 (float16x8_t __a, const int __b)
2822 {
2823 return __aarch64_vget_lane_any (__a, __b);
2824 }
2825
2826 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2827 vgetq_lane_f32 (float32x4_t __a, const int __b)
2828 {
2829 return __aarch64_vget_lane_any (__a, __b);
2830 }
2831
2832 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2833 vgetq_lane_f64 (float64x2_t __a, const int __b)
2834 {
2835 return __aarch64_vget_lane_any (__a, __b);
2836 }
2837
2838 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2839 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2840 {
2841 return __aarch64_vget_lane_any (__a, __b);
2842 }
2843
2844 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2845 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2846 {
2847 return __aarch64_vget_lane_any (__a, __b);
2848 }
2849
2850 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2851 vgetq_lane_s8 (int8x16_t __a, const int __b)
2852 {
2853 return __aarch64_vget_lane_any (__a, __b);
2854 }
2855
2856 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2857 vgetq_lane_s16 (int16x8_t __a, const int __b)
2858 {
2859 return __aarch64_vget_lane_any (__a, __b);
2860 }
2861
2862 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2863 vgetq_lane_s32 (int32x4_t __a, const int __b)
2864 {
2865 return __aarch64_vget_lane_any (__a, __b);
2866 }
2867
2868 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2869 vgetq_lane_s64 (int64x2_t __a, const int __b)
2870 {
2871 return __aarch64_vget_lane_any (__a, __b);
2872 }
2873
2874 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2875 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2876 {
2877 return __aarch64_vget_lane_any (__a, __b);
2878 }
2879
2880 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2881 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2882 {
2883 return __aarch64_vget_lane_any (__a, __b);
2884 }
2885
2886 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2887 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2888 {
2889 return __aarch64_vget_lane_any (__a, __b);
2890 }
2891
2892 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2893 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2894 {
2895 return __aarch64_vget_lane_any (__a, __b);
2896 }
2897
2898 /* vreinterpret */
2899
2900 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2901 vreinterpret_p8_f16 (float16x4_t __a)
2902 {
2903 return (poly8x8_t) __a;
2904 }
2905
2906 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2907 vreinterpret_p8_f64 (float64x1_t __a)
2908 {
2909 return (poly8x8_t) __a;
2910 }
2911
2912 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2913 vreinterpret_p8_s8 (int8x8_t __a)
2914 {
2915 return (poly8x8_t) __a;
2916 }
2917
2918 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2919 vreinterpret_p8_s16 (int16x4_t __a)
2920 {
2921 return (poly8x8_t) __a;
2922 }
2923
2924 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2925 vreinterpret_p8_s32 (int32x2_t __a)
2926 {
2927 return (poly8x8_t) __a;
2928 }
2929
2930 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2931 vreinterpret_p8_s64 (int64x1_t __a)
2932 {
2933 return (poly8x8_t) __a;
2934 }
2935
2936 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2937 vreinterpret_p8_f32 (float32x2_t __a)
2938 {
2939 return (poly8x8_t) __a;
2940 }
2941
2942 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2943 vreinterpret_p8_u8 (uint8x8_t __a)
2944 {
2945 return (poly8x8_t) __a;
2946 }
2947
2948 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2949 vreinterpret_p8_u16 (uint16x4_t __a)
2950 {
2951 return (poly8x8_t) __a;
2952 }
2953
2954 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2955 vreinterpret_p8_u32 (uint32x2_t __a)
2956 {
2957 return (poly8x8_t) __a;
2958 }
2959
2960 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2961 vreinterpret_p8_u64 (uint64x1_t __a)
2962 {
2963 return (poly8x8_t) __a;
2964 }
2965
2966 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2967 vreinterpret_p8_p16 (poly16x4_t __a)
2968 {
2969 return (poly8x8_t) __a;
2970 }
2971
2972 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2973 vreinterpretq_p8_f64 (float64x2_t __a)
2974 {
2975 return (poly8x16_t) __a;
2976 }
2977
2978 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2979 vreinterpretq_p8_s8 (int8x16_t __a)
2980 {
2981 return (poly8x16_t) __a;
2982 }
2983
2984 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2985 vreinterpretq_p8_s16 (int16x8_t __a)
2986 {
2987 return (poly8x16_t) __a;
2988 }
2989
2990 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2991 vreinterpretq_p8_s32 (int32x4_t __a)
2992 {
2993 return (poly8x16_t) __a;
2994 }
2995
2996 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2997 vreinterpretq_p8_s64 (int64x2_t __a)
2998 {
2999 return (poly8x16_t) __a;
3000 }
3001
3002 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3003 vreinterpretq_p8_f16 (float16x8_t __a)
3004 {
3005 return (poly8x16_t) __a;
3006 }
3007
3008 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3009 vreinterpretq_p8_f32 (float32x4_t __a)
3010 {
3011 return (poly8x16_t) __a;
3012 }
3013
3014 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3015 vreinterpretq_p8_u8 (uint8x16_t __a)
3016 {
3017 return (poly8x16_t) __a;
3018 }
3019
3020 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3021 vreinterpretq_p8_u16 (uint16x8_t __a)
3022 {
3023 return (poly8x16_t) __a;
3024 }
3025
3026 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3027 vreinterpretq_p8_u32 (uint32x4_t __a)
3028 {
3029 return (poly8x16_t) __a;
3030 }
3031
3032 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3033 vreinterpretq_p8_u64 (uint64x2_t __a)
3034 {
3035 return (poly8x16_t) __a;
3036 }
3037
3038 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3039 vreinterpretq_p8_p16 (poly16x8_t __a)
3040 {
3041 return (poly8x16_t) __a;
3042 }
3043
3044 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3045 vreinterpret_p16_f16 (float16x4_t __a)
3046 {
3047 return (poly16x4_t) __a;
3048 }
3049
3050 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3051 vreinterpret_p16_f64 (float64x1_t __a)
3052 {
3053 return (poly16x4_t) __a;
3054 }
3055
3056 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3057 vreinterpret_p16_s8 (int8x8_t __a)
3058 {
3059 return (poly16x4_t) __a;
3060 }
3061
3062 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3063 vreinterpret_p16_s16 (int16x4_t __a)
3064 {
3065 return (poly16x4_t) __a;
3066 }
3067
3068 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3069 vreinterpret_p16_s32 (int32x2_t __a)
3070 {
3071 return (poly16x4_t) __a;
3072 }
3073
3074 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3075 vreinterpret_p16_s64 (int64x1_t __a)
3076 {
3077 return (poly16x4_t) __a;
3078 }
3079
3080 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3081 vreinterpret_p16_f32 (float32x2_t __a)
3082 {
3083 return (poly16x4_t) __a;
3084 }
3085
3086 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3087 vreinterpret_p16_u8 (uint8x8_t __a)
3088 {
3089 return (poly16x4_t) __a;
3090 }
3091
3092 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3093 vreinterpret_p16_u16 (uint16x4_t __a)
3094 {
3095 return (poly16x4_t) __a;
3096 }
3097
3098 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3099 vreinterpret_p16_u32 (uint32x2_t __a)
3100 {
3101 return (poly16x4_t) __a;
3102 }
3103
3104 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3105 vreinterpret_p16_u64 (uint64x1_t __a)
3106 {
3107 return (poly16x4_t) __a;
3108 }
3109
3110 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
3111 vreinterpret_p16_p8 (poly8x8_t __a)
3112 {
3113 return (poly16x4_t) __a;
3114 }
3115
3116 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3117 vreinterpretq_p16_f64 (float64x2_t __a)
3118 {
3119 return (poly16x8_t) __a;
3120 }
3121
3122 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3123 vreinterpretq_p16_s8 (int8x16_t __a)
3124 {
3125 return (poly16x8_t) __a;
3126 }
3127
3128 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3129 vreinterpretq_p16_s16 (int16x8_t __a)
3130 {
3131 return (poly16x8_t) __a;
3132 }
3133
3134 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3135 vreinterpretq_p16_s32 (int32x4_t __a)
3136 {
3137 return (poly16x8_t) __a;
3138 }
3139
3140 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3141 vreinterpretq_p16_s64 (int64x2_t __a)
3142 {
3143 return (poly16x8_t) __a;
3144 }
3145
3146 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3147 vreinterpretq_p16_f16 (float16x8_t __a)
3148 {
3149 return (poly16x8_t) __a;
3150 }
3151
3152 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3153 vreinterpretq_p16_f32 (float32x4_t __a)
3154 {
3155 return (poly16x8_t) __a;
3156 }
3157
3158 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3159 vreinterpretq_p16_u8 (uint8x16_t __a)
3160 {
3161 return (poly16x8_t) __a;
3162 }
3163
3164 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3165 vreinterpretq_p16_u16 (uint16x8_t __a)
3166 {
3167 return (poly16x8_t) __a;
3168 }
3169
3170 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3171 vreinterpretq_p16_u32 (uint32x4_t __a)
3172 {
3173 return (poly16x8_t) __a;
3174 }
3175
3176 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3177 vreinterpretq_p16_u64 (uint64x2_t __a)
3178 {
3179 return (poly16x8_t) __a;
3180 }
3181
3182 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3183 vreinterpretq_p16_p8 (poly8x16_t __a)
3184 {
3185 return (poly16x8_t) __a;
3186 }
3187
3188 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3189 vreinterpret_f16_f64 (float64x1_t __a)
3190 {
3191 return (float16x4_t) __a;
3192 }
3193
3194 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3195 vreinterpret_f16_s8 (int8x8_t __a)
3196 {
3197 return (float16x4_t) __a;
3198 }
3199
3200 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3201 vreinterpret_f16_s16 (int16x4_t __a)
3202 {
3203 return (float16x4_t) __a;
3204 }
3205
3206 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3207 vreinterpret_f16_s32 (int32x2_t __a)
3208 {
3209 return (float16x4_t) __a;
3210 }
3211
3212 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3213 vreinterpret_f16_s64 (int64x1_t __a)
3214 {
3215 return (float16x4_t) __a;
3216 }
3217
3218 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3219 vreinterpret_f16_f32 (float32x2_t __a)
3220 {
3221 return (float16x4_t) __a;
3222 }
3223
3224 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3225 vreinterpret_f16_u8 (uint8x8_t __a)
3226 {
3227 return (float16x4_t) __a;
3228 }
3229
3230 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3231 vreinterpret_f16_u16 (uint16x4_t __a)
3232 {
3233 return (float16x4_t) __a;
3234 }
3235
3236 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3237 vreinterpret_f16_u32 (uint32x2_t __a)
3238 {
3239 return (float16x4_t) __a;
3240 }
3241
3242 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3243 vreinterpret_f16_u64 (uint64x1_t __a)
3244 {
3245 return (float16x4_t) __a;
3246 }
3247
3248 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3249 vreinterpret_f16_p8 (poly8x8_t __a)
3250 {
3251 return (float16x4_t) __a;
3252 }
3253
3254 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
3255 vreinterpret_f16_p16 (poly16x4_t __a)
3256 {
3257 return (float16x4_t) __a;
3258 }
3259
3260 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3261 vreinterpretq_f16_f64 (float64x2_t __a)
3262 {
3263 return (float16x8_t) __a;
3264 }
3265
3266 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3267 vreinterpretq_f16_s8 (int8x16_t __a)
3268 {
3269 return (float16x8_t) __a;
3270 }
3271
3272 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3273 vreinterpretq_f16_s16 (int16x8_t __a)
3274 {
3275 return (float16x8_t) __a;
3276 }
3277
3278 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3279 vreinterpretq_f16_s32 (int32x4_t __a)
3280 {
3281 return (float16x8_t) __a;
3282 }
3283
3284 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3285 vreinterpretq_f16_s64 (int64x2_t __a)
3286 {
3287 return (float16x8_t) __a;
3288 }
3289
3290 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3291 vreinterpretq_f16_f32 (float32x4_t __a)
3292 {
3293 return (float16x8_t) __a;
3294 }
3295
3296 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3297 vreinterpretq_f16_u8 (uint8x16_t __a)
3298 {
3299 return (float16x8_t) __a;
3300 }
3301
3302 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3303 vreinterpretq_f16_u16 (uint16x8_t __a)
3304 {
3305 return (float16x8_t) __a;
3306 }
3307
3308 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3309 vreinterpretq_f16_u32 (uint32x4_t __a)
3310 {
3311 return (float16x8_t) __a;
3312 }
3313
3314 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3315 vreinterpretq_f16_u64 (uint64x2_t __a)
3316 {
3317 return (float16x8_t) __a;
3318 }
3319
3320 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3321 vreinterpretq_f16_p8 (poly8x16_t __a)
3322 {
3323 return (float16x8_t) __a;
3324 }
3325
3326 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
3327 vreinterpretq_f16_p16 (poly16x8_t __a)
3328 {
3329 return (float16x8_t) __a;
3330 }
3331
3332 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3333 vreinterpret_f32_f16 (float16x4_t __a)
3334 {
3335 return (float32x2_t) __a;
3336 }
3337
3338 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3339 vreinterpret_f32_f64 (float64x1_t __a)
3340 {
3341 return (float32x2_t) __a;
3342 }
3343
3344 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3345 vreinterpret_f32_s8 (int8x8_t __a)
3346 {
3347 return (float32x2_t) __a;
3348 }
3349
3350 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3351 vreinterpret_f32_s16 (int16x4_t __a)
3352 {
3353 return (float32x2_t) __a;
3354 }
3355
3356 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3357 vreinterpret_f32_s32 (int32x2_t __a)
3358 {
3359 return (float32x2_t) __a;
3360 }
3361
3362 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3363 vreinterpret_f32_s64 (int64x1_t __a)
3364 {
3365 return (float32x2_t) __a;
3366 }
3367
3368 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3369 vreinterpret_f32_u8 (uint8x8_t __a)
3370 {
3371 return (float32x2_t) __a;
3372 }
3373
3374 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3375 vreinterpret_f32_u16 (uint16x4_t __a)
3376 {
3377 return (float32x2_t) __a;
3378 }
3379
3380 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3381 vreinterpret_f32_u32 (uint32x2_t __a)
3382 {
3383 return (float32x2_t) __a;
3384 }
3385
3386 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3387 vreinterpret_f32_u64 (uint64x1_t __a)
3388 {
3389 return (float32x2_t) __a;
3390 }
3391
3392 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3393 vreinterpret_f32_p8 (poly8x8_t __a)
3394 {
3395 return (float32x2_t) __a;
3396 }
3397
3398 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
3399 vreinterpret_f32_p16 (poly16x4_t __a)
3400 {
3401 return (float32x2_t) __a;
3402 }
3403
3404 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3405 vreinterpretq_f32_f16 (float16x8_t __a)
3406 {
3407 return (float32x4_t) __a;
3408 }
3409
3410 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3411 vreinterpretq_f32_f64 (float64x2_t __a)
3412 {
3413 return (float32x4_t) __a;
3414 }
3415
3416 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3417 vreinterpretq_f32_s8 (int8x16_t __a)
3418 {
3419 return (float32x4_t) __a;
3420 }
3421
3422 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3423 vreinterpretq_f32_s16 (int16x8_t __a)
3424 {
3425 return (float32x4_t) __a;
3426 }
3427
3428 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3429 vreinterpretq_f32_s32 (int32x4_t __a)
3430 {
3431 return (float32x4_t) __a;
3432 }
3433
3434 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3435 vreinterpretq_f32_s64 (int64x2_t __a)
3436 {
3437 return (float32x4_t) __a;
3438 }
3439
3440 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3441 vreinterpretq_f32_u8 (uint8x16_t __a)
3442 {
3443 return (float32x4_t) __a;
3444 }
3445
3446 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3447 vreinterpretq_f32_u16 (uint16x8_t __a)
3448 {
3449 return (float32x4_t) __a;
3450 }
3451
3452 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3453 vreinterpretq_f32_u32 (uint32x4_t __a)
3454 {
3455 return (float32x4_t) __a;
3456 }
3457
3458 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3459 vreinterpretq_f32_u64 (uint64x2_t __a)
3460 {
3461 return (float32x4_t) __a;
3462 }
3463
3464 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3465 vreinterpretq_f32_p8 (poly8x16_t __a)
3466 {
3467 return (float32x4_t) __a;
3468 }
3469
3470 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3471 vreinterpretq_f32_p16 (poly16x8_t __a)
3472 {
3473 return (float32x4_t) __a;
3474 }
3475
3476 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3477 vreinterpret_f64_f16 (float16x4_t __a)
3478 {
3479 return (float64x1_t) __a;
3480 }
3481
3482 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3483 vreinterpret_f64_f32 (float32x2_t __a)
3484 {
3485 return (float64x1_t) __a;
3486 }
3487
3488 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3489 vreinterpret_f64_p8 (poly8x8_t __a)
3490 {
3491 return (float64x1_t) __a;
3492 }
3493
3494 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3495 vreinterpret_f64_p16 (poly16x4_t __a)
3496 {
3497 return (float64x1_t) __a;
3498 }
3499
3500 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3501 vreinterpret_f64_s8 (int8x8_t __a)
3502 {
3503 return (float64x1_t) __a;
3504 }
3505
3506 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3507 vreinterpret_f64_s16 (int16x4_t __a)
3508 {
3509 return (float64x1_t) __a;
3510 }
3511
3512 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3513 vreinterpret_f64_s32 (int32x2_t __a)
3514 {
3515 return (float64x1_t) __a;
3516 }
3517
3518 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3519 vreinterpret_f64_s64 (int64x1_t __a)
3520 {
3521 return (float64x1_t) __a;
3522 }
3523
3524 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3525 vreinterpret_f64_u8 (uint8x8_t __a)
3526 {
3527 return (float64x1_t) __a;
3528 }
3529
3530 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3531 vreinterpret_f64_u16 (uint16x4_t __a)
3532 {
3533 return (float64x1_t) __a;
3534 }
3535
3536 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3537 vreinterpret_f64_u32 (uint32x2_t __a)
3538 {
3539 return (float64x1_t) __a;
3540 }
3541
3542 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3543 vreinterpret_f64_u64 (uint64x1_t __a)
3544 {
3545 return (float64x1_t) __a;
3546 }
3547
3548 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3549 vreinterpretq_f64_f16 (float16x8_t __a)
3550 {
3551 return (float64x2_t) __a;
3552 }
3553
3554 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3555 vreinterpretq_f64_f32 (float32x4_t __a)
3556 {
3557 return (float64x2_t) __a;
3558 }
3559
3560 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3561 vreinterpretq_f64_p8 (poly8x16_t __a)
3562 {
3563 return (float64x2_t) __a;
3564 }
3565
3566 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3567 vreinterpretq_f64_p16 (poly16x8_t __a)
3568 {
3569 return (float64x2_t) __a;
3570 }
3571
3572 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3573 vreinterpretq_f64_s8 (int8x16_t __a)
3574 {
3575 return (float64x2_t) __a;
3576 }
3577
3578 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3579 vreinterpretq_f64_s16 (int16x8_t __a)
3580 {
3581 return (float64x2_t) __a;
3582 }
3583
3584 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3585 vreinterpretq_f64_s32 (int32x4_t __a)
3586 {
3587 return (float64x2_t) __a;
3588 }
3589
3590 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3591 vreinterpretq_f64_s64 (int64x2_t __a)
3592 {
3593 return (float64x2_t) __a;
3594 }
3595
3596 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3597 vreinterpretq_f64_u8 (uint8x16_t __a)
3598 {
3599 return (float64x2_t) __a;
3600 }
3601
3602 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3603 vreinterpretq_f64_u16 (uint16x8_t __a)
3604 {
3605 return (float64x2_t) __a;
3606 }
3607
3608 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3609 vreinterpretq_f64_u32 (uint32x4_t __a)
3610 {
3611 return (float64x2_t) __a;
3612 }
3613
3614 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3615 vreinterpretq_f64_u64 (uint64x2_t __a)
3616 {
3617 return (float64x2_t) __a;
3618 }
3619
3620 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3621 vreinterpret_s64_f16 (float16x4_t __a)
3622 {
3623 return (int64x1_t) __a;
3624 }
3625
3626 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3627 vreinterpret_s64_f64 (float64x1_t __a)
3628 {
3629 return (int64x1_t) __a;
3630 }
3631
3632 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3633 vreinterpret_s64_s8 (int8x8_t __a)
3634 {
3635 return (int64x1_t) __a;
3636 }
3637
3638 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3639 vreinterpret_s64_s16 (int16x4_t __a)
3640 {
3641 return (int64x1_t) __a;
3642 }
3643
3644 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3645 vreinterpret_s64_s32 (int32x2_t __a)
3646 {
3647 return (int64x1_t) __a;
3648 }
3649
3650 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3651 vreinterpret_s64_f32 (float32x2_t __a)
3652 {
3653 return (int64x1_t) __a;
3654 }
3655
3656 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3657 vreinterpret_s64_u8 (uint8x8_t __a)
3658 {
3659 return (int64x1_t) __a;
3660 }
3661
3662 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3663 vreinterpret_s64_u16 (uint16x4_t __a)
3664 {
3665 return (int64x1_t) __a;
3666 }
3667
3668 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3669 vreinterpret_s64_u32 (uint32x2_t __a)
3670 {
3671 return (int64x1_t) __a;
3672 }
3673
3674 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3675 vreinterpret_s64_u64 (uint64x1_t __a)
3676 {
3677 return (int64x1_t) __a;
3678 }
3679
3680 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3681 vreinterpret_s64_p8 (poly8x8_t __a)
3682 {
3683 return (int64x1_t) __a;
3684 }
3685
3686 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3687 vreinterpret_s64_p16 (poly16x4_t __a)
3688 {
3689 return (int64x1_t) __a;
3690 }
3691
3692 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3693 vreinterpretq_s64_f64 (float64x2_t __a)
3694 {
3695 return (int64x2_t) __a;
3696 }
3697
3698 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3699 vreinterpretq_s64_s8 (int8x16_t __a)
3700 {
3701 return (int64x2_t) __a;
3702 }
3703
3704 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3705 vreinterpretq_s64_s16 (int16x8_t __a)
3706 {
3707 return (int64x2_t) __a;
3708 }
3709
3710 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3711 vreinterpretq_s64_s32 (int32x4_t __a)
3712 {
3713 return (int64x2_t) __a;
3714 }
3715
3716 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3717 vreinterpretq_s64_f16 (float16x8_t __a)
3718 {
3719 return (int64x2_t) __a;
3720 }
3721
3722 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3723 vreinterpretq_s64_f32 (float32x4_t __a)
3724 {
3725 return (int64x2_t) __a;
3726 }
3727
3728 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3729 vreinterpretq_s64_u8 (uint8x16_t __a)
3730 {
3731 return (int64x2_t) __a;
3732 }
3733
3734 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3735 vreinterpretq_s64_u16 (uint16x8_t __a)
3736 {
3737 return (int64x2_t) __a;
3738 }
3739
3740 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3741 vreinterpretq_s64_u32 (uint32x4_t __a)
3742 {
3743 return (int64x2_t) __a;
3744 }
3745
3746 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3747 vreinterpretq_s64_u64 (uint64x2_t __a)
3748 {
3749 return (int64x2_t) __a;
3750 }
3751
3752 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3753 vreinterpretq_s64_p8 (poly8x16_t __a)
3754 {
3755 return (int64x2_t) __a;
3756 }
3757
3758 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3759 vreinterpretq_s64_p16 (poly16x8_t __a)
3760 {
3761 return (int64x2_t) __a;
3762 }
3763
3764 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3765 vreinterpret_u64_f16 (float16x4_t __a)
3766 {
3767 return (uint64x1_t) __a;
3768 }
3769
3770 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3771 vreinterpret_u64_f64 (float64x1_t __a)
3772 {
3773 return (uint64x1_t) __a;
3774 }
3775
3776 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3777 vreinterpret_u64_s8 (int8x8_t __a)
3778 {
3779 return (uint64x1_t) __a;
3780 }
3781
3782 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3783 vreinterpret_u64_s16 (int16x4_t __a)
3784 {
3785 return (uint64x1_t) __a;
3786 }
3787
3788 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3789 vreinterpret_u64_s32 (int32x2_t __a)
3790 {
3791 return (uint64x1_t) __a;
3792 }
3793
3794 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3795 vreinterpret_u64_s64 (int64x1_t __a)
3796 {
3797 return (uint64x1_t) __a;
3798 }
3799
3800 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3801 vreinterpret_u64_f32 (float32x2_t __a)
3802 {
3803 return (uint64x1_t) __a;
3804 }
3805
3806 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3807 vreinterpret_u64_u8 (uint8x8_t __a)
3808 {
3809 return (uint64x1_t) __a;
3810 }
3811
3812 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3813 vreinterpret_u64_u16 (uint16x4_t __a)
3814 {
3815 return (uint64x1_t) __a;
3816 }
3817
3818 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3819 vreinterpret_u64_u32 (uint32x2_t __a)
3820 {
3821 return (uint64x1_t) __a;
3822 }
3823
3824 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3825 vreinterpret_u64_p8 (poly8x8_t __a)
3826 {
3827 return (uint64x1_t) __a;
3828 }
3829
3830 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3831 vreinterpret_u64_p16 (poly16x4_t __a)
3832 {
3833 return (uint64x1_t) __a;
3834 }
3835
3836 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3837 vreinterpretq_u64_f64 (float64x2_t __a)
3838 {
3839 return (uint64x2_t) __a;
3840 }
3841
3842 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3843 vreinterpretq_u64_s8 (int8x16_t __a)
3844 {
3845 return (uint64x2_t) __a;
3846 }
3847
3848 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3849 vreinterpretq_u64_s16 (int16x8_t __a)
3850 {
3851 return (uint64x2_t) __a;
3852 }
3853
3854 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3855 vreinterpretq_u64_s32 (int32x4_t __a)
3856 {
3857 return (uint64x2_t) __a;
3858 }
3859
3860 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3861 vreinterpretq_u64_s64 (int64x2_t __a)
3862 {
3863 return (uint64x2_t) __a;
3864 }
3865
3866 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3867 vreinterpretq_u64_f16 (float16x8_t __a)
3868 {
3869 return (uint64x2_t) __a;
3870 }
3871
3872 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3873 vreinterpretq_u64_f32 (float32x4_t __a)
3874 {
3875 return (uint64x2_t) __a;
3876 }
3877
3878 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3879 vreinterpretq_u64_u8 (uint8x16_t __a)
3880 {
3881 return (uint64x2_t) __a;
3882 }
3883
3884 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3885 vreinterpretq_u64_u16 (uint16x8_t __a)
3886 {
3887 return (uint64x2_t) __a;
3888 }
3889
3890 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3891 vreinterpretq_u64_u32 (uint32x4_t __a)
3892 {
3893 return (uint64x2_t) __a;
3894 }
3895
3896 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3897 vreinterpretq_u64_p8 (poly8x16_t __a)
3898 {
3899 return (uint64x2_t) __a;
3900 }
3901
3902 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3903 vreinterpretq_u64_p16 (poly16x8_t __a)
3904 {
3905 return (uint64x2_t) __a;
3906 }
3907
3908 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3909 vreinterpret_s8_f16 (float16x4_t __a)
3910 {
3911 return (int8x8_t) __a;
3912 }
3913
3914 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3915 vreinterpret_s8_f64 (float64x1_t __a)
3916 {
3917 return (int8x8_t) __a;
3918 }
3919
3920 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3921 vreinterpret_s8_s16 (int16x4_t __a)
3922 {
3923 return (int8x8_t) __a;
3924 }
3925
3926 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3927 vreinterpret_s8_s32 (int32x2_t __a)
3928 {
3929 return (int8x8_t) __a;
3930 }
3931
3932 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3933 vreinterpret_s8_s64 (int64x1_t __a)
3934 {
3935 return (int8x8_t) __a;
3936 }
3937
3938 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3939 vreinterpret_s8_f32 (float32x2_t __a)
3940 {
3941 return (int8x8_t) __a;
3942 }
3943
3944 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3945 vreinterpret_s8_u8 (uint8x8_t __a)
3946 {
3947 return (int8x8_t) __a;
3948 }
3949
3950 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3951 vreinterpret_s8_u16 (uint16x4_t __a)
3952 {
3953 return (int8x8_t) __a;
3954 }
3955
3956 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3957 vreinterpret_s8_u32 (uint32x2_t __a)
3958 {
3959 return (int8x8_t) __a;
3960 }
3961
3962 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3963 vreinterpret_s8_u64 (uint64x1_t __a)
3964 {
3965 return (int8x8_t) __a;
3966 }
3967
3968 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3969 vreinterpret_s8_p8 (poly8x8_t __a)
3970 {
3971 return (int8x8_t) __a;
3972 }
3973
3974 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3975 vreinterpret_s8_p16 (poly16x4_t __a)
3976 {
3977 return (int8x8_t) __a;
3978 }
3979
3980 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3981 vreinterpretq_s8_f64 (float64x2_t __a)
3982 {
3983 return (int8x16_t) __a;
3984 }
3985
3986 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3987 vreinterpretq_s8_s16 (int16x8_t __a)
3988 {
3989 return (int8x16_t) __a;
3990 }
3991
3992 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3993 vreinterpretq_s8_s32 (int32x4_t __a)
3994 {
3995 return (int8x16_t) __a;
3996 }
3997
3998 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3999 vreinterpretq_s8_s64 (int64x2_t __a)
4000 {
4001 return (int8x16_t) __a;
4002 }
4003
4004 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4005 vreinterpretq_s8_f16 (float16x8_t __a)
4006 {
4007 return (int8x16_t) __a;
4008 }
4009
4010 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4011 vreinterpretq_s8_f32 (float32x4_t __a)
4012 {
4013 return (int8x16_t) __a;
4014 }
4015
4016 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4017 vreinterpretq_s8_u8 (uint8x16_t __a)
4018 {
4019 return (int8x16_t) __a;
4020 }
4021
4022 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4023 vreinterpretq_s8_u16 (uint16x8_t __a)
4024 {
4025 return (int8x16_t) __a;
4026 }
4027
4028 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4029 vreinterpretq_s8_u32 (uint32x4_t __a)
4030 {
4031 return (int8x16_t) __a;
4032 }
4033
4034 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4035 vreinterpretq_s8_u64 (uint64x2_t __a)
4036 {
4037 return (int8x16_t) __a;
4038 }
4039
4040 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4041 vreinterpretq_s8_p8 (poly8x16_t __a)
4042 {
4043 return (int8x16_t) __a;
4044 }
4045
4046 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4047 vreinterpretq_s8_p16 (poly16x8_t __a)
4048 {
4049 return (int8x16_t) __a;
4050 }
4051
4052 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4053 vreinterpret_s16_f16 (float16x4_t __a)
4054 {
4055 return (int16x4_t) __a;
4056 }
4057
4058 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4059 vreinterpret_s16_f64 (float64x1_t __a)
4060 {
4061 return (int16x4_t) __a;
4062 }
4063
4064 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4065 vreinterpret_s16_s8 (int8x8_t __a)
4066 {
4067 return (int16x4_t) __a;
4068 }
4069
4070 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4071 vreinterpret_s16_s32 (int32x2_t __a)
4072 {
4073 return (int16x4_t) __a;
4074 }
4075
4076 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4077 vreinterpret_s16_s64 (int64x1_t __a)
4078 {
4079 return (int16x4_t) __a;
4080 }
4081
4082 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4083 vreinterpret_s16_f32 (float32x2_t __a)
4084 {
4085 return (int16x4_t) __a;
4086 }
4087
4088 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4089 vreinterpret_s16_u8 (uint8x8_t __a)
4090 {
4091 return (int16x4_t) __a;
4092 }
4093
4094 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4095 vreinterpret_s16_u16 (uint16x4_t __a)
4096 {
4097 return (int16x4_t) __a;
4098 }
4099
4100 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4101 vreinterpret_s16_u32 (uint32x2_t __a)
4102 {
4103 return (int16x4_t) __a;
4104 }
4105
4106 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4107 vreinterpret_s16_u64 (uint64x1_t __a)
4108 {
4109 return (int16x4_t) __a;
4110 }
4111
4112 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4113 vreinterpret_s16_p8 (poly8x8_t __a)
4114 {
4115 return (int16x4_t) __a;
4116 }
4117
4118 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4119 vreinterpret_s16_p16 (poly16x4_t __a)
4120 {
4121 return (int16x4_t) __a;
4122 }
4123
4124 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4125 vreinterpretq_s16_f64 (float64x2_t __a)
4126 {
4127 return (int16x8_t) __a;
4128 }
4129
4130 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4131 vreinterpretq_s16_s8 (int8x16_t __a)
4132 {
4133 return (int16x8_t) __a;
4134 }
4135
4136 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4137 vreinterpretq_s16_s32 (int32x4_t __a)
4138 {
4139 return (int16x8_t) __a;
4140 }
4141
4142 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4143 vreinterpretq_s16_s64 (int64x2_t __a)
4144 {
4145 return (int16x8_t) __a;
4146 }
4147
4148 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4149 vreinterpretq_s16_f16 (float16x8_t __a)
4150 {
4151 return (int16x8_t) __a;
4152 }
4153
4154 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4155 vreinterpretq_s16_f32 (float32x4_t __a)
4156 {
4157 return (int16x8_t) __a;
4158 }
4159
4160 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4161 vreinterpretq_s16_u8 (uint8x16_t __a)
4162 {
4163 return (int16x8_t) __a;
4164 }
4165
4166 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4167 vreinterpretq_s16_u16 (uint16x8_t __a)
4168 {
4169 return (int16x8_t) __a;
4170 }
4171
4172 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4173 vreinterpretq_s16_u32 (uint32x4_t __a)
4174 {
4175 return (int16x8_t) __a;
4176 }
4177
4178 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4179 vreinterpretq_s16_u64 (uint64x2_t __a)
4180 {
4181 return (int16x8_t) __a;
4182 }
4183
4184 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4185 vreinterpretq_s16_p8 (poly8x16_t __a)
4186 {
4187 return (int16x8_t) __a;
4188 }
4189
4190 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4191 vreinterpretq_s16_p16 (poly16x8_t __a)
4192 {
4193 return (int16x8_t) __a;
4194 }
4195
4196 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4197 vreinterpret_s32_f16 (float16x4_t __a)
4198 {
4199 return (int32x2_t) __a;
4200 }
4201
4202 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4203 vreinterpret_s32_f64 (float64x1_t __a)
4204 {
4205 return (int32x2_t) __a;
4206 }
4207
4208 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4209 vreinterpret_s32_s8 (int8x8_t __a)
4210 {
4211 return (int32x2_t) __a;
4212 }
4213
4214 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4215 vreinterpret_s32_s16 (int16x4_t __a)
4216 {
4217 return (int32x2_t) __a;
4218 }
4219
4220 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4221 vreinterpret_s32_s64 (int64x1_t __a)
4222 {
4223 return (int32x2_t) __a;
4224 }
4225
4226 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4227 vreinterpret_s32_f32 (float32x2_t __a)
4228 {
4229 return (int32x2_t) __a;
4230 }
4231
4232 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4233 vreinterpret_s32_u8 (uint8x8_t __a)
4234 {
4235 return (int32x2_t) __a;
4236 }
4237
4238 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4239 vreinterpret_s32_u16 (uint16x4_t __a)
4240 {
4241 return (int32x2_t) __a;
4242 }
4243
4244 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4245 vreinterpret_s32_u32 (uint32x2_t __a)
4246 {
4247 return (int32x2_t) __a;
4248 }
4249
4250 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4251 vreinterpret_s32_u64 (uint64x1_t __a)
4252 {
4253 return (int32x2_t) __a;
4254 }
4255
4256 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4257 vreinterpret_s32_p8 (poly8x8_t __a)
4258 {
4259 return (int32x2_t) __a;
4260 }
4261
4262 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4263 vreinterpret_s32_p16 (poly16x4_t __a)
4264 {
4265 return (int32x2_t) __a;
4266 }
4267
4268 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4269 vreinterpretq_s32_f64 (float64x2_t __a)
4270 {
4271 return (int32x4_t) __a;
4272 }
4273
4274 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4275 vreinterpretq_s32_s8 (int8x16_t __a)
4276 {
4277 return (int32x4_t) __a;
4278 }
4279
4280 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4281 vreinterpretq_s32_s16 (int16x8_t __a)
4282 {
4283 return (int32x4_t) __a;
4284 }
4285
4286 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4287 vreinterpretq_s32_s64 (int64x2_t __a)
4288 {
4289 return (int32x4_t) __a;
4290 }
4291
4292 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4293 vreinterpretq_s32_f16 (float16x8_t __a)
4294 {
4295 return (int32x4_t) __a;
4296 }
4297
4298 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4299 vreinterpretq_s32_f32 (float32x4_t __a)
4300 {
4301 return (int32x4_t) __a;
4302 }
4303
4304 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4305 vreinterpretq_s32_u8 (uint8x16_t __a)
4306 {
4307 return (int32x4_t) __a;
4308 }
4309
4310 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4311 vreinterpretq_s32_u16 (uint16x8_t __a)
4312 {
4313 return (int32x4_t) __a;
4314 }
4315
4316 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4317 vreinterpretq_s32_u32 (uint32x4_t __a)
4318 {
4319 return (int32x4_t) __a;
4320 }
4321
4322 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4323 vreinterpretq_s32_u64 (uint64x2_t __a)
4324 {
4325 return (int32x4_t) __a;
4326 }
4327
4328 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4329 vreinterpretq_s32_p8 (poly8x16_t __a)
4330 {
4331 return (int32x4_t) __a;
4332 }
4333
4334 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4335 vreinterpretq_s32_p16 (poly16x8_t __a)
4336 {
4337 return (int32x4_t) __a;
4338 }
4339
4340 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4341 vreinterpret_u8_f16 (float16x4_t __a)
4342 {
4343 return (uint8x8_t) __a;
4344 }
4345
4346 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4347 vreinterpret_u8_f64 (float64x1_t __a)
4348 {
4349 return (uint8x8_t) __a;
4350 }
4351
4352 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4353 vreinterpret_u8_s8 (int8x8_t __a)
4354 {
4355 return (uint8x8_t) __a;
4356 }
4357
4358 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4359 vreinterpret_u8_s16 (int16x4_t __a)
4360 {
4361 return (uint8x8_t) __a;
4362 }
4363
4364 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4365 vreinterpret_u8_s32 (int32x2_t __a)
4366 {
4367 return (uint8x8_t) __a;
4368 }
4369
4370 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4371 vreinterpret_u8_s64 (int64x1_t __a)
4372 {
4373 return (uint8x8_t) __a;
4374 }
4375
4376 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4377 vreinterpret_u8_f32 (float32x2_t __a)
4378 {
4379 return (uint8x8_t) __a;
4380 }
4381
4382 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4383 vreinterpret_u8_u16 (uint16x4_t __a)
4384 {
4385 return (uint8x8_t) __a;
4386 }
4387
4388 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4389 vreinterpret_u8_u32 (uint32x2_t __a)
4390 {
4391 return (uint8x8_t) __a;
4392 }
4393
4394 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4395 vreinterpret_u8_u64 (uint64x1_t __a)
4396 {
4397 return (uint8x8_t) __a;
4398 }
4399
4400 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4401 vreinterpret_u8_p8 (poly8x8_t __a)
4402 {
4403 return (uint8x8_t) __a;
4404 }
4405
4406 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4407 vreinterpret_u8_p16 (poly16x4_t __a)
4408 {
4409 return (uint8x8_t) __a;
4410 }
4411
4412 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4413 vreinterpretq_u8_f64 (float64x2_t __a)
4414 {
4415 return (uint8x16_t) __a;
4416 }
4417
4418 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4419 vreinterpretq_u8_s8 (int8x16_t __a)
4420 {
4421 return (uint8x16_t) __a;
4422 }
4423
4424 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4425 vreinterpretq_u8_s16 (int16x8_t __a)
4426 {
4427 return (uint8x16_t) __a;
4428 }
4429
4430 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4431 vreinterpretq_u8_s32 (int32x4_t __a)
4432 {
4433 return (uint8x16_t) __a;
4434 }
4435
4436 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4437 vreinterpretq_u8_s64 (int64x2_t __a)
4438 {
4439 return (uint8x16_t) __a;
4440 }
4441
4442 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4443 vreinterpretq_u8_f16 (float16x8_t __a)
4444 {
4445 return (uint8x16_t) __a;
4446 }
4447
4448 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4449 vreinterpretq_u8_f32 (float32x4_t __a)
4450 {
4451 return (uint8x16_t) __a;
4452 }
4453
4454 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4455 vreinterpretq_u8_u16 (uint16x8_t __a)
4456 {
4457 return (uint8x16_t) __a;
4458 }
4459
4460 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4461 vreinterpretq_u8_u32 (uint32x4_t __a)
4462 {
4463 return (uint8x16_t) __a;
4464 }
4465
4466 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4467 vreinterpretq_u8_u64 (uint64x2_t __a)
4468 {
4469 return (uint8x16_t) __a;
4470 }
4471
4472 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4473 vreinterpretq_u8_p8 (poly8x16_t __a)
4474 {
4475 return (uint8x16_t) __a;
4476 }
4477
4478 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4479 vreinterpretq_u8_p16 (poly16x8_t __a)
4480 {
4481 return (uint8x16_t) __a;
4482 }
4483
4484 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4485 vreinterpret_u16_f16 (float16x4_t __a)
4486 {
4487 return (uint16x4_t) __a;
4488 }
4489
4490 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4491 vreinterpret_u16_f64 (float64x1_t __a)
4492 {
4493 return (uint16x4_t) __a;
4494 }
4495
4496 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4497 vreinterpret_u16_s8 (int8x8_t __a)
4498 {
4499 return (uint16x4_t) __a;
4500 }
4501
4502 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4503 vreinterpret_u16_s16 (int16x4_t __a)
4504 {
4505 return (uint16x4_t) __a;
4506 }
4507
4508 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4509 vreinterpret_u16_s32 (int32x2_t __a)
4510 {
4511 return (uint16x4_t) __a;
4512 }
4513
4514 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4515 vreinterpret_u16_s64 (int64x1_t __a)
4516 {
4517 return (uint16x4_t) __a;
4518 }
4519
4520 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4521 vreinterpret_u16_f32 (float32x2_t __a)
4522 {
4523 return (uint16x4_t) __a;
4524 }
4525
4526 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4527 vreinterpret_u16_u8 (uint8x8_t __a)
4528 {
4529 return (uint16x4_t) __a;
4530 }
4531
4532 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4533 vreinterpret_u16_u32 (uint32x2_t __a)
4534 {
4535 return (uint16x4_t) __a;
4536 }
4537
4538 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4539 vreinterpret_u16_u64 (uint64x1_t __a)
4540 {
4541 return (uint16x4_t) __a;
4542 }
4543
4544 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4545 vreinterpret_u16_p8 (poly8x8_t __a)
4546 {
4547 return (uint16x4_t) __a;
4548 }
4549
4550 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4551 vreinterpret_u16_p16 (poly16x4_t __a)
4552 {
4553 return (uint16x4_t) __a;
4554 }
4555
4556 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4557 vreinterpretq_u16_f64 (float64x2_t __a)
4558 {
4559 return (uint16x8_t) __a;
4560 }
4561
4562 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4563 vreinterpretq_u16_s8 (int8x16_t __a)
4564 {
4565 return (uint16x8_t) __a;
4566 }
4567
4568 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4569 vreinterpretq_u16_s16 (int16x8_t __a)
4570 {
4571 return (uint16x8_t) __a;
4572 }
4573
4574 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4575 vreinterpretq_u16_s32 (int32x4_t __a)
4576 {
4577 return (uint16x8_t) __a;
4578 }
4579
4580 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4581 vreinterpretq_u16_s64 (int64x2_t __a)
4582 {
4583 return (uint16x8_t) __a;
4584 }
4585
4586 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4587 vreinterpretq_u16_f16 (float16x8_t __a)
4588 {
4589 return (uint16x8_t) __a;
4590 }
4591
4592 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4593 vreinterpretq_u16_f32 (float32x4_t __a)
4594 {
4595 return (uint16x8_t) __a;
4596 }
4597
4598 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4599 vreinterpretq_u16_u8 (uint8x16_t __a)
4600 {
4601 return (uint16x8_t) __a;
4602 }
4603
4604 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4605 vreinterpretq_u16_u32 (uint32x4_t __a)
4606 {
4607 return (uint16x8_t) __a;
4608 }
4609
4610 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4611 vreinterpretq_u16_u64 (uint64x2_t __a)
4612 {
4613 return (uint16x8_t) __a;
4614 }
4615
4616 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4617 vreinterpretq_u16_p8 (poly8x16_t __a)
4618 {
4619 return (uint16x8_t) __a;
4620 }
4621
4622 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4623 vreinterpretq_u16_p16 (poly16x8_t __a)
4624 {
4625 return (uint16x8_t) __a;
4626 }
4627
4628 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4629 vreinterpret_u32_f16 (float16x4_t __a)
4630 {
4631 return (uint32x2_t) __a;
4632 }
4633
4634 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4635 vreinterpret_u32_f64 (float64x1_t __a)
4636 {
4637 return (uint32x2_t) __a;
4638 }
4639
4640 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4641 vreinterpret_u32_s8 (int8x8_t __a)
4642 {
4643 return (uint32x2_t) __a;
4644 }
4645
4646 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4647 vreinterpret_u32_s16 (int16x4_t __a)
4648 {
4649 return (uint32x2_t) __a;
4650 }
4651
4652 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4653 vreinterpret_u32_s32 (int32x2_t __a)
4654 {
4655 return (uint32x2_t) __a;
4656 }
4657
4658 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4659 vreinterpret_u32_s64 (int64x1_t __a)
4660 {
4661 return (uint32x2_t) __a;
4662 }
4663
4664 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4665 vreinterpret_u32_f32 (float32x2_t __a)
4666 {
4667 return (uint32x2_t) __a;
4668 }
4669
4670 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4671 vreinterpret_u32_u8 (uint8x8_t __a)
4672 {
4673 return (uint32x2_t) __a;
4674 }
4675
4676 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4677 vreinterpret_u32_u16 (uint16x4_t __a)
4678 {
4679 return (uint32x2_t) __a;
4680 }
4681
4682 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4683 vreinterpret_u32_u64 (uint64x1_t __a)
4684 {
4685 return (uint32x2_t) __a;
4686 }
4687
4688 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4689 vreinterpret_u32_p8 (poly8x8_t __a)
4690 {
4691 return (uint32x2_t) __a;
4692 }
4693
4694 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4695 vreinterpret_u32_p16 (poly16x4_t __a)
4696 {
4697 return (uint32x2_t) __a;
4698 }
4699
4700 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4701 vreinterpretq_u32_f64 (float64x2_t __a)
4702 {
4703 return (uint32x4_t) __a;
4704 }
4705
4706 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4707 vreinterpretq_u32_s8 (int8x16_t __a)
4708 {
4709 return (uint32x4_t) __a;
4710 }
4711
4712 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4713 vreinterpretq_u32_s16 (int16x8_t __a)
4714 {
4715 return (uint32x4_t) __a;
4716 }
4717
4718 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4719 vreinterpretq_u32_s32 (int32x4_t __a)
4720 {
4721 return (uint32x4_t) __a;
4722 }
4723
4724 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4725 vreinterpretq_u32_s64 (int64x2_t __a)
4726 {
4727 return (uint32x4_t) __a;
4728 }
4729
4730 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4731 vreinterpretq_u32_f16 (float16x8_t __a)
4732 {
4733 return (uint32x4_t) __a;
4734 }
4735
4736 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4737 vreinterpretq_u32_f32 (float32x4_t __a)
4738 {
4739 return (uint32x4_t) __a;
4740 }
4741
4742 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4743 vreinterpretq_u32_u8 (uint8x16_t __a)
4744 {
4745 return (uint32x4_t) __a;
4746 }
4747
4748 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4749 vreinterpretq_u32_u16 (uint16x8_t __a)
4750 {
4751 return (uint32x4_t) __a;
4752 }
4753
4754 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4755 vreinterpretq_u32_u64 (uint64x2_t __a)
4756 {
4757 return (uint32x4_t) __a;
4758 }
4759
4760 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4761 vreinterpretq_u32_p8 (poly8x16_t __a)
4762 {
4763 return (uint32x4_t) __a;
4764 }
4765
4766 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4767 vreinterpretq_u32_p16 (poly16x8_t __a)
4768 {
4769 return (uint32x4_t) __a;
4770 }
4771
4772 /* vset_lane */
4773
4774 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
4775 vset_lane_f16 (float16_t __elem, float16x4_t __vec, const int __index)
4776 {
4777 return __aarch64_vset_lane_any (__elem, __vec, __index);
4778 }
4779
4780 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4781 vset_lane_f32 (float32_t __elem, float32x2_t __vec, const int __index)
4782 {
4783 return __aarch64_vset_lane_any (__elem, __vec, __index);
4784 }
4785
4786 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4787 vset_lane_f64 (float64_t __elem, float64x1_t __vec, const int __index)
4788 {
4789 return __aarch64_vset_lane_any (__elem, __vec, __index);
4790 }
4791
4792 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4793 vset_lane_p8 (poly8_t __elem, poly8x8_t __vec, const int __index)
4794 {
4795 return __aarch64_vset_lane_any (__elem, __vec, __index);
4796 }
4797
4798 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4799 vset_lane_p16 (poly16_t __elem, poly16x4_t __vec, const int __index)
4800 {
4801 return __aarch64_vset_lane_any (__elem, __vec, __index);
4802 }
4803
4804 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4805 vset_lane_s8 (int8_t __elem, int8x8_t __vec, const int __index)
4806 {
4807 return __aarch64_vset_lane_any (__elem, __vec, __index);
4808 }
4809
4810 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4811 vset_lane_s16 (int16_t __elem, int16x4_t __vec, const int __index)
4812 {
4813 return __aarch64_vset_lane_any (__elem, __vec, __index);
4814 }
4815
4816 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4817 vset_lane_s32 (int32_t __elem, int32x2_t __vec, const int __index)
4818 {
4819 return __aarch64_vset_lane_any (__elem, __vec, __index);
4820 }
4821
4822 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4823 vset_lane_s64 (int64_t __elem, int64x1_t __vec, const int __index)
4824 {
4825 return __aarch64_vset_lane_any (__elem, __vec, __index);
4826 }
4827
4828 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4829 vset_lane_u8 (uint8_t __elem, uint8x8_t __vec, const int __index)
4830 {
4831 return __aarch64_vset_lane_any (__elem, __vec, __index);
4832 }
4833
4834 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4835 vset_lane_u16 (uint16_t __elem, uint16x4_t __vec, const int __index)
4836 {
4837 return __aarch64_vset_lane_any (__elem, __vec, __index);
4838 }
4839
4840 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4841 vset_lane_u32 (uint32_t __elem, uint32x2_t __vec, const int __index)
4842 {
4843 return __aarch64_vset_lane_any (__elem, __vec, __index);
4844 }
4845
4846 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4847 vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index)
4848 {
4849 return __aarch64_vset_lane_any (__elem, __vec, __index);
4850 }
4851
4852 /* vsetq_lane */
4853
4854 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
4855 vsetq_lane_f16 (float16_t __elem, float16x8_t __vec, const int __index)
4856 {
4857 return __aarch64_vset_lane_any (__elem, __vec, __index);
4858 }
4859
4860 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4861 vsetq_lane_f32 (float32_t __elem, float32x4_t __vec, const int __index)
4862 {
4863 return __aarch64_vset_lane_any (__elem, __vec, __index);
4864 }
4865
4866 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4867 vsetq_lane_f64 (float64_t __elem, float64x2_t __vec, const int __index)
4868 {
4869 return __aarch64_vset_lane_any (__elem, __vec, __index);
4870 }
4871
4872 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4873 vsetq_lane_p8 (poly8_t __elem, poly8x16_t __vec, const int __index)
4874 {
4875 return __aarch64_vset_lane_any (__elem, __vec, __index);
4876 }
4877
4878 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4879 vsetq_lane_p16 (poly16_t __elem, poly16x8_t __vec, const int __index)
4880 {
4881 return __aarch64_vset_lane_any (__elem, __vec, __index);
4882 }
4883
4884 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4885 vsetq_lane_s8 (int8_t __elem, int8x16_t __vec, const int __index)
4886 {
4887 return __aarch64_vset_lane_any (__elem, __vec, __index);
4888 }
4889
4890 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4891 vsetq_lane_s16 (int16_t __elem, int16x8_t __vec, const int __index)
4892 {
4893 return __aarch64_vset_lane_any (__elem, __vec, __index);
4894 }
4895
4896 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4897 vsetq_lane_s32 (int32_t __elem, int32x4_t __vec, const int __index)
4898 {
4899 return __aarch64_vset_lane_any (__elem, __vec, __index);
4900 }
4901
4902 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4903 vsetq_lane_s64 (int64_t __elem, int64x2_t __vec, const int __index)
4904 {
4905 return __aarch64_vset_lane_any (__elem, __vec, __index);
4906 }
4907
4908 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4909 vsetq_lane_u8 (uint8_t __elem, uint8x16_t __vec, const int __index)
4910 {
4911 return __aarch64_vset_lane_any (__elem, __vec, __index);
4912 }
4913
4914 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4915 vsetq_lane_u16 (uint16_t __elem, uint16x8_t __vec, const int __index)
4916 {
4917 return __aarch64_vset_lane_any (__elem, __vec, __index);
4918 }
4919
4920 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4921 vsetq_lane_u32 (uint32_t __elem, uint32x4_t __vec, const int __index)
4922 {
4923 return __aarch64_vset_lane_any (__elem, __vec, __index);
4924 }
4925
4926 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4927 vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
4928 {
4929 return __aarch64_vset_lane_any (__elem, __vec, __index);
4930 }
4931
4932 #define __GET_LOW(__TYPE) \
4933 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4934 uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0)); \
4935 return vreinterpret_##__TYPE##_u64 (lo);
4936
4937 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
4938 vget_low_f16 (float16x8_t __a)
4939 {
4940 __GET_LOW (f16);
4941 }
4942
4943 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4944 vget_low_f32 (float32x4_t __a)
4945 {
4946 __GET_LOW (f32);
4947 }
4948
4949 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4950 vget_low_f64 (float64x2_t __a)
4951 {
4952 return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
4953 }
4954
4955 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4956 vget_low_p8 (poly8x16_t __a)
4957 {
4958 __GET_LOW (p8);
4959 }
4960
4961 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4962 vget_low_p16 (poly16x8_t __a)
4963 {
4964 __GET_LOW (p16);
4965 }
4966
4967 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4968 vget_low_s8 (int8x16_t __a)
4969 {
4970 __GET_LOW (s8);
4971 }
4972
4973 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4974 vget_low_s16 (int16x8_t __a)
4975 {
4976 __GET_LOW (s16);
4977 }
4978
4979 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4980 vget_low_s32 (int32x4_t __a)
4981 {
4982 __GET_LOW (s32);
4983 }
4984
4985 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4986 vget_low_s64 (int64x2_t __a)
4987 {
4988 __GET_LOW (s64);
4989 }
4990
4991 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4992 vget_low_u8 (uint8x16_t __a)
4993 {
4994 __GET_LOW (u8);
4995 }
4996
4997 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4998 vget_low_u16 (uint16x8_t __a)
4999 {
5000 __GET_LOW (u16);
5001 }
5002
5003 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5004 vget_low_u32 (uint32x4_t __a)
5005 {
5006 __GET_LOW (u32);
5007 }
5008
5009 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5010 vget_low_u64 (uint64x2_t __a)
5011 {
5012 return vcreate_u64 (vgetq_lane_u64 (__a, 0));
5013 }
5014
5015 #undef __GET_LOW
5016
5017 #define __GET_HIGH(__TYPE) \
5018 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
5019 uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \
5020 return vreinterpret_##__TYPE##_u64 (hi);
5021
5022 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
5023 vget_high_f16 (float16x8_t __a)
5024 {
5025 __GET_HIGH (f16);
5026 }
5027
5028 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5029 vget_high_f32 (float32x4_t __a)
5030 {
5031 __GET_HIGH (f32);
5032 }
5033
5034 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5035 vget_high_f64 (float64x2_t __a)
5036 {
5037 __GET_HIGH (f64);
5038 }
5039
5040 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5041 vget_high_p8 (poly8x16_t __a)
5042 {
5043 __GET_HIGH (p8);
5044 }
5045
5046 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5047 vget_high_p16 (poly16x8_t __a)
5048 {
5049 __GET_HIGH (p16);
5050 }
5051
5052 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5053 vget_high_s8 (int8x16_t __a)
5054 {
5055 __GET_HIGH (s8);
5056 }
5057
5058 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5059 vget_high_s16 (int16x8_t __a)
5060 {
5061 __GET_HIGH (s16);
5062 }
5063
5064 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5065 vget_high_s32 (int32x4_t __a)
5066 {
5067 __GET_HIGH (s32);
5068 }
5069
5070 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5071 vget_high_s64 (int64x2_t __a)
5072 {
5073 __GET_HIGH (s64);
5074 }
5075
5076 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5077 vget_high_u8 (uint8x16_t __a)
5078 {
5079 __GET_HIGH (u8);
5080 }
5081
5082 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5083 vget_high_u16 (uint16x8_t __a)
5084 {
5085 __GET_HIGH (u16);
5086 }
5087
5088 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5089 vget_high_u32 (uint32x4_t __a)
5090 {
5091 __GET_HIGH (u32);
5092 }
5093
5094 #undef __GET_HIGH
5095
5096 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5097 vget_high_u64 (uint64x2_t __a)
5098 {
5099 return vcreate_u64 (vgetq_lane_u64 (__a, 1));
5100 }
5101
5102 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5103 vcombine_s8 (int8x8_t __a, int8x8_t __b)
5104 {
5105 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
5106 }
5107
5108 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5109 vcombine_s16 (int16x4_t __a, int16x4_t __b)
5110 {
5111 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
5112 }
5113
5114 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5115 vcombine_s32 (int32x2_t __a, int32x2_t __b)
5116 {
5117 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
5118 }
5119
5120 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5121 vcombine_s64 (int64x1_t __a, int64x1_t __b)
5122 {
5123 return __builtin_aarch64_combinedi (__a[0], __b[0]);
5124 }
5125
5126 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
5127 vcombine_f16 (float16x4_t __a, float16x4_t __b)
5128 {
5129 return __builtin_aarch64_combinev4hf (__a, __b);
5130 }
5131
5132 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5133 vcombine_f32 (float32x2_t __a, float32x2_t __b)
5134 {
5135 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
5136 }
5137
5138 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5139 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
5140 {
5141 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
5142 (int8x8_t) __b);
5143 }
5144
5145 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5146 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
5147 {
5148 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
5149 (int16x4_t) __b);
5150 }
5151
5152 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5153 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
5154 {
5155 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
5156 (int32x2_t) __b);
5157 }
5158
5159 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5160 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
5161 {
5162 return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
5163 }
5164
5165 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5166 vcombine_f64 (float64x1_t __a, float64x1_t __b)
5167 {
5168 return __builtin_aarch64_combinedf (__a[0], __b[0]);
5169 }
5170
5171 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5172 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
5173 {
5174 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
5175 (int8x8_t) __b);
5176 }
5177
5178 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
5179 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
5180 {
5181 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
5182 (int16x4_t) __b);
5183 }
5184
5185 /* Start of temporary inline asm implementations. */
5186
5187 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5188 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
5189 {
5190 int8x8_t result;
5191 __asm__ ("saba %0.8b,%2.8b,%3.8b"
5192 : "=w"(result)
5193 : "0"(a), "w"(b), "w"(c)
5194 : /* No clobbers */);
5195 return result;
5196 }
5197
5198 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5199 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
5200 {
5201 int16x4_t result;
5202 __asm__ ("saba %0.4h,%2.4h,%3.4h"
5203 : "=w"(result)
5204 : "0"(a), "w"(b), "w"(c)
5205 : /* No clobbers */);
5206 return result;
5207 }
5208
5209 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5210 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
5211 {
5212 int32x2_t result;
5213 __asm__ ("saba %0.2s,%2.2s,%3.2s"
5214 : "=w"(result)
5215 : "0"(a), "w"(b), "w"(c)
5216 : /* No clobbers */);
5217 return result;
5218 }
5219
5220 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5221 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
5222 {
5223 uint8x8_t result;
5224 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
5225 : "=w"(result)
5226 : "0"(a), "w"(b), "w"(c)
5227 : /* No clobbers */);
5228 return result;
5229 }
5230
5231 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5232 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
5233 {
5234 uint16x4_t result;
5235 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
5236 : "=w"(result)
5237 : "0"(a), "w"(b), "w"(c)
5238 : /* No clobbers */);
5239 return result;
5240 }
5241
5242 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5243 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
5244 {
5245 uint32x2_t result;
5246 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
5247 : "=w"(result)
5248 : "0"(a), "w"(b), "w"(c)
5249 : /* No clobbers */);
5250 return result;
5251 }
5252
5253 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5254 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
5255 {
5256 int16x8_t result;
5257 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
5258 : "=w"(result)
5259 : "0"(a), "w"(b), "w"(c)
5260 : /* No clobbers */);
5261 return result;
5262 }
5263
5264 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5265 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
5266 {
5267 int32x4_t result;
5268 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
5269 : "=w"(result)
5270 : "0"(a), "w"(b), "w"(c)
5271 : /* No clobbers */);
5272 return result;
5273 }
5274
5275 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5276 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
5277 {
5278 int64x2_t result;
5279 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
5280 : "=w"(result)
5281 : "0"(a), "w"(b), "w"(c)
5282 : /* No clobbers */);
5283 return result;
5284 }
5285
5286 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5287 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
5288 {
5289 uint16x8_t result;
5290 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
5291 : "=w"(result)
5292 : "0"(a), "w"(b), "w"(c)
5293 : /* No clobbers */);
5294 return result;
5295 }
5296
5297 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5298 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
5299 {
5300 uint32x4_t result;
5301 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
5302 : "=w"(result)
5303 : "0"(a), "w"(b), "w"(c)
5304 : /* No clobbers */);
5305 return result;
5306 }
5307
5308 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5309 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
5310 {
5311 uint64x2_t result;
5312 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
5313 : "=w"(result)
5314 : "0"(a), "w"(b), "w"(c)
5315 : /* No clobbers */);
5316 return result;
5317 }
5318
5319 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5320 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
5321 {
5322 int16x8_t result;
5323 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
5324 : "=w"(result)
5325 : "0"(a), "w"(b), "w"(c)
5326 : /* No clobbers */);
5327 return result;
5328 }
5329
5330 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5331 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
5332 {
5333 int32x4_t result;
5334 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
5335 : "=w"(result)
5336 : "0"(a), "w"(b), "w"(c)
5337 : /* No clobbers */);
5338 return result;
5339 }
5340
5341 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5342 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
5343 {
5344 int64x2_t result;
5345 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
5346 : "=w"(result)
5347 : "0"(a), "w"(b), "w"(c)
5348 : /* No clobbers */);
5349 return result;
5350 }
5351
5352 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5353 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
5354 {
5355 uint16x8_t result;
5356 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
5357 : "=w"(result)
5358 : "0"(a), "w"(b), "w"(c)
5359 : /* No clobbers */);
5360 return result;
5361 }
5362
5363 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5364 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
5365 {
5366 uint32x4_t result;
5367 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
5368 : "=w"(result)
5369 : "0"(a), "w"(b), "w"(c)
5370 : /* No clobbers */);
5371 return result;
5372 }
5373
5374 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5375 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
5376 {
5377 uint64x2_t result;
5378 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
5379 : "=w"(result)
5380 : "0"(a), "w"(b), "w"(c)
5381 : /* No clobbers */);
5382 return result;
5383 }
5384
5385 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5386 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
5387 {
5388 int8x16_t result;
5389 __asm__ ("saba %0.16b,%2.16b,%3.16b"
5390 : "=w"(result)
5391 : "0"(a), "w"(b), "w"(c)
5392 : /* No clobbers */);
5393 return result;
5394 }
5395
5396 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5397 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
5398 {
5399 int16x8_t result;
5400 __asm__ ("saba %0.8h,%2.8h,%3.8h"
5401 : "=w"(result)
5402 : "0"(a), "w"(b), "w"(c)
5403 : /* No clobbers */);
5404 return result;
5405 }
5406
5407 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5408 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
5409 {
5410 int32x4_t result;
5411 __asm__ ("saba %0.4s,%2.4s,%3.4s"
5412 : "=w"(result)
5413 : "0"(a), "w"(b), "w"(c)
5414 : /* No clobbers */);
5415 return result;
5416 }
5417
5418 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5419 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
5420 {
5421 uint8x16_t result;
5422 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
5423 : "=w"(result)
5424 : "0"(a), "w"(b), "w"(c)
5425 : /* No clobbers */);
5426 return result;
5427 }
5428
5429 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5430 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
5431 {
5432 uint16x8_t result;
5433 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
5434 : "=w"(result)
5435 : "0"(a), "w"(b), "w"(c)
5436 : /* No clobbers */);
5437 return result;
5438 }
5439
5440 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5441 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
5442 {
5443 uint32x4_t result;
5444 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
5445 : "=w"(result)
5446 : "0"(a), "w"(b), "w"(c)
5447 : /* No clobbers */);
5448 return result;
5449 }
5450
5451 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5452 vabd_s8 (int8x8_t a, int8x8_t b)
5453 {
5454 int8x8_t result;
5455 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
5456 : "=w"(result)
5457 : "w"(a), "w"(b)
5458 : /* No clobbers */);
5459 return result;
5460 }
5461
5462 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5463 vabd_s16 (int16x4_t a, int16x4_t b)
5464 {
5465 int16x4_t result;
5466 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
5467 : "=w"(result)
5468 : "w"(a), "w"(b)
5469 : /* No clobbers */);
5470 return result;
5471 }
5472
5473 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5474 vabd_s32 (int32x2_t a, int32x2_t b)
5475 {
5476 int32x2_t result;
5477 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
5478 : "=w"(result)
5479 : "w"(a), "w"(b)
5480 : /* No clobbers */);
5481 return result;
5482 }
5483
5484 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5485 vabd_u8 (uint8x8_t a, uint8x8_t b)
5486 {
5487 uint8x8_t result;
5488 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
5489 : "=w"(result)
5490 : "w"(a), "w"(b)
5491 : /* No clobbers */);
5492 return result;
5493 }
5494
5495 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5496 vabd_u16 (uint16x4_t a, uint16x4_t b)
5497 {
5498 uint16x4_t result;
5499 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
5500 : "=w"(result)
5501 : "w"(a), "w"(b)
5502 : /* No clobbers */);
5503 return result;
5504 }
5505
5506 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5507 vabd_u32 (uint32x2_t a, uint32x2_t b)
5508 {
5509 uint32x2_t result;
5510 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
5511 : "=w"(result)
5512 : "w"(a), "w"(b)
5513 : /* No clobbers */);
5514 return result;
5515 }
5516
5517 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5518 vabdl_high_s8 (int8x16_t a, int8x16_t b)
5519 {
5520 int16x8_t result;
5521 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
5522 : "=w"(result)
5523 : "w"(a), "w"(b)
5524 : /* No clobbers */);
5525 return result;
5526 }
5527
5528 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5529 vabdl_high_s16 (int16x8_t a, int16x8_t b)
5530 {
5531 int32x4_t result;
5532 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
5533 : "=w"(result)
5534 : "w"(a), "w"(b)
5535 : /* No clobbers */);
5536 return result;
5537 }
5538
5539 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5540 vabdl_high_s32 (int32x4_t a, int32x4_t b)
5541 {
5542 int64x2_t result;
5543 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
5544 : "=w"(result)
5545 : "w"(a), "w"(b)
5546 : /* No clobbers */);
5547 return result;
5548 }
5549
5550 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5551 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
5552 {
5553 uint16x8_t result;
5554 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
5555 : "=w"(result)
5556 : "w"(a), "w"(b)
5557 : /* No clobbers */);
5558 return result;
5559 }
5560
5561 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5562 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
5563 {
5564 uint32x4_t result;
5565 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
5566 : "=w"(result)
5567 : "w"(a), "w"(b)
5568 : /* No clobbers */);
5569 return result;
5570 }
5571
5572 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5573 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
5574 {
5575 uint64x2_t result;
5576 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
5577 : "=w"(result)
5578 : "w"(a), "w"(b)
5579 : /* No clobbers */);
5580 return result;
5581 }
5582
5583 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5584 vabdl_s8 (int8x8_t a, int8x8_t b)
5585 {
5586 int16x8_t result;
5587 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
5588 : "=w"(result)
5589 : "w"(a), "w"(b)
5590 : /* No clobbers */);
5591 return result;
5592 }
5593
5594 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5595 vabdl_s16 (int16x4_t a, int16x4_t b)
5596 {
5597 int32x4_t result;
5598 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
5599 : "=w"(result)
5600 : "w"(a), "w"(b)
5601 : /* No clobbers */);
5602 return result;
5603 }
5604
5605 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5606 vabdl_s32 (int32x2_t a, int32x2_t b)
5607 {
5608 int64x2_t result;
5609 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
5610 : "=w"(result)
5611 : "w"(a), "w"(b)
5612 : /* No clobbers */);
5613 return result;
5614 }
5615
5616 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5617 vabdl_u8 (uint8x8_t a, uint8x8_t b)
5618 {
5619 uint16x8_t result;
5620 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
5621 : "=w"(result)
5622 : "w"(a), "w"(b)
5623 : /* No clobbers */);
5624 return result;
5625 }
5626
5627 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5628 vabdl_u16 (uint16x4_t a, uint16x4_t b)
5629 {
5630 uint32x4_t result;
5631 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
5632 : "=w"(result)
5633 : "w"(a), "w"(b)
5634 : /* No clobbers */);
5635 return result;
5636 }
5637
5638 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5639 vabdl_u32 (uint32x2_t a, uint32x2_t b)
5640 {
5641 uint64x2_t result;
5642 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
5643 : "=w"(result)
5644 : "w"(a), "w"(b)
5645 : /* No clobbers */);
5646 return result;
5647 }
5648
5649 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5650 vabdq_s8 (int8x16_t a, int8x16_t b)
5651 {
5652 int8x16_t result;
5653 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
5654 : "=w"(result)
5655 : "w"(a), "w"(b)
5656 : /* No clobbers */);
5657 return result;
5658 }
5659
5660 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5661 vabdq_s16 (int16x8_t a, int16x8_t b)
5662 {
5663 int16x8_t result;
5664 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
5665 : "=w"(result)
5666 : "w"(a), "w"(b)
5667 : /* No clobbers */);
5668 return result;
5669 }
5670
5671 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5672 vabdq_s32 (int32x4_t a, int32x4_t b)
5673 {
5674 int32x4_t result;
5675 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
5676 : "=w"(result)
5677 : "w"(a), "w"(b)
5678 : /* No clobbers */);
5679 return result;
5680 }
5681
5682 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5683 vabdq_u8 (uint8x16_t a, uint8x16_t b)
5684 {
5685 uint8x16_t result;
5686 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
5687 : "=w"(result)
5688 : "w"(a), "w"(b)
5689 : /* No clobbers */);
5690 return result;
5691 }
5692
5693 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5694 vabdq_u16 (uint16x8_t a, uint16x8_t b)
5695 {
5696 uint16x8_t result;
5697 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
5698 : "=w"(result)
5699 : "w"(a), "w"(b)
5700 : /* No clobbers */);
5701 return result;
5702 }
5703
5704 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5705 vabdq_u32 (uint32x4_t a, uint32x4_t b)
5706 {
5707 uint32x4_t result;
5708 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
5709 : "=w"(result)
5710 : "w"(a), "w"(b)
5711 : /* No clobbers */);
5712 return result;
5713 }
5714
5715 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5716 vaddlv_s8 (int8x8_t a)
5717 {
5718 int16_t result;
5719 __asm__ ("saddlv %h0,%1.8b"
5720 : "=w"(result)
5721 : "w"(a)
5722 : /* No clobbers */);
5723 return result;
5724 }
5725
5726 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5727 vaddlv_s16 (int16x4_t a)
5728 {
5729 int32_t result;
5730 __asm__ ("saddlv %s0,%1.4h"
5731 : "=w"(result)
5732 : "w"(a)
5733 : /* No clobbers */);
5734 return result;
5735 }
5736
5737 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5738 vaddlv_u8 (uint8x8_t a)
5739 {
5740 uint16_t result;
5741 __asm__ ("uaddlv %h0,%1.8b"
5742 : "=w"(result)
5743 : "w"(a)
5744 : /* No clobbers */);
5745 return result;
5746 }
5747
5748 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5749 vaddlv_u16 (uint16x4_t a)
5750 {
5751 uint32_t result;
5752 __asm__ ("uaddlv %s0,%1.4h"
5753 : "=w"(result)
5754 : "w"(a)
5755 : /* No clobbers */);
5756 return result;
5757 }
5758
5759 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5760 vaddlvq_s8 (int8x16_t a)
5761 {
5762 int16_t result;
5763 __asm__ ("saddlv %h0,%1.16b"
5764 : "=w"(result)
5765 : "w"(a)
5766 : /* No clobbers */);
5767 return result;
5768 }
5769
5770 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5771 vaddlvq_s16 (int16x8_t a)
5772 {
5773 int32_t result;
5774 __asm__ ("saddlv %s0,%1.8h"
5775 : "=w"(result)
5776 : "w"(a)
5777 : /* No clobbers */);
5778 return result;
5779 }
5780
5781 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
5782 vaddlvq_s32 (int32x4_t a)
5783 {
5784 int64_t result;
5785 __asm__ ("saddlv %d0,%1.4s"
5786 : "=w"(result)
5787 : "w"(a)
5788 : /* No clobbers */);
5789 return result;
5790 }
5791
5792 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5793 vaddlvq_u8 (uint8x16_t a)
5794 {
5795 uint16_t result;
5796 __asm__ ("uaddlv %h0,%1.16b"
5797 : "=w"(result)
5798 : "w"(a)
5799 : /* No clobbers */);
5800 return result;
5801 }
5802
5803 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5804 vaddlvq_u16 (uint16x8_t a)
5805 {
5806 uint32_t result;
5807 __asm__ ("uaddlv %s0,%1.8h"
5808 : "=w"(result)
5809 : "w"(a)
5810 : /* No clobbers */);
5811 return result;
5812 }
5813
5814 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5815 vaddlvq_u32 (uint32x4_t a)
5816 {
5817 uint64_t result;
5818 __asm__ ("uaddlv %d0,%1.4s"
5819 : "=w"(result)
5820 : "w"(a)
5821 : /* No clobbers */);
5822 return result;
5823 }
5824
5825 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5826 vcvtx_f32_f64 (float64x2_t a)
5827 {
5828 float32x2_t result;
5829 __asm__ ("fcvtxn %0.2s,%1.2d"
5830 : "=w"(result)
5831 : "w"(a)
5832 : /* No clobbers */);
5833 return result;
5834 }
5835
5836 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5837 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5838 {
5839 float32x4_t result;
5840 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5841 : "=w"(result)
5842 : "w" (b), "0"(a)
5843 : /* No clobbers */);
5844 return result;
5845 }
5846
5847 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5848 vcvtxd_f32_f64 (float64_t a)
5849 {
5850 float32_t result;
5851 __asm__ ("fcvtxn %s0,%d1"
5852 : "=w"(result)
5853 : "w"(a)
5854 : /* No clobbers */);
5855 return result;
5856 }
5857
5858 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5859 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
5860 {
5861 float32x2_t result;
5862 float32x2_t t1;
5863 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
5864 : "=w"(result), "=w"(t1)
5865 : "0"(a), "w"(b), "w"(c)
5866 : /* No clobbers */);
5867 return result;
5868 }
5869
5870 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5871 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
5872 {
5873 int16x4_t result;
5874 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
5875 : "=w"(result)
5876 : "0"(a), "w"(b), "x"(c)
5877 : /* No clobbers */);
5878 return result;
5879 }
5880
5881 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5882 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
5883 {
5884 int32x2_t result;
5885 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
5886 : "=w"(result)
5887 : "0"(a), "w"(b), "w"(c)
5888 : /* No clobbers */);
5889 return result;
5890 }
5891
5892 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5893 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
5894 {
5895 uint16x4_t result;
5896 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
5897 : "=w"(result)
5898 : "0"(a), "w"(b), "x"(c)
5899 : /* No clobbers */);
5900 return result;
5901 }
5902
5903 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5904 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
5905 {
5906 uint32x2_t result;
5907 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
5908 : "=w"(result)
5909 : "0"(a), "w"(b), "w"(c)
5910 : /* No clobbers */);
5911 return result;
5912 }
5913
5914 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5915 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
5916 {
5917 int8x8_t result;
5918 __asm__ ("mla %0.8b, %2.8b, %3.8b"
5919 : "=w"(result)
5920 : "0"(a), "w"(b), "w"(c)
5921 : /* No clobbers */);
5922 return result;
5923 }
5924
5925 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5926 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
5927 {
5928 int16x4_t result;
5929 __asm__ ("mla %0.4h, %2.4h, %3.4h"
5930 : "=w"(result)
5931 : "0"(a), "w"(b), "w"(c)
5932 : /* No clobbers */);
5933 return result;
5934 }
5935
5936 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5937 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
5938 {
5939 int32x2_t result;
5940 __asm__ ("mla %0.2s, %2.2s, %3.2s"
5941 : "=w"(result)
5942 : "0"(a), "w"(b), "w"(c)
5943 : /* No clobbers */);
5944 return result;
5945 }
5946
5947 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5948 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
5949 {
5950 uint8x8_t result;
5951 __asm__ ("mla %0.8b, %2.8b, %3.8b"
5952 : "=w"(result)
5953 : "0"(a), "w"(b), "w"(c)
5954 : /* No clobbers */);
5955 return result;
5956 }
5957
5958 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5959 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
5960 {
5961 uint16x4_t result;
5962 __asm__ ("mla %0.4h, %2.4h, %3.4h"
5963 : "=w"(result)
5964 : "0"(a), "w"(b), "w"(c)
5965 : /* No clobbers */);
5966 return result;
5967 }
5968
5969 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5970 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
5971 {
5972 uint32x2_t result;
5973 __asm__ ("mla %0.2s, %2.2s, %3.2s"
5974 : "=w"(result)
5975 : "0"(a), "w"(b), "w"(c)
5976 : /* No clobbers */);
5977 return result;
5978 }
5979
5980 #define vmlal_high_lane_s16(a, b, c, d) \
5981 __extension__ \
5982 ({ \
5983 int16x4_t c_ = (c); \
5984 int16x8_t b_ = (b); \
5985 int32x4_t a_ = (a); \
5986 int32x4_t result; \
5987 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
5988 : "=w"(result) \
5989 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
5990 : /* No clobbers */); \
5991 result; \
5992 })
5993
5994 #define vmlal_high_lane_s32(a, b, c, d) \
5995 __extension__ \
5996 ({ \
5997 int32x2_t c_ = (c); \
5998 int32x4_t b_ = (b); \
5999 int64x2_t a_ = (a); \
6000 int64x2_t result; \
6001 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6002 : "=w"(result) \
6003 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6004 : /* No clobbers */); \
6005 result; \
6006 })
6007
6008 #define vmlal_high_lane_u16(a, b, c, d) \
6009 __extension__ \
6010 ({ \
6011 uint16x4_t c_ = (c); \
6012 uint16x8_t b_ = (b); \
6013 uint32x4_t a_ = (a); \
6014 uint32x4_t result; \
6015 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6016 : "=w"(result) \
6017 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6018 : /* No clobbers */); \
6019 result; \
6020 })
6021
6022 #define vmlal_high_lane_u32(a, b, c, d) \
6023 __extension__ \
6024 ({ \
6025 uint32x2_t c_ = (c); \
6026 uint32x4_t b_ = (b); \
6027 uint64x2_t a_ = (a); \
6028 uint64x2_t result; \
6029 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6030 : "=w"(result) \
6031 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6032 : /* No clobbers */); \
6033 result; \
6034 })
6035
6036 #define vmlal_high_laneq_s16(a, b, c, d) \
6037 __extension__ \
6038 ({ \
6039 int16x8_t c_ = (c); \
6040 int16x8_t b_ = (b); \
6041 int32x4_t a_ = (a); \
6042 int32x4_t result; \
6043 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6044 : "=w"(result) \
6045 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6046 : /* No clobbers */); \
6047 result; \
6048 })
6049
6050 #define vmlal_high_laneq_s32(a, b, c, d) \
6051 __extension__ \
6052 ({ \
6053 int32x4_t c_ = (c); \
6054 int32x4_t b_ = (b); \
6055 int64x2_t a_ = (a); \
6056 int64x2_t result; \
6057 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6058 : "=w"(result) \
6059 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6060 : /* No clobbers */); \
6061 result; \
6062 })
6063
6064 #define vmlal_high_laneq_u16(a, b, c, d) \
6065 __extension__ \
6066 ({ \
6067 uint16x8_t c_ = (c); \
6068 uint16x8_t b_ = (b); \
6069 uint32x4_t a_ = (a); \
6070 uint32x4_t result; \
6071 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6072 : "=w"(result) \
6073 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6074 : /* No clobbers */); \
6075 result; \
6076 })
6077
6078 #define vmlal_high_laneq_u32(a, b, c, d) \
6079 __extension__ \
6080 ({ \
6081 uint32x4_t c_ = (c); \
6082 uint32x4_t b_ = (b); \
6083 uint64x2_t a_ = (a); \
6084 uint64x2_t result; \
6085 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6086 : "=w"(result) \
6087 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6088 : /* No clobbers */); \
6089 result; \
6090 })
6091
6092 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6093 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6094 {
6095 int32x4_t result;
6096 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6097 : "=w"(result)
6098 : "0"(a), "w"(b), "x"(c)
6099 : /* No clobbers */);
6100 return result;
6101 }
6102
6103 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6104 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6105 {
6106 int64x2_t result;
6107 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6108 : "=w"(result)
6109 : "0"(a), "w"(b), "w"(c)
6110 : /* No clobbers */);
6111 return result;
6112 }
6113
6114 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6115 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6116 {
6117 uint32x4_t result;
6118 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6119 : "=w"(result)
6120 : "0"(a), "w"(b), "x"(c)
6121 : /* No clobbers */);
6122 return result;
6123 }
6124
6125 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6126 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6127 {
6128 uint64x2_t result;
6129 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6130 : "=w"(result)
6131 : "0"(a), "w"(b), "w"(c)
6132 : /* No clobbers */);
6133 return result;
6134 }
6135
6136 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6137 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6138 {
6139 int16x8_t result;
6140 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
6141 : "=w"(result)
6142 : "0"(a), "w"(b), "w"(c)
6143 : /* No clobbers */);
6144 return result;
6145 }
6146
6147 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6148 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6149 {
6150 int32x4_t result;
6151 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
6152 : "=w"(result)
6153 : "0"(a), "w"(b), "w"(c)
6154 : /* No clobbers */);
6155 return result;
6156 }
6157
6158 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6159 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6160 {
6161 int64x2_t result;
6162 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
6163 : "=w"(result)
6164 : "0"(a), "w"(b), "w"(c)
6165 : /* No clobbers */);
6166 return result;
6167 }
6168
6169 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6170 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6171 {
6172 uint16x8_t result;
6173 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
6174 : "=w"(result)
6175 : "0"(a), "w"(b), "w"(c)
6176 : /* No clobbers */);
6177 return result;
6178 }
6179
6180 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6181 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6182 {
6183 uint32x4_t result;
6184 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
6185 : "=w"(result)
6186 : "0"(a), "w"(b), "w"(c)
6187 : /* No clobbers */);
6188 return result;
6189 }
6190
6191 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6192 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
6193 {
6194 uint64x2_t result;
6195 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
6196 : "=w"(result)
6197 : "0"(a), "w"(b), "w"(c)
6198 : /* No clobbers */);
6199 return result;
6200 }
6201
6202 #define vmlal_lane_s16(a, b, c, d) \
6203 __extension__ \
6204 ({ \
6205 int16x4_t c_ = (c); \
6206 int16x4_t b_ = (b); \
6207 int32x4_t a_ = (a); \
6208 int32x4_t result; \
6209 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
6210 : "=w"(result) \
6211 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6212 : /* No clobbers */); \
6213 result; \
6214 })
6215
6216 #define vmlal_lane_s32(a, b, c, d) \
6217 __extension__ \
6218 ({ \
6219 int32x2_t c_ = (c); \
6220 int32x2_t b_ = (b); \
6221 int64x2_t a_ = (a); \
6222 int64x2_t result; \
6223 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
6224 : "=w"(result) \
6225 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6226 : /* No clobbers */); \
6227 result; \
6228 })
6229
6230 #define vmlal_lane_u16(a, b, c, d) \
6231 __extension__ \
6232 ({ \
6233 uint16x4_t c_ = (c); \
6234 uint16x4_t b_ = (b); \
6235 uint32x4_t a_ = (a); \
6236 uint32x4_t result; \
6237 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
6238 : "=w"(result) \
6239 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6240 : /* No clobbers */); \
6241 result; \
6242 })
6243
6244 #define vmlal_lane_u32(a, b, c, d) \
6245 __extension__ \
6246 ({ \
6247 uint32x2_t c_ = (c); \
6248 uint32x2_t b_ = (b); \
6249 uint64x2_t a_ = (a); \
6250 uint64x2_t result; \
6251 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
6252 : "=w"(result) \
6253 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6254 : /* No clobbers */); \
6255 result; \
6256 })
6257
6258 #define vmlal_laneq_s16(a, b, c, d) \
6259 __extension__ \
6260 ({ \
6261 int16x8_t c_ = (c); \
6262 int16x4_t b_ = (b); \
6263 int32x4_t a_ = (a); \
6264 int32x4_t result; \
6265 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
6266 : "=w"(result) \
6267 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6268 : /* No clobbers */); \
6269 result; \
6270 })
6271
6272 #define vmlal_laneq_s32(a, b, c, d) \
6273 __extension__ \
6274 ({ \
6275 int32x4_t c_ = (c); \
6276 int32x2_t b_ = (b); \
6277 int64x2_t a_ = (a); \
6278 int64x2_t result; \
6279 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
6280 : "=w"(result) \
6281 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6282 : /* No clobbers */); \
6283 result; \
6284 })
6285
6286 #define vmlal_laneq_u16(a, b, c, d) \
6287 __extension__ \
6288 ({ \
6289 uint16x8_t c_ = (c); \
6290 uint16x4_t b_ = (b); \
6291 uint32x4_t a_ = (a); \
6292 uint32x4_t result; \
6293 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
6294 : "=w"(result) \
6295 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6296 : /* No clobbers */); \
6297 result; \
6298 })
6299
6300 #define vmlal_laneq_u32(a, b, c, d) \
6301 __extension__ \
6302 ({ \
6303 uint32x4_t c_ = (c); \
6304 uint32x2_t b_ = (b); \
6305 uint64x2_t a_ = (a); \
6306 uint64x2_t result; \
6307 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
6308 : "=w"(result) \
6309 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6310 : /* No clobbers */); \
6311 result; \
6312 })
6313
6314 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6315 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
6316 {
6317 int32x4_t result;
6318 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
6319 : "=w"(result)
6320 : "0"(a), "w"(b), "x"(c)
6321 : /* No clobbers */);
6322 return result;
6323 }
6324
6325 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6326 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
6327 {
6328 int64x2_t result;
6329 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
6330 : "=w"(result)
6331 : "0"(a), "w"(b), "w"(c)
6332 : /* No clobbers */);
6333 return result;
6334 }
6335
6336 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6337 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
6338 {
6339 uint32x4_t result;
6340 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
6341 : "=w"(result)
6342 : "0"(a), "w"(b), "x"(c)
6343 : /* No clobbers */);
6344 return result;
6345 }
6346
6347 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6348 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
6349 {
6350 uint64x2_t result;
6351 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
6352 : "=w"(result)
6353 : "0"(a), "w"(b), "w"(c)
6354 : /* No clobbers */);
6355 return result;
6356 }
6357
6358 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6359 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
6360 {
6361 int16x8_t result;
6362 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
6363 : "=w"(result)
6364 : "0"(a), "w"(b), "w"(c)
6365 : /* No clobbers */);
6366 return result;
6367 }
6368
6369 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6370 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
6371 {
6372 int32x4_t result;
6373 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
6374 : "=w"(result)
6375 : "0"(a), "w"(b), "w"(c)
6376 : /* No clobbers */);
6377 return result;
6378 }
6379
6380 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6381 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
6382 {
6383 int64x2_t result;
6384 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
6385 : "=w"(result)
6386 : "0"(a), "w"(b), "w"(c)
6387 : /* No clobbers */);
6388 return result;
6389 }
6390
6391 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6392 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
6393 {
6394 uint16x8_t result;
6395 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
6396 : "=w"(result)
6397 : "0"(a), "w"(b), "w"(c)
6398 : /* No clobbers */);
6399 return result;
6400 }
6401
6402 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6403 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
6404 {
6405 uint32x4_t result;
6406 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
6407 : "=w"(result)
6408 : "0"(a), "w"(b), "w"(c)
6409 : /* No clobbers */);
6410 return result;
6411 }
6412
6413 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6414 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
6415 {
6416 uint64x2_t result;
6417 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
6418 : "=w"(result)
6419 : "0"(a), "w"(b), "w"(c)
6420 : /* No clobbers */);
6421 return result;
6422 }
6423
6424 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6425 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
6426 {
6427 float32x4_t result;
6428 float32x4_t t1;
6429 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
6430 : "=w"(result), "=w"(t1)
6431 : "0"(a), "w"(b), "w"(c)
6432 : /* No clobbers */);
6433 return result;
6434 }
6435
6436 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6437 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
6438 {
6439 int16x8_t result;
6440 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
6441 : "=w"(result)
6442 : "0"(a), "w"(b), "x"(c)
6443 : /* No clobbers */);
6444 return result;
6445 }
6446
6447 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6448 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
6449 {
6450 int32x4_t result;
6451 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
6452 : "=w"(result)
6453 : "0"(a), "w"(b), "w"(c)
6454 : /* No clobbers */);
6455 return result;
6456 }
6457
6458 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6459 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
6460 {
6461 uint16x8_t result;
6462 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
6463 : "=w"(result)
6464 : "0"(a), "w"(b), "x"(c)
6465 : /* No clobbers */);
6466 return result;
6467 }
6468
6469 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6470 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
6471 {
6472 uint32x4_t result;
6473 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
6474 : "=w"(result)
6475 : "0"(a), "w"(b), "w"(c)
6476 : /* No clobbers */);
6477 return result;
6478 }
6479
6480 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6481 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
6482 {
6483 int8x16_t result;
6484 __asm__ ("mla %0.16b, %2.16b, %3.16b"
6485 : "=w"(result)
6486 : "0"(a), "w"(b), "w"(c)
6487 : /* No clobbers */);
6488 return result;
6489 }
6490
6491 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6492 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
6493 {
6494 int16x8_t result;
6495 __asm__ ("mla %0.8h, %2.8h, %3.8h"
6496 : "=w"(result)
6497 : "0"(a), "w"(b), "w"(c)
6498 : /* No clobbers */);
6499 return result;
6500 }
6501
6502 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6503 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
6504 {
6505 int32x4_t result;
6506 __asm__ ("mla %0.4s, %2.4s, %3.4s"
6507 : "=w"(result)
6508 : "0"(a), "w"(b), "w"(c)
6509 : /* No clobbers */);
6510 return result;
6511 }
6512
6513 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6514 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
6515 {
6516 uint8x16_t result;
6517 __asm__ ("mla %0.16b, %2.16b, %3.16b"
6518 : "=w"(result)
6519 : "0"(a), "w"(b), "w"(c)
6520 : /* No clobbers */);
6521 return result;
6522 }
6523
6524 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6525 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
6526 {
6527 uint16x8_t result;
6528 __asm__ ("mla %0.8h, %2.8h, %3.8h"
6529 : "=w"(result)
6530 : "0"(a), "w"(b), "w"(c)
6531 : /* No clobbers */);
6532 return result;
6533 }
6534
6535 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6536 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
6537 {
6538 uint32x4_t result;
6539 __asm__ ("mla %0.4s, %2.4s, %3.4s"
6540 : "=w"(result)
6541 : "0"(a), "w"(b), "w"(c)
6542 : /* No clobbers */);
6543 return result;
6544 }
6545
6546 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6547 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6548 {
6549 float32x2_t result;
6550 float32x2_t t1;
6551 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
6552 : "=w"(result), "=w"(t1)
6553 : "0"(a), "w"(b), "w"(c)
6554 : /* No clobbers */);
6555 return result;
6556 }
6557
6558 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6559 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6560 {
6561 int16x4_t result;
6562 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
6563 : "=w"(result)
6564 : "0"(a), "w"(b), "x"(c)
6565 : /* No clobbers */);
6566 return result;
6567 }
6568
6569 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6570 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6571 {
6572 int32x2_t result;
6573 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
6574 : "=w"(result)
6575 : "0"(a), "w"(b), "w"(c)
6576 : /* No clobbers */);
6577 return result;
6578 }
6579
6580 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6581 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6582 {
6583 uint16x4_t result;
6584 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
6585 : "=w"(result)
6586 : "0"(a), "w"(b), "x"(c)
6587 : /* No clobbers */);
6588 return result;
6589 }
6590
6591 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6592 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6593 {
6594 uint32x2_t result;
6595 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
6596 : "=w"(result)
6597 : "0"(a), "w"(b), "w"(c)
6598 : /* No clobbers */);
6599 return result;
6600 }
6601
6602 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6603 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6604 {
6605 int8x8_t result;
6606 __asm__ ("mls %0.8b,%2.8b,%3.8b"
6607 : "=w"(result)
6608 : "0"(a), "w"(b), "w"(c)
6609 : /* No clobbers */);
6610 return result;
6611 }
6612
6613 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6614 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6615 {
6616 int16x4_t result;
6617 __asm__ ("mls %0.4h,%2.4h,%3.4h"
6618 : "=w"(result)
6619 : "0"(a), "w"(b), "w"(c)
6620 : /* No clobbers */);
6621 return result;
6622 }
6623
6624 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6625 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6626 {
6627 int32x2_t result;
6628 __asm__ ("mls %0.2s,%2.2s,%3.2s"
6629 : "=w"(result)
6630 : "0"(a), "w"(b), "w"(c)
6631 : /* No clobbers */);
6632 return result;
6633 }
6634
6635 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6636 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6637 {
6638 uint8x8_t result;
6639 __asm__ ("mls %0.8b,%2.8b,%3.8b"
6640 : "=w"(result)
6641 : "0"(a), "w"(b), "w"(c)
6642 : /* No clobbers */);
6643 return result;
6644 }
6645
6646 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6647 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6648 {
6649 uint16x4_t result;
6650 __asm__ ("mls %0.4h,%2.4h,%3.4h"
6651 : "=w"(result)
6652 : "0"(a), "w"(b), "w"(c)
6653 : /* No clobbers */);
6654 return result;
6655 }
6656
6657 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6658 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6659 {
6660 uint32x2_t result;
6661 __asm__ ("mls %0.2s,%2.2s,%3.2s"
6662 : "=w"(result)
6663 : "0"(a), "w"(b), "w"(c)
6664 : /* No clobbers */);
6665 return result;
6666 }
6667
6668 #define vmlsl_high_lane_s16(a, b, c, d) \
6669 __extension__ \
6670 ({ \
6671 int16x4_t c_ = (c); \
6672 int16x8_t b_ = (b); \
6673 int32x4_t a_ = (a); \
6674 int32x4_t result; \
6675 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
6676 : "=w"(result) \
6677 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6678 : /* No clobbers */); \
6679 result; \
6680 })
6681
6682 #define vmlsl_high_lane_s32(a, b, c, d) \
6683 __extension__ \
6684 ({ \
6685 int32x2_t c_ = (c); \
6686 int32x4_t b_ = (b); \
6687 int64x2_t a_ = (a); \
6688 int64x2_t result; \
6689 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
6690 : "=w"(result) \
6691 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6692 : /* No clobbers */); \
6693 result; \
6694 })
6695
6696 #define vmlsl_high_lane_u16(a, b, c, d) \
6697 __extension__ \
6698 ({ \
6699 uint16x4_t c_ = (c); \
6700 uint16x8_t b_ = (b); \
6701 uint32x4_t a_ = (a); \
6702 uint32x4_t result; \
6703 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
6704 : "=w"(result) \
6705 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6706 : /* No clobbers */); \
6707 result; \
6708 })
6709
6710 #define vmlsl_high_lane_u32(a, b, c, d) \
6711 __extension__ \
6712 ({ \
6713 uint32x2_t c_ = (c); \
6714 uint32x4_t b_ = (b); \
6715 uint64x2_t a_ = (a); \
6716 uint64x2_t result; \
6717 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
6718 : "=w"(result) \
6719 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6720 : /* No clobbers */); \
6721 result; \
6722 })
6723
6724 #define vmlsl_high_laneq_s16(a, b, c, d) \
6725 __extension__ \
6726 ({ \
6727 int16x8_t c_ = (c); \
6728 int16x8_t b_ = (b); \
6729 int32x4_t a_ = (a); \
6730 int32x4_t result; \
6731 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
6732 : "=w"(result) \
6733 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6734 : /* No clobbers */); \
6735 result; \
6736 })
6737
6738 #define vmlsl_high_laneq_s32(a, b, c, d) \
6739 __extension__ \
6740 ({ \
6741 int32x4_t c_ = (c); \
6742 int32x4_t b_ = (b); \
6743 int64x2_t a_ = (a); \
6744 int64x2_t result; \
6745 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
6746 : "=w"(result) \
6747 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6748 : /* No clobbers */); \
6749 result; \
6750 })
6751
6752 #define vmlsl_high_laneq_u16(a, b, c, d) \
6753 __extension__ \
6754 ({ \
6755 uint16x8_t c_ = (c); \
6756 uint16x8_t b_ = (b); \
6757 uint32x4_t a_ = (a); \
6758 uint32x4_t result; \
6759 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
6760 : "=w"(result) \
6761 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6762 : /* No clobbers */); \
6763 result; \
6764 })
6765
6766 #define vmlsl_high_laneq_u32(a, b, c, d) \
6767 __extension__ \
6768 ({ \
6769 uint32x4_t c_ = (c); \
6770 uint32x4_t b_ = (b); \
6771 uint64x2_t a_ = (a); \
6772 uint64x2_t result; \
6773 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
6774 : "=w"(result) \
6775 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6776 : /* No clobbers */); \
6777 result; \
6778 })
6779
6780 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6781 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6782 {
6783 int32x4_t result;
6784 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
6785 : "=w"(result)
6786 : "0"(a), "w"(b), "x"(c)
6787 : /* No clobbers */);
6788 return result;
6789 }
6790
6791 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6792 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6793 {
6794 int64x2_t result;
6795 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
6796 : "=w"(result)
6797 : "0"(a), "w"(b), "w"(c)
6798 : /* No clobbers */);
6799 return result;
6800 }
6801
6802 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6803 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6804 {
6805 uint32x4_t result;
6806 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
6807 : "=w"(result)
6808 : "0"(a), "w"(b), "x"(c)
6809 : /* No clobbers */);
6810 return result;
6811 }
6812
6813 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6814 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6815 {
6816 uint64x2_t result;
6817 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
6818 : "=w"(result)
6819 : "0"(a), "w"(b), "w"(c)
6820 : /* No clobbers */);
6821 return result;
6822 }
6823
6824 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6825 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6826 {
6827 int16x8_t result;
6828 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
6829 : "=w"(result)
6830 : "0"(a), "w"(b), "w"(c)
6831 : /* No clobbers */);
6832 return result;
6833 }
6834
6835 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6836 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6837 {
6838 int32x4_t result;
6839 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
6840 : "=w"(result)
6841 : "0"(a), "w"(b), "w"(c)
6842 : /* No clobbers */);
6843 return result;
6844 }
6845
6846 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6847 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6848 {
6849 int64x2_t result;
6850 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
6851 : "=w"(result)
6852 : "0"(a), "w"(b), "w"(c)
6853 : /* No clobbers */);
6854 return result;
6855 }
6856
6857 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6858 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6859 {
6860 uint16x8_t result;
6861 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
6862 : "=w"(result)
6863 : "0"(a), "w"(b), "w"(c)
6864 : /* No clobbers */);
6865 return result;
6866 }
6867
6868 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6869 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6870 {
6871 uint32x4_t result;
6872 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
6873 : "=w"(result)
6874 : "0"(a), "w"(b), "w"(c)
6875 : /* No clobbers */);
6876 return result;
6877 }
6878
6879 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6880 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
6881 {
6882 uint64x2_t result;
6883 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
6884 : "=w"(result)
6885 : "0"(a), "w"(b), "w"(c)
6886 : /* No clobbers */);
6887 return result;
6888 }
6889
6890 #define vmlsl_lane_s16(a, b, c, d) \
6891 __extension__ \
6892 ({ \
6893 int16x4_t c_ = (c); \
6894 int16x4_t b_ = (b); \
6895 int32x4_t a_ = (a); \
6896 int32x4_t result; \
6897 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
6898 : "=w"(result) \
6899 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6900 : /* No clobbers */); \
6901 result; \
6902 })
6903
6904 #define vmlsl_lane_s32(a, b, c, d) \
6905 __extension__ \
6906 ({ \
6907 int32x2_t c_ = (c); \
6908 int32x2_t b_ = (b); \
6909 int64x2_t a_ = (a); \
6910 int64x2_t result; \
6911 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
6912 : "=w"(result) \
6913 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6914 : /* No clobbers */); \
6915 result; \
6916 })
6917
6918 #define vmlsl_lane_u16(a, b, c, d) \
6919 __extension__ \
6920 ({ \
6921 uint16x4_t c_ = (c); \
6922 uint16x4_t b_ = (b); \
6923 uint32x4_t a_ = (a); \
6924 uint32x4_t result; \
6925 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
6926 : "=w"(result) \
6927 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6928 : /* No clobbers */); \
6929 result; \
6930 })
6931
6932 #define vmlsl_lane_u32(a, b, c, d) \
6933 __extension__ \
6934 ({ \
6935 uint32x2_t c_ = (c); \
6936 uint32x2_t b_ = (b); \
6937 uint64x2_t a_ = (a); \
6938 uint64x2_t result; \
6939 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
6940 : "=w"(result) \
6941 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6942 : /* No clobbers */); \
6943 result; \
6944 })
6945
6946 #define vmlsl_laneq_s16(a, b, c, d) \
6947 __extension__ \
6948 ({ \
6949 int16x8_t c_ = (c); \
6950 int16x4_t b_ = (b); \
6951 int32x4_t a_ = (a); \
6952 int32x4_t result; \
6953 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
6954 : "=w"(result) \
6955 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6956 : /* No clobbers */); \
6957 result; \
6958 })
6959
6960 #define vmlsl_laneq_s32(a, b, c, d) \
6961 __extension__ \
6962 ({ \
6963 int32x4_t c_ = (c); \
6964 int32x2_t b_ = (b); \
6965 int64x2_t a_ = (a); \
6966 int64x2_t result; \
6967 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
6968 : "=w"(result) \
6969 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6970 : /* No clobbers */); \
6971 result; \
6972 })
6973
6974 #define vmlsl_laneq_u16(a, b, c, d) \
6975 __extension__ \
6976 ({ \
6977 uint16x8_t c_ = (c); \
6978 uint16x4_t b_ = (b); \
6979 uint32x4_t a_ = (a); \
6980 uint32x4_t result; \
6981 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
6982 : "=w"(result) \
6983 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6984 : /* No clobbers */); \
6985 result; \
6986 })
6987
6988 #define vmlsl_laneq_u32(a, b, c, d) \
6989 __extension__ \
6990 ({ \
6991 uint32x4_t c_ = (c); \
6992 uint32x2_t b_ = (b); \
6993 uint64x2_t a_ = (a); \
6994 uint64x2_t result; \
6995 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
6996 : "=w"(result) \
6997 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6998 : /* No clobbers */); \
6999 result; \
7000 })
7001
7002 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7003 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7004 {
7005 int32x4_t result;
7006 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7007 : "=w"(result)
7008 : "0"(a), "w"(b), "x"(c)
7009 : /* No clobbers */);
7010 return result;
7011 }
7012
7013 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7014 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7015 {
7016 int64x2_t result;
7017 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7018 : "=w"(result)
7019 : "0"(a), "w"(b), "w"(c)
7020 : /* No clobbers */);
7021 return result;
7022 }
7023
7024 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7025 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7026 {
7027 uint32x4_t result;
7028 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7029 : "=w"(result)
7030 : "0"(a), "w"(b), "x"(c)
7031 : /* No clobbers */);
7032 return result;
7033 }
7034
7035 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7036 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7037 {
7038 uint64x2_t result;
7039 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7040 : "=w"(result)
7041 : "0"(a), "w"(b), "w"(c)
7042 : /* No clobbers */);
7043 return result;
7044 }
7045
7046 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7047 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7048 {
7049 int16x8_t result;
7050 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7051 : "=w"(result)
7052 : "0"(a), "w"(b), "w"(c)
7053 : /* No clobbers */);
7054 return result;
7055 }
7056
7057 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7058 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7059 {
7060 int32x4_t result;
7061 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7062 : "=w"(result)
7063 : "0"(a), "w"(b), "w"(c)
7064 : /* No clobbers */);
7065 return result;
7066 }
7067
7068 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7069 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7070 {
7071 int64x2_t result;
7072 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7073 : "=w"(result)
7074 : "0"(a), "w"(b), "w"(c)
7075 : /* No clobbers */);
7076 return result;
7077 }
7078
7079 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7080 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7081 {
7082 uint16x8_t result;
7083 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7084 : "=w"(result)
7085 : "0"(a), "w"(b), "w"(c)
7086 : /* No clobbers */);
7087 return result;
7088 }
7089
7090 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7091 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7092 {
7093 uint32x4_t result;
7094 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7095 : "=w"(result)
7096 : "0"(a), "w"(b), "w"(c)
7097 : /* No clobbers */);
7098 return result;
7099 }
7100
7101 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7102 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7103 {
7104 uint64x2_t result;
7105 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7106 : "=w"(result)
7107 : "0"(a), "w"(b), "w"(c)
7108 : /* No clobbers */);
7109 return result;
7110 }
7111
7112 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7113 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7114 {
7115 float32x4_t result;
7116 float32x4_t t1;
7117 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7118 : "=w"(result), "=w"(t1)
7119 : "0"(a), "w"(b), "w"(c)
7120 : /* No clobbers */);
7121 return result;
7122 }
7123
7124 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7125 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7126 {
7127 int16x8_t result;
7128 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7129 : "=w"(result)
7130 : "0"(a), "w"(b), "x"(c)
7131 : /* No clobbers */);
7132 return result;
7133 }
7134
7135 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7136 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7137 {
7138 int32x4_t result;
7139 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7140 : "=w"(result)
7141 : "0"(a), "w"(b), "w"(c)
7142 : /* No clobbers */);
7143 return result;
7144 }
7145
7146 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7147 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7148 {
7149 uint16x8_t result;
7150 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7151 : "=w"(result)
7152 : "0"(a), "w"(b), "x"(c)
7153 : /* No clobbers */);
7154 return result;
7155 }
7156
7157 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7158 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7159 {
7160 uint32x4_t result;
7161 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7162 : "=w"(result)
7163 : "0"(a), "w"(b), "w"(c)
7164 : /* No clobbers */);
7165 return result;
7166 }
7167
7168 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7169 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7170 {
7171 int8x16_t result;
7172 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7173 : "=w"(result)
7174 : "0"(a), "w"(b), "w"(c)
7175 : /* No clobbers */);
7176 return result;
7177 }
7178
7179 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7180 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7181 {
7182 int16x8_t result;
7183 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7184 : "=w"(result)
7185 : "0"(a), "w"(b), "w"(c)
7186 : /* No clobbers */);
7187 return result;
7188 }
7189
7190 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7191 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7192 {
7193 int32x4_t result;
7194 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7195 : "=w"(result)
7196 : "0"(a), "w"(b), "w"(c)
7197 : /* No clobbers */);
7198 return result;
7199 }
7200
7201 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7202 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7203 {
7204 uint8x16_t result;
7205 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7206 : "=w"(result)
7207 : "0"(a), "w"(b), "w"(c)
7208 : /* No clobbers */);
7209 return result;
7210 }
7211
7212 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7213 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7214 {
7215 uint16x8_t result;
7216 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7217 : "=w"(result)
7218 : "0"(a), "w"(b), "w"(c)
7219 : /* No clobbers */);
7220 return result;
7221 }
7222
7223 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7224 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7225 {
7226 uint32x4_t result;
7227 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7228 : "=w"(result)
7229 : "0"(a), "w"(b), "w"(c)
7230 : /* No clobbers */);
7231 return result;
7232 }
7233
7234 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7235 vmovl_high_s8 (int8x16_t a)
7236 {
7237 int16x8_t result;
7238 __asm__ ("sshll2 %0.8h,%1.16b,#0"
7239 : "=w"(result)
7240 : "w"(a)
7241 : /* No clobbers */);
7242 return result;
7243 }
7244
7245 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7246 vmovl_high_s16 (int16x8_t a)
7247 {
7248 int32x4_t result;
7249 __asm__ ("sshll2 %0.4s,%1.8h,#0"
7250 : "=w"(result)
7251 : "w"(a)
7252 : /* No clobbers */);
7253 return result;
7254 }
7255
7256 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7257 vmovl_high_s32 (int32x4_t a)
7258 {
7259 int64x2_t result;
7260 __asm__ ("sshll2 %0.2d,%1.4s,#0"
7261 : "=w"(result)
7262 : "w"(a)
7263 : /* No clobbers */);
7264 return result;
7265 }
7266
7267 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7268 vmovl_high_u8 (uint8x16_t a)
7269 {
7270 uint16x8_t result;
7271 __asm__ ("ushll2 %0.8h,%1.16b,#0"
7272 : "=w"(result)
7273 : "w"(a)
7274 : /* No clobbers */);
7275 return result;
7276 }
7277
7278 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7279 vmovl_high_u16 (uint16x8_t a)
7280 {
7281 uint32x4_t result;
7282 __asm__ ("ushll2 %0.4s,%1.8h,#0"
7283 : "=w"(result)
7284 : "w"(a)
7285 : /* No clobbers */);
7286 return result;
7287 }
7288
7289 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7290 vmovl_high_u32 (uint32x4_t a)
7291 {
7292 uint64x2_t result;
7293 __asm__ ("ushll2 %0.2d,%1.4s,#0"
7294 : "=w"(result)
7295 : "w"(a)
7296 : /* No clobbers */);
7297 return result;
7298 }
7299
7300 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7301 vmovl_s8 (int8x8_t a)
7302 {
7303 int16x8_t result;
7304 __asm__ ("sshll %0.8h,%1.8b,#0"
7305 : "=w"(result)
7306 : "w"(a)
7307 : /* No clobbers */);
7308 return result;
7309 }
7310
7311 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7312 vmovl_s16 (int16x4_t a)
7313 {
7314 int32x4_t result;
7315 __asm__ ("sshll %0.4s,%1.4h,#0"
7316 : "=w"(result)
7317 : "w"(a)
7318 : /* No clobbers */);
7319 return result;
7320 }
7321
7322 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7323 vmovl_s32 (int32x2_t a)
7324 {
7325 int64x2_t result;
7326 __asm__ ("sshll %0.2d,%1.2s,#0"
7327 : "=w"(result)
7328 : "w"(a)
7329 : /* No clobbers */);
7330 return result;
7331 }
7332
7333 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7334 vmovl_u8 (uint8x8_t a)
7335 {
7336 uint16x8_t result;
7337 __asm__ ("ushll %0.8h,%1.8b,#0"
7338 : "=w"(result)
7339 : "w"(a)
7340 : /* No clobbers */);
7341 return result;
7342 }
7343
7344 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7345 vmovl_u16 (uint16x4_t a)
7346 {
7347 uint32x4_t result;
7348 __asm__ ("ushll %0.4s,%1.4h,#0"
7349 : "=w"(result)
7350 : "w"(a)
7351 : /* No clobbers */);
7352 return result;
7353 }
7354
7355 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7356 vmovl_u32 (uint32x2_t a)
7357 {
7358 uint64x2_t result;
7359 __asm__ ("ushll %0.2d,%1.2s,#0"
7360 : "=w"(result)
7361 : "w"(a)
7362 : /* No clobbers */);
7363 return result;
7364 }
7365
7366 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7367 vmovn_high_s16 (int8x8_t a, int16x8_t b)
7368 {
7369 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
7370 __asm__ ("xtn2 %0.16b,%1.8h"
7371 : "+w"(result)
7372 : "w"(b)
7373 : /* No clobbers */);
7374 return result;
7375 }
7376
7377 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7378 vmovn_high_s32 (int16x4_t a, int32x4_t b)
7379 {
7380 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
7381 __asm__ ("xtn2 %0.8h,%1.4s"
7382 : "+w"(result)
7383 : "w"(b)
7384 : /* No clobbers */);
7385 return result;
7386 }
7387
7388 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7389 vmovn_high_s64 (int32x2_t a, int64x2_t b)
7390 {
7391 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
7392 __asm__ ("xtn2 %0.4s,%1.2d"
7393 : "+w"(result)
7394 : "w"(b)
7395 : /* No clobbers */);
7396 return result;
7397 }
7398
7399 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7400 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
7401 {
7402 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
7403 __asm__ ("xtn2 %0.16b,%1.8h"
7404 : "+w"(result)
7405 : "w"(b)
7406 : /* No clobbers */);
7407 return result;
7408 }
7409
7410 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7411 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
7412 {
7413 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
7414 __asm__ ("xtn2 %0.8h,%1.4s"
7415 : "+w"(result)
7416 : "w"(b)
7417 : /* No clobbers */);
7418 return result;
7419 }
7420
7421 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7422 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
7423 {
7424 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
7425 __asm__ ("xtn2 %0.4s,%1.2d"
7426 : "+w"(result)
7427 : "w"(b)
7428 : /* No clobbers */);
7429 return result;
7430 }
7431
7432 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7433 vmovn_s16 (int16x8_t a)
7434 {
7435 int8x8_t result;
7436 __asm__ ("xtn %0.8b,%1.8h"
7437 : "=w"(result)
7438 : "w"(a)
7439 : /* No clobbers */);
7440 return result;
7441 }
7442
7443 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7444 vmovn_s32 (int32x4_t a)
7445 {
7446 int16x4_t result;
7447 __asm__ ("xtn %0.4h,%1.4s"
7448 : "=w"(result)
7449 : "w"(a)
7450 : /* No clobbers */);
7451 return result;
7452 }
7453
7454 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7455 vmovn_s64 (int64x2_t a)
7456 {
7457 int32x2_t result;
7458 __asm__ ("xtn %0.2s,%1.2d"
7459 : "=w"(result)
7460 : "w"(a)
7461 : /* No clobbers */);
7462 return result;
7463 }
7464
7465 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7466 vmovn_u16 (uint16x8_t a)
7467 {
7468 uint8x8_t result;
7469 __asm__ ("xtn %0.8b,%1.8h"
7470 : "=w"(result)
7471 : "w"(a)
7472 : /* No clobbers */);
7473 return result;
7474 }
7475
7476 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7477 vmovn_u32 (uint32x4_t a)
7478 {
7479 uint16x4_t result;
7480 __asm__ ("xtn %0.4h,%1.4s"
7481 : "=w"(result)
7482 : "w"(a)
7483 : /* No clobbers */);
7484 return result;
7485 }
7486
7487 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7488 vmovn_u64 (uint64x2_t a)
7489 {
7490 uint32x2_t result;
7491 __asm__ ("xtn %0.2s,%1.2d"
7492 : "=w"(result)
7493 : "w"(a)
7494 : /* No clobbers */);
7495 return result;
7496 }
7497
7498 #define vmull_high_lane_s16(a, b, c) \
7499 __extension__ \
7500 ({ \
7501 int16x4_t b_ = (b); \
7502 int16x8_t a_ = (a); \
7503 int32x4_t result; \
7504 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
7505 : "=w"(result) \
7506 : "w"(a_), "x"(b_), "i"(c) \
7507 : /* No clobbers */); \
7508 result; \
7509 })
7510
7511 #define vmull_high_lane_s32(a, b, c) \
7512 __extension__ \
7513 ({ \
7514 int32x2_t b_ = (b); \
7515 int32x4_t a_ = (a); \
7516 int64x2_t result; \
7517 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
7518 : "=w"(result) \
7519 : "w"(a_), "w"(b_), "i"(c) \
7520 : /* No clobbers */); \
7521 result; \
7522 })
7523
7524 #define vmull_high_lane_u16(a, b, c) \
7525 __extension__ \
7526 ({ \
7527 uint16x4_t b_ = (b); \
7528 uint16x8_t a_ = (a); \
7529 uint32x4_t result; \
7530 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
7531 : "=w"(result) \
7532 : "w"(a_), "x"(b_), "i"(c) \
7533 : /* No clobbers */); \
7534 result; \
7535 })
7536
7537 #define vmull_high_lane_u32(a, b, c) \
7538 __extension__ \
7539 ({ \
7540 uint32x2_t b_ = (b); \
7541 uint32x4_t a_ = (a); \
7542 uint64x2_t result; \
7543 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
7544 : "=w"(result) \
7545 : "w"(a_), "w"(b_), "i"(c) \
7546 : /* No clobbers */); \
7547 result; \
7548 })
7549
7550 #define vmull_high_laneq_s16(a, b, c) \
7551 __extension__ \
7552 ({ \
7553 int16x8_t b_ = (b); \
7554 int16x8_t a_ = (a); \
7555 int32x4_t result; \
7556 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
7557 : "=w"(result) \
7558 : "w"(a_), "x"(b_), "i"(c) \
7559 : /* No clobbers */); \
7560 result; \
7561 })
7562
7563 #define vmull_high_laneq_s32(a, b, c) \
7564 __extension__ \
7565 ({ \
7566 int32x4_t b_ = (b); \
7567 int32x4_t a_ = (a); \
7568 int64x2_t result; \
7569 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
7570 : "=w"(result) \
7571 : "w"(a_), "w"(b_), "i"(c) \
7572 : /* No clobbers */); \
7573 result; \
7574 })
7575
7576 #define vmull_high_laneq_u16(a, b, c) \
7577 __extension__ \
7578 ({ \
7579 uint16x8_t b_ = (b); \
7580 uint16x8_t a_ = (a); \
7581 uint32x4_t result; \
7582 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
7583 : "=w"(result) \
7584 : "w"(a_), "x"(b_), "i"(c) \
7585 : /* No clobbers */); \
7586 result; \
7587 })
7588
7589 #define vmull_high_laneq_u32(a, b, c) \
7590 __extension__ \
7591 ({ \
7592 uint32x4_t b_ = (b); \
7593 uint32x4_t a_ = (a); \
7594 uint64x2_t result; \
7595 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
7596 : "=w"(result) \
7597 : "w"(a_), "w"(b_), "i"(c) \
7598 : /* No clobbers */); \
7599 result; \
7600 })
7601
7602 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7603 vmull_high_n_s16 (int16x8_t a, int16_t b)
7604 {
7605 int32x4_t result;
7606 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
7607 : "=w"(result)
7608 : "w"(a), "x"(b)
7609 : /* No clobbers */);
7610 return result;
7611 }
7612
7613 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7614 vmull_high_n_s32 (int32x4_t a, int32_t b)
7615 {
7616 int64x2_t result;
7617 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
7618 : "=w"(result)
7619 : "w"(a), "w"(b)
7620 : /* No clobbers */);
7621 return result;
7622 }
7623
7624 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7625 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
7626 {
7627 uint32x4_t result;
7628 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
7629 : "=w"(result)
7630 : "w"(a), "x"(b)
7631 : /* No clobbers */);
7632 return result;
7633 }
7634
7635 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7636 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
7637 {
7638 uint64x2_t result;
7639 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
7640 : "=w"(result)
7641 : "w"(a), "w"(b)
7642 : /* No clobbers */);
7643 return result;
7644 }
7645
7646 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
7647 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
7648 {
7649 poly16x8_t result;
7650 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
7651 : "=w"(result)
7652 : "w"(a), "w"(b)
7653 : /* No clobbers */);
7654 return result;
7655 }
7656
7657 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7658 vmull_high_s8 (int8x16_t a, int8x16_t b)
7659 {
7660 int16x8_t result;
7661 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
7662 : "=w"(result)
7663 : "w"(a), "w"(b)
7664 : /* No clobbers */);
7665 return result;
7666 }
7667
7668 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7669 vmull_high_s16 (int16x8_t a, int16x8_t b)
7670 {
7671 int32x4_t result;
7672 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
7673 : "=w"(result)
7674 : "w"(a), "w"(b)
7675 : /* No clobbers */);
7676 return result;
7677 }
7678
7679 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7680 vmull_high_s32 (int32x4_t a, int32x4_t b)
7681 {
7682 int64x2_t result;
7683 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
7684 : "=w"(result)
7685 : "w"(a), "w"(b)
7686 : /* No clobbers */);
7687 return result;
7688 }
7689
7690 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7691 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
7692 {
7693 uint16x8_t result;
7694 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
7695 : "=w"(result)
7696 : "w"(a), "w"(b)
7697 : /* No clobbers */);
7698 return result;
7699 }
7700
7701 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7702 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
7703 {
7704 uint32x4_t result;
7705 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
7706 : "=w"(result)
7707 : "w"(a), "w"(b)
7708 : /* No clobbers */);
7709 return result;
7710 }
7711
7712 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7713 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
7714 {
7715 uint64x2_t result;
7716 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
7717 : "=w"(result)
7718 : "w"(a), "w"(b)
7719 : /* No clobbers */);
7720 return result;
7721 }
7722
7723 #define vmull_lane_s16(a, b, c) \
7724 __extension__ \
7725 ({ \
7726 int16x4_t b_ = (b); \
7727 int16x4_t a_ = (a); \
7728 int32x4_t result; \
7729 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
7730 : "=w"(result) \
7731 : "w"(a_), "x"(b_), "i"(c) \
7732 : /* No clobbers */); \
7733 result; \
7734 })
7735
7736 #define vmull_lane_s32(a, b, c) \
7737 __extension__ \
7738 ({ \
7739 int32x2_t b_ = (b); \
7740 int32x2_t a_ = (a); \
7741 int64x2_t result; \
7742 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
7743 : "=w"(result) \
7744 : "w"(a_), "w"(b_), "i"(c) \
7745 : /* No clobbers */); \
7746 result; \
7747 })
7748
7749 #define vmull_lane_u16(a, b, c) \
7750 __extension__ \
7751 ({ \
7752 uint16x4_t b_ = (b); \
7753 uint16x4_t a_ = (a); \
7754 uint32x4_t result; \
7755 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
7756 : "=w"(result) \
7757 : "w"(a_), "x"(b_), "i"(c) \
7758 : /* No clobbers */); \
7759 result; \
7760 })
7761
7762 #define vmull_lane_u32(a, b, c) \
7763 __extension__ \
7764 ({ \
7765 uint32x2_t b_ = (b); \
7766 uint32x2_t a_ = (a); \
7767 uint64x2_t result; \
7768 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
7769 : "=w"(result) \
7770 : "w"(a_), "w"(b_), "i"(c) \
7771 : /* No clobbers */); \
7772 result; \
7773 })
7774
7775 #define vmull_laneq_s16(a, b, c) \
7776 __extension__ \
7777 ({ \
7778 int16x8_t b_ = (b); \
7779 int16x4_t a_ = (a); \
7780 int32x4_t result; \
7781 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
7782 : "=w"(result) \
7783 : "w"(a_), "x"(b_), "i"(c) \
7784 : /* No clobbers */); \
7785 result; \
7786 })
7787
7788 #define vmull_laneq_s32(a, b, c) \
7789 __extension__ \
7790 ({ \
7791 int32x4_t b_ = (b); \
7792 int32x2_t a_ = (a); \
7793 int64x2_t result; \
7794 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
7795 : "=w"(result) \
7796 : "w"(a_), "w"(b_), "i"(c) \
7797 : /* No clobbers */); \
7798 result; \
7799 })
7800
7801 #define vmull_laneq_u16(a, b, c) \
7802 __extension__ \
7803 ({ \
7804 uint16x8_t b_ = (b); \
7805 uint16x4_t a_ = (a); \
7806 uint32x4_t result; \
7807 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
7808 : "=w"(result) \
7809 : "w"(a_), "x"(b_), "i"(c) \
7810 : /* No clobbers */); \
7811 result; \
7812 })
7813
7814 #define vmull_laneq_u32(a, b, c) \
7815 __extension__ \
7816 ({ \
7817 uint32x4_t b_ = (b); \
7818 uint32x2_t a_ = (a); \
7819 uint64x2_t result; \
7820 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
7821 : "=w"(result) \
7822 : "w"(a_), "w"(b_), "i"(c) \
7823 : /* No clobbers */); \
7824 result; \
7825 })
7826
7827 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7828 vmull_n_s16 (int16x4_t a, int16_t b)
7829 {
7830 int32x4_t result;
7831 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
7832 : "=w"(result)
7833 : "w"(a), "x"(b)
7834 : /* No clobbers */);
7835 return result;
7836 }
7837
7838 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7839 vmull_n_s32 (int32x2_t a, int32_t b)
7840 {
7841 int64x2_t result;
7842 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
7843 : "=w"(result)
7844 : "w"(a), "w"(b)
7845 : /* No clobbers */);
7846 return result;
7847 }
7848
7849 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7850 vmull_n_u16 (uint16x4_t a, uint16_t b)
7851 {
7852 uint32x4_t result;
7853 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
7854 : "=w"(result)
7855 : "w"(a), "x"(b)
7856 : /* No clobbers */);
7857 return result;
7858 }
7859
7860 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7861 vmull_n_u32 (uint32x2_t a, uint32_t b)
7862 {
7863 uint64x2_t result;
7864 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
7865 : "=w"(result)
7866 : "w"(a), "w"(b)
7867 : /* No clobbers */);
7868 return result;
7869 }
7870
7871 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
7872 vmull_p8 (poly8x8_t a, poly8x8_t b)
7873 {
7874 poly16x8_t result;
7875 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
7876 : "=w"(result)
7877 : "w"(a), "w"(b)
7878 : /* No clobbers */);
7879 return result;
7880 }
7881
7882 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7883 vmull_s8 (int8x8_t a, int8x8_t b)
7884 {
7885 int16x8_t result;
7886 __asm__ ("smull %0.8h, %1.8b, %2.8b"
7887 : "=w"(result)
7888 : "w"(a), "w"(b)
7889 : /* No clobbers */);
7890 return result;
7891 }
7892
7893 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7894 vmull_s16 (int16x4_t a, int16x4_t b)
7895 {
7896 int32x4_t result;
7897 __asm__ ("smull %0.4s, %1.4h, %2.4h"
7898 : "=w"(result)
7899 : "w"(a), "w"(b)
7900 : /* No clobbers */);
7901 return result;
7902 }
7903
7904 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7905 vmull_s32 (int32x2_t a, int32x2_t b)
7906 {
7907 int64x2_t result;
7908 __asm__ ("smull %0.2d, %1.2s, %2.2s"
7909 : "=w"(result)
7910 : "w"(a), "w"(b)
7911 : /* No clobbers */);
7912 return result;
7913 }
7914
7915 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7916 vmull_u8 (uint8x8_t a, uint8x8_t b)
7917 {
7918 uint16x8_t result;
7919 __asm__ ("umull %0.8h, %1.8b, %2.8b"
7920 : "=w"(result)
7921 : "w"(a), "w"(b)
7922 : /* No clobbers */);
7923 return result;
7924 }
7925
7926 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7927 vmull_u16 (uint16x4_t a, uint16x4_t b)
7928 {
7929 uint32x4_t result;
7930 __asm__ ("umull %0.4s, %1.4h, %2.4h"
7931 : "=w"(result)
7932 : "w"(a), "w"(b)
7933 : /* No clobbers */);
7934 return result;
7935 }
7936
7937 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7938 vmull_u32 (uint32x2_t a, uint32x2_t b)
7939 {
7940 uint64x2_t result;
7941 __asm__ ("umull %0.2d, %1.2s, %2.2s"
7942 : "=w"(result)
7943 : "w"(a), "w"(b)
7944 : /* No clobbers */);
7945 return result;
7946 }
7947
7948 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7949 vpadal_s8 (int16x4_t a, int8x8_t b)
7950 {
7951 int16x4_t result;
7952 __asm__ ("sadalp %0.4h,%2.8b"
7953 : "=w"(result)
7954 : "0"(a), "w"(b)
7955 : /* No clobbers */);
7956 return result;
7957 }
7958
7959 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7960 vpadal_s16 (int32x2_t a, int16x4_t b)
7961 {
7962 int32x2_t result;
7963 __asm__ ("sadalp %0.2s,%2.4h"
7964 : "=w"(result)
7965 : "0"(a), "w"(b)
7966 : /* No clobbers */);
7967 return result;
7968 }
7969
7970 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
7971 vpadal_s32 (int64x1_t a, int32x2_t b)
7972 {
7973 int64x1_t result;
7974 __asm__ ("sadalp %0.1d,%2.2s"
7975 : "=w"(result)
7976 : "0"(a), "w"(b)
7977 : /* No clobbers */);
7978 return result;
7979 }
7980
7981 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7982 vpadal_u8 (uint16x4_t a, uint8x8_t b)
7983 {
7984 uint16x4_t result;
7985 __asm__ ("uadalp %0.4h,%2.8b"
7986 : "=w"(result)
7987 : "0"(a), "w"(b)
7988 : /* No clobbers */);
7989 return result;
7990 }
7991
7992 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7993 vpadal_u16 (uint32x2_t a, uint16x4_t b)
7994 {
7995 uint32x2_t result;
7996 __asm__ ("uadalp %0.2s,%2.4h"
7997 : "=w"(result)
7998 : "0"(a), "w"(b)
7999 : /* No clobbers */);
8000 return result;
8001 }
8002
8003 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8004 vpadal_u32 (uint64x1_t a, uint32x2_t b)
8005 {
8006 uint64x1_t result;
8007 __asm__ ("uadalp %0.1d,%2.2s"
8008 : "=w"(result)
8009 : "0"(a), "w"(b)
8010 : /* No clobbers */);
8011 return result;
8012 }
8013
8014 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8015 vpadalq_s8 (int16x8_t a, int8x16_t b)
8016 {
8017 int16x8_t result;
8018 __asm__ ("sadalp %0.8h,%2.16b"
8019 : "=w"(result)
8020 : "0"(a), "w"(b)
8021 : /* No clobbers */);
8022 return result;
8023 }
8024
8025 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8026 vpadalq_s16 (int32x4_t a, int16x8_t b)
8027 {
8028 int32x4_t result;
8029 __asm__ ("sadalp %0.4s,%2.8h"
8030 : "=w"(result)
8031 : "0"(a), "w"(b)
8032 : /* No clobbers */);
8033 return result;
8034 }
8035
8036 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8037 vpadalq_s32 (int64x2_t a, int32x4_t b)
8038 {
8039 int64x2_t result;
8040 __asm__ ("sadalp %0.2d,%2.4s"
8041 : "=w"(result)
8042 : "0"(a), "w"(b)
8043 : /* No clobbers */);
8044 return result;
8045 }
8046
8047 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8048 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
8049 {
8050 uint16x8_t result;
8051 __asm__ ("uadalp %0.8h,%2.16b"
8052 : "=w"(result)
8053 : "0"(a), "w"(b)
8054 : /* No clobbers */);
8055 return result;
8056 }
8057
8058 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8059 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
8060 {
8061 uint32x4_t result;
8062 __asm__ ("uadalp %0.4s,%2.8h"
8063 : "=w"(result)
8064 : "0"(a), "w"(b)
8065 : /* No clobbers */);
8066 return result;
8067 }
8068
8069 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8070 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
8071 {
8072 uint64x2_t result;
8073 __asm__ ("uadalp %0.2d,%2.4s"
8074 : "=w"(result)
8075 : "0"(a), "w"(b)
8076 : /* No clobbers */);
8077 return result;
8078 }
8079
8080 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8081 vpaddl_s8 (int8x8_t a)
8082 {
8083 int16x4_t result;
8084 __asm__ ("saddlp %0.4h,%1.8b"
8085 : "=w"(result)
8086 : "w"(a)
8087 : /* No clobbers */);
8088 return result;
8089 }
8090
8091 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8092 vpaddl_s16 (int16x4_t a)
8093 {
8094 int32x2_t result;
8095 __asm__ ("saddlp %0.2s,%1.4h"
8096 : "=w"(result)
8097 : "w"(a)
8098 : /* No clobbers */);
8099 return result;
8100 }
8101
8102 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8103 vpaddl_s32 (int32x2_t a)
8104 {
8105 int64x1_t result;
8106 __asm__ ("saddlp %0.1d,%1.2s"
8107 : "=w"(result)
8108 : "w"(a)
8109 : /* No clobbers */);
8110 return result;
8111 }
8112
8113 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8114 vpaddl_u8 (uint8x8_t a)
8115 {
8116 uint16x4_t result;
8117 __asm__ ("uaddlp %0.4h,%1.8b"
8118 : "=w"(result)
8119 : "w"(a)
8120 : /* No clobbers */);
8121 return result;
8122 }
8123
8124 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8125 vpaddl_u16 (uint16x4_t a)
8126 {
8127 uint32x2_t result;
8128 __asm__ ("uaddlp %0.2s,%1.4h"
8129 : "=w"(result)
8130 : "w"(a)
8131 : /* No clobbers */);
8132 return result;
8133 }
8134
8135 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8136 vpaddl_u32 (uint32x2_t a)
8137 {
8138 uint64x1_t result;
8139 __asm__ ("uaddlp %0.1d,%1.2s"
8140 : "=w"(result)
8141 : "w"(a)
8142 : /* No clobbers */);
8143 return result;
8144 }
8145
8146 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8147 vpaddlq_s8 (int8x16_t a)
8148 {
8149 int16x8_t result;
8150 __asm__ ("saddlp %0.8h,%1.16b"
8151 : "=w"(result)
8152 : "w"(a)
8153 : /* No clobbers */);
8154 return result;
8155 }
8156
8157 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8158 vpaddlq_s16 (int16x8_t a)
8159 {
8160 int32x4_t result;
8161 __asm__ ("saddlp %0.4s,%1.8h"
8162 : "=w"(result)
8163 : "w"(a)
8164 : /* No clobbers */);
8165 return result;
8166 }
8167
8168 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8169 vpaddlq_s32 (int32x4_t a)
8170 {
8171 int64x2_t result;
8172 __asm__ ("saddlp %0.2d,%1.4s"
8173 : "=w"(result)
8174 : "w"(a)
8175 : /* No clobbers */);
8176 return result;
8177 }
8178
8179 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8180 vpaddlq_u8 (uint8x16_t a)
8181 {
8182 uint16x8_t result;
8183 __asm__ ("uaddlp %0.8h,%1.16b"
8184 : "=w"(result)
8185 : "w"(a)
8186 : /* No clobbers */);
8187 return result;
8188 }
8189
8190 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8191 vpaddlq_u16 (uint16x8_t a)
8192 {
8193 uint32x4_t result;
8194 __asm__ ("uaddlp %0.4s,%1.8h"
8195 : "=w"(result)
8196 : "w"(a)
8197 : /* No clobbers */);
8198 return result;
8199 }
8200
8201 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8202 vpaddlq_u32 (uint32x4_t a)
8203 {
8204 uint64x2_t result;
8205 __asm__ ("uaddlp %0.2d,%1.4s"
8206 : "=w"(result)
8207 : "w"(a)
8208 : /* No clobbers */);
8209 return result;
8210 }
8211
8212 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8213 vpaddq_s8 (int8x16_t a, int8x16_t b)
8214 {
8215 int8x16_t result;
8216 __asm__ ("addp %0.16b,%1.16b,%2.16b"
8217 : "=w"(result)
8218 : "w"(a), "w"(b)
8219 : /* No clobbers */);
8220 return result;
8221 }
8222
8223 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8224 vpaddq_s16 (int16x8_t a, int16x8_t b)
8225 {
8226 int16x8_t result;
8227 __asm__ ("addp %0.8h,%1.8h,%2.8h"
8228 : "=w"(result)
8229 : "w"(a), "w"(b)
8230 : /* No clobbers */);
8231 return result;
8232 }
8233
8234 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8235 vpaddq_s32 (int32x4_t a, int32x4_t b)
8236 {
8237 int32x4_t result;
8238 __asm__ ("addp %0.4s,%1.4s,%2.4s"
8239 : "=w"(result)
8240 : "w"(a), "w"(b)
8241 : /* No clobbers */);
8242 return result;
8243 }
8244
8245 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8246 vpaddq_s64 (int64x2_t a, int64x2_t b)
8247 {
8248 int64x2_t result;
8249 __asm__ ("addp %0.2d,%1.2d,%2.2d"
8250 : "=w"(result)
8251 : "w"(a), "w"(b)
8252 : /* No clobbers */);
8253 return result;
8254 }
8255
8256 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8257 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
8258 {
8259 uint8x16_t result;
8260 __asm__ ("addp %0.16b,%1.16b,%2.16b"
8261 : "=w"(result)
8262 : "w"(a), "w"(b)
8263 : /* No clobbers */);
8264 return result;
8265 }
8266
8267 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8268 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
8269 {
8270 uint16x8_t result;
8271 __asm__ ("addp %0.8h,%1.8h,%2.8h"
8272 : "=w"(result)
8273 : "w"(a), "w"(b)
8274 : /* No clobbers */);
8275 return result;
8276 }
8277
8278 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8279 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
8280 {
8281 uint32x4_t result;
8282 __asm__ ("addp %0.4s,%1.4s,%2.4s"
8283 : "=w"(result)
8284 : "w"(a), "w"(b)
8285 : /* No clobbers */);
8286 return result;
8287 }
8288
8289 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8290 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
8291 {
8292 uint64x2_t result;
8293 __asm__ ("addp %0.2d,%1.2d,%2.2d"
8294 : "=w"(result)
8295 : "w"(a), "w"(b)
8296 : /* No clobbers */);
8297 return result;
8298 }
8299
8300 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8301 vqdmulh_n_s16 (int16x4_t a, int16_t b)
8302 {
8303 int16x4_t result;
8304 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
8305 : "=w"(result)
8306 : "w"(a), "x"(b)
8307 : /* No clobbers */);
8308 return result;
8309 }
8310
8311 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8312 vqdmulh_n_s32 (int32x2_t a, int32_t b)
8313 {
8314 int32x2_t result;
8315 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
8316 : "=w"(result)
8317 : "w"(a), "w"(b)
8318 : /* No clobbers */);
8319 return result;
8320 }
8321
8322 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8323 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
8324 {
8325 int16x8_t result;
8326 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
8327 : "=w"(result)
8328 : "w"(a), "x"(b)
8329 : /* No clobbers */);
8330 return result;
8331 }
8332
8333 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8334 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
8335 {
8336 int32x4_t result;
8337 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
8338 : "=w"(result)
8339 : "w"(a), "w"(b)
8340 : /* No clobbers */);
8341 return result;
8342 }
8343
8344 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8345 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
8346 {
8347 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
8348 __asm__ ("sqxtn2 %0.16b, %1.8h"
8349 : "+w"(result)
8350 : "w"(b)
8351 : /* No clobbers */);
8352 return result;
8353 }
8354
8355 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8356 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
8357 {
8358 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
8359 __asm__ ("sqxtn2 %0.8h, %1.4s"
8360 : "+w"(result)
8361 : "w"(b)
8362 : /* No clobbers */);
8363 return result;
8364 }
8365
8366 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8367 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
8368 {
8369 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
8370 __asm__ ("sqxtn2 %0.4s, %1.2d"
8371 : "+w"(result)
8372 : "w"(b)
8373 : /* No clobbers */);
8374 return result;
8375 }
8376
8377 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8378 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
8379 {
8380 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
8381 __asm__ ("uqxtn2 %0.16b, %1.8h"
8382 : "+w"(result)
8383 : "w"(b)
8384 : /* No clobbers */);
8385 return result;
8386 }
8387
8388 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8389 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
8390 {
8391 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
8392 __asm__ ("uqxtn2 %0.8h, %1.4s"
8393 : "+w"(result)
8394 : "w"(b)
8395 : /* No clobbers */);
8396 return result;
8397 }
8398
8399 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8400 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
8401 {
8402 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
8403 __asm__ ("uqxtn2 %0.4s, %1.2d"
8404 : "+w"(result)
8405 : "w"(b)
8406 : /* No clobbers */);
8407 return result;
8408 }
8409
8410 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8411 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
8412 {
8413 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
8414 __asm__ ("sqxtun2 %0.16b, %1.8h"
8415 : "+w"(result)
8416 : "w"(b)
8417 : /* No clobbers */);
8418 return result;
8419 }
8420
8421 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8422 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
8423 {
8424 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
8425 __asm__ ("sqxtun2 %0.8h, %1.4s"
8426 : "+w"(result)
8427 : "w"(b)
8428 : /* No clobbers */);
8429 return result;
8430 }
8431
8432 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8433 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
8434 {
8435 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
8436 __asm__ ("sqxtun2 %0.4s, %1.2d"
8437 : "+w"(result)
8438 : "w"(b)
8439 : /* No clobbers */);
8440 return result;
8441 }
8442
8443 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8444 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
8445 {
8446 int16x4_t result;
8447 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
8448 : "=w"(result)
8449 : "w"(a), "x"(b)
8450 : /* No clobbers */);
8451 return result;
8452 }
8453
8454 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8455 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
8456 {
8457 int32x2_t result;
8458 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
8459 : "=w"(result)
8460 : "w"(a), "w"(b)
8461 : /* No clobbers */);
8462 return result;
8463 }
8464
8465 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8466 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
8467 {
8468 int16x8_t result;
8469 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
8470 : "=w"(result)
8471 : "w"(a), "x"(b)
8472 : /* No clobbers */);
8473 return result;
8474 }
8475
8476 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8477 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
8478 {
8479 int32x4_t result;
8480 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
8481 : "=w"(result)
8482 : "w"(a), "w"(b)
8483 : /* No clobbers */);
8484 return result;
8485 }
8486
8487 #define vqrshrn_high_n_s16(a, b, c) \
8488 __extension__ \
8489 ({ \
8490 int16x8_t b_ = (b); \
8491 int8x8_t a_ = (a); \
8492 int8x16_t result = vcombine_s8 \
8493 (a_, vcreate_s8 \
8494 (__AARCH64_UINT64_C (0x0))); \
8495 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
8496 : "+w"(result) \
8497 : "w"(b_), "i"(c) \
8498 : /* No clobbers */); \
8499 result; \
8500 })
8501
8502 #define vqrshrn_high_n_s32(a, b, c) \
8503 __extension__ \
8504 ({ \
8505 int32x4_t b_ = (b); \
8506 int16x4_t a_ = (a); \
8507 int16x8_t result = vcombine_s16 \
8508 (a_, vcreate_s16 \
8509 (__AARCH64_UINT64_C (0x0))); \
8510 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
8511 : "+w"(result) \
8512 : "w"(b_), "i"(c) \
8513 : /* No clobbers */); \
8514 result; \
8515 })
8516
8517 #define vqrshrn_high_n_s64(a, b, c) \
8518 __extension__ \
8519 ({ \
8520 int64x2_t b_ = (b); \
8521 int32x2_t a_ = (a); \
8522 int32x4_t result = vcombine_s32 \
8523 (a_, vcreate_s32 \
8524 (__AARCH64_UINT64_C (0x0))); \
8525 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
8526 : "+w"(result) \
8527 : "w"(b_), "i"(c) \
8528 : /* No clobbers */); \
8529 result; \
8530 })
8531
8532 #define vqrshrn_high_n_u16(a, b, c) \
8533 __extension__ \
8534 ({ \
8535 uint16x8_t b_ = (b); \
8536 uint8x8_t a_ = (a); \
8537 uint8x16_t result = vcombine_u8 \
8538 (a_, vcreate_u8 \
8539 (__AARCH64_UINT64_C (0x0))); \
8540 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
8541 : "+w"(result) \
8542 : "w"(b_), "i"(c) \
8543 : /* No clobbers */); \
8544 result; \
8545 })
8546
8547 #define vqrshrn_high_n_u32(a, b, c) \
8548 __extension__ \
8549 ({ \
8550 uint32x4_t b_ = (b); \
8551 uint16x4_t a_ = (a); \
8552 uint16x8_t result = vcombine_u16 \
8553 (a_, vcreate_u16 \
8554 (__AARCH64_UINT64_C (0x0))); \
8555 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
8556 : "+w"(result) \
8557 : "w"(b_), "i"(c) \
8558 : /* No clobbers */); \
8559 result; \
8560 })
8561
8562 #define vqrshrn_high_n_u64(a, b, c) \
8563 __extension__ \
8564 ({ \
8565 uint64x2_t b_ = (b); \
8566 uint32x2_t a_ = (a); \
8567 uint32x4_t result = vcombine_u32 \
8568 (a_, vcreate_u32 \
8569 (__AARCH64_UINT64_C (0x0))); \
8570 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
8571 : "+w"(result) \
8572 : "w"(b_), "i"(c) \
8573 : /* No clobbers */); \
8574 result; \
8575 })
8576
8577 #define vqrshrun_high_n_s16(a, b, c) \
8578 __extension__ \
8579 ({ \
8580 int16x8_t b_ = (b); \
8581 uint8x8_t a_ = (a); \
8582 uint8x16_t result = vcombine_u8 \
8583 (a_, vcreate_u8 \
8584 (__AARCH64_UINT64_C (0x0))); \
8585 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
8586 : "+w"(result) \
8587 : "w"(b_), "i"(c) \
8588 : /* No clobbers */); \
8589 result; \
8590 })
8591
8592 #define vqrshrun_high_n_s32(a, b, c) \
8593 __extension__ \
8594 ({ \
8595 int32x4_t b_ = (b); \
8596 uint16x4_t a_ = (a); \
8597 uint16x8_t result = vcombine_u16 \
8598 (a_, vcreate_u16 \
8599 (__AARCH64_UINT64_C (0x0))); \
8600 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
8601 : "+w"(result) \
8602 : "w"(b_), "i"(c) \
8603 : /* No clobbers */); \
8604 result; \
8605 })
8606
8607 #define vqrshrun_high_n_s64(a, b, c) \
8608 __extension__ \
8609 ({ \
8610 int64x2_t b_ = (b); \
8611 uint32x2_t a_ = (a); \
8612 uint32x4_t result = vcombine_u32 \
8613 (a_, vcreate_u32 \
8614 (__AARCH64_UINT64_C (0x0))); \
8615 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
8616 : "+w"(result) \
8617 : "w"(b_), "i"(c) \
8618 : /* No clobbers */); \
8619 result; \
8620 })
8621
8622 #define vqshrn_high_n_s16(a, b, c) \
8623 __extension__ \
8624 ({ \
8625 int16x8_t b_ = (b); \
8626 int8x8_t a_ = (a); \
8627 int8x16_t result = vcombine_s8 \
8628 (a_, vcreate_s8 \
8629 (__AARCH64_UINT64_C (0x0))); \
8630 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
8631 : "+w"(result) \
8632 : "w"(b_), "i"(c) \
8633 : /* No clobbers */); \
8634 result; \
8635 })
8636
8637 #define vqshrn_high_n_s32(a, b, c) \
8638 __extension__ \
8639 ({ \
8640 int32x4_t b_ = (b); \
8641 int16x4_t a_ = (a); \
8642 int16x8_t result = vcombine_s16 \
8643 (a_, vcreate_s16 \
8644 (__AARCH64_UINT64_C (0x0))); \
8645 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
8646 : "+w"(result) \
8647 : "w"(b_), "i"(c) \
8648 : /* No clobbers */); \
8649 result; \
8650 })
8651
8652 #define vqshrn_high_n_s64(a, b, c) \
8653 __extension__ \
8654 ({ \
8655 int64x2_t b_ = (b); \
8656 int32x2_t a_ = (a); \
8657 int32x4_t result = vcombine_s32 \
8658 (a_, vcreate_s32 \
8659 (__AARCH64_UINT64_C (0x0))); \
8660 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
8661 : "+w"(result) \
8662 : "w"(b_), "i"(c) \
8663 : /* No clobbers */); \
8664 result; \
8665 })
8666
8667 #define vqshrn_high_n_u16(a, b, c) \
8668 __extension__ \
8669 ({ \
8670 uint16x8_t b_ = (b); \
8671 uint8x8_t a_ = (a); \
8672 uint8x16_t result = vcombine_u8 \
8673 (a_, vcreate_u8 \
8674 (__AARCH64_UINT64_C (0x0))); \
8675 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
8676 : "+w"(result) \
8677 : "w"(b_), "i"(c) \
8678 : /* No clobbers */); \
8679 result; \
8680 })
8681
8682 #define vqshrn_high_n_u32(a, b, c) \
8683 __extension__ \
8684 ({ \
8685 uint32x4_t b_ = (b); \
8686 uint16x4_t a_ = (a); \
8687 uint16x8_t result = vcombine_u16 \
8688 (a_, vcreate_u16 \
8689 (__AARCH64_UINT64_C (0x0))); \
8690 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
8691 : "+w"(result) \
8692 : "w"(b_), "i"(c) \
8693 : /* No clobbers */); \
8694 result; \
8695 })
8696
8697 #define vqshrn_high_n_u64(a, b, c) \
8698 __extension__ \
8699 ({ \
8700 uint64x2_t b_ = (b); \
8701 uint32x2_t a_ = (a); \
8702 uint32x4_t result = vcombine_u32 \
8703 (a_, vcreate_u32 \
8704 (__AARCH64_UINT64_C (0x0))); \
8705 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
8706 : "+w"(result) \
8707 : "w"(b_), "i"(c) \
8708 : /* No clobbers */); \
8709 result; \
8710 })
8711
8712 #define vqshrun_high_n_s16(a, b, c) \
8713 __extension__ \
8714 ({ \
8715 int16x8_t b_ = (b); \
8716 uint8x8_t a_ = (a); \
8717 uint8x16_t result = vcombine_u8 \
8718 (a_, vcreate_u8 \
8719 (__AARCH64_UINT64_C (0x0))); \
8720 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
8721 : "+w"(result) \
8722 : "w"(b_), "i"(c) \
8723 : /* No clobbers */); \
8724 result; \
8725 })
8726
8727 #define vqshrun_high_n_s32(a, b, c) \
8728 __extension__ \
8729 ({ \
8730 int32x4_t b_ = (b); \
8731 uint16x4_t a_ = (a); \
8732 uint16x8_t result = vcombine_u16 \
8733 (a_, vcreate_u16 \
8734 (__AARCH64_UINT64_C (0x0))); \
8735 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
8736 : "+w"(result) \
8737 : "w"(b_), "i"(c) \
8738 : /* No clobbers */); \
8739 result; \
8740 })
8741
8742 #define vqshrun_high_n_s64(a, b, c) \
8743 __extension__ \
8744 ({ \
8745 int64x2_t b_ = (b); \
8746 uint32x2_t a_ = (a); \
8747 uint32x4_t result = vcombine_u32 \
8748 (a_, vcreate_u32 \
8749 (__AARCH64_UINT64_C (0x0))); \
8750 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
8751 : "+w"(result) \
8752 : "w"(b_), "i"(c) \
8753 : /* No clobbers */); \
8754 result; \
8755 })
8756
8757 #define vrshrn_high_n_s16(a, b, c) \
8758 __extension__ \
8759 ({ \
8760 int16x8_t b_ = (b); \
8761 int8x8_t a_ = (a); \
8762 int8x16_t result = vcombine_s8 \
8763 (a_, vcreate_s8 \
8764 (__AARCH64_UINT64_C (0x0))); \
8765 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
8766 : "+w"(result) \
8767 : "w"(b_), "i"(c) \
8768 : /* No clobbers */); \
8769 result; \
8770 })
8771
8772 #define vrshrn_high_n_s32(a, b, c) \
8773 __extension__ \
8774 ({ \
8775 int32x4_t b_ = (b); \
8776 int16x4_t a_ = (a); \
8777 int16x8_t result = vcombine_s16 \
8778 (a_, vcreate_s16 \
8779 (__AARCH64_UINT64_C (0x0))); \
8780 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
8781 : "+w"(result) \
8782 : "w"(b_), "i"(c) \
8783 : /* No clobbers */); \
8784 result; \
8785 })
8786
8787 #define vrshrn_high_n_s64(a, b, c) \
8788 __extension__ \
8789 ({ \
8790 int64x2_t b_ = (b); \
8791 int32x2_t a_ = (a); \
8792 int32x4_t result = vcombine_s32 \
8793 (a_, vcreate_s32 \
8794 (__AARCH64_UINT64_C (0x0))); \
8795 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
8796 : "+w"(result) \
8797 : "w"(b_), "i"(c) \
8798 : /* No clobbers */); \
8799 result; \
8800 })
8801
8802 #define vrshrn_high_n_u16(a, b, c) \
8803 __extension__ \
8804 ({ \
8805 uint16x8_t b_ = (b); \
8806 uint8x8_t a_ = (a); \
8807 uint8x16_t result = vcombine_u8 \
8808 (a_, vcreate_u8 \
8809 (__AARCH64_UINT64_C (0x0))); \
8810 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
8811 : "+w"(result) \
8812 : "w"(b_), "i"(c) \
8813 : /* No clobbers */); \
8814 result; \
8815 })
8816
8817 #define vrshrn_high_n_u32(a, b, c) \
8818 __extension__ \
8819 ({ \
8820 uint32x4_t b_ = (b); \
8821 uint16x4_t a_ = (a); \
8822 uint16x8_t result = vcombine_u16 \
8823 (a_, vcreate_u16 \
8824 (__AARCH64_UINT64_C (0x0))); \
8825 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
8826 : "+w"(result) \
8827 : "w"(b_), "i"(c) \
8828 : /* No clobbers */); \
8829 result; \
8830 })
8831
8832 #define vrshrn_high_n_u64(a, b, c) \
8833 __extension__ \
8834 ({ \
8835 uint64x2_t b_ = (b); \
8836 uint32x2_t a_ = (a); \
8837 uint32x4_t result = vcombine_u32 \
8838 (a_, vcreate_u32 \
8839 (__AARCH64_UINT64_C (0x0))); \
8840 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
8841 : "+w"(result) \
8842 : "w"(b_), "i"(c) \
8843 : /* No clobbers */); \
8844 result; \
8845 })
8846
8847 #define vrshrn_n_s16(a, b) \
8848 __extension__ \
8849 ({ \
8850 int16x8_t a_ = (a); \
8851 int8x8_t result; \
8852 __asm__ ("rshrn %0.8b,%1.8h,%2" \
8853 : "=w"(result) \
8854 : "w"(a_), "i"(b) \
8855 : /* No clobbers */); \
8856 result; \
8857 })
8858
8859 #define vrshrn_n_s32(a, b) \
8860 __extension__ \
8861 ({ \
8862 int32x4_t a_ = (a); \
8863 int16x4_t result; \
8864 __asm__ ("rshrn %0.4h,%1.4s,%2" \
8865 : "=w"(result) \
8866 : "w"(a_), "i"(b) \
8867 : /* No clobbers */); \
8868 result; \
8869 })
8870
8871 #define vrshrn_n_s64(a, b) \
8872 __extension__ \
8873 ({ \
8874 int64x2_t a_ = (a); \
8875 int32x2_t result; \
8876 __asm__ ("rshrn %0.2s,%1.2d,%2" \
8877 : "=w"(result) \
8878 : "w"(a_), "i"(b) \
8879 : /* No clobbers */); \
8880 result; \
8881 })
8882
8883 #define vrshrn_n_u16(a, b) \
8884 __extension__ \
8885 ({ \
8886 uint16x8_t a_ = (a); \
8887 uint8x8_t result; \
8888 __asm__ ("rshrn %0.8b,%1.8h,%2" \
8889 : "=w"(result) \
8890 : "w"(a_), "i"(b) \
8891 : /* No clobbers */); \
8892 result; \
8893 })
8894
8895 #define vrshrn_n_u32(a, b) \
8896 __extension__ \
8897 ({ \
8898 uint32x4_t a_ = (a); \
8899 uint16x4_t result; \
8900 __asm__ ("rshrn %0.4h,%1.4s,%2" \
8901 : "=w"(result) \
8902 : "w"(a_), "i"(b) \
8903 : /* No clobbers */); \
8904 result; \
8905 })
8906
8907 #define vrshrn_n_u64(a, b) \
8908 __extension__ \
8909 ({ \
8910 uint64x2_t a_ = (a); \
8911 uint32x2_t result; \
8912 __asm__ ("rshrn %0.2s,%1.2d,%2" \
8913 : "=w"(result) \
8914 : "w"(a_), "i"(b) \
8915 : /* No clobbers */); \
8916 result; \
8917 })
8918
8919 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8920 vrsqrte_u32 (uint32x2_t a)
8921 {
8922 uint32x2_t result;
8923 __asm__ ("ursqrte %0.2s,%1.2s"
8924 : "=w"(result)
8925 : "w"(a)
8926 : /* No clobbers */);
8927 return result;
8928 }
8929
8930 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8931 vrsqrteq_u32 (uint32x4_t a)
8932 {
8933 uint32x4_t result;
8934 __asm__ ("ursqrte %0.4s,%1.4s"
8935 : "=w"(result)
8936 : "w"(a)
8937 : /* No clobbers */);
8938 return result;
8939 }
8940
8941 #define vshrn_high_n_s16(a, b, c) \
8942 __extension__ \
8943 ({ \
8944 int16x8_t b_ = (b); \
8945 int8x8_t a_ = (a); \
8946 int8x16_t result = vcombine_s8 \
8947 (a_, vcreate_s8 \
8948 (__AARCH64_UINT64_C (0x0))); \
8949 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
8950 : "+w"(result) \
8951 : "w"(b_), "i"(c) \
8952 : /* No clobbers */); \
8953 result; \
8954 })
8955
8956 #define vshrn_high_n_s32(a, b, c) \
8957 __extension__ \
8958 ({ \
8959 int32x4_t b_ = (b); \
8960 int16x4_t a_ = (a); \
8961 int16x8_t result = vcombine_s16 \
8962 (a_, vcreate_s16 \
8963 (__AARCH64_UINT64_C (0x0))); \
8964 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
8965 : "+w"(result) \
8966 : "w"(b_), "i"(c) \
8967 : /* No clobbers */); \
8968 result; \
8969 })
8970
8971 #define vshrn_high_n_s64(a, b, c) \
8972 __extension__ \
8973 ({ \
8974 int64x2_t b_ = (b); \
8975 int32x2_t a_ = (a); \
8976 int32x4_t result = vcombine_s32 \
8977 (a_, vcreate_s32 \
8978 (__AARCH64_UINT64_C (0x0))); \
8979 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
8980 : "+w"(result) \
8981 : "w"(b_), "i"(c) \
8982 : /* No clobbers */); \
8983 result; \
8984 })
8985
8986 #define vshrn_high_n_u16(a, b, c) \
8987 __extension__ \
8988 ({ \
8989 uint16x8_t b_ = (b); \
8990 uint8x8_t a_ = (a); \
8991 uint8x16_t result = vcombine_u8 \
8992 (a_, vcreate_u8 \
8993 (__AARCH64_UINT64_C (0x0))); \
8994 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
8995 : "+w"(result) \
8996 : "w"(b_), "i"(c) \
8997 : /* No clobbers */); \
8998 result; \
8999 })
9000
9001 #define vshrn_high_n_u32(a, b, c) \
9002 __extension__ \
9003 ({ \
9004 uint32x4_t b_ = (b); \
9005 uint16x4_t a_ = (a); \
9006 uint16x8_t result = vcombine_u16 \
9007 (a_, vcreate_u16 \
9008 (__AARCH64_UINT64_C (0x0))); \
9009 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
9010 : "+w"(result) \
9011 : "w"(b_), "i"(c) \
9012 : /* No clobbers */); \
9013 result; \
9014 })
9015
9016 #define vshrn_high_n_u64(a, b, c) \
9017 __extension__ \
9018 ({ \
9019 uint64x2_t b_ = (b); \
9020 uint32x2_t a_ = (a); \
9021 uint32x4_t result = vcombine_u32 \
9022 (a_, vcreate_u32 \
9023 (__AARCH64_UINT64_C (0x0))); \
9024 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
9025 : "+w"(result) \
9026 : "w"(b_), "i"(c) \
9027 : /* No clobbers */); \
9028 result; \
9029 })
9030
9031 #define vshrn_n_s16(a, b) \
9032 __extension__ \
9033 ({ \
9034 int16x8_t a_ = (a); \
9035 int8x8_t result; \
9036 __asm__ ("shrn %0.8b,%1.8h,%2" \
9037 : "=w"(result) \
9038 : "w"(a_), "i"(b) \
9039 : /* No clobbers */); \
9040 result; \
9041 })
9042
9043 #define vshrn_n_s32(a, b) \
9044 __extension__ \
9045 ({ \
9046 int32x4_t a_ = (a); \
9047 int16x4_t result; \
9048 __asm__ ("shrn %0.4h,%1.4s,%2" \
9049 : "=w"(result) \
9050 : "w"(a_), "i"(b) \
9051 : /* No clobbers */); \
9052 result; \
9053 })
9054
9055 #define vshrn_n_s64(a, b) \
9056 __extension__ \
9057 ({ \
9058 int64x2_t a_ = (a); \
9059 int32x2_t result; \
9060 __asm__ ("shrn %0.2s,%1.2d,%2" \
9061 : "=w"(result) \
9062 : "w"(a_), "i"(b) \
9063 : /* No clobbers */); \
9064 result; \
9065 })
9066
9067 #define vshrn_n_u16(a, b) \
9068 __extension__ \
9069 ({ \
9070 uint16x8_t a_ = (a); \
9071 uint8x8_t result; \
9072 __asm__ ("shrn %0.8b,%1.8h,%2" \
9073 : "=w"(result) \
9074 : "w"(a_), "i"(b) \
9075 : /* No clobbers */); \
9076 result; \
9077 })
9078
9079 #define vshrn_n_u32(a, b) \
9080 __extension__ \
9081 ({ \
9082 uint32x4_t a_ = (a); \
9083 uint16x4_t result; \
9084 __asm__ ("shrn %0.4h,%1.4s,%2" \
9085 : "=w"(result) \
9086 : "w"(a_), "i"(b) \
9087 : /* No clobbers */); \
9088 result; \
9089 })
9090
9091 #define vshrn_n_u64(a, b) \
9092 __extension__ \
9093 ({ \
9094 uint64x2_t a_ = (a); \
9095 uint32x2_t result; \
9096 __asm__ ("shrn %0.2s,%1.2d,%2" \
9097 : "=w"(result) \
9098 : "w"(a_), "i"(b) \
9099 : /* No clobbers */); \
9100 result; \
9101 })
9102
9103 #define vsli_n_p8(a, b, c) \
9104 __extension__ \
9105 ({ \
9106 poly8x8_t b_ = (b); \
9107 poly8x8_t a_ = (a); \
9108 poly8x8_t result; \
9109 __asm__ ("sli %0.8b,%2.8b,%3" \
9110 : "=w"(result) \
9111 : "0"(a_), "w"(b_), "i"(c) \
9112 : /* No clobbers */); \
9113 result; \
9114 })
9115
9116 #define vsli_n_p16(a, b, c) \
9117 __extension__ \
9118 ({ \
9119 poly16x4_t b_ = (b); \
9120 poly16x4_t a_ = (a); \
9121 poly16x4_t result; \
9122 __asm__ ("sli %0.4h,%2.4h,%3" \
9123 : "=w"(result) \
9124 : "0"(a_), "w"(b_), "i"(c) \
9125 : /* No clobbers */); \
9126 result; \
9127 })
9128
9129 #define vsliq_n_p8(a, b, c) \
9130 __extension__ \
9131 ({ \
9132 poly8x16_t b_ = (b); \
9133 poly8x16_t a_ = (a); \
9134 poly8x16_t result; \
9135 __asm__ ("sli %0.16b,%2.16b,%3" \
9136 : "=w"(result) \
9137 : "0"(a_), "w"(b_), "i"(c) \
9138 : /* No clobbers */); \
9139 result; \
9140 })
9141
9142 #define vsliq_n_p16(a, b, c) \
9143 __extension__ \
9144 ({ \
9145 poly16x8_t b_ = (b); \
9146 poly16x8_t a_ = (a); \
9147 poly16x8_t result; \
9148 __asm__ ("sli %0.8h,%2.8h,%3" \
9149 : "=w"(result) \
9150 : "0"(a_), "w"(b_), "i"(c) \
9151 : /* No clobbers */); \
9152 result; \
9153 })
9154
9155 #define vsri_n_p8(a, b, c) \
9156 __extension__ \
9157 ({ \
9158 poly8x8_t b_ = (b); \
9159 poly8x8_t a_ = (a); \
9160 poly8x8_t result; \
9161 __asm__ ("sri %0.8b,%2.8b,%3" \
9162 : "=w"(result) \
9163 : "0"(a_), "w"(b_), "i"(c) \
9164 : /* No clobbers */); \
9165 result; \
9166 })
9167
9168 #define vsri_n_p16(a, b, c) \
9169 __extension__ \
9170 ({ \
9171 poly16x4_t b_ = (b); \
9172 poly16x4_t a_ = (a); \
9173 poly16x4_t result; \
9174 __asm__ ("sri %0.4h,%2.4h,%3" \
9175 : "=w"(result) \
9176 : "0"(a_), "w"(b_), "i"(c) \
9177 : /* No clobbers */); \
9178 result; \
9179 })
9180
9181 #define vsriq_n_p8(a, b, c) \
9182 __extension__ \
9183 ({ \
9184 poly8x16_t b_ = (b); \
9185 poly8x16_t a_ = (a); \
9186 poly8x16_t result; \
9187 __asm__ ("sri %0.16b,%2.16b,%3" \
9188 : "=w"(result) \
9189 : "0"(a_), "w"(b_), "i"(c) \
9190 : /* No clobbers */); \
9191 result; \
9192 })
9193
9194 #define vsriq_n_p16(a, b, c) \
9195 __extension__ \
9196 ({ \
9197 poly16x8_t b_ = (b); \
9198 poly16x8_t a_ = (a); \
9199 poly16x8_t result; \
9200 __asm__ ("sri %0.8h,%2.8h,%3" \
9201 : "=w"(result) \
9202 : "0"(a_), "w"(b_), "i"(c) \
9203 : /* No clobbers */); \
9204 result; \
9205 })
9206
9207 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9208 vtst_p8 (poly8x8_t a, poly8x8_t b)
9209 {
9210 uint8x8_t result;
9211 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
9212 : "=w"(result)
9213 : "w"(a), "w"(b)
9214 : /* No clobbers */);
9215 return result;
9216 }
9217
9218 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9219 vtst_p16 (poly16x4_t a, poly16x4_t b)
9220 {
9221 uint16x4_t result;
9222 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
9223 : "=w"(result)
9224 : "w"(a), "w"(b)
9225 : /* No clobbers */);
9226 return result;
9227 }
9228
9229 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9230 vtstq_p8 (poly8x16_t a, poly8x16_t b)
9231 {
9232 uint8x16_t result;
9233 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
9234 : "=w"(result)
9235 : "w"(a), "w"(b)
9236 : /* No clobbers */);
9237 return result;
9238 }
9239
9240 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9241 vtstq_p16 (poly16x8_t a, poly16x8_t b)
9242 {
9243 uint16x8_t result;
9244 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
9245 : "=w"(result)
9246 : "w"(a), "w"(b)
9247 : /* No clobbers */);
9248 return result;
9249 }
9250
9251 /* End of temporary inline asm implementations. */
9252
9253 /* Start of temporary inline asm for vldn, vstn and friends. */
9254
9255 /* Create struct element types for duplicating loads.
9256
9257 Create 2 element structures of:
9258
9259 +------+----+----+----+----+
9260 | | 8 | 16 | 32 | 64 |
9261 +------+----+----+----+----+
9262 |int | Y | Y | N | N |
9263 +------+----+----+----+----+
9264 |uint | Y | Y | N | N |
9265 +------+----+----+----+----+
9266 |float | - | Y | N | N |
9267 +------+----+----+----+----+
9268 |poly | Y | Y | - | - |
9269 +------+----+----+----+----+
9270
9271 Create 3 element structures of:
9272
9273 +------+----+----+----+----+
9274 | | 8 | 16 | 32 | 64 |
9275 +------+----+----+----+----+
9276 |int | Y | Y | Y | Y |
9277 +------+----+----+----+----+
9278 |uint | Y | Y | Y | Y |
9279 +------+----+----+----+----+
9280 |float | - | Y | Y | Y |
9281 +------+----+----+----+----+
9282 |poly | Y | Y | - | - |
9283 +------+----+----+----+----+
9284
9285 Create 4 element structures of:
9286
9287 +------+----+----+----+----+
9288 | | 8 | 16 | 32 | 64 |
9289 +------+----+----+----+----+
9290 |int | Y | N | N | Y |
9291 +------+----+----+----+----+
9292 |uint | Y | N | N | Y |
9293 +------+----+----+----+----+
9294 |float | - | N | N | Y |
9295 +------+----+----+----+----+
9296 |poly | Y | N | - | - |
9297 +------+----+----+----+----+
9298
9299 This is required for casting memory reference. */
9300 #define __STRUCTN(t, sz, nelem) \
9301 typedef struct t ## sz ## x ## nelem ## _t { \
9302 t ## sz ## _t val[nelem]; \
9303 } t ## sz ## x ## nelem ## _t;
9304
9305 /* 2-element structs. */
9306 __STRUCTN (int, 8, 2)
9307 __STRUCTN (int, 16, 2)
9308 __STRUCTN (uint, 8, 2)
9309 __STRUCTN (uint, 16, 2)
9310 __STRUCTN (float, 16, 2)
9311 __STRUCTN (poly, 8, 2)
9312 __STRUCTN (poly, 16, 2)
9313 /* 3-element structs. */
9314 __STRUCTN (int, 8, 3)
9315 __STRUCTN (int, 16, 3)
9316 __STRUCTN (int, 32, 3)
9317 __STRUCTN (int, 64, 3)
9318 __STRUCTN (uint, 8, 3)
9319 __STRUCTN (uint, 16, 3)
9320 __STRUCTN (uint, 32, 3)
9321 __STRUCTN (uint, 64, 3)
9322 __STRUCTN (float, 16, 3)
9323 __STRUCTN (float, 32, 3)
9324 __STRUCTN (float, 64, 3)
9325 __STRUCTN (poly, 8, 3)
9326 __STRUCTN (poly, 16, 3)
9327 /* 4-element structs. */
9328 __STRUCTN (int, 8, 4)
9329 __STRUCTN (int, 64, 4)
9330 __STRUCTN (uint, 8, 4)
9331 __STRUCTN (uint, 64, 4)
9332 __STRUCTN (poly, 8, 4)
9333 __STRUCTN (float, 64, 4)
9334 #undef __STRUCTN
9335
9336
9337 #define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode, \
9338 qmode, ptr_mode, funcsuffix, signedtype) \
9339 __extension__ static __inline void \
9340 __attribute__ ((__always_inline__)) \
9341 vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
9342 intype __b, const int __c) \
9343 { \
9344 __builtin_aarch64_simd_oi __o; \
9345 largetype __temp; \
9346 __temp.val[0] \
9347 = vcombine_##funcsuffix (__b.val[0], \
9348 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
9349 __temp.val[1] \
9350 = vcombine_##funcsuffix (__b.val[1], \
9351 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
9352 __o = __builtin_aarch64_set_qregoi##qmode (__o, \
9353 (signedtype) __temp.val[0], 0); \
9354 __o = __builtin_aarch64_set_qregoi##qmode (__o, \
9355 (signedtype) __temp.val[1], 1); \
9356 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
9357 __ptr, __o, __c); \
9358 }
9359
9360 __ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16,
9361 float16x8_t)
9362 __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32,
9363 float32x4_t)
9364 __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64,
9365 float64x2_t)
9366 __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
9367 int8x16_t)
9368 __ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16,
9369 int16x8_t)
9370 __ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
9371 int8x16_t)
9372 __ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
9373 int16x8_t)
9374 __ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
9375 int32x4_t)
9376 __ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64,
9377 int64x2_t)
9378 __ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
9379 int8x16_t)
9380 __ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16,
9381 int16x8_t)
9382 __ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32,
9383 int32x4_t)
9384 __ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64,
9385 int64x2_t)
9386
9387 #undef __ST2_LANE_FUNC
9388 #define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
9389 __extension__ static __inline void \
9390 __attribute__ ((__always_inline__)) \
9391 vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
9392 intype __b, const int __c) \
9393 { \
9394 union { intype __i; \
9395 __builtin_aarch64_simd_oi __o; } __temp = { __b }; \
9396 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
9397 __ptr, __temp.__o, __c); \
9398 }
9399
9400 __ST2_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16)
9401 __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
9402 __ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
9403 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
9404 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
9405 __ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
9406 __ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
9407 __ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
9408 __ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
9409 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
9410 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
9411 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
9412 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
9413
9414 #define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode, \
9415 qmode, ptr_mode, funcsuffix, signedtype) \
9416 __extension__ static __inline void \
9417 __attribute__ ((__always_inline__)) \
9418 vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
9419 intype __b, const int __c) \
9420 { \
9421 __builtin_aarch64_simd_ci __o; \
9422 largetype __temp; \
9423 __temp.val[0] \
9424 = vcombine_##funcsuffix (__b.val[0], \
9425 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
9426 __temp.val[1] \
9427 = vcombine_##funcsuffix (__b.val[1], \
9428 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
9429 __temp.val[2] \
9430 = vcombine_##funcsuffix (__b.val[2], \
9431 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
9432 __o = __builtin_aarch64_set_qregci##qmode (__o, \
9433 (signedtype) __temp.val[0], 0); \
9434 __o = __builtin_aarch64_set_qregci##qmode (__o, \
9435 (signedtype) __temp.val[1], 1); \
9436 __o = __builtin_aarch64_set_qregci##qmode (__o, \
9437 (signedtype) __temp.val[2], 2); \
9438 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
9439 __ptr, __o, __c); \
9440 }
9441
9442 __ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16,
9443 float16x8_t)
9444 __ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32,
9445 float32x4_t)
9446 __ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64,
9447 float64x2_t)
9448 __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
9449 int8x16_t)
9450 __ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16,
9451 int16x8_t)
9452 __ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
9453 int8x16_t)
9454 __ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
9455 int16x8_t)
9456 __ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
9457 int32x4_t)
9458 __ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64,
9459 int64x2_t)
9460 __ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
9461 int8x16_t)
9462 __ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16,
9463 int16x8_t)
9464 __ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32,
9465 int32x4_t)
9466 __ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64,
9467 int64x2_t)
9468
9469 #undef __ST3_LANE_FUNC
9470 #define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
9471 __extension__ static __inline void \
9472 __attribute__ ((__always_inline__)) \
9473 vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
9474 intype __b, const int __c) \
9475 { \
9476 union { intype __i; \
9477 __builtin_aarch64_simd_ci __o; } __temp = { __b }; \
9478 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
9479 __ptr, __temp.__o, __c); \
9480 }
9481
9482 __ST3_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16)
9483 __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
9484 __ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
9485 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
9486 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
9487 __ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
9488 __ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
9489 __ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
9490 __ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
9491 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
9492 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
9493 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
9494 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
9495
9496 #define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode, \
9497 qmode, ptr_mode, funcsuffix, signedtype) \
9498 __extension__ static __inline void \
9499 __attribute__ ((__always_inline__)) \
9500 vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
9501 intype __b, const int __c) \
9502 { \
9503 __builtin_aarch64_simd_xi __o; \
9504 largetype __temp; \
9505 __temp.val[0] \
9506 = vcombine_##funcsuffix (__b.val[0], \
9507 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
9508 __temp.val[1] \
9509 = vcombine_##funcsuffix (__b.val[1], \
9510 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
9511 __temp.val[2] \
9512 = vcombine_##funcsuffix (__b.val[2], \
9513 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
9514 __temp.val[3] \
9515 = vcombine_##funcsuffix (__b.val[3], \
9516 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
9517 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
9518 (signedtype) __temp.val[0], 0); \
9519 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
9520 (signedtype) __temp.val[1], 1); \
9521 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
9522 (signedtype) __temp.val[2], 2); \
9523 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
9524 (signedtype) __temp.val[3], 3); \
9525 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
9526 __ptr, __o, __c); \
9527 }
9528
9529 __ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16,
9530 float16x8_t)
9531 __ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32,
9532 float32x4_t)
9533 __ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64,
9534 float64x2_t)
9535 __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
9536 int8x16_t)
9537 __ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16,
9538 int16x8_t)
9539 __ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
9540 int8x16_t)
9541 __ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
9542 int16x8_t)
9543 __ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
9544 int32x4_t)
9545 __ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64,
9546 int64x2_t)
9547 __ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
9548 int8x16_t)
9549 __ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16,
9550 int16x8_t)
9551 __ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32,
9552 int32x4_t)
9553 __ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64,
9554 int64x2_t)
9555
9556 #undef __ST4_LANE_FUNC
9557 #define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
9558 __extension__ static __inline void \
9559 __attribute__ ((__always_inline__)) \
9560 vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
9561 intype __b, const int __c) \
9562 { \
9563 union { intype __i; \
9564 __builtin_aarch64_simd_xi __o; } __temp = { __b }; \
9565 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
9566 __ptr, __temp.__o, __c); \
9567 }
9568
9569 __ST4_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16)
9570 __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
9571 __ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
9572 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
9573 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
9574 __ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
9575 __ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
9576 __ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
9577 __ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
9578 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
9579 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
9580 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
9581 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
9582
9583 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
9584 vaddlv_s32 (int32x2_t a)
9585 {
9586 int64_t result;
9587 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
9588 return result;
9589 }
9590
9591 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
9592 vaddlv_u32 (uint32x2_t a)
9593 {
9594 uint64_t result;
9595 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
9596 return result;
9597 }
9598
9599 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9600 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
9601 {
9602 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
9603 }
9604
9605 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9606 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
9607 {
9608 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
9609 }
9610
9611 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9612 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
9613 {
9614 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
9615 }
9616
9617 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9618 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
9619 {
9620 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
9621 }
9622
9623 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9624 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
9625 {
9626 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
9627 }
9628
9629 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9630 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
9631 {
9632 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
9633 }
9634
9635 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9636 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
9637 {
9638 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
9639 }
9640
9641 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9642 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
9643 {
9644 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
9645 }
9646
9647 /* Table intrinsics. */
9648
9649 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9650 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
9651 {
9652 poly8x8_t result;
9653 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
9654 : "=w"(result)
9655 : "w"(a), "w"(b)
9656 : /* No clobbers */);
9657 return result;
9658 }
9659
9660 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9661 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
9662 {
9663 int8x8_t result;
9664 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
9665 : "=w"(result)
9666 : "w"(a), "w"(b)
9667 : /* No clobbers */);
9668 return result;
9669 }
9670
9671 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9672 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
9673 {
9674 uint8x8_t result;
9675 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
9676 : "=w"(result)
9677 : "w"(a), "w"(b)
9678 : /* No clobbers */);
9679 return result;
9680 }
9681
9682 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9683 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
9684 {
9685 poly8x16_t result;
9686 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
9687 : "=w"(result)
9688 : "w"(a), "w"(b)
9689 : /* No clobbers */);
9690 return result;
9691 }
9692
9693 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9694 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
9695 {
9696 int8x16_t result;
9697 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
9698 : "=w"(result)
9699 : "w"(a), "w"(b)
9700 : /* No clobbers */);
9701 return result;
9702 }
9703
9704 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9705 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
9706 {
9707 uint8x16_t result;
9708 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
9709 : "=w"(result)
9710 : "w"(a), "w"(b)
9711 : /* No clobbers */);
9712 return result;
9713 }
9714
9715 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9716 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
9717 {
9718 int8x8_t result = r;
9719 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
9720 : "+w"(result)
9721 : "w"(tab), "w"(idx)
9722 : /* No clobbers */);
9723 return result;
9724 }
9725
9726 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9727 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
9728 {
9729 uint8x8_t result = r;
9730 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
9731 : "+w"(result)
9732 : "w"(tab), "w"(idx)
9733 : /* No clobbers */);
9734 return result;
9735 }
9736
9737 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9738 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
9739 {
9740 poly8x8_t result = r;
9741 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
9742 : "+w"(result)
9743 : "w"(tab), "w"(idx)
9744 : /* No clobbers */);
9745 return result;
9746 }
9747
9748 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9749 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
9750 {
9751 int8x16_t result = r;
9752 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
9753 : "+w"(result)
9754 : "w"(tab), "w"(idx)
9755 : /* No clobbers */);
9756 return result;
9757 }
9758
9759 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9760 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
9761 {
9762 uint8x16_t result = r;
9763 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
9764 : "+w"(result)
9765 : "w"(tab), "w"(idx)
9766 : /* No clobbers */);
9767 return result;
9768 }
9769
9770 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9771 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
9772 {
9773 poly8x16_t result = r;
9774 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
9775 : "+w"(result)
9776 : "w"(tab), "w"(idx)
9777 : /* No clobbers */);
9778 return result;
9779 }
9780
9781 /* V7 legacy table intrinsics. */
9782
9783 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9784 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
9785 {
9786 int8x8_t result;
9787 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
9788 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
9789 : "=w"(result)
9790 : "w"(temp), "w"(idx)
9791 : /* No clobbers */);
9792 return result;
9793 }
9794
9795 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9796 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
9797 {
9798 uint8x8_t result;
9799 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
9800 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
9801 : "=w"(result)
9802 : "w"(temp), "w"(idx)
9803 : /* No clobbers */);
9804 return result;
9805 }
9806
9807 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9808 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
9809 {
9810 poly8x8_t result;
9811 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
9812 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
9813 : "=w"(result)
9814 : "w"(temp), "w"(idx)
9815 : /* No clobbers */);
9816 return result;
9817 }
9818
9819 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9820 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
9821 {
9822 int8x8_t result;
9823 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
9824 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
9825 : "=w"(result)
9826 : "w"(temp), "w"(idx)
9827 : /* No clobbers */);
9828 return result;
9829 }
9830
9831 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9832 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
9833 {
9834 uint8x8_t result;
9835 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
9836 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
9837 : "=w"(result)
9838 : "w"(temp), "w"(idx)
9839 : /* No clobbers */);
9840 return result;
9841 }
9842
9843 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9844 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
9845 {
9846 poly8x8_t result;
9847 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
9848 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
9849 : "=w"(result)
9850 : "w"(temp), "w"(idx)
9851 : /* No clobbers */);
9852 return result;
9853 }
9854
9855 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9856 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
9857 {
9858 int8x8_t result;
9859 int8x16x2_t temp;
9860 __builtin_aarch64_simd_oi __o;
9861 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
9862 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
9863 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9864 (int8x16_t) temp.val[0], 0);
9865 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9866 (int8x16_t) temp.val[1], 1);
9867 result = __builtin_aarch64_tbl3v8qi (__o, idx);
9868 return result;
9869 }
9870
9871 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9872 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
9873 {
9874 uint8x8_t result;
9875 uint8x16x2_t temp;
9876 __builtin_aarch64_simd_oi __o;
9877 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
9878 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
9879 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9880 (int8x16_t) temp.val[0], 0);
9881 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9882 (int8x16_t) temp.val[1], 1);
9883 result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
9884 return result;
9885 }
9886
9887 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9888 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
9889 {
9890 poly8x8_t result;
9891 poly8x16x2_t temp;
9892 __builtin_aarch64_simd_oi __o;
9893 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
9894 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
9895 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9896 (int8x16_t) temp.val[0], 0);
9897 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9898 (int8x16_t) temp.val[1], 1);
9899 result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
9900 return result;
9901 }
9902
9903 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9904 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
9905 {
9906 int8x8_t result;
9907 int8x16x2_t temp;
9908 __builtin_aarch64_simd_oi __o;
9909 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
9910 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
9911 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9912 (int8x16_t) temp.val[0], 0);
9913 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9914 (int8x16_t) temp.val[1], 1);
9915 result = __builtin_aarch64_tbl3v8qi (__o, idx);
9916 return result;
9917 }
9918
9919 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9920 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
9921 {
9922 uint8x8_t result;
9923 uint8x16x2_t temp;
9924 __builtin_aarch64_simd_oi __o;
9925 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
9926 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
9927 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9928 (int8x16_t) temp.val[0], 0);
9929 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9930 (int8x16_t) temp.val[1], 1);
9931 result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
9932 return result;
9933 }
9934
9935 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9936 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
9937 {
9938 poly8x8_t result;
9939 poly8x16x2_t temp;
9940 __builtin_aarch64_simd_oi __o;
9941 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
9942 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
9943 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9944 (int8x16_t) temp.val[0], 0);
9945 __o = __builtin_aarch64_set_qregoiv16qi (__o,
9946 (int8x16_t) temp.val[1], 1);
9947 result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
9948 return result;
9949 }
9950
9951 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9952 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
9953 {
9954 int8x8_t result = r;
9955 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
9956 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
9957 : "+w"(result)
9958 : "w"(temp), "w"(idx)
9959 : /* No clobbers */);
9960 return result;
9961 }
9962
9963 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9964 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
9965 {
9966 uint8x8_t result = r;
9967 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
9968 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
9969 : "+w"(result)
9970 : "w"(temp), "w"(idx)
9971 : /* No clobbers */);
9972 return result;
9973 }
9974
9975 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9976 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
9977 {
9978 poly8x8_t result = r;
9979 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
9980 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
9981 : "+w"(result)
9982 : "w"(temp), "w"(idx)
9983 : /* No clobbers */);
9984 return result;
9985 }
9986
9987 /* End of temporary inline asm. */
9988
9989 /* Start of optimal implementations in approved order. */
9990
9991 /* vabd. */
9992
9993 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9994 vabds_f32 (float32_t __a, float32_t __b)
9995 {
9996 return __builtin_aarch64_fabdsf (__a, __b);
9997 }
9998
9999 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10000 vabdd_f64 (float64_t __a, float64_t __b)
10001 {
10002 return __builtin_aarch64_fabddf (__a, __b);
10003 }
10004
10005 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10006 vabd_f32 (float32x2_t __a, float32x2_t __b)
10007 {
10008 return __builtin_aarch64_fabdv2sf (__a, __b);
10009 }
10010
10011 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
10012 vabd_f64 (float64x1_t __a, float64x1_t __b)
10013 {
10014 return (float64x1_t) {vabdd_f64 (vget_lane_f64 (__a, 0),
10015 vget_lane_f64 (__b, 0))};
10016 }
10017
10018 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10019 vabdq_f32 (float32x4_t __a, float32x4_t __b)
10020 {
10021 return __builtin_aarch64_fabdv4sf (__a, __b);
10022 }
10023
10024 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10025 vabdq_f64 (float64x2_t __a, float64x2_t __b)
10026 {
10027 return __builtin_aarch64_fabdv2df (__a, __b);
10028 }
10029
10030 /* vabs */
10031
10032 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10033 vabs_f32 (float32x2_t __a)
10034 {
10035 return __builtin_aarch64_absv2sf (__a);
10036 }
10037
10038 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
10039 vabs_f64 (float64x1_t __a)
10040 {
10041 return (float64x1_t) {__builtin_fabs (__a[0])};
10042 }
10043
10044 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10045 vabs_s8 (int8x8_t __a)
10046 {
10047 return __builtin_aarch64_absv8qi (__a);
10048 }
10049
10050 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10051 vabs_s16 (int16x4_t __a)
10052 {
10053 return __builtin_aarch64_absv4hi (__a);
10054 }
10055
10056 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10057 vabs_s32 (int32x2_t __a)
10058 {
10059 return __builtin_aarch64_absv2si (__a);
10060 }
10061
10062 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
10063 vabs_s64 (int64x1_t __a)
10064 {
10065 return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
10066 }
10067
10068 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10069 vabsq_f32 (float32x4_t __a)
10070 {
10071 return __builtin_aarch64_absv4sf (__a);
10072 }
10073
10074 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10075 vabsq_f64 (float64x2_t __a)
10076 {
10077 return __builtin_aarch64_absv2df (__a);
10078 }
10079
10080 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10081 vabsq_s8 (int8x16_t __a)
10082 {
10083 return __builtin_aarch64_absv16qi (__a);
10084 }
10085
10086 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10087 vabsq_s16 (int16x8_t __a)
10088 {
10089 return __builtin_aarch64_absv8hi (__a);
10090 }
10091
10092 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10093 vabsq_s32 (int32x4_t __a)
10094 {
10095 return __builtin_aarch64_absv4si (__a);
10096 }
10097
10098 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10099 vabsq_s64 (int64x2_t __a)
10100 {
10101 return __builtin_aarch64_absv2di (__a);
10102 }
10103
10104 /* vadd */
10105
10106 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
10107 vaddd_s64 (int64_t __a, int64_t __b)
10108 {
10109 return __a + __b;
10110 }
10111
10112 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10113 vaddd_u64 (uint64_t __a, uint64_t __b)
10114 {
10115 return __a + __b;
10116 }
10117
10118 /* vaddv */
10119
10120 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
10121 vaddv_s8 (int8x8_t __a)
10122 {
10123 return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
10124 }
10125
10126 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
10127 vaddv_s16 (int16x4_t __a)
10128 {
10129 return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
10130 }
10131
10132 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
10133 vaddv_s32 (int32x2_t __a)
10134 {
10135 return __builtin_aarch64_reduc_plus_scal_v2si (__a);
10136 }
10137
10138 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
10139 vaddv_u8 (uint8x8_t __a)
10140 {
10141 return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a);
10142 }
10143
10144 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
10145 vaddv_u16 (uint16x4_t __a)
10146 {
10147 return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a);
10148 }
10149
10150 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10151 vaddv_u32 (uint32x2_t __a)
10152 {
10153 return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a);
10154 }
10155
10156 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
10157 vaddvq_s8 (int8x16_t __a)
10158 {
10159 return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
10160 }
10161
10162 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
10163 vaddvq_s16 (int16x8_t __a)
10164 {
10165 return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
10166 }
10167
10168 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
10169 vaddvq_s32 (int32x4_t __a)
10170 {
10171 return __builtin_aarch64_reduc_plus_scal_v4si (__a);
10172 }
10173
10174 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
10175 vaddvq_s64 (int64x2_t __a)
10176 {
10177 return __builtin_aarch64_reduc_plus_scal_v2di (__a);
10178 }
10179
10180 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
10181 vaddvq_u8 (uint8x16_t __a)
10182 {
10183 return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a);
10184 }
10185
10186 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
10187 vaddvq_u16 (uint16x8_t __a)
10188 {
10189 return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a);
10190 }
10191
10192 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10193 vaddvq_u32 (uint32x4_t __a)
10194 {
10195 return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a);
10196 }
10197
10198 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10199 vaddvq_u64 (uint64x2_t __a)
10200 {
10201 return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a);
10202 }
10203
10204 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10205 vaddv_f32 (float32x2_t __a)
10206 {
10207 return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
10208 }
10209
10210 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10211 vaddvq_f32 (float32x4_t __a)
10212 {
10213 return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
10214 }
10215
10216 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10217 vaddvq_f64 (float64x2_t __a)
10218 {
10219 return __builtin_aarch64_reduc_plus_scal_v2df (__a);
10220 }
10221
10222 /* vbsl */
10223
10224 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
10225 vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
10226 {
10227 return __builtin_aarch64_simd_bslv4hf_suss (__a, __b, __c);
10228 }
10229
10230 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10231 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
10232 {
10233 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
10234 }
10235
10236 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
10237 vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
10238 {
10239 return (float64x1_t)
10240 { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
10241 }
10242
10243 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10244 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
10245 {
10246 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
10247 }
10248
10249 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
10250 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
10251 {
10252 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
10253 }
10254
10255 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10256 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
10257 {
10258 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
10259 }
10260
10261 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10262 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
10263 {
10264 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
10265 }
10266
10267 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10268 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
10269 {
10270 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
10271 }
10272
10273 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
10274 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
10275 {
10276 return (int64x1_t)
10277 {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
10278 }
10279
10280 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10281 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
10282 {
10283 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
10284 }
10285
10286 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10287 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
10288 {
10289 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
10290 }
10291
10292 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10293 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
10294 {
10295 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
10296 }
10297
10298 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10299 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
10300 {
10301 return (uint64x1_t)
10302 {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
10303 }
10304
10305 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
10306 vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
10307 {
10308 return __builtin_aarch64_simd_bslv8hf_suss (__a, __b, __c);
10309 }
10310
10311 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10312 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
10313 {
10314 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
10315 }
10316
10317 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10318 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
10319 {
10320 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
10321 }
10322
10323 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
10324 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
10325 {
10326 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
10327 }
10328
10329 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
10330 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
10331 {
10332 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
10333 }
10334
10335 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10336 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
10337 {
10338 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
10339 }
10340
10341 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10342 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
10343 {
10344 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
10345 }
10346
10347 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10348 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
10349 {
10350 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
10351 }
10352
10353 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10354 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
10355 {
10356 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
10357 }
10358
10359 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10360 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
10361 {
10362 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
10363 }
10364
10365 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10366 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
10367 {
10368 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
10369 }
10370
10371 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10372 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
10373 {
10374 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
10375 }
10376
10377 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10378 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
10379 {
10380 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
10381 }
10382
10383 /* ARMv8.1-A instrinsics. */
10384 #pragma GCC push_options
10385 #pragma GCC target ("arch=armv8.1-a")
10386
10387 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10388 vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
10389 {
10390 return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c);
10391 }
10392
10393 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10394 vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
10395 {
10396 return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c);
10397 }
10398
10399 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10400 vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
10401 {
10402 return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c);
10403 }
10404
10405 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10406 vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
10407 {
10408 return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c);
10409 }
10410
10411 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10412 vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
10413 {
10414 return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c);
10415 }
10416
10417 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10418 vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
10419 {
10420 return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c);
10421 }
10422
10423 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10424 vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
10425 {
10426 return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c);
10427 }
10428
10429 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10430 vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
10431 {
10432 return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c);
10433 }
10434
10435 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10436 vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
10437 {
10438 return __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d);
10439 }
10440
10441 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10442 vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
10443 {
10444 return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d);
10445 }
10446
10447 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10448 vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
10449 {
10450 return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d);
10451 }
10452
10453 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10454 vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
10455 {
10456 return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d);
10457 }
10458
10459 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10460 vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
10461 {
10462 return __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d);
10463 }
10464
10465 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10466 vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
10467 {
10468 return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d);
10469 }
10470
10471 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10472 vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
10473 {
10474 return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d);
10475 }
10476
10477 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10478 vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
10479 {
10480 return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d);
10481 }
10482
10483 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10484 vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
10485 {
10486 return __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d);
10487 }
10488
10489 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10490 vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
10491 {
10492 return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d);
10493 }
10494
10495 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10496 vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
10497 {
10498 return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d);
10499 }
10500
10501 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10502 vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
10503 {
10504 return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d);
10505 }
10506
10507 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
10508 vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c)
10509 {
10510 return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c);
10511 }
10512
10513 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
10514 vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
10515 {
10516 return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d);
10517 }
10518
10519 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
10520 vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
10521 {
10522 return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d);
10523 }
10524
10525 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
10526 vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c)
10527 {
10528 return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c);
10529 }
10530
10531 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
10532 vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
10533 {
10534 return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d);
10535 }
10536
10537 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
10538 vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
10539 {
10540 return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d);
10541 }
10542
10543 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10544 vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
10545 {
10546 return __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d);
10547 }
10548
10549 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10550 vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
10551 {
10552 return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d);
10553 }
10554
10555 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10556 vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
10557 {
10558 return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d);
10559 }
10560
10561 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10562 vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
10563 {
10564 return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d);
10565 }
10566
10567 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
10568 vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c)
10569 {
10570 return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c);
10571 }
10572
10573 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
10574 vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
10575 {
10576 return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d);
10577 }
10578
10579 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
10580 vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
10581 {
10582 return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d);
10583 }
10584
10585 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
10586 vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c)
10587 {
10588 return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c);
10589 }
10590
10591 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
10592 vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
10593 {
10594 return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d);
10595 }
10596
10597 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
10598 vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
10599 {
10600 return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d);
10601 }
10602 #pragma GCC pop_options
10603
10604 #pragma GCC push_options
10605 #pragma GCC target ("+nothing+crypto")
10606 /* vaes */
10607
10608 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10609 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
10610 {
10611 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
10612 }
10613
10614 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10615 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
10616 {
10617 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
10618 }
10619
10620 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10621 vaesmcq_u8 (uint8x16_t data)
10622 {
10623 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
10624 }
10625
10626 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10627 vaesimcq_u8 (uint8x16_t data)
10628 {
10629 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
10630 }
10631 #pragma GCC pop_options
10632
10633 /* vcage */
10634
10635 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10636 vcage_f64 (float64x1_t __a, float64x1_t __b)
10637 {
10638 return vabs_f64 (__a) >= vabs_f64 (__b);
10639 }
10640
10641 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10642 vcages_f32 (float32_t __a, float32_t __b)
10643 {
10644 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
10645 }
10646
10647 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10648 vcage_f32 (float32x2_t __a, float32x2_t __b)
10649 {
10650 return vabs_f32 (__a) >= vabs_f32 (__b);
10651 }
10652
10653 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10654 vcageq_f32 (float32x4_t __a, float32x4_t __b)
10655 {
10656 return vabsq_f32 (__a) >= vabsq_f32 (__b);
10657 }
10658
10659 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10660 vcaged_f64 (float64_t __a, float64_t __b)
10661 {
10662 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
10663 }
10664
10665 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10666 vcageq_f64 (float64x2_t __a, float64x2_t __b)
10667 {
10668 return vabsq_f64 (__a) >= vabsq_f64 (__b);
10669 }
10670
10671 /* vcagt */
10672
10673 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10674 vcagts_f32 (float32_t __a, float32_t __b)
10675 {
10676 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
10677 }
10678
10679 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10680 vcagt_f32 (float32x2_t __a, float32x2_t __b)
10681 {
10682 return vabs_f32 (__a) > vabs_f32 (__b);
10683 }
10684
10685 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10686 vcagt_f64 (float64x1_t __a, float64x1_t __b)
10687 {
10688 return vabs_f64 (__a) > vabs_f64 (__b);
10689 }
10690
10691 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10692 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
10693 {
10694 return vabsq_f32 (__a) > vabsq_f32 (__b);
10695 }
10696
10697 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10698 vcagtd_f64 (float64_t __a, float64_t __b)
10699 {
10700 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
10701 }
10702
10703 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10704 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
10705 {
10706 return vabsq_f64 (__a) > vabsq_f64 (__b);
10707 }
10708
10709 /* vcale */
10710
10711 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10712 vcale_f32 (float32x2_t __a, float32x2_t __b)
10713 {
10714 return vabs_f32 (__a) <= vabs_f32 (__b);
10715 }
10716
10717 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10718 vcale_f64 (float64x1_t __a, float64x1_t __b)
10719 {
10720 return vabs_f64 (__a) <= vabs_f64 (__b);
10721 }
10722
10723 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10724 vcaled_f64 (float64_t __a, float64_t __b)
10725 {
10726 return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
10727 }
10728
10729 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10730 vcales_f32 (float32_t __a, float32_t __b)
10731 {
10732 return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
10733 }
10734
10735 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10736 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
10737 {
10738 return vabsq_f32 (__a) <= vabsq_f32 (__b);
10739 }
10740
10741 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10742 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
10743 {
10744 return vabsq_f64 (__a) <= vabsq_f64 (__b);
10745 }
10746
10747 /* vcalt */
10748
10749 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10750 vcalt_f32 (float32x2_t __a, float32x2_t __b)
10751 {
10752 return vabs_f32 (__a) < vabs_f32 (__b);
10753 }
10754
10755 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10756 vcalt_f64 (float64x1_t __a, float64x1_t __b)
10757 {
10758 return vabs_f64 (__a) < vabs_f64 (__b);
10759 }
10760
10761 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10762 vcaltd_f64 (float64_t __a, float64_t __b)
10763 {
10764 return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
10765 }
10766
10767 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10768 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
10769 {
10770 return vabsq_f32 (__a) < vabsq_f32 (__b);
10771 }
10772
10773 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10774 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
10775 {
10776 return vabsq_f64 (__a) < vabsq_f64 (__b);
10777 }
10778
10779 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10780 vcalts_f32 (float32_t __a, float32_t __b)
10781 {
10782 return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
10783 }
10784
10785 /* vceq - vector. */
10786
10787 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10788 vceq_f32 (float32x2_t __a, float32x2_t __b)
10789 {
10790 return (uint32x2_t) (__a == __b);
10791 }
10792
10793 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10794 vceq_f64 (float64x1_t __a, float64x1_t __b)
10795 {
10796 return (uint64x1_t) (__a == __b);
10797 }
10798
10799 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10800 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
10801 {
10802 return (uint8x8_t) (__a == __b);
10803 }
10804
10805 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10806 vceq_s8 (int8x8_t __a, int8x8_t __b)
10807 {
10808 return (uint8x8_t) (__a == __b);
10809 }
10810
10811 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10812 vceq_s16 (int16x4_t __a, int16x4_t __b)
10813 {
10814 return (uint16x4_t) (__a == __b);
10815 }
10816
10817 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10818 vceq_s32 (int32x2_t __a, int32x2_t __b)
10819 {
10820 return (uint32x2_t) (__a == __b);
10821 }
10822
10823 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10824 vceq_s64 (int64x1_t __a, int64x1_t __b)
10825 {
10826 return (uint64x1_t) (__a == __b);
10827 }
10828
10829 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10830 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
10831 {
10832 return (__a == __b);
10833 }
10834
10835 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10836 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
10837 {
10838 return (__a == __b);
10839 }
10840
10841 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10842 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
10843 {
10844 return (__a == __b);
10845 }
10846
10847 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10848 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
10849 {
10850 return (__a == __b);
10851 }
10852
10853 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10854 vceqq_f32 (float32x4_t __a, float32x4_t __b)
10855 {
10856 return (uint32x4_t) (__a == __b);
10857 }
10858
10859 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10860 vceqq_f64 (float64x2_t __a, float64x2_t __b)
10861 {
10862 return (uint64x2_t) (__a == __b);
10863 }
10864
10865 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10866 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
10867 {
10868 return (uint8x16_t) (__a == __b);
10869 }
10870
10871 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10872 vceqq_s8 (int8x16_t __a, int8x16_t __b)
10873 {
10874 return (uint8x16_t) (__a == __b);
10875 }
10876
10877 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10878 vceqq_s16 (int16x8_t __a, int16x8_t __b)
10879 {
10880 return (uint16x8_t) (__a == __b);
10881 }
10882
10883 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10884 vceqq_s32 (int32x4_t __a, int32x4_t __b)
10885 {
10886 return (uint32x4_t) (__a == __b);
10887 }
10888
10889 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10890 vceqq_s64 (int64x2_t __a, int64x2_t __b)
10891 {
10892 return (uint64x2_t) (__a == __b);
10893 }
10894
10895 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10896 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
10897 {
10898 return (__a == __b);
10899 }
10900
10901 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10902 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
10903 {
10904 return (__a == __b);
10905 }
10906
10907 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10908 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
10909 {
10910 return (__a == __b);
10911 }
10912
10913 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10914 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
10915 {
10916 return (__a == __b);
10917 }
10918
10919 /* vceq - scalar. */
10920
10921 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
10922 vceqs_f32 (float32_t __a, float32_t __b)
10923 {
10924 return __a == __b ? -1 : 0;
10925 }
10926
10927 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10928 vceqd_s64 (int64_t __a, int64_t __b)
10929 {
10930 return __a == __b ? -1ll : 0ll;
10931 }
10932
10933 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10934 vceqd_u64 (uint64_t __a, uint64_t __b)
10935 {
10936 return __a == __b ? -1ll : 0ll;
10937 }
10938
10939 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
10940 vceqd_f64 (float64_t __a, float64_t __b)
10941 {
10942 return __a == __b ? -1ll : 0ll;
10943 }
10944
10945 /* vceqz - vector. */
10946
10947 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10948 vceqz_f32 (float32x2_t __a)
10949 {
10950 return (uint32x2_t) (__a == 0.0f);
10951 }
10952
10953 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10954 vceqz_f64 (float64x1_t __a)
10955 {
10956 return (uint64x1_t) (__a == (float64x1_t) {0.0});
10957 }
10958
10959 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10960 vceqz_p8 (poly8x8_t __a)
10961 {
10962 return (uint8x8_t) (__a == 0);
10963 }
10964
10965 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10966 vceqz_s8 (int8x8_t __a)
10967 {
10968 return (uint8x8_t) (__a == 0);
10969 }
10970
10971 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10972 vceqz_s16 (int16x4_t __a)
10973 {
10974 return (uint16x4_t) (__a == 0);
10975 }
10976
10977 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10978 vceqz_s32 (int32x2_t __a)
10979 {
10980 return (uint32x2_t) (__a == 0);
10981 }
10982
10983 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
10984 vceqz_s64 (int64x1_t __a)
10985 {
10986 return (uint64x1_t) (__a == __AARCH64_INT64_C (0));
10987 }
10988
10989 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10990 vceqz_u8 (uint8x8_t __a)
10991 {
10992 return (__a == 0);
10993 }
10994
10995 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10996 vceqz_u16 (uint16x4_t __a)
10997 {
10998 return (__a == 0);
10999 }
11000
11001 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11002 vceqz_u32 (uint32x2_t __a)
11003 {
11004 return (__a == 0);
11005 }
11006
11007 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11008 vceqz_u64 (uint64x1_t __a)
11009 {
11010 return (__a == __AARCH64_UINT64_C (0));
11011 }
11012
11013 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11014 vceqzq_f32 (float32x4_t __a)
11015 {
11016 return (uint32x4_t) (__a == 0.0f);
11017 }
11018
11019 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11020 vceqzq_f64 (float64x2_t __a)
11021 {
11022 return (uint64x2_t) (__a == 0.0f);
11023 }
11024
11025 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11026 vceqzq_p8 (poly8x16_t __a)
11027 {
11028 return (uint8x16_t) (__a == 0);
11029 }
11030
11031 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11032 vceqzq_s8 (int8x16_t __a)
11033 {
11034 return (uint8x16_t) (__a == 0);
11035 }
11036
11037 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11038 vceqzq_s16 (int16x8_t __a)
11039 {
11040 return (uint16x8_t) (__a == 0);
11041 }
11042
11043 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11044 vceqzq_s32 (int32x4_t __a)
11045 {
11046 return (uint32x4_t) (__a == 0);
11047 }
11048
11049 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11050 vceqzq_s64 (int64x2_t __a)
11051 {
11052 return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
11053 }
11054
11055 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11056 vceqzq_u8 (uint8x16_t __a)
11057 {
11058 return (__a == 0);
11059 }
11060
11061 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11062 vceqzq_u16 (uint16x8_t __a)
11063 {
11064 return (__a == 0);
11065 }
11066
11067 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11068 vceqzq_u32 (uint32x4_t __a)
11069 {
11070 return (__a == 0);
11071 }
11072
11073 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11074 vceqzq_u64 (uint64x2_t __a)
11075 {
11076 return (__a == __AARCH64_UINT64_C (0));
11077 }
11078
11079 /* vceqz - scalar. */
11080
11081 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11082 vceqzs_f32 (float32_t __a)
11083 {
11084 return __a == 0.0f ? -1 : 0;
11085 }
11086
11087 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11088 vceqzd_s64 (int64_t __a)
11089 {
11090 return __a == 0 ? -1ll : 0ll;
11091 }
11092
11093 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11094 vceqzd_u64 (uint64_t __a)
11095 {
11096 return __a == 0 ? -1ll : 0ll;
11097 }
11098
11099 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11100 vceqzd_f64 (float64_t __a)
11101 {
11102 return __a == 0.0 ? -1ll : 0ll;
11103 }
11104
11105 /* vcge - vector. */
11106
11107 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11108 vcge_f32 (float32x2_t __a, float32x2_t __b)
11109 {
11110 return (uint32x2_t) (__a >= __b);
11111 }
11112
11113 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11114 vcge_f64 (float64x1_t __a, float64x1_t __b)
11115 {
11116 return (uint64x1_t) (__a >= __b);
11117 }
11118
11119 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11120 vcge_s8 (int8x8_t __a, int8x8_t __b)
11121 {
11122 return (uint8x8_t) (__a >= __b);
11123 }
11124
11125 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11126 vcge_s16 (int16x4_t __a, int16x4_t __b)
11127 {
11128 return (uint16x4_t) (__a >= __b);
11129 }
11130
11131 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11132 vcge_s32 (int32x2_t __a, int32x2_t __b)
11133 {
11134 return (uint32x2_t) (__a >= __b);
11135 }
11136
11137 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11138 vcge_s64 (int64x1_t __a, int64x1_t __b)
11139 {
11140 return (uint64x1_t) (__a >= __b);
11141 }
11142
11143 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11144 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
11145 {
11146 return (__a >= __b);
11147 }
11148
11149 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11150 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
11151 {
11152 return (__a >= __b);
11153 }
11154
11155 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11156 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
11157 {
11158 return (__a >= __b);
11159 }
11160
11161 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11162 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
11163 {
11164 return (__a >= __b);
11165 }
11166
11167 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11168 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
11169 {
11170 return (uint32x4_t) (__a >= __b);
11171 }
11172
11173 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11174 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
11175 {
11176 return (uint64x2_t) (__a >= __b);
11177 }
11178
11179 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11180 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
11181 {
11182 return (uint8x16_t) (__a >= __b);
11183 }
11184
11185 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11186 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
11187 {
11188 return (uint16x8_t) (__a >= __b);
11189 }
11190
11191 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11192 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
11193 {
11194 return (uint32x4_t) (__a >= __b);
11195 }
11196
11197 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11198 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
11199 {
11200 return (uint64x2_t) (__a >= __b);
11201 }
11202
11203 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11204 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
11205 {
11206 return (__a >= __b);
11207 }
11208
11209 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11210 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
11211 {
11212 return (__a >= __b);
11213 }
11214
11215 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11216 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
11217 {
11218 return (__a >= __b);
11219 }
11220
11221 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11222 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
11223 {
11224 return (__a >= __b);
11225 }
11226
11227 /* vcge - scalar. */
11228
11229 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11230 vcges_f32 (float32_t __a, float32_t __b)
11231 {
11232 return __a >= __b ? -1 : 0;
11233 }
11234
11235 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11236 vcged_s64 (int64_t __a, int64_t __b)
11237 {
11238 return __a >= __b ? -1ll : 0ll;
11239 }
11240
11241 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11242 vcged_u64 (uint64_t __a, uint64_t __b)
11243 {
11244 return __a >= __b ? -1ll : 0ll;
11245 }
11246
11247 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11248 vcged_f64 (float64_t __a, float64_t __b)
11249 {
11250 return __a >= __b ? -1ll : 0ll;
11251 }
11252
11253 /* vcgez - vector. */
11254
11255 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11256 vcgez_f32 (float32x2_t __a)
11257 {
11258 return (uint32x2_t) (__a >= 0.0f);
11259 }
11260
11261 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11262 vcgez_f64 (float64x1_t __a)
11263 {
11264 return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
11265 }
11266
11267 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11268 vcgez_s8 (int8x8_t __a)
11269 {
11270 return (uint8x8_t) (__a >= 0);
11271 }
11272
11273 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11274 vcgez_s16 (int16x4_t __a)
11275 {
11276 return (uint16x4_t) (__a >= 0);
11277 }
11278
11279 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11280 vcgez_s32 (int32x2_t __a)
11281 {
11282 return (uint32x2_t) (__a >= 0);
11283 }
11284
11285 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11286 vcgez_s64 (int64x1_t __a)
11287 {
11288 return (uint64x1_t) (__a >= __AARCH64_INT64_C (0));
11289 }
11290
11291 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11292 vcgezq_f32 (float32x4_t __a)
11293 {
11294 return (uint32x4_t) (__a >= 0.0f);
11295 }
11296
11297 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11298 vcgezq_f64 (float64x2_t __a)
11299 {
11300 return (uint64x2_t) (__a >= 0.0);
11301 }
11302
11303 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11304 vcgezq_s8 (int8x16_t __a)
11305 {
11306 return (uint8x16_t) (__a >= 0);
11307 }
11308
11309 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11310 vcgezq_s16 (int16x8_t __a)
11311 {
11312 return (uint16x8_t) (__a >= 0);
11313 }
11314
11315 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11316 vcgezq_s32 (int32x4_t __a)
11317 {
11318 return (uint32x4_t) (__a >= 0);
11319 }
11320
11321 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11322 vcgezq_s64 (int64x2_t __a)
11323 {
11324 return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
11325 }
11326
11327 /* vcgez - scalar. */
11328
11329 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11330 vcgezs_f32 (float32_t __a)
11331 {
11332 return __a >= 0.0f ? -1 : 0;
11333 }
11334
11335 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11336 vcgezd_s64 (int64_t __a)
11337 {
11338 return __a >= 0 ? -1ll : 0ll;
11339 }
11340
11341 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11342 vcgezd_f64 (float64_t __a)
11343 {
11344 return __a >= 0.0 ? -1ll : 0ll;
11345 }
11346
11347 /* vcgt - vector. */
11348
11349 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11350 vcgt_f32 (float32x2_t __a, float32x2_t __b)
11351 {
11352 return (uint32x2_t) (__a > __b);
11353 }
11354
11355 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11356 vcgt_f64 (float64x1_t __a, float64x1_t __b)
11357 {
11358 return (uint64x1_t) (__a > __b);
11359 }
11360
11361 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11362 vcgt_s8 (int8x8_t __a, int8x8_t __b)
11363 {
11364 return (uint8x8_t) (__a > __b);
11365 }
11366
11367 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11368 vcgt_s16 (int16x4_t __a, int16x4_t __b)
11369 {
11370 return (uint16x4_t) (__a > __b);
11371 }
11372
11373 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11374 vcgt_s32 (int32x2_t __a, int32x2_t __b)
11375 {
11376 return (uint32x2_t) (__a > __b);
11377 }
11378
11379 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11380 vcgt_s64 (int64x1_t __a, int64x1_t __b)
11381 {
11382 return (uint64x1_t) (__a > __b);
11383 }
11384
11385 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11386 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
11387 {
11388 return (__a > __b);
11389 }
11390
11391 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11392 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
11393 {
11394 return (__a > __b);
11395 }
11396
11397 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11398 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
11399 {
11400 return (__a > __b);
11401 }
11402
11403 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11404 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
11405 {
11406 return (__a > __b);
11407 }
11408
11409 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11410 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
11411 {
11412 return (uint32x4_t) (__a > __b);
11413 }
11414
11415 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11416 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
11417 {
11418 return (uint64x2_t) (__a > __b);
11419 }
11420
11421 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11422 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
11423 {
11424 return (uint8x16_t) (__a > __b);
11425 }
11426
11427 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11428 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
11429 {
11430 return (uint16x8_t) (__a > __b);
11431 }
11432
11433 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11434 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
11435 {
11436 return (uint32x4_t) (__a > __b);
11437 }
11438
11439 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11440 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
11441 {
11442 return (uint64x2_t) (__a > __b);
11443 }
11444
11445 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11446 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
11447 {
11448 return (__a > __b);
11449 }
11450
11451 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11452 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
11453 {
11454 return (__a > __b);
11455 }
11456
11457 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11458 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
11459 {
11460 return (__a > __b);
11461 }
11462
11463 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11464 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
11465 {
11466 return (__a > __b);
11467 }
11468
11469 /* vcgt - scalar. */
11470
11471 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11472 vcgts_f32 (float32_t __a, float32_t __b)
11473 {
11474 return __a > __b ? -1 : 0;
11475 }
11476
11477 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11478 vcgtd_s64 (int64_t __a, int64_t __b)
11479 {
11480 return __a > __b ? -1ll : 0ll;
11481 }
11482
11483 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11484 vcgtd_u64 (uint64_t __a, uint64_t __b)
11485 {
11486 return __a > __b ? -1ll : 0ll;
11487 }
11488
11489 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11490 vcgtd_f64 (float64_t __a, float64_t __b)
11491 {
11492 return __a > __b ? -1ll : 0ll;
11493 }
11494
11495 /* vcgtz - vector. */
11496
11497 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11498 vcgtz_f32 (float32x2_t __a)
11499 {
11500 return (uint32x2_t) (__a > 0.0f);
11501 }
11502
11503 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11504 vcgtz_f64 (float64x1_t __a)
11505 {
11506 return (uint64x1_t) (__a > (float64x1_t) {0.0});
11507 }
11508
11509 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11510 vcgtz_s8 (int8x8_t __a)
11511 {
11512 return (uint8x8_t) (__a > 0);
11513 }
11514
11515 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11516 vcgtz_s16 (int16x4_t __a)
11517 {
11518 return (uint16x4_t) (__a > 0);
11519 }
11520
11521 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11522 vcgtz_s32 (int32x2_t __a)
11523 {
11524 return (uint32x2_t) (__a > 0);
11525 }
11526
11527 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11528 vcgtz_s64 (int64x1_t __a)
11529 {
11530 return (uint64x1_t) (__a > __AARCH64_INT64_C (0));
11531 }
11532
11533 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11534 vcgtzq_f32 (float32x4_t __a)
11535 {
11536 return (uint32x4_t) (__a > 0.0f);
11537 }
11538
11539 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11540 vcgtzq_f64 (float64x2_t __a)
11541 {
11542 return (uint64x2_t) (__a > 0.0);
11543 }
11544
11545 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11546 vcgtzq_s8 (int8x16_t __a)
11547 {
11548 return (uint8x16_t) (__a > 0);
11549 }
11550
11551 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11552 vcgtzq_s16 (int16x8_t __a)
11553 {
11554 return (uint16x8_t) (__a > 0);
11555 }
11556
11557 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11558 vcgtzq_s32 (int32x4_t __a)
11559 {
11560 return (uint32x4_t) (__a > 0);
11561 }
11562
11563 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11564 vcgtzq_s64 (int64x2_t __a)
11565 {
11566 return (uint64x2_t) (__a > __AARCH64_INT64_C (0));
11567 }
11568
11569 /* vcgtz - scalar. */
11570
11571 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11572 vcgtzs_f32 (float32_t __a)
11573 {
11574 return __a > 0.0f ? -1 : 0;
11575 }
11576
11577 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11578 vcgtzd_s64 (int64_t __a)
11579 {
11580 return __a > 0 ? -1ll : 0ll;
11581 }
11582
11583 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11584 vcgtzd_f64 (float64_t __a)
11585 {
11586 return __a > 0.0 ? -1ll : 0ll;
11587 }
11588
11589 /* vcle - vector. */
11590
11591 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11592 vcle_f32 (float32x2_t __a, float32x2_t __b)
11593 {
11594 return (uint32x2_t) (__a <= __b);
11595 }
11596
11597 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11598 vcle_f64 (float64x1_t __a, float64x1_t __b)
11599 {
11600 return (uint64x1_t) (__a <= __b);
11601 }
11602
11603 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11604 vcle_s8 (int8x8_t __a, int8x8_t __b)
11605 {
11606 return (uint8x8_t) (__a <= __b);
11607 }
11608
11609 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11610 vcle_s16 (int16x4_t __a, int16x4_t __b)
11611 {
11612 return (uint16x4_t) (__a <= __b);
11613 }
11614
11615 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11616 vcle_s32 (int32x2_t __a, int32x2_t __b)
11617 {
11618 return (uint32x2_t) (__a <= __b);
11619 }
11620
11621 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11622 vcle_s64 (int64x1_t __a, int64x1_t __b)
11623 {
11624 return (uint64x1_t) (__a <= __b);
11625 }
11626
11627 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11628 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
11629 {
11630 return (__a <= __b);
11631 }
11632
11633 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11634 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
11635 {
11636 return (__a <= __b);
11637 }
11638
11639 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11640 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
11641 {
11642 return (__a <= __b);
11643 }
11644
11645 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11646 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
11647 {
11648 return (__a <= __b);
11649 }
11650
11651 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11652 vcleq_f32 (float32x4_t __a, float32x4_t __b)
11653 {
11654 return (uint32x4_t) (__a <= __b);
11655 }
11656
11657 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11658 vcleq_f64 (float64x2_t __a, float64x2_t __b)
11659 {
11660 return (uint64x2_t) (__a <= __b);
11661 }
11662
11663 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11664 vcleq_s8 (int8x16_t __a, int8x16_t __b)
11665 {
11666 return (uint8x16_t) (__a <= __b);
11667 }
11668
11669 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11670 vcleq_s16 (int16x8_t __a, int16x8_t __b)
11671 {
11672 return (uint16x8_t) (__a <= __b);
11673 }
11674
11675 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11676 vcleq_s32 (int32x4_t __a, int32x4_t __b)
11677 {
11678 return (uint32x4_t) (__a <= __b);
11679 }
11680
11681 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11682 vcleq_s64 (int64x2_t __a, int64x2_t __b)
11683 {
11684 return (uint64x2_t) (__a <= __b);
11685 }
11686
11687 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11688 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
11689 {
11690 return (__a <= __b);
11691 }
11692
11693 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11694 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
11695 {
11696 return (__a <= __b);
11697 }
11698
11699 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11700 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
11701 {
11702 return (__a <= __b);
11703 }
11704
11705 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11706 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
11707 {
11708 return (__a <= __b);
11709 }
11710
11711 /* vcle - scalar. */
11712
11713 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11714 vcles_f32 (float32_t __a, float32_t __b)
11715 {
11716 return __a <= __b ? -1 : 0;
11717 }
11718
11719 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11720 vcled_s64 (int64_t __a, int64_t __b)
11721 {
11722 return __a <= __b ? -1ll : 0ll;
11723 }
11724
11725 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11726 vcled_u64 (uint64_t __a, uint64_t __b)
11727 {
11728 return __a <= __b ? -1ll : 0ll;
11729 }
11730
11731 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11732 vcled_f64 (float64_t __a, float64_t __b)
11733 {
11734 return __a <= __b ? -1ll : 0ll;
11735 }
11736
11737 /* vclez - vector. */
11738
11739 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11740 vclez_f32 (float32x2_t __a)
11741 {
11742 return (uint32x2_t) (__a <= 0.0f);
11743 }
11744
11745 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11746 vclez_f64 (float64x1_t __a)
11747 {
11748 return (uint64x1_t) (__a <= (float64x1_t) {0.0});
11749 }
11750
11751 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11752 vclez_s8 (int8x8_t __a)
11753 {
11754 return (uint8x8_t) (__a <= 0);
11755 }
11756
11757 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11758 vclez_s16 (int16x4_t __a)
11759 {
11760 return (uint16x4_t) (__a <= 0);
11761 }
11762
11763 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11764 vclez_s32 (int32x2_t __a)
11765 {
11766 return (uint32x2_t) (__a <= 0);
11767 }
11768
11769 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11770 vclez_s64 (int64x1_t __a)
11771 {
11772 return (uint64x1_t) (__a <= __AARCH64_INT64_C (0));
11773 }
11774
11775 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11776 vclezq_f32 (float32x4_t __a)
11777 {
11778 return (uint32x4_t) (__a <= 0.0f);
11779 }
11780
11781 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11782 vclezq_f64 (float64x2_t __a)
11783 {
11784 return (uint64x2_t) (__a <= 0.0);
11785 }
11786
11787 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11788 vclezq_s8 (int8x16_t __a)
11789 {
11790 return (uint8x16_t) (__a <= 0);
11791 }
11792
11793 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11794 vclezq_s16 (int16x8_t __a)
11795 {
11796 return (uint16x8_t) (__a <= 0);
11797 }
11798
11799 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11800 vclezq_s32 (int32x4_t __a)
11801 {
11802 return (uint32x4_t) (__a <= 0);
11803 }
11804
11805 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11806 vclezq_s64 (int64x2_t __a)
11807 {
11808 return (uint64x2_t) (__a <= __AARCH64_INT64_C (0));
11809 }
11810
11811 /* vclez - scalar. */
11812
11813 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11814 vclezs_f32 (float32_t __a)
11815 {
11816 return __a <= 0.0f ? -1 : 0;
11817 }
11818
11819 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11820 vclezd_s64 (int64_t __a)
11821 {
11822 return __a <= 0 ? -1ll : 0ll;
11823 }
11824
11825 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11826 vclezd_f64 (float64_t __a)
11827 {
11828 return __a <= 0.0 ? -1ll : 0ll;
11829 }
11830
11831 /* vclt - vector. */
11832
11833 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11834 vclt_f32 (float32x2_t __a, float32x2_t __b)
11835 {
11836 return (uint32x2_t) (__a < __b);
11837 }
11838
11839 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11840 vclt_f64 (float64x1_t __a, float64x1_t __b)
11841 {
11842 return (uint64x1_t) (__a < __b);
11843 }
11844
11845 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11846 vclt_s8 (int8x8_t __a, int8x8_t __b)
11847 {
11848 return (uint8x8_t) (__a < __b);
11849 }
11850
11851 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11852 vclt_s16 (int16x4_t __a, int16x4_t __b)
11853 {
11854 return (uint16x4_t) (__a < __b);
11855 }
11856
11857 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11858 vclt_s32 (int32x2_t __a, int32x2_t __b)
11859 {
11860 return (uint32x2_t) (__a < __b);
11861 }
11862
11863 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11864 vclt_s64 (int64x1_t __a, int64x1_t __b)
11865 {
11866 return (uint64x1_t) (__a < __b);
11867 }
11868
11869 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11870 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
11871 {
11872 return (__a < __b);
11873 }
11874
11875 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11876 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
11877 {
11878 return (__a < __b);
11879 }
11880
11881 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11882 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
11883 {
11884 return (__a < __b);
11885 }
11886
11887 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11888 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
11889 {
11890 return (__a < __b);
11891 }
11892
11893 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11894 vcltq_f32 (float32x4_t __a, float32x4_t __b)
11895 {
11896 return (uint32x4_t) (__a < __b);
11897 }
11898
11899 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11900 vcltq_f64 (float64x2_t __a, float64x2_t __b)
11901 {
11902 return (uint64x2_t) (__a < __b);
11903 }
11904
11905 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11906 vcltq_s8 (int8x16_t __a, int8x16_t __b)
11907 {
11908 return (uint8x16_t) (__a < __b);
11909 }
11910
11911 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11912 vcltq_s16 (int16x8_t __a, int16x8_t __b)
11913 {
11914 return (uint16x8_t) (__a < __b);
11915 }
11916
11917 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11918 vcltq_s32 (int32x4_t __a, int32x4_t __b)
11919 {
11920 return (uint32x4_t) (__a < __b);
11921 }
11922
11923 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11924 vcltq_s64 (int64x2_t __a, int64x2_t __b)
11925 {
11926 return (uint64x2_t) (__a < __b);
11927 }
11928
11929 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11930 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
11931 {
11932 return (__a < __b);
11933 }
11934
11935 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11936 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
11937 {
11938 return (__a < __b);
11939 }
11940
11941 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11942 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
11943 {
11944 return (__a < __b);
11945 }
11946
11947 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11948 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
11949 {
11950 return (__a < __b);
11951 }
11952
11953 /* vclt - scalar. */
11954
11955 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
11956 vclts_f32 (float32_t __a, float32_t __b)
11957 {
11958 return __a < __b ? -1 : 0;
11959 }
11960
11961 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11962 vcltd_s64 (int64_t __a, int64_t __b)
11963 {
11964 return __a < __b ? -1ll : 0ll;
11965 }
11966
11967 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11968 vcltd_u64 (uint64_t __a, uint64_t __b)
11969 {
11970 return __a < __b ? -1ll : 0ll;
11971 }
11972
11973 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
11974 vcltd_f64 (float64_t __a, float64_t __b)
11975 {
11976 return __a < __b ? -1ll : 0ll;
11977 }
11978
11979 /* vcltz - vector. */
11980
11981 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11982 vcltz_f32 (float32x2_t __a)
11983 {
11984 return (uint32x2_t) (__a < 0.0f);
11985 }
11986
11987 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11988 vcltz_f64 (float64x1_t __a)
11989 {
11990 return (uint64x1_t) (__a < (float64x1_t) {0.0});
11991 }
11992
11993 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11994 vcltz_s8 (int8x8_t __a)
11995 {
11996 return (uint8x8_t) (__a < 0);
11997 }
11998
11999 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12000 vcltz_s16 (int16x4_t __a)
12001 {
12002 return (uint16x4_t) (__a < 0);
12003 }
12004
12005 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12006 vcltz_s32 (int32x2_t __a)
12007 {
12008 return (uint32x2_t) (__a < 0);
12009 }
12010
12011 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12012 vcltz_s64 (int64x1_t __a)
12013 {
12014 return (uint64x1_t) (__a < __AARCH64_INT64_C (0));
12015 }
12016
12017 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12018 vcltzq_f32 (float32x4_t __a)
12019 {
12020 return (uint32x4_t) (__a < 0.0f);
12021 }
12022
12023 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12024 vcltzq_f64 (float64x2_t __a)
12025 {
12026 return (uint64x2_t) (__a < 0.0);
12027 }
12028
12029 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12030 vcltzq_s8 (int8x16_t __a)
12031 {
12032 return (uint8x16_t) (__a < 0);
12033 }
12034
12035 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12036 vcltzq_s16 (int16x8_t __a)
12037 {
12038 return (uint16x8_t) (__a < 0);
12039 }
12040
12041 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12042 vcltzq_s32 (int32x4_t __a)
12043 {
12044 return (uint32x4_t) (__a < 0);
12045 }
12046
12047 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12048 vcltzq_s64 (int64x2_t __a)
12049 {
12050 return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
12051 }
12052
12053 /* vcltz - scalar. */
12054
12055 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12056 vcltzs_f32 (float32_t __a)
12057 {
12058 return __a < 0.0f ? -1 : 0;
12059 }
12060
12061 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12062 vcltzd_s64 (int64_t __a)
12063 {
12064 return __a < 0 ? -1ll : 0ll;
12065 }
12066
12067 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12068 vcltzd_f64 (float64_t __a)
12069 {
12070 return __a < 0.0 ? -1ll : 0ll;
12071 }
12072
12073 /* vcls. */
12074
12075 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12076 vcls_s8 (int8x8_t __a)
12077 {
12078 return __builtin_aarch64_clrsbv8qi (__a);
12079 }
12080
12081 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12082 vcls_s16 (int16x4_t __a)
12083 {
12084 return __builtin_aarch64_clrsbv4hi (__a);
12085 }
12086
12087 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12088 vcls_s32 (int32x2_t __a)
12089 {
12090 return __builtin_aarch64_clrsbv2si (__a);
12091 }
12092
12093 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12094 vclsq_s8 (int8x16_t __a)
12095 {
12096 return __builtin_aarch64_clrsbv16qi (__a);
12097 }
12098
12099 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12100 vclsq_s16 (int16x8_t __a)
12101 {
12102 return __builtin_aarch64_clrsbv8hi (__a);
12103 }
12104
12105 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12106 vclsq_s32 (int32x4_t __a)
12107 {
12108 return __builtin_aarch64_clrsbv4si (__a);
12109 }
12110
12111 /* vclz. */
12112
12113 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12114 vclz_s8 (int8x8_t __a)
12115 {
12116 return __builtin_aarch64_clzv8qi (__a);
12117 }
12118
12119 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12120 vclz_s16 (int16x4_t __a)
12121 {
12122 return __builtin_aarch64_clzv4hi (__a);
12123 }
12124
12125 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12126 vclz_s32 (int32x2_t __a)
12127 {
12128 return __builtin_aarch64_clzv2si (__a);
12129 }
12130
12131 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12132 vclz_u8 (uint8x8_t __a)
12133 {
12134 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
12135 }
12136
12137 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12138 vclz_u16 (uint16x4_t __a)
12139 {
12140 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
12141 }
12142
12143 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12144 vclz_u32 (uint32x2_t __a)
12145 {
12146 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
12147 }
12148
12149 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12150 vclzq_s8 (int8x16_t __a)
12151 {
12152 return __builtin_aarch64_clzv16qi (__a);
12153 }
12154
12155 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12156 vclzq_s16 (int16x8_t __a)
12157 {
12158 return __builtin_aarch64_clzv8hi (__a);
12159 }
12160
12161 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12162 vclzq_s32 (int32x4_t __a)
12163 {
12164 return __builtin_aarch64_clzv4si (__a);
12165 }
12166
12167 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12168 vclzq_u8 (uint8x16_t __a)
12169 {
12170 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
12171 }
12172
12173 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12174 vclzq_u16 (uint16x8_t __a)
12175 {
12176 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
12177 }
12178
12179 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12180 vclzq_u32 (uint32x4_t __a)
12181 {
12182 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
12183 }
12184
12185 /* vcnt. */
12186
12187 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12188 vcnt_p8 (poly8x8_t __a)
12189 {
12190 return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
12191 }
12192
12193 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12194 vcnt_s8 (int8x8_t __a)
12195 {
12196 return __builtin_aarch64_popcountv8qi (__a);
12197 }
12198
12199 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12200 vcnt_u8 (uint8x8_t __a)
12201 {
12202 return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
12203 }
12204
12205 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12206 vcntq_p8 (poly8x16_t __a)
12207 {
12208 return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
12209 }
12210
12211 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12212 vcntq_s8 (int8x16_t __a)
12213 {
12214 return __builtin_aarch64_popcountv16qi (__a);
12215 }
12216
12217 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12218 vcntq_u8 (uint8x16_t __a)
12219 {
12220 return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
12221 }
12222
12223 /* vcopy_lane. */
12224
12225 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12226 vcopy_lane_f32 (float32x2_t __a, const int __lane1,
12227 float32x2_t __b, const int __lane2)
12228 {
12229 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12230 __a, __lane1);
12231 }
12232
12233 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12234 vcopy_lane_f64 (float64x1_t __a, const int __lane1,
12235 float64x1_t __b, const int __lane2)
12236 {
12237 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12238 __a, __lane1);
12239 }
12240
12241 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12242 vcopy_lane_p8 (poly8x8_t __a, const int __lane1,
12243 poly8x8_t __b, const int __lane2)
12244 {
12245 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12246 __a, __lane1);
12247 }
12248
12249 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12250 vcopy_lane_p16 (poly16x4_t __a, const int __lane1,
12251 poly16x4_t __b, const int __lane2)
12252 {
12253 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12254 __a, __lane1);
12255 }
12256
12257 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12258 vcopy_lane_s8 (int8x8_t __a, const int __lane1,
12259 int8x8_t __b, const int __lane2)
12260 {
12261 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12262 __a, __lane1);
12263 }
12264
12265 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12266 vcopy_lane_s16 (int16x4_t __a, const int __lane1,
12267 int16x4_t __b, const int __lane2)
12268 {
12269 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12270 __a, __lane1);
12271 }
12272
12273 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12274 vcopy_lane_s32 (int32x2_t __a, const int __lane1,
12275 int32x2_t __b, const int __lane2)
12276 {
12277 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12278 __a, __lane1);
12279 }
12280
12281 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12282 vcopy_lane_s64 (int64x1_t __a, const int __lane1,
12283 int64x1_t __b, const int __lane2)
12284 {
12285 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12286 __a, __lane1);
12287 }
12288
12289 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12290 vcopy_lane_u8 (uint8x8_t __a, const int __lane1,
12291 uint8x8_t __b, const int __lane2)
12292 {
12293 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12294 __a, __lane1);
12295 }
12296
12297 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12298 vcopy_lane_u16 (uint16x4_t __a, const int __lane1,
12299 uint16x4_t __b, const int __lane2)
12300 {
12301 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12302 __a, __lane1);
12303 }
12304
12305 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12306 vcopy_lane_u32 (uint32x2_t __a, const int __lane1,
12307 uint32x2_t __b, const int __lane2)
12308 {
12309 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12310 __a, __lane1);
12311 }
12312
12313 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12314 vcopy_lane_u64 (uint64x1_t __a, const int __lane1,
12315 uint64x1_t __b, const int __lane2)
12316 {
12317 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12318 __a, __lane1);
12319 }
12320
12321 /* vcopy_laneq. */
12322
12323 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12324 vcopy_laneq_f32 (float32x2_t __a, const int __lane1,
12325 float32x4_t __b, const int __lane2)
12326 {
12327 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12328 __a, __lane1);
12329 }
12330
12331 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12332 vcopy_laneq_f64 (float64x1_t __a, const int __lane1,
12333 float64x2_t __b, const int __lane2)
12334 {
12335 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12336 __a, __lane1);
12337 }
12338
12339 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12340 vcopy_laneq_p8 (poly8x8_t __a, const int __lane1,
12341 poly8x16_t __b, const int __lane2)
12342 {
12343 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12344 __a, __lane1);
12345 }
12346
12347 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12348 vcopy_laneq_p16 (poly16x4_t __a, const int __lane1,
12349 poly16x8_t __b, const int __lane2)
12350 {
12351 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12352 __a, __lane1);
12353 }
12354
12355 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12356 vcopy_laneq_s8 (int8x8_t __a, const int __lane1,
12357 int8x16_t __b, const int __lane2)
12358 {
12359 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12360 __a, __lane1);
12361 }
12362
12363 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12364 vcopy_laneq_s16 (int16x4_t __a, const int __lane1,
12365 int16x8_t __b, const int __lane2)
12366 {
12367 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12368 __a, __lane1);
12369 }
12370
12371 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12372 vcopy_laneq_s32 (int32x2_t __a, const int __lane1,
12373 int32x4_t __b, const int __lane2)
12374 {
12375 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12376 __a, __lane1);
12377 }
12378
12379 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12380 vcopy_laneq_s64 (int64x1_t __a, const int __lane1,
12381 int64x2_t __b, const int __lane2)
12382 {
12383 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12384 __a, __lane1);
12385 }
12386
12387 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12388 vcopy_laneq_u8 (uint8x8_t __a, const int __lane1,
12389 uint8x16_t __b, const int __lane2)
12390 {
12391 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12392 __a, __lane1);
12393 }
12394
12395 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12396 vcopy_laneq_u16 (uint16x4_t __a, const int __lane1,
12397 uint16x8_t __b, const int __lane2)
12398 {
12399 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12400 __a, __lane1);
12401 }
12402
12403 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12404 vcopy_laneq_u32 (uint32x2_t __a, const int __lane1,
12405 uint32x4_t __b, const int __lane2)
12406 {
12407 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12408 __a, __lane1);
12409 }
12410
12411 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12412 vcopy_laneq_u64 (uint64x1_t __a, const int __lane1,
12413 uint64x2_t __b, const int __lane2)
12414 {
12415 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12416 __a, __lane1);
12417 }
12418
12419 /* vcopyq_lane. */
12420
12421 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12422 vcopyq_lane_f32 (float32x4_t __a, const int __lane1,
12423 float32x2_t __b, const int __lane2)
12424 {
12425 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12426 __a, __lane1);
12427 }
12428
12429 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12430 vcopyq_lane_f64 (float64x2_t __a, const int __lane1,
12431 float64x1_t __b, const int __lane2)
12432 {
12433 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12434 __a, __lane1);
12435 }
12436
12437 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12438 vcopyq_lane_p8 (poly8x16_t __a, const int __lane1,
12439 poly8x8_t __b, const int __lane2)
12440 {
12441 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12442 __a, __lane1);
12443 }
12444
12445 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12446 vcopyq_lane_p16 (poly16x8_t __a, const int __lane1,
12447 poly16x4_t __b, const int __lane2)
12448 {
12449 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12450 __a, __lane1);
12451 }
12452
12453 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12454 vcopyq_lane_s8 (int8x16_t __a, const int __lane1,
12455 int8x8_t __b, const int __lane2)
12456 {
12457 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12458 __a, __lane1);
12459 }
12460
12461 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12462 vcopyq_lane_s16 (int16x8_t __a, const int __lane1,
12463 int16x4_t __b, const int __lane2)
12464 {
12465 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12466 __a, __lane1);
12467 }
12468
12469 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12470 vcopyq_lane_s32 (int32x4_t __a, const int __lane1,
12471 int32x2_t __b, const int __lane2)
12472 {
12473 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12474 __a, __lane1);
12475 }
12476
12477 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12478 vcopyq_lane_s64 (int64x2_t __a, const int __lane1,
12479 int64x1_t __b, const int __lane2)
12480 {
12481 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12482 __a, __lane1);
12483 }
12484
12485 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12486 vcopyq_lane_u8 (uint8x16_t __a, const int __lane1,
12487 uint8x8_t __b, const int __lane2)
12488 {
12489 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12490 __a, __lane1);
12491 }
12492
12493 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12494 vcopyq_lane_u16 (uint16x8_t __a, const int __lane1,
12495 uint16x4_t __b, const int __lane2)
12496 {
12497 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12498 __a, __lane1);
12499 }
12500
12501 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12502 vcopyq_lane_u32 (uint32x4_t __a, const int __lane1,
12503 uint32x2_t __b, const int __lane2)
12504 {
12505 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12506 __a, __lane1);
12507 }
12508
12509 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12510 vcopyq_lane_u64 (uint64x2_t __a, const int __lane1,
12511 uint64x1_t __b, const int __lane2)
12512 {
12513 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12514 __a, __lane1);
12515 }
12516
12517 /* vcopyq_laneq. */
12518
12519 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12520 vcopyq_laneq_f32 (float32x4_t __a, const int __lane1,
12521 float32x4_t __b, const int __lane2)
12522 {
12523 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12524 __a, __lane1);
12525 }
12526
12527 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12528 vcopyq_laneq_f64 (float64x2_t __a, const int __lane1,
12529 float64x2_t __b, const int __lane2)
12530 {
12531 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12532 __a, __lane1);
12533 }
12534
12535 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12536 vcopyq_laneq_p8 (poly8x16_t __a, const int __lane1,
12537 poly8x16_t __b, const int __lane2)
12538 {
12539 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12540 __a, __lane1);
12541 }
12542
12543 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12544 vcopyq_laneq_p16 (poly16x8_t __a, const int __lane1,
12545 poly16x8_t __b, const int __lane2)
12546 {
12547 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12548 __a, __lane1);
12549 }
12550
12551 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12552 vcopyq_laneq_s8 (int8x16_t __a, const int __lane1,
12553 int8x16_t __b, const int __lane2)
12554 {
12555 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12556 __a, __lane1);
12557 }
12558
12559 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12560 vcopyq_laneq_s16 (int16x8_t __a, const int __lane1,
12561 int16x8_t __b, const int __lane2)
12562 {
12563 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12564 __a, __lane1);
12565 }
12566
12567 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12568 vcopyq_laneq_s32 (int32x4_t __a, const int __lane1,
12569 int32x4_t __b, const int __lane2)
12570 {
12571 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12572 __a, __lane1);
12573 }
12574
12575 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12576 vcopyq_laneq_s64 (int64x2_t __a, const int __lane1,
12577 int64x2_t __b, const int __lane2)
12578 {
12579 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12580 __a, __lane1);
12581 }
12582
12583 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12584 vcopyq_laneq_u8 (uint8x16_t __a, const int __lane1,
12585 uint8x16_t __b, const int __lane2)
12586 {
12587 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12588 __a, __lane1);
12589 }
12590
12591 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12592 vcopyq_laneq_u16 (uint16x8_t __a, const int __lane1,
12593 uint16x8_t __b, const int __lane2)
12594 {
12595 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12596 __a, __lane1);
12597 }
12598
12599 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12600 vcopyq_laneq_u32 (uint32x4_t __a, const int __lane1,
12601 uint32x4_t __b, const int __lane2)
12602 {
12603 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12604 __a, __lane1);
12605 }
12606
12607 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12608 vcopyq_laneq_u64 (uint64x2_t __a, const int __lane1,
12609 uint64x2_t __b, const int __lane2)
12610 {
12611 return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
12612 __a, __lane1);
12613 }
12614
12615 /* vcvt (double -> float). */
12616
12617 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
12618 vcvt_f16_f32 (float32x4_t __a)
12619 {
12620 return __builtin_aarch64_float_truncate_lo_v4hf (__a);
12621 }
12622
12623 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
12624 vcvt_high_f16_f32 (float16x4_t __a, float32x4_t __b)
12625 {
12626 return __builtin_aarch64_float_truncate_hi_v8hf (__a, __b);
12627 }
12628
12629 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12630 vcvt_f32_f64 (float64x2_t __a)
12631 {
12632 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
12633 }
12634
12635 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12636 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
12637 {
12638 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
12639 }
12640
12641 /* vcvt (float -> double). */
12642
12643 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12644 vcvt_f32_f16 (float16x4_t __a)
12645 {
12646 return __builtin_aarch64_float_extend_lo_v4sf (__a);
12647 }
12648
12649 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12650 vcvt_f64_f32 (float32x2_t __a)
12651 {
12652
12653 return __builtin_aarch64_float_extend_lo_v2df (__a);
12654 }
12655
12656 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12657 vcvt_high_f32_f16 (float16x8_t __a)
12658 {
12659 return __builtin_aarch64_vec_unpacks_hi_v8hf (__a);
12660 }
12661
12662 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12663 vcvt_high_f64_f32 (float32x4_t __a)
12664 {
12665 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
12666 }
12667
12668 /* vcvt (<u>fixed-point -> float). */
12669
12670 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
12671 vcvtd_n_f64_s64 (int64_t __a, const int __b)
12672 {
12673 return __builtin_aarch64_scvtfdi (__a, __b);
12674 }
12675
12676 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
12677 vcvtd_n_f64_u64 (uint64_t __a, const int __b)
12678 {
12679 return __builtin_aarch64_ucvtfdi_sus (__a, __b);
12680 }
12681
12682 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
12683 vcvts_n_f32_s32 (int32_t __a, const int __b)
12684 {
12685 return __builtin_aarch64_scvtfsi (__a, __b);
12686 }
12687
12688 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
12689 vcvts_n_f32_u32 (uint32_t __a, const int __b)
12690 {
12691 return __builtin_aarch64_ucvtfsi_sus (__a, __b);
12692 }
12693
12694 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12695 vcvt_n_f32_s32 (int32x2_t __a, const int __b)
12696 {
12697 return __builtin_aarch64_scvtfv2si (__a, __b);
12698 }
12699
12700 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12701 vcvt_n_f32_u32 (uint32x2_t __a, const int __b)
12702 {
12703 return __builtin_aarch64_ucvtfv2si_sus (__a, __b);
12704 }
12705
12706 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12707 vcvt_n_f64_s64 (int64x1_t __a, const int __b)
12708 {
12709 return (float64x1_t)
12710 { __builtin_aarch64_scvtfdi (vget_lane_s64 (__a, 0), __b) };
12711 }
12712
12713 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12714 vcvt_n_f64_u64 (uint64x1_t __a, const int __b)
12715 {
12716 return (float64x1_t)
12717 { __builtin_aarch64_ucvtfdi_sus (vget_lane_u64 (__a, 0), __b) };
12718 }
12719
12720 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12721 vcvtq_n_f32_s32 (int32x4_t __a, const int __b)
12722 {
12723 return __builtin_aarch64_scvtfv4si (__a, __b);
12724 }
12725
12726 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12727 vcvtq_n_f32_u32 (uint32x4_t __a, const int __b)
12728 {
12729 return __builtin_aarch64_ucvtfv4si_sus (__a, __b);
12730 }
12731
12732 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12733 vcvtq_n_f64_s64 (int64x2_t __a, const int __b)
12734 {
12735 return __builtin_aarch64_scvtfv2di (__a, __b);
12736 }
12737
12738 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12739 vcvtq_n_f64_u64 (uint64x2_t __a, const int __b)
12740 {
12741 return __builtin_aarch64_ucvtfv2di_sus (__a, __b);
12742 }
12743
12744 /* vcvt (float -> <u>fixed-point). */
12745
12746 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
12747 vcvtd_n_s64_f64 (float64_t __a, const int __b)
12748 {
12749 return __builtin_aarch64_fcvtzsdf (__a, __b);
12750 }
12751
12752 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12753 vcvtd_n_u64_f64 (float64_t __a, const int __b)
12754 {
12755 return __builtin_aarch64_fcvtzudf_uss (__a, __b);
12756 }
12757
12758 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
12759 vcvts_n_s32_f32 (float32_t __a, const int __b)
12760 {
12761 return __builtin_aarch64_fcvtzssf (__a, __b);
12762 }
12763
12764 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12765 vcvts_n_u32_f32 (float32_t __a, const int __b)
12766 {
12767 return __builtin_aarch64_fcvtzusf_uss (__a, __b);
12768 }
12769
12770 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12771 vcvt_n_s32_f32 (float32x2_t __a, const int __b)
12772 {
12773 return __builtin_aarch64_fcvtzsv2sf (__a, __b);
12774 }
12775
12776 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12777 vcvt_n_u32_f32 (float32x2_t __a, const int __b)
12778 {
12779 return __builtin_aarch64_fcvtzuv2sf_uss (__a, __b);
12780 }
12781
12782 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12783 vcvt_n_s64_f64 (float64x1_t __a, const int __b)
12784 {
12785 return (int64x1_t)
12786 { __builtin_aarch64_fcvtzsdf (vget_lane_f64 (__a, 0), __b) };
12787 }
12788
12789 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12790 vcvt_n_u64_f64 (float64x1_t __a, const int __b)
12791 {
12792 return (uint64x1_t)
12793 { __builtin_aarch64_fcvtzudf_uss (vget_lane_f64 (__a, 0), __b) };
12794 }
12795
12796 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12797 vcvtq_n_s32_f32 (float32x4_t __a, const int __b)
12798 {
12799 return __builtin_aarch64_fcvtzsv4sf (__a, __b);
12800 }
12801
12802 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12803 vcvtq_n_u32_f32 (float32x4_t __a, const int __b)
12804 {
12805 return __builtin_aarch64_fcvtzuv4sf_uss (__a, __b);
12806 }
12807
12808 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12809 vcvtq_n_s64_f64 (float64x2_t __a, const int __b)
12810 {
12811 return __builtin_aarch64_fcvtzsv2df (__a, __b);
12812 }
12813
12814 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12815 vcvtq_n_u64_f64 (float64x2_t __a, const int __b)
12816 {
12817 return __builtin_aarch64_fcvtzuv2df_uss (__a, __b);
12818 }
12819
12820 /* vcvt (<u>int -> float) */
12821
12822 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
12823 vcvtd_f64_s64 (int64_t __a)
12824 {
12825 return (float64_t) __a;
12826 }
12827
12828 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
12829 vcvtd_f64_u64 (uint64_t __a)
12830 {
12831 return (float64_t) __a;
12832 }
12833
12834 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
12835 vcvts_f32_s32 (int32_t __a)
12836 {
12837 return (float32_t) __a;
12838 }
12839
12840 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
12841 vcvts_f32_u32 (uint32_t __a)
12842 {
12843 return (float32_t) __a;
12844 }
12845
12846 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12847 vcvt_f32_s32 (int32x2_t __a)
12848 {
12849 return __builtin_aarch64_floatv2siv2sf (__a);
12850 }
12851
12852 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12853 vcvt_f32_u32 (uint32x2_t __a)
12854 {
12855 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
12856 }
12857
12858 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12859 vcvt_f64_s64 (int64x1_t __a)
12860 {
12861 return (float64x1_t) { vget_lane_s64 (__a, 0) };
12862 }
12863
12864 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
12865 vcvt_f64_u64 (uint64x1_t __a)
12866 {
12867 return (float64x1_t) { vget_lane_u64 (__a, 0) };
12868 }
12869
12870 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12871 vcvtq_f32_s32 (int32x4_t __a)
12872 {
12873 return __builtin_aarch64_floatv4siv4sf (__a);
12874 }
12875
12876 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12877 vcvtq_f32_u32 (uint32x4_t __a)
12878 {
12879 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
12880 }
12881
12882 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12883 vcvtq_f64_s64 (int64x2_t __a)
12884 {
12885 return __builtin_aarch64_floatv2div2df (__a);
12886 }
12887
12888 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12889 vcvtq_f64_u64 (uint64x2_t __a)
12890 {
12891 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
12892 }
12893
12894 /* vcvt (float -> <u>int) */
12895
12896 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
12897 vcvtd_s64_f64 (float64_t __a)
12898 {
12899 return (int64_t) __a;
12900 }
12901
12902 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12903 vcvtd_u64_f64 (float64_t __a)
12904 {
12905 return (uint64_t) __a;
12906 }
12907
12908 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
12909 vcvts_s32_f32 (float32_t __a)
12910 {
12911 return (int32_t) __a;
12912 }
12913
12914 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12915 vcvts_u32_f32 (float32_t __a)
12916 {
12917 return (uint32_t) __a;
12918 }
12919
12920 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12921 vcvt_s32_f32 (float32x2_t __a)
12922 {
12923 return __builtin_aarch64_lbtruncv2sfv2si (__a);
12924 }
12925
12926 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12927 vcvt_u32_f32 (float32x2_t __a)
12928 {
12929 return __builtin_aarch64_lbtruncuv2sfv2si_us (__a);
12930 }
12931
12932 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12933 vcvtq_s32_f32 (float32x4_t __a)
12934 {
12935 return __builtin_aarch64_lbtruncv4sfv4si (__a);
12936 }
12937
12938 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12939 vcvtq_u32_f32 (float32x4_t __a)
12940 {
12941 return __builtin_aarch64_lbtruncuv4sfv4si_us (__a);
12942 }
12943
12944 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
12945 vcvt_s64_f64 (float64x1_t __a)
12946 {
12947 return (int64x1_t) {vcvtd_s64_f64 (__a[0])};
12948 }
12949
12950 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
12951 vcvt_u64_f64 (float64x1_t __a)
12952 {
12953 return (uint64x1_t) {vcvtd_u64_f64 (__a[0])};
12954 }
12955
12956 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12957 vcvtq_s64_f64 (float64x2_t __a)
12958 {
12959 return __builtin_aarch64_lbtruncv2dfv2di (__a);
12960 }
12961
12962 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12963 vcvtq_u64_f64 (float64x2_t __a)
12964 {
12965 return __builtin_aarch64_lbtruncuv2dfv2di_us (__a);
12966 }
12967
12968 /* vcvta */
12969
12970 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
12971 vcvtad_s64_f64 (float64_t __a)
12972 {
12973 return __builtin_aarch64_lrounddfdi (__a);
12974 }
12975
12976 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12977 vcvtad_u64_f64 (float64_t __a)
12978 {
12979 return __builtin_aarch64_lroundudfdi_us (__a);
12980 }
12981
12982 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
12983 vcvtas_s32_f32 (float32_t __a)
12984 {
12985 return __builtin_aarch64_lroundsfsi (__a);
12986 }
12987
12988 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
12989 vcvtas_u32_f32 (float32_t __a)
12990 {
12991 return __builtin_aarch64_lroundusfsi_us (__a);
12992 }
12993
12994 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12995 vcvta_s32_f32 (float32x2_t __a)
12996 {
12997 return __builtin_aarch64_lroundv2sfv2si (__a);
12998 }
12999
13000 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13001 vcvta_u32_f32 (float32x2_t __a)
13002 {
13003 return __builtin_aarch64_lrounduv2sfv2si_us (__a);
13004 }
13005
13006 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13007 vcvtaq_s32_f32 (float32x4_t __a)
13008 {
13009 return __builtin_aarch64_lroundv4sfv4si (__a);
13010 }
13011
13012 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13013 vcvtaq_u32_f32 (float32x4_t __a)
13014 {
13015 return __builtin_aarch64_lrounduv4sfv4si_us (__a);
13016 }
13017
13018 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13019 vcvta_s64_f64 (float64x1_t __a)
13020 {
13021 return (int64x1_t) {vcvtad_s64_f64 (__a[0])};
13022 }
13023
13024 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13025 vcvta_u64_f64 (float64x1_t __a)
13026 {
13027 return (uint64x1_t) {vcvtad_u64_f64 (__a[0])};
13028 }
13029
13030 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13031 vcvtaq_s64_f64 (float64x2_t __a)
13032 {
13033 return __builtin_aarch64_lroundv2dfv2di (__a);
13034 }
13035
13036 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13037 vcvtaq_u64_f64 (float64x2_t __a)
13038 {
13039 return __builtin_aarch64_lrounduv2dfv2di_us (__a);
13040 }
13041
13042 /* vcvtm */
13043
13044 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13045 vcvtmd_s64_f64 (float64_t __a)
13046 {
13047 return __builtin_llfloor (__a);
13048 }
13049
13050 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13051 vcvtmd_u64_f64 (float64_t __a)
13052 {
13053 return __builtin_aarch64_lfloorudfdi_us (__a);
13054 }
13055
13056 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13057 vcvtms_s32_f32 (float32_t __a)
13058 {
13059 return __builtin_ifloorf (__a);
13060 }
13061
13062 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13063 vcvtms_u32_f32 (float32_t __a)
13064 {
13065 return __builtin_aarch64_lfloorusfsi_us (__a);
13066 }
13067
13068 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13069 vcvtm_s32_f32 (float32x2_t __a)
13070 {
13071 return __builtin_aarch64_lfloorv2sfv2si (__a);
13072 }
13073
13074 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13075 vcvtm_u32_f32 (float32x2_t __a)
13076 {
13077 return __builtin_aarch64_lflooruv2sfv2si_us (__a);
13078 }
13079
13080 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13081 vcvtmq_s32_f32 (float32x4_t __a)
13082 {
13083 return __builtin_aarch64_lfloorv4sfv4si (__a);
13084 }
13085
13086 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13087 vcvtmq_u32_f32 (float32x4_t __a)
13088 {
13089 return __builtin_aarch64_lflooruv4sfv4si_us (__a);
13090 }
13091
13092 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13093 vcvtm_s64_f64 (float64x1_t __a)
13094 {
13095 return (int64x1_t) {vcvtmd_s64_f64 (__a[0])};
13096 }
13097
13098 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13099 vcvtm_u64_f64 (float64x1_t __a)
13100 {
13101 return (uint64x1_t) {vcvtmd_u64_f64 (__a[0])};
13102 }
13103
13104 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13105 vcvtmq_s64_f64 (float64x2_t __a)
13106 {
13107 return __builtin_aarch64_lfloorv2dfv2di (__a);
13108 }
13109
13110 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13111 vcvtmq_u64_f64 (float64x2_t __a)
13112 {
13113 return __builtin_aarch64_lflooruv2dfv2di_us (__a);
13114 }
13115
13116 /* vcvtn */
13117
13118 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13119 vcvtnd_s64_f64 (float64_t __a)
13120 {
13121 return __builtin_aarch64_lfrintndfdi (__a);
13122 }
13123
13124 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13125 vcvtnd_u64_f64 (float64_t __a)
13126 {
13127 return __builtin_aarch64_lfrintnudfdi_us (__a);
13128 }
13129
13130 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13131 vcvtns_s32_f32 (float32_t __a)
13132 {
13133 return __builtin_aarch64_lfrintnsfsi (__a);
13134 }
13135
13136 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13137 vcvtns_u32_f32 (float32_t __a)
13138 {
13139 return __builtin_aarch64_lfrintnusfsi_us (__a);
13140 }
13141
13142 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13143 vcvtn_s32_f32 (float32x2_t __a)
13144 {
13145 return __builtin_aarch64_lfrintnv2sfv2si (__a);
13146 }
13147
13148 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13149 vcvtn_u32_f32 (float32x2_t __a)
13150 {
13151 return __builtin_aarch64_lfrintnuv2sfv2si_us (__a);
13152 }
13153
13154 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13155 vcvtnq_s32_f32 (float32x4_t __a)
13156 {
13157 return __builtin_aarch64_lfrintnv4sfv4si (__a);
13158 }
13159
13160 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13161 vcvtnq_u32_f32 (float32x4_t __a)
13162 {
13163 return __builtin_aarch64_lfrintnuv4sfv4si_us (__a);
13164 }
13165
13166 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13167 vcvtn_s64_f64 (float64x1_t __a)
13168 {
13169 return (int64x1_t) {vcvtnd_s64_f64 (__a[0])};
13170 }
13171
13172 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13173 vcvtn_u64_f64 (float64x1_t __a)
13174 {
13175 return (uint64x1_t) {vcvtnd_u64_f64 (__a[0])};
13176 }
13177
13178 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13179 vcvtnq_s64_f64 (float64x2_t __a)
13180 {
13181 return __builtin_aarch64_lfrintnv2dfv2di (__a);
13182 }
13183
13184 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13185 vcvtnq_u64_f64 (float64x2_t __a)
13186 {
13187 return __builtin_aarch64_lfrintnuv2dfv2di_us (__a);
13188 }
13189
13190 /* vcvtp */
13191
13192 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13193 vcvtpd_s64_f64 (float64_t __a)
13194 {
13195 return __builtin_llceil (__a);
13196 }
13197
13198 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13199 vcvtpd_u64_f64 (float64_t __a)
13200 {
13201 return __builtin_aarch64_lceiludfdi_us (__a);
13202 }
13203
13204 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13205 vcvtps_s32_f32 (float32_t __a)
13206 {
13207 return __builtin_iceilf (__a);
13208 }
13209
13210 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13211 vcvtps_u32_f32 (float32_t __a)
13212 {
13213 return __builtin_aarch64_lceilusfsi_us (__a);
13214 }
13215
13216 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13217 vcvtp_s32_f32 (float32x2_t __a)
13218 {
13219 return __builtin_aarch64_lceilv2sfv2si (__a);
13220 }
13221
13222 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13223 vcvtp_u32_f32 (float32x2_t __a)
13224 {
13225 return __builtin_aarch64_lceiluv2sfv2si_us (__a);
13226 }
13227
13228 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13229 vcvtpq_s32_f32 (float32x4_t __a)
13230 {
13231 return __builtin_aarch64_lceilv4sfv4si (__a);
13232 }
13233
13234 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13235 vcvtpq_u32_f32 (float32x4_t __a)
13236 {
13237 return __builtin_aarch64_lceiluv4sfv4si_us (__a);
13238 }
13239
13240 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13241 vcvtp_s64_f64 (float64x1_t __a)
13242 {
13243 return (int64x1_t) {vcvtpd_s64_f64 (__a[0])};
13244 }
13245
13246 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13247 vcvtp_u64_f64 (float64x1_t __a)
13248 {
13249 return (uint64x1_t) {vcvtpd_u64_f64 (__a[0])};
13250 }
13251
13252 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13253 vcvtpq_s64_f64 (float64x2_t __a)
13254 {
13255 return __builtin_aarch64_lceilv2dfv2di (__a);
13256 }
13257
13258 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13259 vcvtpq_u64_f64 (float64x2_t __a)
13260 {
13261 return __builtin_aarch64_lceiluv2dfv2di_us (__a);
13262 }
13263
13264 /* vdup_n */
13265
13266 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
13267 vdup_n_f16 (float16_t __a)
13268 {
13269 return (float16x4_t) {__a, __a, __a, __a};
13270 }
13271
13272 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13273 vdup_n_f32 (float32_t __a)
13274 {
13275 return (float32x2_t) {__a, __a};
13276 }
13277
13278 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13279 vdup_n_f64 (float64_t __a)
13280 {
13281 return (float64x1_t) {__a};
13282 }
13283
13284 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13285 vdup_n_p8 (poly8_t __a)
13286 {
13287 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13288 }
13289
13290 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13291 vdup_n_p16 (poly16_t __a)
13292 {
13293 return (poly16x4_t) {__a, __a, __a, __a};
13294 }
13295
13296 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13297 vdup_n_s8 (int8_t __a)
13298 {
13299 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13300 }
13301
13302 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13303 vdup_n_s16 (int16_t __a)
13304 {
13305 return (int16x4_t) {__a, __a, __a, __a};
13306 }
13307
13308 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13309 vdup_n_s32 (int32_t __a)
13310 {
13311 return (int32x2_t) {__a, __a};
13312 }
13313
13314 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13315 vdup_n_s64 (int64_t __a)
13316 {
13317 return (int64x1_t) {__a};
13318 }
13319
13320 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13321 vdup_n_u8 (uint8_t __a)
13322 {
13323 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13324 }
13325
13326 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13327 vdup_n_u16 (uint16_t __a)
13328 {
13329 return (uint16x4_t) {__a, __a, __a, __a};
13330 }
13331
13332 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13333 vdup_n_u32 (uint32_t __a)
13334 {
13335 return (uint32x2_t) {__a, __a};
13336 }
13337
13338 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13339 vdup_n_u64 (uint64_t __a)
13340 {
13341 return (uint64x1_t) {__a};
13342 }
13343
13344 /* vdupq_n */
13345
13346 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13347 vdupq_n_f16 (float16_t __a)
13348 {
13349 return (float16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13350 }
13351
13352 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13353 vdupq_n_f32 (float32_t __a)
13354 {
13355 return (float32x4_t) {__a, __a, __a, __a};
13356 }
13357
13358 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13359 vdupq_n_f64 (float64_t __a)
13360 {
13361 return (float64x2_t) {__a, __a};
13362 }
13363
13364 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13365 vdupq_n_p8 (uint32_t __a)
13366 {
13367 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
13368 __a, __a, __a, __a, __a, __a, __a, __a};
13369 }
13370
13371 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13372 vdupq_n_p16 (uint32_t __a)
13373 {
13374 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13375 }
13376
13377 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13378 vdupq_n_s8 (int32_t __a)
13379 {
13380 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
13381 __a, __a, __a, __a, __a, __a, __a, __a};
13382 }
13383
13384 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13385 vdupq_n_s16 (int32_t __a)
13386 {
13387 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13388 }
13389
13390 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13391 vdupq_n_s32 (int32_t __a)
13392 {
13393 return (int32x4_t) {__a, __a, __a, __a};
13394 }
13395
13396 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13397 vdupq_n_s64 (int64_t __a)
13398 {
13399 return (int64x2_t) {__a, __a};
13400 }
13401
13402 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13403 vdupq_n_u8 (uint32_t __a)
13404 {
13405 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
13406 __a, __a, __a, __a, __a, __a, __a, __a};
13407 }
13408
13409 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13410 vdupq_n_u16 (uint32_t __a)
13411 {
13412 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
13413 }
13414
13415 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13416 vdupq_n_u32 (uint32_t __a)
13417 {
13418 return (uint32x4_t) {__a, __a, __a, __a};
13419 }
13420
13421 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13422 vdupq_n_u64 (uint64_t __a)
13423 {
13424 return (uint64x2_t) {__a, __a};
13425 }
13426
13427 /* vdup_lane */
13428
13429 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
13430 vdup_lane_f16 (float16x4_t __a, const int __b)
13431 {
13432 return __aarch64_vdup_lane_f16 (__a, __b);
13433 }
13434
13435 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13436 vdup_lane_f32 (float32x2_t __a, const int __b)
13437 {
13438 return __aarch64_vdup_lane_f32 (__a, __b);
13439 }
13440
13441 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13442 vdup_lane_f64 (float64x1_t __a, const int __b)
13443 {
13444 return __aarch64_vdup_lane_f64 (__a, __b);
13445 }
13446
13447 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13448 vdup_lane_p8 (poly8x8_t __a, const int __b)
13449 {
13450 return __aarch64_vdup_lane_p8 (__a, __b);
13451 }
13452
13453 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13454 vdup_lane_p16 (poly16x4_t __a, const int __b)
13455 {
13456 return __aarch64_vdup_lane_p16 (__a, __b);
13457 }
13458
13459 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13460 vdup_lane_s8 (int8x8_t __a, const int __b)
13461 {
13462 return __aarch64_vdup_lane_s8 (__a, __b);
13463 }
13464
13465 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13466 vdup_lane_s16 (int16x4_t __a, const int __b)
13467 {
13468 return __aarch64_vdup_lane_s16 (__a, __b);
13469 }
13470
13471 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13472 vdup_lane_s32 (int32x2_t __a, const int __b)
13473 {
13474 return __aarch64_vdup_lane_s32 (__a, __b);
13475 }
13476
13477 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13478 vdup_lane_s64 (int64x1_t __a, const int __b)
13479 {
13480 return __aarch64_vdup_lane_s64 (__a, __b);
13481 }
13482
13483 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13484 vdup_lane_u8 (uint8x8_t __a, const int __b)
13485 {
13486 return __aarch64_vdup_lane_u8 (__a, __b);
13487 }
13488
13489 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13490 vdup_lane_u16 (uint16x4_t __a, const int __b)
13491 {
13492 return __aarch64_vdup_lane_u16 (__a, __b);
13493 }
13494
13495 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13496 vdup_lane_u32 (uint32x2_t __a, const int __b)
13497 {
13498 return __aarch64_vdup_lane_u32 (__a, __b);
13499 }
13500
13501 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13502 vdup_lane_u64 (uint64x1_t __a, const int __b)
13503 {
13504 return __aarch64_vdup_lane_u64 (__a, __b);
13505 }
13506
13507 /* vdup_laneq */
13508
13509 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
13510 vdup_laneq_f16 (float16x8_t __a, const int __b)
13511 {
13512 return __aarch64_vdup_laneq_f16 (__a, __b);
13513 }
13514
13515 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13516 vdup_laneq_f32 (float32x4_t __a, const int __b)
13517 {
13518 return __aarch64_vdup_laneq_f32 (__a, __b);
13519 }
13520
13521 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13522 vdup_laneq_f64 (float64x2_t __a, const int __b)
13523 {
13524 return __aarch64_vdup_laneq_f64 (__a, __b);
13525 }
13526
13527 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13528 vdup_laneq_p8 (poly8x16_t __a, const int __b)
13529 {
13530 return __aarch64_vdup_laneq_p8 (__a, __b);
13531 }
13532
13533 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13534 vdup_laneq_p16 (poly16x8_t __a, const int __b)
13535 {
13536 return __aarch64_vdup_laneq_p16 (__a, __b);
13537 }
13538
13539 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13540 vdup_laneq_s8 (int8x16_t __a, const int __b)
13541 {
13542 return __aarch64_vdup_laneq_s8 (__a, __b);
13543 }
13544
13545 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13546 vdup_laneq_s16 (int16x8_t __a, const int __b)
13547 {
13548 return __aarch64_vdup_laneq_s16 (__a, __b);
13549 }
13550
13551 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13552 vdup_laneq_s32 (int32x4_t __a, const int __b)
13553 {
13554 return __aarch64_vdup_laneq_s32 (__a, __b);
13555 }
13556
13557 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13558 vdup_laneq_s64 (int64x2_t __a, const int __b)
13559 {
13560 return __aarch64_vdup_laneq_s64 (__a, __b);
13561 }
13562
13563 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13564 vdup_laneq_u8 (uint8x16_t __a, const int __b)
13565 {
13566 return __aarch64_vdup_laneq_u8 (__a, __b);
13567 }
13568
13569 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13570 vdup_laneq_u16 (uint16x8_t __a, const int __b)
13571 {
13572 return __aarch64_vdup_laneq_u16 (__a, __b);
13573 }
13574
13575 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13576 vdup_laneq_u32 (uint32x4_t __a, const int __b)
13577 {
13578 return __aarch64_vdup_laneq_u32 (__a, __b);
13579 }
13580
13581 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13582 vdup_laneq_u64 (uint64x2_t __a, const int __b)
13583 {
13584 return __aarch64_vdup_laneq_u64 (__a, __b);
13585 }
13586
13587 /* vdupq_lane */
13588
13589 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13590 vdupq_lane_f16 (float16x4_t __a, const int __b)
13591 {
13592 return __aarch64_vdupq_lane_f16 (__a, __b);
13593 }
13594
13595 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13596 vdupq_lane_f32 (float32x2_t __a, const int __b)
13597 {
13598 return __aarch64_vdupq_lane_f32 (__a, __b);
13599 }
13600
13601 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13602 vdupq_lane_f64 (float64x1_t __a, const int __b)
13603 {
13604 return __aarch64_vdupq_lane_f64 (__a, __b);
13605 }
13606
13607 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13608 vdupq_lane_p8 (poly8x8_t __a, const int __b)
13609 {
13610 return __aarch64_vdupq_lane_p8 (__a, __b);
13611 }
13612
13613 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13614 vdupq_lane_p16 (poly16x4_t __a, const int __b)
13615 {
13616 return __aarch64_vdupq_lane_p16 (__a, __b);
13617 }
13618
13619 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13620 vdupq_lane_s8 (int8x8_t __a, const int __b)
13621 {
13622 return __aarch64_vdupq_lane_s8 (__a, __b);
13623 }
13624
13625 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13626 vdupq_lane_s16 (int16x4_t __a, const int __b)
13627 {
13628 return __aarch64_vdupq_lane_s16 (__a, __b);
13629 }
13630
13631 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13632 vdupq_lane_s32 (int32x2_t __a, const int __b)
13633 {
13634 return __aarch64_vdupq_lane_s32 (__a, __b);
13635 }
13636
13637 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13638 vdupq_lane_s64 (int64x1_t __a, const int __b)
13639 {
13640 return __aarch64_vdupq_lane_s64 (__a, __b);
13641 }
13642
13643 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13644 vdupq_lane_u8 (uint8x8_t __a, const int __b)
13645 {
13646 return __aarch64_vdupq_lane_u8 (__a, __b);
13647 }
13648
13649 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13650 vdupq_lane_u16 (uint16x4_t __a, const int __b)
13651 {
13652 return __aarch64_vdupq_lane_u16 (__a, __b);
13653 }
13654
13655 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13656 vdupq_lane_u32 (uint32x2_t __a, const int __b)
13657 {
13658 return __aarch64_vdupq_lane_u32 (__a, __b);
13659 }
13660
13661 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13662 vdupq_lane_u64 (uint64x1_t __a, const int __b)
13663 {
13664 return __aarch64_vdupq_lane_u64 (__a, __b);
13665 }
13666
13667 /* vdupq_laneq */
13668
13669 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
13670 vdupq_laneq_f16 (float16x8_t __a, const int __b)
13671 {
13672 return __aarch64_vdupq_laneq_f16 (__a, __b);
13673 }
13674
13675 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13676 vdupq_laneq_f32 (float32x4_t __a, const int __b)
13677 {
13678 return __aarch64_vdupq_laneq_f32 (__a, __b);
13679 }
13680
13681 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13682 vdupq_laneq_f64 (float64x2_t __a, const int __b)
13683 {
13684 return __aarch64_vdupq_laneq_f64 (__a, __b);
13685 }
13686
13687 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13688 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
13689 {
13690 return __aarch64_vdupq_laneq_p8 (__a, __b);
13691 }
13692
13693 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13694 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
13695 {
13696 return __aarch64_vdupq_laneq_p16 (__a, __b);
13697 }
13698
13699 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13700 vdupq_laneq_s8 (int8x16_t __a, const int __b)
13701 {
13702 return __aarch64_vdupq_laneq_s8 (__a, __b);
13703 }
13704
13705 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13706 vdupq_laneq_s16 (int16x8_t __a, const int __b)
13707 {
13708 return __aarch64_vdupq_laneq_s16 (__a, __b);
13709 }
13710
13711 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13712 vdupq_laneq_s32 (int32x4_t __a, const int __b)
13713 {
13714 return __aarch64_vdupq_laneq_s32 (__a, __b);
13715 }
13716
13717 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13718 vdupq_laneq_s64 (int64x2_t __a, const int __b)
13719 {
13720 return __aarch64_vdupq_laneq_s64 (__a, __b);
13721 }
13722
13723 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13724 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
13725 {
13726 return __aarch64_vdupq_laneq_u8 (__a, __b);
13727 }
13728
13729 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13730 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
13731 {
13732 return __aarch64_vdupq_laneq_u16 (__a, __b);
13733 }
13734
13735 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13736 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
13737 {
13738 return __aarch64_vdupq_laneq_u32 (__a, __b);
13739 }
13740
13741 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13742 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
13743 {
13744 return __aarch64_vdupq_laneq_u64 (__a, __b);
13745 }
13746
13747 /* vdupb_lane */
13748 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
13749 vdupb_lane_p8 (poly8x8_t __a, const int __b)
13750 {
13751 return __aarch64_vget_lane_any (__a, __b);
13752 }
13753
13754 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
13755 vdupb_lane_s8 (int8x8_t __a, const int __b)
13756 {
13757 return __aarch64_vget_lane_any (__a, __b);
13758 }
13759
13760 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13761 vdupb_lane_u8 (uint8x8_t __a, const int __b)
13762 {
13763 return __aarch64_vget_lane_any (__a, __b);
13764 }
13765
13766 /* vduph_lane */
13767
13768 __extension__ static __inline float16_t __attribute__ ((__always_inline__))
13769 vduph_lane_f16 (float16x4_t __a, const int __b)
13770 {
13771 return __aarch64_vget_lane_any (__a, __b);
13772 }
13773
13774 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
13775 vduph_lane_p16 (poly16x4_t __a, const int __b)
13776 {
13777 return __aarch64_vget_lane_any (__a, __b);
13778 }
13779
13780 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
13781 vduph_lane_s16 (int16x4_t __a, const int __b)
13782 {
13783 return __aarch64_vget_lane_any (__a, __b);
13784 }
13785
13786 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13787 vduph_lane_u16 (uint16x4_t __a, const int __b)
13788 {
13789 return __aarch64_vget_lane_any (__a, __b);
13790 }
13791
13792 /* vdups_lane */
13793
13794 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13795 vdups_lane_f32 (float32x2_t __a, const int __b)
13796 {
13797 return __aarch64_vget_lane_any (__a, __b);
13798 }
13799
13800 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13801 vdups_lane_s32 (int32x2_t __a, const int __b)
13802 {
13803 return __aarch64_vget_lane_any (__a, __b);
13804 }
13805
13806 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13807 vdups_lane_u32 (uint32x2_t __a, const int __b)
13808 {
13809 return __aarch64_vget_lane_any (__a, __b);
13810 }
13811
13812 /* vdupd_lane */
13813 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13814 vdupd_lane_f64 (float64x1_t __a, const int __b)
13815 {
13816 __AARCH64_LANE_CHECK (__a, __b);
13817 return __a[0];
13818 }
13819
13820 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13821 vdupd_lane_s64 (int64x1_t __a, const int __b)
13822 {
13823 __AARCH64_LANE_CHECK (__a, __b);
13824 return __a[0];
13825 }
13826
13827 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13828 vdupd_lane_u64 (uint64x1_t __a, const int __b)
13829 {
13830 __AARCH64_LANE_CHECK (__a, __b);
13831 return __a[0];
13832 }
13833
13834 /* vdupb_laneq */
13835 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
13836 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
13837 {
13838 return __aarch64_vget_lane_any (__a, __b);
13839 }
13840
13841 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
13842 vdupb_laneq_s8 (int8x16_t __a, const int __b)
13843 {
13844 return __aarch64_vget_lane_any (__a, __b);
13845 }
13846
13847 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13848 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
13849 {
13850 return __aarch64_vget_lane_any (__a, __b);
13851 }
13852
13853 /* vduph_laneq */
13854
13855 __extension__ static __inline float16_t __attribute__ ((__always_inline__))
13856 vduph_laneq_f16 (float16x8_t __a, const int __b)
13857 {
13858 return __aarch64_vget_lane_any (__a, __b);
13859 }
13860
13861 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
13862 vduph_laneq_p16 (poly16x8_t __a, const int __b)
13863 {
13864 return __aarch64_vget_lane_any (__a, __b);
13865 }
13866
13867 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
13868 vduph_laneq_s16 (int16x8_t __a, const int __b)
13869 {
13870 return __aarch64_vget_lane_any (__a, __b);
13871 }
13872
13873 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13874 vduph_laneq_u16 (uint16x8_t __a, const int __b)
13875 {
13876 return __aarch64_vget_lane_any (__a, __b);
13877 }
13878
13879 /* vdups_laneq */
13880
13881 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13882 vdups_laneq_f32 (float32x4_t __a, const int __b)
13883 {
13884 return __aarch64_vget_lane_any (__a, __b);
13885 }
13886
13887 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13888 vdups_laneq_s32 (int32x4_t __a, const int __b)
13889 {
13890 return __aarch64_vget_lane_any (__a, __b);
13891 }
13892
13893 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13894 vdups_laneq_u32 (uint32x4_t __a, const int __b)
13895 {
13896 return __aarch64_vget_lane_any (__a, __b);
13897 }
13898
13899 /* vdupd_laneq */
13900 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13901 vdupd_laneq_f64 (float64x2_t __a, const int __b)
13902 {
13903 return __aarch64_vget_lane_any (__a, __b);
13904 }
13905
13906 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13907 vdupd_laneq_s64 (int64x2_t __a, const int __b)
13908 {
13909 return __aarch64_vget_lane_any (__a, __b);
13910 }
13911
13912 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13913 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
13914 {
13915 return __aarch64_vget_lane_any (__a, __b);
13916 }
13917
13918 /* vext */
13919
13920 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
13921 vext_f16 (float16x4_t __a, float16x4_t __b, __const int __c)
13922 {
13923 __AARCH64_LANE_CHECK (__a, __c);
13924 #ifdef __AARCH64EB__
13925 return __builtin_shuffle (__b, __a,
13926 (uint16x4_t) {4 - __c, 5 - __c, 6 - __c, 7 - __c});
13927 #else
13928 return __builtin_shuffle (__a, __b,
13929 (uint16x4_t) {__c, __c + 1, __c + 2, __c + 3});
13930 #endif
13931 }
13932
13933 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13934 vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
13935 {
13936 __AARCH64_LANE_CHECK (__a, __c);
13937 #ifdef __AARCH64EB__
13938 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
13939 #else
13940 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
13941 #endif
13942 }
13943
13944 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13945 vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
13946 {
13947 __AARCH64_LANE_CHECK (__a, __c);
13948 /* The only possible index to the assembler instruction returns element 0. */
13949 return __a;
13950 }
13951 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13952 vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
13953 {
13954 __AARCH64_LANE_CHECK (__a, __c);
13955 #ifdef __AARCH64EB__
13956 return __builtin_shuffle (__b, __a, (uint8x8_t)
13957 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
13958 #else
13959 return __builtin_shuffle (__a, __b,
13960 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
13961 #endif
13962 }
13963
13964 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13965 vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
13966 {
13967 __AARCH64_LANE_CHECK (__a, __c);
13968 #ifdef __AARCH64EB__
13969 return __builtin_shuffle (__b, __a,
13970 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
13971 #else
13972 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
13973 #endif
13974 }
13975
13976 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13977 vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
13978 {
13979 __AARCH64_LANE_CHECK (__a, __c);
13980 #ifdef __AARCH64EB__
13981 return __builtin_shuffle (__b, __a, (uint8x8_t)
13982 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
13983 #else
13984 return __builtin_shuffle (__a, __b,
13985 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
13986 #endif
13987 }
13988
13989 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13990 vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
13991 {
13992 __AARCH64_LANE_CHECK (__a, __c);
13993 #ifdef __AARCH64EB__
13994 return __builtin_shuffle (__b, __a,
13995 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
13996 #else
13997 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
13998 #endif
13999 }
14000
14001 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14002 vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
14003 {
14004 __AARCH64_LANE_CHECK (__a, __c);
14005 #ifdef __AARCH64EB__
14006 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14007 #else
14008 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14009 #endif
14010 }
14011
14012 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14013 vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
14014 {
14015 __AARCH64_LANE_CHECK (__a, __c);
14016 /* The only possible index to the assembler instruction returns element 0. */
14017 return __a;
14018 }
14019
14020 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14021 vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
14022 {
14023 __AARCH64_LANE_CHECK (__a, __c);
14024 #ifdef __AARCH64EB__
14025 return __builtin_shuffle (__b, __a, (uint8x8_t)
14026 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14027 #else
14028 return __builtin_shuffle (__a, __b,
14029 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14030 #endif
14031 }
14032
14033 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14034 vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
14035 {
14036 __AARCH64_LANE_CHECK (__a, __c);
14037 #ifdef __AARCH64EB__
14038 return __builtin_shuffle (__b, __a,
14039 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14040 #else
14041 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
14042 #endif
14043 }
14044
14045 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14046 vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
14047 {
14048 __AARCH64_LANE_CHECK (__a, __c);
14049 #ifdef __AARCH64EB__
14050 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
14051 #else
14052 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
14053 #endif
14054 }
14055
14056 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14057 vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
14058 {
14059 __AARCH64_LANE_CHECK (__a, __c);
14060 /* The only possible index to the assembler instruction returns element 0. */
14061 return __a;
14062 }
14063
14064 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
14065 vextq_f16 (float16x8_t __a, float16x8_t __b, __const int __c)
14066 {
14067 __AARCH64_LANE_CHECK (__a, __c);
14068 #ifdef __AARCH64EB__
14069 return __builtin_shuffle (__b, __a,
14070 (uint16x8_t) {8 - __c, 9 - __c, 10 - __c, 11 - __c,
14071 12 - __c, 13 - __c, 14 - __c,
14072 15 - __c});
14073 #else
14074 return __builtin_shuffle (__a, __b,
14075 (uint16x8_t) {__c, __c + 1, __c + 2, __c + 3,
14076 __c + 4, __c + 5, __c + 6, __c + 7});
14077 #endif
14078 }
14079
14080 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14081 vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
14082 {
14083 __AARCH64_LANE_CHECK (__a, __c);
14084 #ifdef __AARCH64EB__
14085 return __builtin_shuffle (__b, __a,
14086 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14087 #else
14088 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
14089 #endif
14090 }
14091
14092 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14093 vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
14094 {
14095 __AARCH64_LANE_CHECK (__a, __c);
14096 #ifdef __AARCH64EB__
14097 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
14098 #else
14099 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
14100 #endif
14101 }
14102
14103 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14104 vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
14105 {
14106 __AARCH64_LANE_CHECK (__a, __c);
14107 #ifdef __AARCH64EB__
14108 return __builtin_shuffle (__b, __a, (uint8x16_t)
14109 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14110 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14111 #else
14112 return __builtin_shuffle (__a, __b, (uint8x16_t)
14113 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14114 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14115 #endif
14116 }
14117
14118 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14119 vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
14120 {
14121 __AARCH64_LANE_CHECK (__a, __c);
14122 #ifdef __AARCH64EB__
14123 return __builtin_shuffle (__b, __a, (uint16x8_t)
14124 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14125 #else
14126 return __builtin_shuffle (__a, __b,
14127 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14128 #endif
14129 }
14130
14131 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14132 vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
14133 {
14134 __AARCH64_LANE_CHECK (__a, __c);
14135 #ifdef __AARCH64EB__
14136 return __builtin_shuffle (__b, __a, (uint8x16_t)
14137 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14138 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14139 #else
14140 return __builtin_shuffle (__a, __b, (uint8x16_t)
14141 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14142 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14143 #endif
14144 }
14145
14146 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14147 vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
14148 {
14149 __AARCH64_LANE_CHECK (__a, __c);
14150 #ifdef __AARCH64EB__
14151 return __builtin_shuffle (__b, __a, (uint16x8_t)
14152 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14153 #else
14154 return __builtin_shuffle (__a, __b,
14155 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14156 #endif
14157 }
14158
14159 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14160 vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
14161 {
14162 __AARCH64_LANE_CHECK (__a, __c);
14163 #ifdef __AARCH64EB__
14164 return __builtin_shuffle (__b, __a,
14165 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14166 #else
14167 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
14168 #endif
14169 }
14170
14171 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14172 vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
14173 {
14174 __AARCH64_LANE_CHECK (__a, __c);
14175 #ifdef __AARCH64EB__
14176 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
14177 #else
14178 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
14179 #endif
14180 }
14181
14182 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14183 vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
14184 {
14185 __AARCH64_LANE_CHECK (__a, __c);
14186 #ifdef __AARCH64EB__
14187 return __builtin_shuffle (__b, __a, (uint8x16_t)
14188 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
14189 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
14190 #else
14191 return __builtin_shuffle (__a, __b, (uint8x16_t)
14192 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
14193 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
14194 #endif
14195 }
14196
14197 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14198 vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
14199 {
14200 __AARCH64_LANE_CHECK (__a, __c);
14201 #ifdef __AARCH64EB__
14202 return __builtin_shuffle (__b, __a, (uint16x8_t)
14203 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
14204 #else
14205 return __builtin_shuffle (__a, __b,
14206 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
14207 #endif
14208 }
14209
14210 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14211 vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
14212 {
14213 __AARCH64_LANE_CHECK (__a, __c);
14214 #ifdef __AARCH64EB__
14215 return __builtin_shuffle (__b, __a,
14216 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
14217 #else
14218 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
14219 #endif
14220 }
14221
14222 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14223 vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
14224 {
14225 __AARCH64_LANE_CHECK (__a, __c);
14226 #ifdef __AARCH64EB__
14227 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
14228 #else
14229 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
14230 #endif
14231 }
14232
14233 /* vfma */
14234
14235 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14236 vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
14237 {
14238 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
14239 }
14240
14241 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14242 vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
14243 {
14244 return __builtin_aarch64_fmav2sf (__b, __c, __a);
14245 }
14246
14247 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14248 vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
14249 {
14250 return __builtin_aarch64_fmav4sf (__b, __c, __a);
14251 }
14252
14253 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14254 vfmaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
14255 {
14256 return __builtin_aarch64_fmav2df (__b, __c, __a);
14257 }
14258
14259 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14260 vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
14261 {
14262 return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a);
14263 }
14264
14265 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14266 vfma_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c)
14267 {
14268 return (float64x1_t) {__b[0] * __c + __a[0]};
14269 }
14270
14271 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14272 vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
14273 {
14274 return __builtin_aarch64_fmav4sf (__b, vdupq_n_f32 (__c), __a);
14275 }
14276
14277 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14278 vfmaq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c)
14279 {
14280 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a);
14281 }
14282
14283 /* vfma_lane */
14284
14285 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14286 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
14287 float32x2_t __c, const int __lane)
14288 {
14289 return __builtin_aarch64_fmav2sf (__b,
14290 __aarch64_vdup_lane_f32 (__c, __lane),
14291 __a);
14292 }
14293
14294 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14295 vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
14296 float64x1_t __c, const int __lane)
14297 {
14298 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
14299 }
14300
14301 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14302 vfmad_lane_f64 (float64_t __a, float64_t __b,
14303 float64x1_t __c, const int __lane)
14304 {
14305 return __builtin_fma (__b, __c[0], __a);
14306 }
14307
14308 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14309 vfmas_lane_f32 (float32_t __a, float32_t __b,
14310 float32x2_t __c, const int __lane)
14311 {
14312 return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
14313 }
14314
14315 /* vfma_laneq */
14316
14317 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14318 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
14319 float32x4_t __c, const int __lane)
14320 {
14321 return __builtin_aarch64_fmav2sf (__b,
14322 __aarch64_vdup_laneq_f32 (__c, __lane),
14323 __a);
14324 }
14325
14326 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14327 vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
14328 float64x2_t __c, const int __lane)
14329 {
14330 float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
14331 return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
14332 }
14333
14334 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14335 vfmad_laneq_f64 (float64_t __a, float64_t __b,
14336 float64x2_t __c, const int __lane)
14337 {
14338 return __builtin_fma (__b, __aarch64_vget_lane_any (__c, __lane), __a);
14339 }
14340
14341 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14342 vfmas_laneq_f32 (float32_t __a, float32_t __b,
14343 float32x4_t __c, const int __lane)
14344 {
14345 return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a);
14346 }
14347
14348 /* vfmaq_lane */
14349
14350 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14351 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
14352 float32x2_t __c, const int __lane)
14353 {
14354 return __builtin_aarch64_fmav4sf (__b,
14355 __aarch64_vdupq_lane_f32 (__c, __lane),
14356 __a);
14357 }
14358
14359 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14360 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
14361 float64x1_t __c, const int __lane)
14362 {
14363 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
14364 }
14365
14366 /* vfmaq_laneq */
14367
14368 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14369 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
14370 float32x4_t __c, const int __lane)
14371 {
14372 return __builtin_aarch64_fmav4sf (__b,
14373 __aarch64_vdupq_laneq_f32 (__c, __lane),
14374 __a);
14375 }
14376
14377 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14378 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
14379 float64x2_t __c, const int __lane)
14380 {
14381 return __builtin_aarch64_fmav2df (__b,
14382 __aarch64_vdupq_laneq_f64 (__c, __lane),
14383 __a);
14384 }
14385
14386 /* vfms */
14387
14388 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14389 vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
14390 {
14391 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
14392 }
14393
14394 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14395 vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
14396 {
14397 return __builtin_aarch64_fmav2sf (-__b, __c, __a);
14398 }
14399
14400 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14401 vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
14402 {
14403 return __builtin_aarch64_fmav4sf (-__b, __c, __a);
14404 }
14405
14406 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14407 vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c)
14408 {
14409 return __builtin_aarch64_fmav2df (-__b, __c, __a);
14410 }
14411
14412 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14413 vfms_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
14414 {
14415 return __builtin_aarch64_fmav2sf (-__b, vdup_n_f32 (__c), __a);
14416 }
14417
14418 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14419 vfms_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c)
14420 {
14421 return (float64x1_t) {-__b[0] * __c + __a[0]};
14422 }
14423
14424 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14425 vfmsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
14426 {
14427 return __builtin_aarch64_fmav4sf (-__b, vdupq_n_f32 (__c), __a);
14428 }
14429
14430 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14431 vfmsq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c)
14432 {
14433 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a);
14434 }
14435
14436 /* vfms_lane */
14437
14438 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14439 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
14440 float32x2_t __c, const int __lane)
14441 {
14442 return __builtin_aarch64_fmav2sf (-__b,
14443 __aarch64_vdup_lane_f32 (__c, __lane),
14444 __a);
14445 }
14446
14447 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14448 vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
14449 float64x1_t __c, const int __lane)
14450 {
14451 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
14452 }
14453
14454 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14455 vfmsd_lane_f64 (float64_t __a, float64_t __b,
14456 float64x1_t __c, const int __lane)
14457 {
14458 return __builtin_fma (-__b, __c[0], __a);
14459 }
14460
14461 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14462 vfmss_lane_f32 (float32_t __a, float32_t __b,
14463 float32x2_t __c, const int __lane)
14464 {
14465 return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
14466 }
14467
14468 /* vfms_laneq */
14469
14470 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14471 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
14472 float32x4_t __c, const int __lane)
14473 {
14474 return __builtin_aarch64_fmav2sf (-__b,
14475 __aarch64_vdup_laneq_f32 (__c, __lane),
14476 __a);
14477 }
14478
14479 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14480 vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
14481 float64x2_t __c, const int __lane)
14482 {
14483 float64_t __c0 = __aarch64_vget_lane_any (__c, __lane);
14484 return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
14485 }
14486
14487 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14488 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
14489 float64x2_t __c, const int __lane)
14490 {
14491 return __builtin_fma (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
14492 }
14493
14494 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14495 vfmss_laneq_f32 (float32_t __a, float32_t __b,
14496 float32x4_t __c, const int __lane)
14497 {
14498 return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a);
14499 }
14500
14501 /* vfmsq_lane */
14502
14503 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14504 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
14505 float32x2_t __c, const int __lane)
14506 {
14507 return __builtin_aarch64_fmav4sf (-__b,
14508 __aarch64_vdupq_lane_f32 (__c, __lane),
14509 __a);
14510 }
14511
14512 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14513 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
14514 float64x1_t __c, const int __lane)
14515 {
14516 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
14517 }
14518
14519 /* vfmsq_laneq */
14520
14521 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14522 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
14523 float32x4_t __c, const int __lane)
14524 {
14525 return __builtin_aarch64_fmav4sf (-__b,
14526 __aarch64_vdupq_laneq_f32 (__c, __lane),
14527 __a);
14528 }
14529
14530 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14531 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
14532 float64x2_t __c, const int __lane)
14533 {
14534 return __builtin_aarch64_fmav2df (-__b,
14535 __aarch64_vdupq_laneq_f64 (__c, __lane),
14536 __a);
14537 }
14538
14539 /* vld1 */
14540
14541 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
14542 vld1_f16 (const float16_t *__a)
14543 {
14544 return __builtin_aarch64_ld1v4hf (__a);
14545 }
14546
14547 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14548 vld1_f32 (const float32_t *a)
14549 {
14550 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
14551 }
14552
14553 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14554 vld1_f64 (const float64_t *a)
14555 {
14556 return (float64x1_t) {*a};
14557 }
14558
14559 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14560 vld1_p8 (const poly8_t *a)
14561 {
14562 return (poly8x8_t)
14563 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
14564 }
14565
14566 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14567 vld1_p16 (const poly16_t *a)
14568 {
14569 return (poly16x4_t)
14570 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
14571 }
14572
14573 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14574 vld1_s8 (const int8_t *a)
14575 {
14576 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
14577 }
14578
14579 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14580 vld1_s16 (const int16_t *a)
14581 {
14582 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
14583 }
14584
14585 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14586 vld1_s32 (const int32_t *a)
14587 {
14588 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
14589 }
14590
14591 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14592 vld1_s64 (const int64_t *a)
14593 {
14594 return (int64x1_t) {*a};
14595 }
14596
14597 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14598 vld1_u8 (const uint8_t *a)
14599 {
14600 return (uint8x8_t)
14601 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
14602 }
14603
14604 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14605 vld1_u16 (const uint16_t *a)
14606 {
14607 return (uint16x4_t)
14608 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
14609 }
14610
14611 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14612 vld1_u32 (const uint32_t *a)
14613 {
14614 return (uint32x2_t)
14615 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
14616 }
14617
14618 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14619 vld1_u64 (const uint64_t *a)
14620 {
14621 return (uint64x1_t) {*a};
14622 }
14623
14624 /* vld1q */
14625
14626 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
14627 vld1q_f16 (const float16_t *__a)
14628 {
14629 return __builtin_aarch64_ld1v8hf (__a);
14630 }
14631
14632 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14633 vld1q_f32 (const float32_t *a)
14634 {
14635 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
14636 }
14637
14638 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14639 vld1q_f64 (const float64_t *a)
14640 {
14641 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
14642 }
14643
14644 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14645 vld1q_p8 (const poly8_t *a)
14646 {
14647 return (poly8x16_t)
14648 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
14649 }
14650
14651 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14652 vld1q_p16 (const poly16_t *a)
14653 {
14654 return (poly16x8_t)
14655 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
14656 }
14657
14658 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14659 vld1q_s8 (const int8_t *a)
14660 {
14661 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
14662 }
14663
14664 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14665 vld1q_s16 (const int16_t *a)
14666 {
14667 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
14668 }
14669
14670 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14671 vld1q_s32 (const int32_t *a)
14672 {
14673 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
14674 }
14675
14676 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14677 vld1q_s64 (const int64_t *a)
14678 {
14679 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
14680 }
14681
14682 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14683 vld1q_u8 (const uint8_t *a)
14684 {
14685 return (uint8x16_t)
14686 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
14687 }
14688
14689 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14690 vld1q_u16 (const uint16_t *a)
14691 {
14692 return (uint16x8_t)
14693 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
14694 }
14695
14696 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14697 vld1q_u32 (const uint32_t *a)
14698 {
14699 return (uint32x4_t)
14700 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
14701 }
14702
14703 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14704 vld1q_u64 (const uint64_t *a)
14705 {
14706 return (uint64x2_t)
14707 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
14708 }
14709
14710 /* vld1_dup */
14711
14712 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
14713 vld1_dup_f16 (const float16_t* __a)
14714 {
14715 return vdup_n_f16 (*__a);
14716 }
14717
14718 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14719 vld1_dup_f32 (const float32_t* __a)
14720 {
14721 return vdup_n_f32 (*__a);
14722 }
14723
14724 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14725 vld1_dup_f64 (const float64_t* __a)
14726 {
14727 return vdup_n_f64 (*__a);
14728 }
14729
14730 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14731 vld1_dup_p8 (const poly8_t* __a)
14732 {
14733 return vdup_n_p8 (*__a);
14734 }
14735
14736 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14737 vld1_dup_p16 (const poly16_t* __a)
14738 {
14739 return vdup_n_p16 (*__a);
14740 }
14741
14742 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14743 vld1_dup_s8 (const int8_t* __a)
14744 {
14745 return vdup_n_s8 (*__a);
14746 }
14747
14748 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14749 vld1_dup_s16 (const int16_t* __a)
14750 {
14751 return vdup_n_s16 (*__a);
14752 }
14753
14754 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14755 vld1_dup_s32 (const int32_t* __a)
14756 {
14757 return vdup_n_s32 (*__a);
14758 }
14759
14760 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14761 vld1_dup_s64 (const int64_t* __a)
14762 {
14763 return vdup_n_s64 (*__a);
14764 }
14765
14766 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14767 vld1_dup_u8 (const uint8_t* __a)
14768 {
14769 return vdup_n_u8 (*__a);
14770 }
14771
14772 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14773 vld1_dup_u16 (const uint16_t* __a)
14774 {
14775 return vdup_n_u16 (*__a);
14776 }
14777
14778 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14779 vld1_dup_u32 (const uint32_t* __a)
14780 {
14781 return vdup_n_u32 (*__a);
14782 }
14783
14784 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14785 vld1_dup_u64 (const uint64_t* __a)
14786 {
14787 return vdup_n_u64 (*__a);
14788 }
14789
14790 /* vld1q_dup */
14791
14792 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
14793 vld1q_dup_f16 (const float16_t* __a)
14794 {
14795 return vdupq_n_f16 (*__a);
14796 }
14797
14798 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14799 vld1q_dup_f32 (const float32_t* __a)
14800 {
14801 return vdupq_n_f32 (*__a);
14802 }
14803
14804 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14805 vld1q_dup_f64 (const float64_t* __a)
14806 {
14807 return vdupq_n_f64 (*__a);
14808 }
14809
14810 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14811 vld1q_dup_p8 (const poly8_t* __a)
14812 {
14813 return vdupq_n_p8 (*__a);
14814 }
14815
14816 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14817 vld1q_dup_p16 (const poly16_t* __a)
14818 {
14819 return vdupq_n_p16 (*__a);
14820 }
14821
14822 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14823 vld1q_dup_s8 (const int8_t* __a)
14824 {
14825 return vdupq_n_s8 (*__a);
14826 }
14827
14828 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14829 vld1q_dup_s16 (const int16_t* __a)
14830 {
14831 return vdupq_n_s16 (*__a);
14832 }
14833
14834 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14835 vld1q_dup_s32 (const int32_t* __a)
14836 {
14837 return vdupq_n_s32 (*__a);
14838 }
14839
14840 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14841 vld1q_dup_s64 (const int64_t* __a)
14842 {
14843 return vdupq_n_s64 (*__a);
14844 }
14845
14846 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14847 vld1q_dup_u8 (const uint8_t* __a)
14848 {
14849 return vdupq_n_u8 (*__a);
14850 }
14851
14852 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14853 vld1q_dup_u16 (const uint16_t* __a)
14854 {
14855 return vdupq_n_u16 (*__a);
14856 }
14857
14858 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14859 vld1q_dup_u32 (const uint32_t* __a)
14860 {
14861 return vdupq_n_u32 (*__a);
14862 }
14863
14864 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14865 vld1q_dup_u64 (const uint64_t* __a)
14866 {
14867 return vdupq_n_u64 (*__a);
14868 }
14869
14870 /* vld1_lane */
14871
14872 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
14873 vld1_lane_f16 (const float16_t *__src, float16x4_t __vec, const int __lane)
14874 {
14875 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14876 }
14877
14878 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14879 vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane)
14880 {
14881 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14882 }
14883
14884 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
14885 vld1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
14886 {
14887 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14888 }
14889
14890 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14891 vld1_lane_p8 (const poly8_t *__src, poly8x8_t __vec, const int __lane)
14892 {
14893 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14894 }
14895
14896 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14897 vld1_lane_p16 (const poly16_t *__src, poly16x4_t __vec, const int __lane)
14898 {
14899 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14900 }
14901
14902 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14903 vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane)
14904 {
14905 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14906 }
14907
14908 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14909 vld1_lane_s16 (const int16_t *__src, int16x4_t __vec, const int __lane)
14910 {
14911 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14912 }
14913
14914 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14915 vld1_lane_s32 (const int32_t *__src, int32x2_t __vec, const int __lane)
14916 {
14917 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14918 }
14919
14920 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
14921 vld1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
14922 {
14923 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14924 }
14925
14926 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14927 vld1_lane_u8 (const uint8_t *__src, uint8x8_t __vec, const int __lane)
14928 {
14929 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14930 }
14931
14932 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14933 vld1_lane_u16 (const uint16_t *__src, uint16x4_t __vec, const int __lane)
14934 {
14935 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14936 }
14937
14938 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14939 vld1_lane_u32 (const uint32_t *__src, uint32x2_t __vec, const int __lane)
14940 {
14941 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14942 }
14943
14944 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14945 vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
14946 {
14947 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14948 }
14949
14950 /* vld1q_lane */
14951
14952 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
14953 vld1q_lane_f16 (const float16_t *__src, float16x8_t __vec, const int __lane)
14954 {
14955 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14956 }
14957
14958 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14959 vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane)
14960 {
14961 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14962 }
14963
14964 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14965 vld1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane)
14966 {
14967 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14968 }
14969
14970 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14971 vld1q_lane_p8 (const poly8_t *__src, poly8x16_t __vec, const int __lane)
14972 {
14973 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14974 }
14975
14976 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14977 vld1q_lane_p16 (const poly16_t *__src, poly16x8_t __vec, const int __lane)
14978 {
14979 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14980 }
14981
14982 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14983 vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane)
14984 {
14985 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14986 }
14987
14988 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14989 vld1q_lane_s16 (const int16_t *__src, int16x8_t __vec, const int __lane)
14990 {
14991 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14992 }
14993
14994 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14995 vld1q_lane_s32 (const int32_t *__src, int32x4_t __vec, const int __lane)
14996 {
14997 return __aarch64_vset_lane_any (*__src, __vec, __lane);
14998 }
14999
15000 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15001 vld1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
15002 {
15003 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15004 }
15005
15006 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15007 vld1q_lane_u8 (const uint8_t *__src, uint8x16_t __vec, const int __lane)
15008 {
15009 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15010 }
15011
15012 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15013 vld1q_lane_u16 (const uint16_t *__src, uint16x8_t __vec, const int __lane)
15014 {
15015 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15016 }
15017
15018 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15019 vld1q_lane_u32 (const uint32_t *__src, uint32x4_t __vec, const int __lane)
15020 {
15021 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15022 }
15023
15024 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15025 vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
15026 {
15027 return __aarch64_vset_lane_any (*__src, __vec, __lane);
15028 }
15029
15030 /* vldn */
15031
15032 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
15033 vld2_s64 (const int64_t * __a)
15034 {
15035 int64x1x2_t ret;
15036 __builtin_aarch64_simd_oi __o;
15037 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
15038 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
15039 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
15040 return ret;
15041 }
15042
15043 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
15044 vld2_u64 (const uint64_t * __a)
15045 {
15046 uint64x1x2_t ret;
15047 __builtin_aarch64_simd_oi __o;
15048 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
15049 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
15050 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
15051 return ret;
15052 }
15053
15054 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
15055 vld2_f64 (const float64_t * __a)
15056 {
15057 float64x1x2_t ret;
15058 __builtin_aarch64_simd_oi __o;
15059 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
15060 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
15061 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
15062 return ret;
15063 }
15064
15065 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
15066 vld2_s8 (const int8_t * __a)
15067 {
15068 int8x8x2_t ret;
15069 __builtin_aarch64_simd_oi __o;
15070 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15071 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15072 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15073 return ret;
15074 }
15075
15076 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
15077 vld2_p8 (const poly8_t * __a)
15078 {
15079 poly8x8x2_t ret;
15080 __builtin_aarch64_simd_oi __o;
15081 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15082 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15083 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15084 return ret;
15085 }
15086
15087 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
15088 vld2_s16 (const int16_t * __a)
15089 {
15090 int16x4x2_t ret;
15091 __builtin_aarch64_simd_oi __o;
15092 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15093 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15094 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15095 return ret;
15096 }
15097
15098 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
15099 vld2_p16 (const poly16_t * __a)
15100 {
15101 poly16x4x2_t ret;
15102 __builtin_aarch64_simd_oi __o;
15103 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15104 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15105 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15106 return ret;
15107 }
15108
15109 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
15110 vld2_s32 (const int32_t * __a)
15111 {
15112 int32x2x2_t ret;
15113 __builtin_aarch64_simd_oi __o;
15114 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
15115 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
15116 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
15117 return ret;
15118 }
15119
15120 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
15121 vld2_u8 (const uint8_t * __a)
15122 {
15123 uint8x8x2_t ret;
15124 __builtin_aarch64_simd_oi __o;
15125 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
15126 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15127 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15128 return ret;
15129 }
15130
15131 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
15132 vld2_u16 (const uint16_t * __a)
15133 {
15134 uint16x4x2_t ret;
15135 __builtin_aarch64_simd_oi __o;
15136 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
15137 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15138 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15139 return ret;
15140 }
15141
15142 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
15143 vld2_u32 (const uint32_t * __a)
15144 {
15145 uint32x2x2_t ret;
15146 __builtin_aarch64_simd_oi __o;
15147 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
15148 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
15149 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
15150 return ret;
15151 }
15152
15153 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
15154 vld2_f16 (const float16_t * __a)
15155 {
15156 float16x4x2_t ret;
15157 __builtin_aarch64_simd_oi __o;
15158 __o = __builtin_aarch64_ld2v4hf (__a);
15159 ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
15160 ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1);
15161 return ret;
15162 }
15163
15164 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
15165 vld2_f32 (const float32_t * __a)
15166 {
15167 float32x2x2_t ret;
15168 __builtin_aarch64_simd_oi __o;
15169 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
15170 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
15171 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
15172 return ret;
15173 }
15174
15175 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
15176 vld2q_s8 (const int8_t * __a)
15177 {
15178 int8x16x2_t ret;
15179 __builtin_aarch64_simd_oi __o;
15180 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15181 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15182 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15183 return ret;
15184 }
15185
15186 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
15187 vld2q_p8 (const poly8_t * __a)
15188 {
15189 poly8x16x2_t ret;
15190 __builtin_aarch64_simd_oi __o;
15191 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15192 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15193 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15194 return ret;
15195 }
15196
15197 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
15198 vld2q_s16 (const int16_t * __a)
15199 {
15200 int16x8x2_t ret;
15201 __builtin_aarch64_simd_oi __o;
15202 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15203 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15204 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15205 return ret;
15206 }
15207
15208 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
15209 vld2q_p16 (const poly16_t * __a)
15210 {
15211 poly16x8x2_t ret;
15212 __builtin_aarch64_simd_oi __o;
15213 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15214 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15215 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15216 return ret;
15217 }
15218
15219 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
15220 vld2q_s32 (const int32_t * __a)
15221 {
15222 int32x4x2_t ret;
15223 __builtin_aarch64_simd_oi __o;
15224 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
15225 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
15226 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
15227 return ret;
15228 }
15229
15230 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
15231 vld2q_s64 (const int64_t * __a)
15232 {
15233 int64x2x2_t ret;
15234 __builtin_aarch64_simd_oi __o;
15235 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
15236 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
15237 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
15238 return ret;
15239 }
15240
15241 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
15242 vld2q_u8 (const uint8_t * __a)
15243 {
15244 uint8x16x2_t ret;
15245 __builtin_aarch64_simd_oi __o;
15246 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
15247 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
15248 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
15249 return ret;
15250 }
15251
15252 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
15253 vld2q_u16 (const uint16_t * __a)
15254 {
15255 uint16x8x2_t ret;
15256 __builtin_aarch64_simd_oi __o;
15257 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
15258 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
15259 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
15260 return ret;
15261 }
15262
15263 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
15264 vld2q_u32 (const uint32_t * __a)
15265 {
15266 uint32x4x2_t ret;
15267 __builtin_aarch64_simd_oi __o;
15268 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
15269 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
15270 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
15271 return ret;
15272 }
15273
15274 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
15275 vld2q_u64 (const uint64_t * __a)
15276 {
15277 uint64x2x2_t ret;
15278 __builtin_aarch64_simd_oi __o;
15279 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
15280 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
15281 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
15282 return ret;
15283 }
15284
15285 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
15286 vld2q_f16 (const float16_t * __a)
15287 {
15288 float16x8x2_t ret;
15289 __builtin_aarch64_simd_oi __o;
15290 __o = __builtin_aarch64_ld2v8hf (__a);
15291 ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0);
15292 ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
15293 return ret;
15294 }
15295
15296 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
15297 vld2q_f32 (const float32_t * __a)
15298 {
15299 float32x4x2_t ret;
15300 __builtin_aarch64_simd_oi __o;
15301 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
15302 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
15303 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
15304 return ret;
15305 }
15306
15307 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
15308 vld2q_f64 (const float64_t * __a)
15309 {
15310 float64x2x2_t ret;
15311 __builtin_aarch64_simd_oi __o;
15312 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
15313 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
15314 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
15315 return ret;
15316 }
15317
15318 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
15319 vld3_s64 (const int64_t * __a)
15320 {
15321 int64x1x3_t ret;
15322 __builtin_aarch64_simd_ci __o;
15323 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
15324 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
15325 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
15326 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
15327 return ret;
15328 }
15329
15330 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
15331 vld3_u64 (const uint64_t * __a)
15332 {
15333 uint64x1x3_t ret;
15334 __builtin_aarch64_simd_ci __o;
15335 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
15336 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
15337 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
15338 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
15339 return ret;
15340 }
15341
15342 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
15343 vld3_f64 (const float64_t * __a)
15344 {
15345 float64x1x3_t ret;
15346 __builtin_aarch64_simd_ci __o;
15347 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
15348 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
15349 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
15350 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
15351 return ret;
15352 }
15353
15354 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
15355 vld3_s8 (const int8_t * __a)
15356 {
15357 int8x8x3_t ret;
15358 __builtin_aarch64_simd_ci __o;
15359 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
15360 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
15361 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
15362 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
15363 return ret;
15364 }
15365
15366 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
15367 vld3_p8 (const poly8_t * __a)
15368 {
15369 poly8x8x3_t ret;
15370 __builtin_aarch64_simd_ci __o;
15371 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
15372 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
15373 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
15374 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
15375 return ret;
15376 }
15377
15378 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
15379 vld3_s16 (const int16_t * __a)
15380 {
15381 int16x4x3_t ret;
15382 __builtin_aarch64_simd_ci __o;
15383 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
15384 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
15385 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
15386 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
15387 return ret;
15388 }
15389
15390 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
15391 vld3_p16 (const poly16_t * __a)
15392 {
15393 poly16x4x3_t ret;
15394 __builtin_aarch64_simd_ci __o;
15395 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
15396 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
15397 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
15398 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
15399 return ret;
15400 }
15401
15402 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
15403 vld3_s32 (const int32_t * __a)
15404 {
15405 int32x2x3_t ret;
15406 __builtin_aarch64_simd_ci __o;
15407 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
15408 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
15409 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
15410 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
15411 return ret;
15412 }
15413
15414 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
15415 vld3_u8 (const uint8_t * __a)
15416 {
15417 uint8x8x3_t ret;
15418 __builtin_aarch64_simd_ci __o;
15419 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
15420 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
15421 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
15422 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
15423 return ret;
15424 }
15425
15426 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
15427 vld3_u16 (const uint16_t * __a)
15428 {
15429 uint16x4x3_t ret;
15430 __builtin_aarch64_simd_ci __o;
15431 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
15432 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
15433 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
15434 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
15435 return ret;
15436 }
15437
15438 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
15439 vld3_u32 (const uint32_t * __a)
15440 {
15441 uint32x2x3_t ret;
15442 __builtin_aarch64_simd_ci __o;
15443 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
15444 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
15445 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
15446 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
15447 return ret;
15448 }
15449
15450 __extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
15451 vld3_f16 (const float16_t * __a)
15452 {
15453 float16x4x3_t ret;
15454 __builtin_aarch64_simd_ci __o;
15455 __o = __builtin_aarch64_ld3v4hf (__a);
15456 ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0);
15457 ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1);
15458 ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2);
15459 return ret;
15460 }
15461
15462 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
15463 vld3_f32 (const float32_t * __a)
15464 {
15465 float32x2x3_t ret;
15466 __builtin_aarch64_simd_ci __o;
15467 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
15468 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
15469 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
15470 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
15471 return ret;
15472 }
15473
15474 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
15475 vld3q_s8 (const int8_t * __a)
15476 {
15477 int8x16x3_t ret;
15478 __builtin_aarch64_simd_ci __o;
15479 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
15480 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
15481 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
15482 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
15483 return ret;
15484 }
15485
15486 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
15487 vld3q_p8 (const poly8_t * __a)
15488 {
15489 poly8x16x3_t ret;
15490 __builtin_aarch64_simd_ci __o;
15491 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
15492 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
15493 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
15494 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
15495 return ret;
15496 }
15497
15498 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
15499 vld3q_s16 (const int16_t * __a)
15500 {
15501 int16x8x3_t ret;
15502 __builtin_aarch64_simd_ci __o;
15503 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
15504 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
15505 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
15506 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
15507 return ret;
15508 }
15509
15510 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
15511 vld3q_p16 (const poly16_t * __a)
15512 {
15513 poly16x8x3_t ret;
15514 __builtin_aarch64_simd_ci __o;
15515 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
15516 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
15517 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
15518 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
15519 return ret;
15520 }
15521
15522 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
15523 vld3q_s32 (const int32_t * __a)
15524 {
15525 int32x4x3_t ret;
15526 __builtin_aarch64_simd_ci __o;
15527 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
15528 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
15529 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
15530 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
15531 return ret;
15532 }
15533
15534 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
15535 vld3q_s64 (const int64_t * __a)
15536 {
15537 int64x2x3_t ret;
15538 __builtin_aarch64_simd_ci __o;
15539 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
15540 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
15541 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
15542 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
15543 return ret;
15544 }
15545
15546 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
15547 vld3q_u8 (const uint8_t * __a)
15548 {
15549 uint8x16x3_t ret;
15550 __builtin_aarch64_simd_ci __o;
15551 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
15552 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
15553 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
15554 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
15555 return ret;
15556 }
15557
15558 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
15559 vld3q_u16 (const uint16_t * __a)
15560 {
15561 uint16x8x3_t ret;
15562 __builtin_aarch64_simd_ci __o;
15563 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
15564 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
15565 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
15566 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
15567 return ret;
15568 }
15569
15570 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
15571 vld3q_u32 (const uint32_t * __a)
15572 {
15573 uint32x4x3_t ret;
15574 __builtin_aarch64_simd_ci __o;
15575 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
15576 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
15577 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
15578 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
15579 return ret;
15580 }
15581
15582 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
15583 vld3q_u64 (const uint64_t * __a)
15584 {
15585 uint64x2x3_t ret;
15586 __builtin_aarch64_simd_ci __o;
15587 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
15588 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
15589 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
15590 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
15591 return ret;
15592 }
15593
15594 __extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
15595 vld3q_f16 (const float16_t * __a)
15596 {
15597 float16x8x3_t ret;
15598 __builtin_aarch64_simd_ci __o;
15599 __o = __builtin_aarch64_ld3v8hf (__a);
15600 ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0);
15601 ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1);
15602 ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2);
15603 return ret;
15604 }
15605
15606 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
15607 vld3q_f32 (const float32_t * __a)
15608 {
15609 float32x4x3_t ret;
15610 __builtin_aarch64_simd_ci __o;
15611 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
15612 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
15613 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
15614 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
15615 return ret;
15616 }
15617
15618 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
15619 vld3q_f64 (const float64_t * __a)
15620 {
15621 float64x2x3_t ret;
15622 __builtin_aarch64_simd_ci __o;
15623 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
15624 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
15625 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
15626 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
15627 return ret;
15628 }
15629
15630 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
15631 vld4_s64 (const int64_t * __a)
15632 {
15633 int64x1x4_t ret;
15634 __builtin_aarch64_simd_xi __o;
15635 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
15636 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
15637 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
15638 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
15639 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
15640 return ret;
15641 }
15642
15643 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
15644 vld4_u64 (const uint64_t * __a)
15645 {
15646 uint64x1x4_t ret;
15647 __builtin_aarch64_simd_xi __o;
15648 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
15649 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
15650 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
15651 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
15652 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
15653 return ret;
15654 }
15655
15656 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
15657 vld4_f64 (const float64_t * __a)
15658 {
15659 float64x1x4_t ret;
15660 __builtin_aarch64_simd_xi __o;
15661 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
15662 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
15663 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
15664 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
15665 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
15666 return ret;
15667 }
15668
15669 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
15670 vld4_s8 (const int8_t * __a)
15671 {
15672 int8x8x4_t ret;
15673 __builtin_aarch64_simd_xi __o;
15674 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
15675 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
15676 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
15677 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
15678 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
15679 return ret;
15680 }
15681
15682 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
15683 vld4_p8 (const poly8_t * __a)
15684 {
15685 poly8x8x4_t ret;
15686 __builtin_aarch64_simd_xi __o;
15687 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
15688 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
15689 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
15690 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
15691 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
15692 return ret;
15693 }
15694
15695 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
15696 vld4_s16 (const int16_t * __a)
15697 {
15698 int16x4x4_t ret;
15699 __builtin_aarch64_simd_xi __o;
15700 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
15701 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
15702 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
15703 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
15704 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
15705 return ret;
15706 }
15707
15708 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
15709 vld4_p16 (const poly16_t * __a)
15710 {
15711 poly16x4x4_t ret;
15712 __builtin_aarch64_simd_xi __o;
15713 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
15714 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
15715 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
15716 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
15717 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
15718 return ret;
15719 }
15720
15721 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
15722 vld4_s32 (const int32_t * __a)
15723 {
15724 int32x2x4_t ret;
15725 __builtin_aarch64_simd_xi __o;
15726 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
15727 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
15728 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
15729 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
15730 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
15731 return ret;
15732 }
15733
15734 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
15735 vld4_u8 (const uint8_t * __a)
15736 {
15737 uint8x8x4_t ret;
15738 __builtin_aarch64_simd_xi __o;
15739 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
15740 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
15741 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
15742 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
15743 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
15744 return ret;
15745 }
15746
15747 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
15748 vld4_u16 (const uint16_t * __a)
15749 {
15750 uint16x4x4_t ret;
15751 __builtin_aarch64_simd_xi __o;
15752 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
15753 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
15754 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
15755 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
15756 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
15757 return ret;
15758 }
15759
15760 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
15761 vld4_u32 (const uint32_t * __a)
15762 {
15763 uint32x2x4_t ret;
15764 __builtin_aarch64_simd_xi __o;
15765 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
15766 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
15767 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
15768 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
15769 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
15770 return ret;
15771 }
15772
15773 __extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
15774 vld4_f16 (const float16_t * __a)
15775 {
15776 float16x4x4_t ret;
15777 __builtin_aarch64_simd_xi __o;
15778 __o = __builtin_aarch64_ld4v4hf (__a);
15779 ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0);
15780 ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1);
15781 ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2);
15782 ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3);
15783 return ret;
15784 }
15785
15786 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
15787 vld4_f32 (const float32_t * __a)
15788 {
15789 float32x2x4_t ret;
15790 __builtin_aarch64_simd_xi __o;
15791 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
15792 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
15793 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
15794 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
15795 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
15796 return ret;
15797 }
15798
15799 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
15800 vld4q_s8 (const int8_t * __a)
15801 {
15802 int8x16x4_t ret;
15803 __builtin_aarch64_simd_xi __o;
15804 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
15805 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
15806 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
15807 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
15808 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
15809 return ret;
15810 }
15811
15812 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
15813 vld4q_p8 (const poly8_t * __a)
15814 {
15815 poly8x16x4_t ret;
15816 __builtin_aarch64_simd_xi __o;
15817 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
15818 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
15819 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
15820 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
15821 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
15822 return ret;
15823 }
15824
15825 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
15826 vld4q_s16 (const int16_t * __a)
15827 {
15828 int16x8x4_t ret;
15829 __builtin_aarch64_simd_xi __o;
15830 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
15831 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
15832 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
15833 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
15834 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
15835 return ret;
15836 }
15837
15838 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
15839 vld4q_p16 (const poly16_t * __a)
15840 {
15841 poly16x8x4_t ret;
15842 __builtin_aarch64_simd_xi __o;
15843 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
15844 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
15845 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
15846 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
15847 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
15848 return ret;
15849 }
15850
15851 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
15852 vld4q_s32 (const int32_t * __a)
15853 {
15854 int32x4x4_t ret;
15855 __builtin_aarch64_simd_xi __o;
15856 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
15857 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
15858 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
15859 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
15860 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
15861 return ret;
15862 }
15863
15864 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
15865 vld4q_s64 (const int64_t * __a)
15866 {
15867 int64x2x4_t ret;
15868 __builtin_aarch64_simd_xi __o;
15869 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
15870 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
15871 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
15872 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
15873 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
15874 return ret;
15875 }
15876
15877 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
15878 vld4q_u8 (const uint8_t * __a)
15879 {
15880 uint8x16x4_t ret;
15881 __builtin_aarch64_simd_xi __o;
15882 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
15883 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
15884 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
15885 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
15886 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
15887 return ret;
15888 }
15889
15890 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
15891 vld4q_u16 (const uint16_t * __a)
15892 {
15893 uint16x8x4_t ret;
15894 __builtin_aarch64_simd_xi __o;
15895 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
15896 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
15897 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
15898 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
15899 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
15900 return ret;
15901 }
15902
15903 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
15904 vld4q_u32 (const uint32_t * __a)
15905 {
15906 uint32x4x4_t ret;
15907 __builtin_aarch64_simd_xi __o;
15908 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
15909 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
15910 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
15911 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
15912 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
15913 return ret;
15914 }
15915
15916 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
15917 vld4q_u64 (const uint64_t * __a)
15918 {
15919 uint64x2x4_t ret;
15920 __builtin_aarch64_simd_xi __o;
15921 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
15922 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
15923 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
15924 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
15925 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
15926 return ret;
15927 }
15928
15929 __extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
15930 vld4q_f16 (const float16_t * __a)
15931 {
15932 float16x8x4_t ret;
15933 __builtin_aarch64_simd_xi __o;
15934 __o = __builtin_aarch64_ld4v8hf (__a);
15935 ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0);
15936 ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1);
15937 ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2);
15938 ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3);
15939 return ret;
15940 }
15941
15942 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
15943 vld4q_f32 (const float32_t * __a)
15944 {
15945 float32x4x4_t ret;
15946 __builtin_aarch64_simd_xi __o;
15947 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
15948 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
15949 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
15950 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
15951 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
15952 return ret;
15953 }
15954
15955 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
15956 vld4q_f64 (const float64_t * __a)
15957 {
15958 float64x2x4_t ret;
15959 __builtin_aarch64_simd_xi __o;
15960 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
15961 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
15962 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
15963 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
15964 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
15965 return ret;
15966 }
15967
15968 /* vldn_dup */
15969
15970 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
15971 vld2_dup_s8 (const int8_t * __a)
15972 {
15973 int8x8x2_t ret;
15974 __builtin_aarch64_simd_oi __o;
15975 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
15976 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
15977 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
15978 return ret;
15979 }
15980
15981 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
15982 vld2_dup_s16 (const int16_t * __a)
15983 {
15984 int16x4x2_t ret;
15985 __builtin_aarch64_simd_oi __o;
15986 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
15987 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
15988 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
15989 return ret;
15990 }
15991
15992 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
15993 vld2_dup_s32 (const int32_t * __a)
15994 {
15995 int32x2x2_t ret;
15996 __builtin_aarch64_simd_oi __o;
15997 __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
15998 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
15999 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16000 return ret;
16001 }
16002
16003 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
16004 vld2_dup_f16 (const float16_t * __a)
16005 {
16006 float16x4x2_t ret;
16007 __builtin_aarch64_simd_oi __o;
16008 __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a);
16009 ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0);
16010 ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1);
16011 return ret;
16012 }
16013
16014 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
16015 vld2_dup_f32 (const float32_t * __a)
16016 {
16017 float32x2x2_t ret;
16018 __builtin_aarch64_simd_oi __o;
16019 __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
16020 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
16021 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
16022 return ret;
16023 }
16024
16025 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
16026 vld2_dup_f64 (const float64_t * __a)
16027 {
16028 float64x1x2_t ret;
16029 __builtin_aarch64_simd_oi __o;
16030 __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
16031 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
16032 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
16033 return ret;
16034 }
16035
16036 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
16037 vld2_dup_u8 (const uint8_t * __a)
16038 {
16039 uint8x8x2_t ret;
16040 __builtin_aarch64_simd_oi __o;
16041 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16042 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16043 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16044 return ret;
16045 }
16046
16047 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
16048 vld2_dup_u16 (const uint16_t * __a)
16049 {
16050 uint16x4x2_t ret;
16051 __builtin_aarch64_simd_oi __o;
16052 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16053 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16054 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16055 return ret;
16056 }
16057
16058 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
16059 vld2_dup_u32 (const uint32_t * __a)
16060 {
16061 uint32x2x2_t ret;
16062 __builtin_aarch64_simd_oi __o;
16063 __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
16064 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
16065 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
16066 return ret;
16067 }
16068
16069 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
16070 vld2_dup_p8 (const poly8_t * __a)
16071 {
16072 poly8x8x2_t ret;
16073 __builtin_aarch64_simd_oi __o;
16074 __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16075 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
16076 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
16077 return ret;
16078 }
16079
16080 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
16081 vld2_dup_p16 (const poly16_t * __a)
16082 {
16083 poly16x4x2_t ret;
16084 __builtin_aarch64_simd_oi __o;
16085 __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16086 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
16087 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
16088 return ret;
16089 }
16090
16091 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
16092 vld2_dup_s64 (const int64_t * __a)
16093 {
16094 int64x1x2_t ret;
16095 __builtin_aarch64_simd_oi __o;
16096 __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
16097 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16098 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16099 return ret;
16100 }
16101
16102 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
16103 vld2_dup_u64 (const uint64_t * __a)
16104 {
16105 uint64x1x2_t ret;
16106 __builtin_aarch64_simd_oi __o;
16107 __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
16108 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
16109 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
16110 return ret;
16111 }
16112
16113 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
16114 vld2q_dup_s8 (const int8_t * __a)
16115 {
16116 int8x16x2_t ret;
16117 __builtin_aarch64_simd_oi __o;
16118 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16119 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16120 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16121 return ret;
16122 }
16123
16124 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
16125 vld2q_dup_p8 (const poly8_t * __a)
16126 {
16127 poly8x16x2_t ret;
16128 __builtin_aarch64_simd_oi __o;
16129 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16130 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16131 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16132 return ret;
16133 }
16134
16135 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
16136 vld2q_dup_s16 (const int16_t * __a)
16137 {
16138 int16x8x2_t ret;
16139 __builtin_aarch64_simd_oi __o;
16140 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16141 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16142 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16143 return ret;
16144 }
16145
16146 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
16147 vld2q_dup_p16 (const poly16_t * __a)
16148 {
16149 poly16x8x2_t ret;
16150 __builtin_aarch64_simd_oi __o;
16151 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16152 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16153 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16154 return ret;
16155 }
16156
16157 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
16158 vld2q_dup_s32 (const int32_t * __a)
16159 {
16160 int32x4x2_t ret;
16161 __builtin_aarch64_simd_oi __o;
16162 __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
16163 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16164 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16165 return ret;
16166 }
16167
16168 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
16169 vld2q_dup_s64 (const int64_t * __a)
16170 {
16171 int64x2x2_t ret;
16172 __builtin_aarch64_simd_oi __o;
16173 __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
16174 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16175 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16176 return ret;
16177 }
16178
16179 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
16180 vld2q_dup_u8 (const uint8_t * __a)
16181 {
16182 uint8x16x2_t ret;
16183 __builtin_aarch64_simd_oi __o;
16184 __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16185 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
16186 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
16187 return ret;
16188 }
16189
16190 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
16191 vld2q_dup_u16 (const uint16_t * __a)
16192 {
16193 uint16x8x2_t ret;
16194 __builtin_aarch64_simd_oi __o;
16195 __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16196 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
16197 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
16198 return ret;
16199 }
16200
16201 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
16202 vld2q_dup_u32 (const uint32_t * __a)
16203 {
16204 uint32x4x2_t ret;
16205 __builtin_aarch64_simd_oi __o;
16206 __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
16207 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
16208 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
16209 return ret;
16210 }
16211
16212 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
16213 vld2q_dup_u64 (const uint64_t * __a)
16214 {
16215 uint64x2x2_t ret;
16216 __builtin_aarch64_simd_oi __o;
16217 __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
16218 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
16219 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
16220 return ret;
16221 }
16222
16223 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
16224 vld2q_dup_f16 (const float16_t * __a)
16225 {
16226 float16x8x2_t ret;
16227 __builtin_aarch64_simd_oi __o;
16228 __o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a);
16229 ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0);
16230 ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1);
16231 return ret;
16232 }
16233
16234 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
16235 vld2q_dup_f32 (const float32_t * __a)
16236 {
16237 float32x4x2_t ret;
16238 __builtin_aarch64_simd_oi __o;
16239 __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
16240 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
16241 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
16242 return ret;
16243 }
16244
16245 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
16246 vld2q_dup_f64 (const float64_t * __a)
16247 {
16248 float64x2x2_t ret;
16249 __builtin_aarch64_simd_oi __o;
16250 __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
16251 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
16252 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
16253 return ret;
16254 }
16255
16256 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
16257 vld3_dup_s64 (const int64_t * __a)
16258 {
16259 int64x1x3_t ret;
16260 __builtin_aarch64_simd_ci __o;
16261 __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
16262 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16263 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16264 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16265 return ret;
16266 }
16267
16268 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
16269 vld3_dup_u64 (const uint64_t * __a)
16270 {
16271 uint64x1x3_t ret;
16272 __builtin_aarch64_simd_ci __o;
16273 __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
16274 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
16275 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
16276 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
16277 return ret;
16278 }
16279
16280 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
16281 vld3_dup_f64 (const float64_t * __a)
16282 {
16283 float64x1x3_t ret;
16284 __builtin_aarch64_simd_ci __o;
16285 __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
16286 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
16287 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
16288 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
16289 return ret;
16290 }
16291
16292 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
16293 vld3_dup_s8 (const int8_t * __a)
16294 {
16295 int8x8x3_t ret;
16296 __builtin_aarch64_simd_ci __o;
16297 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16298 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16299 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16300 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16301 return ret;
16302 }
16303
16304 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
16305 vld3_dup_p8 (const poly8_t * __a)
16306 {
16307 poly8x8x3_t ret;
16308 __builtin_aarch64_simd_ci __o;
16309 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16310 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16311 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16312 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16313 return ret;
16314 }
16315
16316 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
16317 vld3_dup_s16 (const int16_t * __a)
16318 {
16319 int16x4x3_t ret;
16320 __builtin_aarch64_simd_ci __o;
16321 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16322 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16323 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16324 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16325 return ret;
16326 }
16327
16328 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
16329 vld3_dup_p16 (const poly16_t * __a)
16330 {
16331 poly16x4x3_t ret;
16332 __builtin_aarch64_simd_ci __o;
16333 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16334 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16335 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16336 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16337 return ret;
16338 }
16339
16340 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
16341 vld3_dup_s32 (const int32_t * __a)
16342 {
16343 int32x2x3_t ret;
16344 __builtin_aarch64_simd_ci __o;
16345 __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
16346 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16347 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16348 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16349 return ret;
16350 }
16351
16352 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
16353 vld3_dup_u8 (const uint8_t * __a)
16354 {
16355 uint8x8x3_t ret;
16356 __builtin_aarch64_simd_ci __o;
16357 __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16358 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
16359 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
16360 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
16361 return ret;
16362 }
16363
16364 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
16365 vld3_dup_u16 (const uint16_t * __a)
16366 {
16367 uint16x4x3_t ret;
16368 __builtin_aarch64_simd_ci __o;
16369 __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16370 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
16371 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
16372 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
16373 return ret;
16374 }
16375
16376 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
16377 vld3_dup_u32 (const uint32_t * __a)
16378 {
16379 uint32x2x3_t ret;
16380 __builtin_aarch64_simd_ci __o;
16381 __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
16382 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
16383 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
16384 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
16385 return ret;
16386 }
16387
16388 __extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__))
16389 vld3_dup_f16 (const float16_t * __a)
16390 {
16391 float16x4x3_t ret;
16392 __builtin_aarch64_simd_ci __o;
16393 __o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a);
16394 ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0);
16395 ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1);
16396 ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2);
16397 return ret;
16398 }
16399
16400 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
16401 vld3_dup_f32 (const float32_t * __a)
16402 {
16403 float32x2x3_t ret;
16404 __builtin_aarch64_simd_ci __o;
16405 __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
16406 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
16407 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
16408 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
16409 return ret;
16410 }
16411
16412 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
16413 vld3q_dup_s8 (const int8_t * __a)
16414 {
16415 int8x16x3_t ret;
16416 __builtin_aarch64_simd_ci __o;
16417 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16418 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16419 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16420 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16421 return ret;
16422 }
16423
16424 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
16425 vld3q_dup_p8 (const poly8_t * __a)
16426 {
16427 poly8x16x3_t ret;
16428 __builtin_aarch64_simd_ci __o;
16429 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16430 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16431 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16432 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16433 return ret;
16434 }
16435
16436 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
16437 vld3q_dup_s16 (const int16_t * __a)
16438 {
16439 int16x8x3_t ret;
16440 __builtin_aarch64_simd_ci __o;
16441 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16442 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16443 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16444 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16445 return ret;
16446 }
16447
16448 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
16449 vld3q_dup_p16 (const poly16_t * __a)
16450 {
16451 poly16x8x3_t ret;
16452 __builtin_aarch64_simd_ci __o;
16453 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16454 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16455 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16456 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16457 return ret;
16458 }
16459
16460 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
16461 vld3q_dup_s32 (const int32_t * __a)
16462 {
16463 int32x4x3_t ret;
16464 __builtin_aarch64_simd_ci __o;
16465 __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
16466 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16467 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16468 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16469 return ret;
16470 }
16471
16472 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
16473 vld3q_dup_s64 (const int64_t * __a)
16474 {
16475 int64x2x3_t ret;
16476 __builtin_aarch64_simd_ci __o;
16477 __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
16478 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
16479 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
16480 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
16481 return ret;
16482 }
16483
16484 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
16485 vld3q_dup_u8 (const uint8_t * __a)
16486 {
16487 uint8x16x3_t ret;
16488 __builtin_aarch64_simd_ci __o;
16489 __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16490 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
16491 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
16492 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
16493 return ret;
16494 }
16495
16496 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
16497 vld3q_dup_u16 (const uint16_t * __a)
16498 {
16499 uint16x8x3_t ret;
16500 __builtin_aarch64_simd_ci __o;
16501 __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16502 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
16503 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
16504 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
16505 return ret;
16506 }
16507
16508 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
16509 vld3q_dup_u32 (const uint32_t * __a)
16510 {
16511 uint32x4x3_t ret;
16512 __builtin_aarch64_simd_ci __o;
16513 __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
16514 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
16515 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
16516 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
16517 return ret;
16518 }
16519
16520 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
16521 vld3q_dup_u64 (const uint64_t * __a)
16522 {
16523 uint64x2x3_t ret;
16524 __builtin_aarch64_simd_ci __o;
16525 __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
16526 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
16527 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
16528 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
16529 return ret;
16530 }
16531
16532 __extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__))
16533 vld3q_dup_f16 (const float16_t * __a)
16534 {
16535 float16x8x3_t ret;
16536 __builtin_aarch64_simd_ci __o;
16537 __o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a);
16538 ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0);
16539 ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1);
16540 ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2);
16541 return ret;
16542 }
16543
16544 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
16545 vld3q_dup_f32 (const float32_t * __a)
16546 {
16547 float32x4x3_t ret;
16548 __builtin_aarch64_simd_ci __o;
16549 __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
16550 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
16551 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
16552 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
16553 return ret;
16554 }
16555
16556 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
16557 vld3q_dup_f64 (const float64_t * __a)
16558 {
16559 float64x2x3_t ret;
16560 __builtin_aarch64_simd_ci __o;
16561 __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
16562 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
16563 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
16564 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
16565 return ret;
16566 }
16567
16568 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
16569 vld4_dup_s64 (const int64_t * __a)
16570 {
16571 int64x1x4_t ret;
16572 __builtin_aarch64_simd_xi __o;
16573 __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
16574 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
16575 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
16576 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
16577 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
16578 return ret;
16579 }
16580
16581 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
16582 vld4_dup_u64 (const uint64_t * __a)
16583 {
16584 uint64x1x4_t ret;
16585 __builtin_aarch64_simd_xi __o;
16586 __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
16587 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
16588 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
16589 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
16590 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
16591 return ret;
16592 }
16593
16594 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
16595 vld4_dup_f64 (const float64_t * __a)
16596 {
16597 float64x1x4_t ret;
16598 __builtin_aarch64_simd_xi __o;
16599 __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
16600 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
16601 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
16602 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
16603 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
16604 return ret;
16605 }
16606
16607 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
16608 vld4_dup_s8 (const int8_t * __a)
16609 {
16610 int8x8x4_t ret;
16611 __builtin_aarch64_simd_xi __o;
16612 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16613 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16614 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16615 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16616 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16617 return ret;
16618 }
16619
16620 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
16621 vld4_dup_p8 (const poly8_t * __a)
16622 {
16623 poly8x8x4_t ret;
16624 __builtin_aarch64_simd_xi __o;
16625 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16626 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16627 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16628 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16629 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16630 return ret;
16631 }
16632
16633 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
16634 vld4_dup_s16 (const int16_t * __a)
16635 {
16636 int16x4x4_t ret;
16637 __builtin_aarch64_simd_xi __o;
16638 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16639 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16640 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16641 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16642 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16643 return ret;
16644 }
16645
16646 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
16647 vld4_dup_p16 (const poly16_t * __a)
16648 {
16649 poly16x4x4_t ret;
16650 __builtin_aarch64_simd_xi __o;
16651 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16652 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16653 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16654 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16655 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16656 return ret;
16657 }
16658
16659 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
16660 vld4_dup_s32 (const int32_t * __a)
16661 {
16662 int32x2x4_t ret;
16663 __builtin_aarch64_simd_xi __o;
16664 __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
16665 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
16666 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
16667 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
16668 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
16669 return ret;
16670 }
16671
16672 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
16673 vld4_dup_u8 (const uint8_t * __a)
16674 {
16675 uint8x8x4_t ret;
16676 __builtin_aarch64_simd_xi __o;
16677 __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
16678 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
16679 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
16680 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
16681 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
16682 return ret;
16683 }
16684
16685 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
16686 vld4_dup_u16 (const uint16_t * __a)
16687 {
16688 uint16x4x4_t ret;
16689 __builtin_aarch64_simd_xi __o;
16690 __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
16691 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
16692 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
16693 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
16694 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
16695 return ret;
16696 }
16697
16698 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
16699 vld4_dup_u32 (const uint32_t * __a)
16700 {
16701 uint32x2x4_t ret;
16702 __builtin_aarch64_simd_xi __o;
16703 __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
16704 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
16705 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
16706 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
16707 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
16708 return ret;
16709 }
16710
16711 __extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__))
16712 vld4_dup_f16 (const float16_t * __a)
16713 {
16714 float16x4x4_t ret;
16715 __builtin_aarch64_simd_xi __o;
16716 __o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a);
16717 ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0);
16718 ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1);
16719 ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2);
16720 ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3);
16721 return ret;
16722 }
16723
16724 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
16725 vld4_dup_f32 (const float32_t * __a)
16726 {
16727 float32x2x4_t ret;
16728 __builtin_aarch64_simd_xi __o;
16729 __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
16730 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
16731 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
16732 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
16733 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
16734 return ret;
16735 }
16736
16737 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
16738 vld4q_dup_s8 (const int8_t * __a)
16739 {
16740 int8x16x4_t ret;
16741 __builtin_aarch64_simd_xi __o;
16742 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16743 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16744 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16745 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16746 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16747 return ret;
16748 }
16749
16750 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
16751 vld4q_dup_p8 (const poly8_t * __a)
16752 {
16753 poly8x16x4_t ret;
16754 __builtin_aarch64_simd_xi __o;
16755 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16756 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16757 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16758 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16759 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16760 return ret;
16761 }
16762
16763 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
16764 vld4q_dup_s16 (const int16_t * __a)
16765 {
16766 int16x8x4_t ret;
16767 __builtin_aarch64_simd_xi __o;
16768 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16769 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16770 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16771 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16772 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16773 return ret;
16774 }
16775
16776 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
16777 vld4q_dup_p16 (const poly16_t * __a)
16778 {
16779 poly16x8x4_t ret;
16780 __builtin_aarch64_simd_xi __o;
16781 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16782 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16783 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16784 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16785 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16786 return ret;
16787 }
16788
16789 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
16790 vld4q_dup_s32 (const int32_t * __a)
16791 {
16792 int32x4x4_t ret;
16793 __builtin_aarch64_simd_xi __o;
16794 __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
16795 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16796 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16797 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16798 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16799 return ret;
16800 }
16801
16802 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
16803 vld4q_dup_s64 (const int64_t * __a)
16804 {
16805 int64x2x4_t ret;
16806 __builtin_aarch64_simd_xi __o;
16807 __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
16808 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16809 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16810 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16811 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16812 return ret;
16813 }
16814
16815 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
16816 vld4q_dup_u8 (const uint8_t * __a)
16817 {
16818 uint8x16x4_t ret;
16819 __builtin_aarch64_simd_xi __o;
16820 __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
16821 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
16822 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
16823 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
16824 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
16825 return ret;
16826 }
16827
16828 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
16829 vld4q_dup_u16 (const uint16_t * __a)
16830 {
16831 uint16x8x4_t ret;
16832 __builtin_aarch64_simd_xi __o;
16833 __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
16834 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
16835 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
16836 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
16837 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
16838 return ret;
16839 }
16840
16841 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
16842 vld4q_dup_u32 (const uint32_t * __a)
16843 {
16844 uint32x4x4_t ret;
16845 __builtin_aarch64_simd_xi __o;
16846 __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
16847 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
16848 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
16849 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
16850 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
16851 return ret;
16852 }
16853
16854 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
16855 vld4q_dup_u64 (const uint64_t * __a)
16856 {
16857 uint64x2x4_t ret;
16858 __builtin_aarch64_simd_xi __o;
16859 __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
16860 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
16861 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
16862 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
16863 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
16864 return ret;
16865 }
16866
16867 __extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__))
16868 vld4q_dup_f16 (const float16_t * __a)
16869 {
16870 float16x8x4_t ret;
16871 __builtin_aarch64_simd_xi __o;
16872 __o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a);
16873 ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0);
16874 ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1);
16875 ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2);
16876 ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3);
16877 return ret;
16878 }
16879
16880 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
16881 vld4q_dup_f32 (const float32_t * __a)
16882 {
16883 float32x4x4_t ret;
16884 __builtin_aarch64_simd_xi __o;
16885 __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
16886 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
16887 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
16888 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
16889 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
16890 return ret;
16891 }
16892
16893 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
16894 vld4q_dup_f64 (const float64_t * __a)
16895 {
16896 float64x2x4_t ret;
16897 __builtin_aarch64_simd_xi __o;
16898 __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
16899 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
16900 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
16901 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
16902 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
16903 return ret;
16904 }
16905
16906 /* vld2_lane */
16907
16908 #define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \
16909 qmode, ptrmode, funcsuffix, signedtype) \
16910 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
16911 vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
16912 { \
16913 __builtin_aarch64_simd_oi __o; \
16914 largetype __temp; \
16915 __temp.val[0] = \
16916 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
16917 __temp.val[1] = \
16918 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
16919 __o = __builtin_aarch64_set_qregoi##qmode (__o, \
16920 (signedtype) __temp.val[0], \
16921 0); \
16922 __o = __builtin_aarch64_set_qregoi##qmode (__o, \
16923 (signedtype) __temp.val[1], \
16924 1); \
16925 __o = __builtin_aarch64_ld2_lane##mode ( \
16926 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
16927 __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0); \
16928 __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1); \
16929 return __b; \
16930 }
16931
16932 __LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf,
16933 v8hf, hf, f16, float16x8_t)
16934 __LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf,
16935 sf, f32, float32x4_t)
16936 __LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df,
16937 df, f64, float64x2_t)
16938 __LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8,
16939 int8x16_t)
16940 __LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi,
16941 p16, int16x8_t)
16942 __LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8,
16943 int8x16_t)
16944 __LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16,
16945 int16x8_t)
16946 __LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32,
16947 int32x4_t)
16948 __LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, di, v2di, di, s64,
16949 int64x2_t)
16950 __LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8,
16951 int8x16_t)
16952 __LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi,
16953 u16, int16x8_t)
16954 __LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si,
16955 u32, int32x4_t)
16956 __LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di,
16957 u64, int64x2_t)
16958
16959 #undef __LD2_LANE_FUNC
16960
16961 /* vld2q_lane */
16962
16963 #define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
16964 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
16965 vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
16966 { \
16967 __builtin_aarch64_simd_oi __o; \
16968 intype ret; \
16969 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); \
16970 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); \
16971 __o = __builtin_aarch64_ld2_lane##mode ( \
16972 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
16973 ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0); \
16974 ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1); \
16975 return ret; \
16976 }
16977
16978 __LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16)
16979 __LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
16980 __LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64)
16981 __LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
16982 __LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
16983 __LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
16984 __LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
16985 __LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
16986 __LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
16987 __LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
16988 __LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
16989 __LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
16990 __LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
16991
16992 #undef __LD2_LANE_FUNC
16993
16994 /* vld3_lane */
16995
16996 #define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \
16997 qmode, ptrmode, funcsuffix, signedtype) \
16998 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
16999 vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17000 { \
17001 __builtin_aarch64_simd_ci __o; \
17002 largetype __temp; \
17003 __temp.val[0] = \
17004 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
17005 __temp.val[1] = \
17006 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
17007 __temp.val[2] = \
17008 vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \
17009 __o = __builtin_aarch64_set_qregci##qmode (__o, \
17010 (signedtype) __temp.val[0], \
17011 0); \
17012 __o = __builtin_aarch64_set_qregci##qmode (__o, \
17013 (signedtype) __temp.val[1], \
17014 1); \
17015 __o = __builtin_aarch64_set_qregci##qmode (__o, \
17016 (signedtype) __temp.val[2], \
17017 2); \
17018 __o = __builtin_aarch64_ld3_lane##mode ( \
17019 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17020 __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0); \
17021 __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1); \
17022 __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2); \
17023 return __b; \
17024 }
17025
17026 __LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf,
17027 v8hf, hf, f16, float16x8_t)
17028 __LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf,
17029 sf, f32, float32x4_t)
17030 __LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df,
17031 df, f64, float64x2_t)
17032 __LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8,
17033 int8x16_t)
17034 __LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi,
17035 p16, int16x8_t)
17036 __LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8,
17037 int8x16_t)
17038 __LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16,
17039 int16x8_t)
17040 __LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32,
17041 int32x4_t)
17042 __LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64,
17043 int64x2_t)
17044 __LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8,
17045 int8x16_t)
17046 __LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi,
17047 u16, int16x8_t)
17048 __LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si,
17049 u32, int32x4_t)
17050 __LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di,
17051 u64, int64x2_t)
17052
17053 #undef __LD3_LANE_FUNC
17054
17055 /* vld3q_lane */
17056
17057 #define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17058 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17059 vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17060 { \
17061 __builtin_aarch64_simd_ci __o; \
17062 intype ret; \
17063 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \
17064 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \
17065 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \
17066 __o = __builtin_aarch64_ld3_lane##mode ( \
17067 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17068 ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0); \
17069 ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1); \
17070 ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2); \
17071 return ret; \
17072 }
17073
17074 __LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16)
17075 __LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
17076 __LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64)
17077 __LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17078 __LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17079 __LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
17080 __LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
17081 __LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
17082 __LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
17083 __LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17084 __LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17085 __LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
17086 __LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
17087
17088 #undef __LD3_LANE_FUNC
17089
17090 /* vld4_lane */
17091
17092 #define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \
17093 qmode, ptrmode, funcsuffix, signedtype) \
17094 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17095 vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17096 { \
17097 __builtin_aarch64_simd_xi __o; \
17098 largetype __temp; \
17099 __temp.val[0] = \
17100 vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \
17101 __temp.val[1] = \
17102 vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \
17103 __temp.val[2] = \
17104 vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \
17105 __temp.val[3] = \
17106 vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); \
17107 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
17108 (signedtype) __temp.val[0], \
17109 0); \
17110 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
17111 (signedtype) __temp.val[1], \
17112 1); \
17113 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
17114 (signedtype) __temp.val[2], \
17115 2); \
17116 __o = __builtin_aarch64_set_qregxi##qmode (__o, \
17117 (signedtype) __temp.val[3], \
17118 3); \
17119 __o = __builtin_aarch64_ld4_lane##mode ( \
17120 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17121 __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); \
17122 __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1); \
17123 __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2); \
17124 __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3); \
17125 return __b; \
17126 }
17127
17128 /* vld4q_lane */
17129
17130 __LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf,
17131 v8hf, hf, f16, float16x8_t)
17132 __LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf,
17133 sf, f32, float32x4_t)
17134 __LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df,
17135 df, f64, float64x2_t)
17136 __LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8,
17137 int8x16_t)
17138 __LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi,
17139 p16, int16x8_t)
17140 __LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8,
17141 int8x16_t)
17142 __LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16,
17143 int16x8_t)
17144 __LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32,
17145 int32x4_t)
17146 __LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64,
17147 int64x2_t)
17148 __LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8,
17149 int8x16_t)
17150 __LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi,
17151 u16, int16x8_t)
17152 __LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si,
17153 u32, int32x4_t)
17154 __LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di,
17155 u64, int64x2_t)
17156
17157 #undef __LD4_LANE_FUNC
17158
17159 /* vld4q_lane */
17160
17161 #define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
17162 __extension__ static __inline intype __attribute__ ((__always_inline__)) \
17163 vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
17164 { \
17165 __builtin_aarch64_simd_xi __o; \
17166 intype ret; \
17167 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \
17168 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \
17169 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \
17170 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \
17171 __o = __builtin_aarch64_ld4_lane##mode ( \
17172 (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \
17173 ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0); \
17174 ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1); \
17175 ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2); \
17176 ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3); \
17177 return ret; \
17178 }
17179
17180 __LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16)
17181 __LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
17182 __LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64)
17183 __LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
17184 __LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
17185 __LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
17186 __LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
17187 __LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
17188 __LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
17189 __LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
17190 __LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
17191 __LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
17192 __LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
17193
17194 #undef __LD4_LANE_FUNC
17195
17196 /* vmax */
17197
17198 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17199 vmax_f32 (float32x2_t __a, float32x2_t __b)
17200 {
17201 return __builtin_aarch64_smax_nanv2sf (__a, __b);
17202 }
17203
17204 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17205 vmax_s8 (int8x8_t __a, int8x8_t __b)
17206 {
17207 return __builtin_aarch64_smaxv8qi (__a, __b);
17208 }
17209
17210 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17211 vmax_s16 (int16x4_t __a, int16x4_t __b)
17212 {
17213 return __builtin_aarch64_smaxv4hi (__a, __b);
17214 }
17215
17216 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17217 vmax_s32 (int32x2_t __a, int32x2_t __b)
17218 {
17219 return __builtin_aarch64_smaxv2si (__a, __b);
17220 }
17221
17222 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17223 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
17224 {
17225 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
17226 (int8x8_t) __b);
17227 }
17228
17229 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17230 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
17231 {
17232 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
17233 (int16x4_t) __b);
17234 }
17235
17236 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17237 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
17238 {
17239 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
17240 (int32x2_t) __b);
17241 }
17242
17243 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17244 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
17245 {
17246 return __builtin_aarch64_smax_nanv4sf (__a, __b);
17247 }
17248
17249 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17250 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
17251 {
17252 return __builtin_aarch64_smax_nanv2df (__a, __b);
17253 }
17254
17255 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17256 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
17257 {
17258 return __builtin_aarch64_smaxv16qi (__a, __b);
17259 }
17260
17261 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17262 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
17263 {
17264 return __builtin_aarch64_smaxv8hi (__a, __b);
17265 }
17266
17267 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17268 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
17269 {
17270 return __builtin_aarch64_smaxv4si (__a, __b);
17271 }
17272
17273 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17274 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
17275 {
17276 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
17277 (int8x16_t) __b);
17278 }
17279
17280 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17281 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
17282 {
17283 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
17284 (int16x8_t) __b);
17285 }
17286
17287 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17288 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
17289 {
17290 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
17291 (int32x4_t) __b);
17292 }
17293 /* vmulx */
17294
17295 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17296 vmulx_f32 (float32x2_t __a, float32x2_t __b)
17297 {
17298 return __builtin_aarch64_fmulxv2sf (__a, __b);
17299 }
17300
17301 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17302 vmulxq_f32 (float32x4_t __a, float32x4_t __b)
17303 {
17304 return __builtin_aarch64_fmulxv4sf (__a, __b);
17305 }
17306
17307 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17308 vmulx_f64 (float64x1_t __a, float64x1_t __b)
17309 {
17310 return (float64x1_t) {__builtin_aarch64_fmulxdf (__a[0], __b[0])};
17311 }
17312
17313 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17314 vmulxq_f64 (float64x2_t __a, float64x2_t __b)
17315 {
17316 return __builtin_aarch64_fmulxv2df (__a, __b);
17317 }
17318
17319 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17320 vmulxs_f32 (float32_t __a, float32_t __b)
17321 {
17322 return __builtin_aarch64_fmulxsf (__a, __b);
17323 }
17324
17325 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17326 vmulxd_f64 (float64_t __a, float64_t __b)
17327 {
17328 return __builtin_aarch64_fmulxdf (__a, __b);
17329 }
17330
17331 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17332 vmulx_lane_f32 (float32x2_t __a, float32x2_t __v, const int __lane)
17333 {
17334 return vmulx_f32 (__a, __aarch64_vdup_lane_f32 (__v, __lane));
17335 }
17336
17337 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17338 vmulx_lane_f64 (float64x1_t __a, float64x1_t __v, const int __lane)
17339 {
17340 return vmulx_f64 (__a, __aarch64_vdup_lane_f64 (__v, __lane));
17341 }
17342
17343 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17344 vmulxq_lane_f32 (float32x4_t __a, float32x2_t __v, const int __lane)
17345 {
17346 return vmulxq_f32 (__a, __aarch64_vdupq_lane_f32 (__v, __lane));
17347 }
17348
17349 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17350 vmulxq_lane_f64 (float64x2_t __a, float64x1_t __v, const int __lane)
17351 {
17352 return vmulxq_f64 (__a, __aarch64_vdupq_lane_f64 (__v, __lane));
17353 }
17354
17355 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17356 vmulx_laneq_f32 (float32x2_t __a, float32x4_t __v, const int __lane)
17357 {
17358 return vmulx_f32 (__a, __aarch64_vdup_laneq_f32 (__v, __lane));
17359 }
17360
17361 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17362 vmulx_laneq_f64 (float64x1_t __a, float64x2_t __v, const int __lane)
17363 {
17364 return vmulx_f64 (__a, __aarch64_vdup_laneq_f64 (__v, __lane));
17365 }
17366
17367 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17368 vmulxq_laneq_f32 (float32x4_t __a, float32x4_t __v, const int __lane)
17369 {
17370 return vmulxq_f32 (__a, __aarch64_vdupq_laneq_f32 (__v, __lane));
17371 }
17372
17373 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17374 vmulxq_laneq_f64 (float64x2_t __a, float64x2_t __v, const int __lane)
17375 {
17376 return vmulxq_f64 (__a, __aarch64_vdupq_laneq_f64 (__v, __lane));
17377 }
17378
17379 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17380 vmulxs_lane_f32 (float32_t __a, float32x2_t __v, const int __lane)
17381 {
17382 return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
17383 }
17384
17385 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17386 vmulxs_laneq_f32 (float32_t __a, float32x4_t __v, const int __lane)
17387 {
17388 return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane));
17389 }
17390
17391 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17392 vmulxd_lane_f64 (float64_t __a, float64x1_t __v, const int __lane)
17393 {
17394 return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
17395 }
17396
17397 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17398 vmulxd_laneq_f64 (float64_t __a, float64x2_t __v, const int __lane)
17399 {
17400 return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane));
17401 }
17402
17403 /* vpmax */
17404
17405 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17406 vpmax_s8 (int8x8_t a, int8x8_t b)
17407 {
17408 return __builtin_aarch64_smaxpv8qi (a, b);
17409 }
17410
17411 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17412 vpmax_s16 (int16x4_t a, int16x4_t b)
17413 {
17414 return __builtin_aarch64_smaxpv4hi (a, b);
17415 }
17416
17417 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17418 vpmax_s32 (int32x2_t a, int32x2_t b)
17419 {
17420 return __builtin_aarch64_smaxpv2si (a, b);
17421 }
17422
17423 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17424 vpmax_u8 (uint8x8_t a, uint8x8_t b)
17425 {
17426 return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) a,
17427 (int8x8_t) b);
17428 }
17429
17430 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17431 vpmax_u16 (uint16x4_t a, uint16x4_t b)
17432 {
17433 return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) a,
17434 (int16x4_t) b);
17435 }
17436
17437 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17438 vpmax_u32 (uint32x2_t a, uint32x2_t b)
17439 {
17440 return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) a,
17441 (int32x2_t) b);
17442 }
17443
17444 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17445 vpmaxq_s8 (int8x16_t a, int8x16_t b)
17446 {
17447 return __builtin_aarch64_smaxpv16qi (a, b);
17448 }
17449
17450 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17451 vpmaxq_s16 (int16x8_t a, int16x8_t b)
17452 {
17453 return __builtin_aarch64_smaxpv8hi (a, b);
17454 }
17455
17456 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17457 vpmaxq_s32 (int32x4_t a, int32x4_t b)
17458 {
17459 return __builtin_aarch64_smaxpv4si (a, b);
17460 }
17461
17462 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17463 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
17464 {
17465 return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) a,
17466 (int8x16_t) b);
17467 }
17468
17469 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17470 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
17471 {
17472 return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) a,
17473 (int16x8_t) b);
17474 }
17475
17476 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17477 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
17478 {
17479 return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) a,
17480 (int32x4_t) b);
17481 }
17482
17483 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17484 vpmax_f32 (float32x2_t a, float32x2_t b)
17485 {
17486 return __builtin_aarch64_smax_nanpv2sf (a, b);
17487 }
17488
17489 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17490 vpmaxq_f32 (float32x4_t a, float32x4_t b)
17491 {
17492 return __builtin_aarch64_smax_nanpv4sf (a, b);
17493 }
17494
17495 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17496 vpmaxq_f64 (float64x2_t a, float64x2_t b)
17497 {
17498 return __builtin_aarch64_smax_nanpv2df (a, b);
17499 }
17500
17501 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17502 vpmaxqd_f64 (float64x2_t a)
17503 {
17504 return __builtin_aarch64_reduc_smax_nan_scal_v2df (a);
17505 }
17506
17507 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17508 vpmaxs_f32 (float32x2_t a)
17509 {
17510 return __builtin_aarch64_reduc_smax_nan_scal_v2sf (a);
17511 }
17512
17513 /* vpmaxnm */
17514
17515 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17516 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
17517 {
17518 return __builtin_aarch64_smaxpv2sf (a, b);
17519 }
17520
17521 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17522 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
17523 {
17524 return __builtin_aarch64_smaxpv4sf (a, b);
17525 }
17526
17527 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17528 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
17529 {
17530 return __builtin_aarch64_smaxpv2df (a, b);
17531 }
17532
17533 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17534 vpmaxnmqd_f64 (float64x2_t a)
17535 {
17536 return __builtin_aarch64_reduc_smax_scal_v2df (a);
17537 }
17538
17539 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17540 vpmaxnms_f32 (float32x2_t a)
17541 {
17542 return __builtin_aarch64_reduc_smax_scal_v2sf (a);
17543 }
17544
17545 /* vpmin */
17546
17547 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17548 vpmin_s8 (int8x8_t a, int8x8_t b)
17549 {
17550 return __builtin_aarch64_sminpv8qi (a, b);
17551 }
17552
17553 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17554 vpmin_s16 (int16x4_t a, int16x4_t b)
17555 {
17556 return __builtin_aarch64_sminpv4hi (a, b);
17557 }
17558
17559 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17560 vpmin_s32 (int32x2_t a, int32x2_t b)
17561 {
17562 return __builtin_aarch64_sminpv2si (a, b);
17563 }
17564
17565 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17566 vpmin_u8 (uint8x8_t a, uint8x8_t b)
17567 {
17568 return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) a,
17569 (int8x8_t) b);
17570 }
17571
17572 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17573 vpmin_u16 (uint16x4_t a, uint16x4_t b)
17574 {
17575 return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) a,
17576 (int16x4_t) b);
17577 }
17578
17579 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17580 vpmin_u32 (uint32x2_t a, uint32x2_t b)
17581 {
17582 return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) a,
17583 (int32x2_t) b);
17584 }
17585
17586 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17587 vpminq_s8 (int8x16_t a, int8x16_t b)
17588 {
17589 return __builtin_aarch64_sminpv16qi (a, b);
17590 }
17591
17592 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17593 vpminq_s16 (int16x8_t a, int16x8_t b)
17594 {
17595 return __builtin_aarch64_sminpv8hi (a, b);
17596 }
17597
17598 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17599 vpminq_s32 (int32x4_t a, int32x4_t b)
17600 {
17601 return __builtin_aarch64_sminpv4si (a, b);
17602 }
17603
17604 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17605 vpminq_u8 (uint8x16_t a, uint8x16_t b)
17606 {
17607 return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) a,
17608 (int8x16_t) b);
17609 }
17610
17611 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17612 vpminq_u16 (uint16x8_t a, uint16x8_t b)
17613 {
17614 return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) a,
17615 (int16x8_t) b);
17616 }
17617
17618 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17619 vpminq_u32 (uint32x4_t a, uint32x4_t b)
17620 {
17621 return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) a,
17622 (int32x4_t) b);
17623 }
17624
17625 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17626 vpmin_f32 (float32x2_t a, float32x2_t b)
17627 {
17628 return __builtin_aarch64_smin_nanpv2sf (a, b);
17629 }
17630
17631 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17632 vpminq_f32 (float32x4_t a, float32x4_t b)
17633 {
17634 return __builtin_aarch64_smin_nanpv4sf (a, b);
17635 }
17636
17637 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17638 vpminq_f64 (float64x2_t a, float64x2_t b)
17639 {
17640 return __builtin_aarch64_smin_nanpv2df (a, b);
17641 }
17642
17643 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17644 vpminqd_f64 (float64x2_t a)
17645 {
17646 return __builtin_aarch64_reduc_smin_nan_scal_v2df (a);
17647 }
17648
17649 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17650 vpmins_f32 (float32x2_t a)
17651 {
17652 return __builtin_aarch64_reduc_smin_nan_scal_v2sf (a);
17653 }
17654
17655 /* vpminnm */
17656
17657 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17658 vpminnm_f32 (float32x2_t a, float32x2_t b)
17659 {
17660 return __builtin_aarch64_sminpv2sf (a, b);
17661 }
17662
17663 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17664 vpminnmq_f32 (float32x4_t a, float32x4_t b)
17665 {
17666 return __builtin_aarch64_sminpv4sf (a, b);
17667 }
17668
17669 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17670 vpminnmq_f64 (float64x2_t a, float64x2_t b)
17671 {
17672 return __builtin_aarch64_sminpv2df (a, b);
17673 }
17674
17675 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17676 vpminnmqd_f64 (float64x2_t a)
17677 {
17678 return __builtin_aarch64_reduc_smin_scal_v2df (a);
17679 }
17680
17681 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17682 vpminnms_f32 (float32x2_t a)
17683 {
17684 return __builtin_aarch64_reduc_smin_scal_v2sf (a);
17685 }
17686
17687 /* vmaxnm */
17688
17689 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17690 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
17691 {
17692 return __builtin_aarch64_fmaxv2sf (__a, __b);
17693 }
17694
17695 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17696 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
17697 {
17698 return __builtin_aarch64_fmaxv4sf (__a, __b);
17699 }
17700
17701 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17702 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
17703 {
17704 return __builtin_aarch64_fmaxv2df (__a, __b);
17705 }
17706
17707 /* vmaxv */
17708
17709 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17710 vmaxv_f32 (float32x2_t __a)
17711 {
17712 return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a);
17713 }
17714
17715 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17716 vmaxv_s8 (int8x8_t __a)
17717 {
17718 return __builtin_aarch64_reduc_smax_scal_v8qi (__a);
17719 }
17720
17721 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
17722 vmaxv_s16 (int16x4_t __a)
17723 {
17724 return __builtin_aarch64_reduc_smax_scal_v4hi (__a);
17725 }
17726
17727 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17728 vmaxv_s32 (int32x2_t __a)
17729 {
17730 return __builtin_aarch64_reduc_smax_scal_v2si (__a);
17731 }
17732
17733 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
17734 vmaxv_u8 (uint8x8_t __a)
17735 {
17736 return __builtin_aarch64_reduc_umax_scal_v8qi_uu (__a);
17737 }
17738
17739 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
17740 vmaxv_u16 (uint16x4_t __a)
17741 {
17742 return __builtin_aarch64_reduc_umax_scal_v4hi_uu (__a);
17743 }
17744
17745 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17746 vmaxv_u32 (uint32x2_t __a)
17747 {
17748 return __builtin_aarch64_reduc_umax_scal_v2si_uu (__a);
17749 }
17750
17751 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17752 vmaxvq_f32 (float32x4_t __a)
17753 {
17754 return __builtin_aarch64_reduc_smax_nan_scal_v4sf (__a);
17755 }
17756
17757 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17758 vmaxvq_f64 (float64x2_t __a)
17759 {
17760 return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a);
17761 }
17762
17763 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17764 vmaxvq_s8 (int8x16_t __a)
17765 {
17766 return __builtin_aarch64_reduc_smax_scal_v16qi (__a);
17767 }
17768
17769 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
17770 vmaxvq_s16 (int16x8_t __a)
17771 {
17772 return __builtin_aarch64_reduc_smax_scal_v8hi (__a);
17773 }
17774
17775 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17776 vmaxvq_s32 (int32x4_t __a)
17777 {
17778 return __builtin_aarch64_reduc_smax_scal_v4si (__a);
17779 }
17780
17781 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
17782 vmaxvq_u8 (uint8x16_t __a)
17783 {
17784 return __builtin_aarch64_reduc_umax_scal_v16qi_uu (__a);
17785 }
17786
17787 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
17788 vmaxvq_u16 (uint16x8_t __a)
17789 {
17790 return __builtin_aarch64_reduc_umax_scal_v8hi_uu (__a);
17791 }
17792
17793 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17794 vmaxvq_u32 (uint32x4_t __a)
17795 {
17796 return __builtin_aarch64_reduc_umax_scal_v4si_uu (__a);
17797 }
17798
17799 /* vmaxnmv */
17800
17801 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17802 vmaxnmv_f32 (float32x2_t __a)
17803 {
17804 return __builtin_aarch64_reduc_smax_scal_v2sf (__a);
17805 }
17806
17807 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17808 vmaxnmvq_f32 (float32x4_t __a)
17809 {
17810 return __builtin_aarch64_reduc_smax_scal_v4sf (__a);
17811 }
17812
17813 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17814 vmaxnmvq_f64 (float64x2_t __a)
17815 {
17816 return __builtin_aarch64_reduc_smax_scal_v2df (__a);
17817 }
17818
17819 /* vmin */
17820
17821 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17822 vmin_f32 (float32x2_t __a, float32x2_t __b)
17823 {
17824 return __builtin_aarch64_smin_nanv2sf (__a, __b);
17825 }
17826
17827 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17828 vmin_s8 (int8x8_t __a, int8x8_t __b)
17829 {
17830 return __builtin_aarch64_sminv8qi (__a, __b);
17831 }
17832
17833 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17834 vmin_s16 (int16x4_t __a, int16x4_t __b)
17835 {
17836 return __builtin_aarch64_sminv4hi (__a, __b);
17837 }
17838
17839 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17840 vmin_s32 (int32x2_t __a, int32x2_t __b)
17841 {
17842 return __builtin_aarch64_sminv2si (__a, __b);
17843 }
17844
17845 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17846 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
17847 {
17848 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
17849 (int8x8_t) __b);
17850 }
17851
17852 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17853 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
17854 {
17855 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
17856 (int16x4_t) __b);
17857 }
17858
17859 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17860 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
17861 {
17862 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
17863 (int32x2_t) __b);
17864 }
17865
17866 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17867 vminq_f32 (float32x4_t __a, float32x4_t __b)
17868 {
17869 return __builtin_aarch64_smin_nanv4sf (__a, __b);
17870 }
17871
17872 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17873 vminq_f64 (float64x2_t __a, float64x2_t __b)
17874 {
17875 return __builtin_aarch64_smin_nanv2df (__a, __b);
17876 }
17877
17878 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17879 vminq_s8 (int8x16_t __a, int8x16_t __b)
17880 {
17881 return __builtin_aarch64_sminv16qi (__a, __b);
17882 }
17883
17884 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17885 vminq_s16 (int16x8_t __a, int16x8_t __b)
17886 {
17887 return __builtin_aarch64_sminv8hi (__a, __b);
17888 }
17889
17890 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17891 vminq_s32 (int32x4_t __a, int32x4_t __b)
17892 {
17893 return __builtin_aarch64_sminv4si (__a, __b);
17894 }
17895
17896 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17897 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
17898 {
17899 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
17900 (int8x16_t) __b);
17901 }
17902
17903 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17904 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
17905 {
17906 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
17907 (int16x8_t) __b);
17908 }
17909
17910 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17911 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
17912 {
17913 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
17914 (int32x4_t) __b);
17915 }
17916
17917 /* vminnm */
17918
17919 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17920 vminnm_f32 (float32x2_t __a, float32x2_t __b)
17921 {
17922 return __builtin_aarch64_fminv2sf (__a, __b);
17923 }
17924
17925 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17926 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
17927 {
17928 return __builtin_aarch64_fminv4sf (__a, __b);
17929 }
17930
17931 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17932 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
17933 {
17934 return __builtin_aarch64_fminv2df (__a, __b);
17935 }
17936
17937 /* vminv */
17938
17939 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17940 vminv_f32 (float32x2_t __a)
17941 {
17942 return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a);
17943 }
17944
17945 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17946 vminv_s8 (int8x8_t __a)
17947 {
17948 return __builtin_aarch64_reduc_smin_scal_v8qi (__a);
17949 }
17950
17951 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
17952 vminv_s16 (int16x4_t __a)
17953 {
17954 return __builtin_aarch64_reduc_smin_scal_v4hi (__a);
17955 }
17956
17957 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17958 vminv_s32 (int32x2_t __a)
17959 {
17960 return __builtin_aarch64_reduc_smin_scal_v2si (__a);
17961 }
17962
17963 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
17964 vminv_u8 (uint8x8_t __a)
17965 {
17966 return __builtin_aarch64_reduc_umin_scal_v8qi_uu (__a);
17967 }
17968
17969 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
17970 vminv_u16 (uint16x4_t __a)
17971 {
17972 return __builtin_aarch64_reduc_umin_scal_v4hi_uu (__a);
17973 }
17974
17975 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17976 vminv_u32 (uint32x2_t __a)
17977 {
17978 return __builtin_aarch64_reduc_umin_scal_v2si_uu (__a);
17979 }
17980
17981 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17982 vminvq_f32 (float32x4_t __a)
17983 {
17984 return __builtin_aarch64_reduc_smin_nan_scal_v4sf (__a);
17985 }
17986
17987 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17988 vminvq_f64 (float64x2_t __a)
17989 {
17990 return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a);
17991 }
17992
17993 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17994 vminvq_s8 (int8x16_t __a)
17995 {
17996 return __builtin_aarch64_reduc_smin_scal_v16qi (__a);
17997 }
17998
17999 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18000 vminvq_s16 (int16x8_t __a)
18001 {
18002 return __builtin_aarch64_reduc_smin_scal_v8hi (__a);
18003 }
18004
18005 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18006 vminvq_s32 (int32x4_t __a)
18007 {
18008 return __builtin_aarch64_reduc_smin_scal_v4si (__a);
18009 }
18010
18011 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18012 vminvq_u8 (uint8x16_t __a)
18013 {
18014 return __builtin_aarch64_reduc_umin_scal_v16qi_uu (__a);
18015 }
18016
18017 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18018 vminvq_u16 (uint16x8_t __a)
18019 {
18020 return __builtin_aarch64_reduc_umin_scal_v8hi_uu (__a);
18021 }
18022
18023 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18024 vminvq_u32 (uint32x4_t __a)
18025 {
18026 return __builtin_aarch64_reduc_umin_scal_v4si_uu (__a);
18027 }
18028
18029 /* vminnmv */
18030
18031 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18032 vminnmv_f32 (float32x2_t __a)
18033 {
18034 return __builtin_aarch64_reduc_smin_scal_v2sf (__a);
18035 }
18036
18037 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18038 vminnmvq_f32 (float32x4_t __a)
18039 {
18040 return __builtin_aarch64_reduc_smin_scal_v4sf (__a);
18041 }
18042
18043 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18044 vminnmvq_f64 (float64x2_t __a)
18045 {
18046 return __builtin_aarch64_reduc_smin_scal_v2df (__a);
18047 }
18048
18049 /* vmla */
18050
18051 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18052 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18053 {
18054 return a + b * c;
18055 }
18056
18057 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18058 vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18059 {
18060 return __a + __b * __c;
18061 }
18062
18063 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18064 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18065 {
18066 return a + b * c;
18067 }
18068
18069 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18070 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18071 {
18072 return a + b * c;
18073 }
18074
18075 /* vmla_lane */
18076
18077 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18078 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
18079 float32x2_t __c, const int __lane)
18080 {
18081 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18082 }
18083
18084 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18085 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
18086 int16x4_t __c, const int __lane)
18087 {
18088 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18089 }
18090
18091 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18092 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
18093 int32x2_t __c, const int __lane)
18094 {
18095 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18096 }
18097
18098 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18099 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18100 uint16x4_t __c, const int __lane)
18101 {
18102 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18103 }
18104
18105 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18106 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18107 uint32x2_t __c, const int __lane)
18108 {
18109 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18110 }
18111
18112 /* vmla_laneq */
18113
18114 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18115 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
18116 float32x4_t __c, const int __lane)
18117 {
18118 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18119 }
18120
18121 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18122 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
18123 int16x8_t __c, const int __lane)
18124 {
18125 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18126 }
18127
18128 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18129 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
18130 int32x4_t __c, const int __lane)
18131 {
18132 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18133 }
18134
18135 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18136 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18137 uint16x8_t __c, const int __lane)
18138 {
18139 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18140 }
18141
18142 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18143 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18144 uint32x4_t __c, const int __lane)
18145 {
18146 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18147 }
18148
18149 /* vmlaq_lane */
18150
18151 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18152 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18153 float32x2_t __c, const int __lane)
18154 {
18155 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18156 }
18157
18158 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18159 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
18160 int16x4_t __c, const int __lane)
18161 {
18162 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18163 }
18164
18165 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18166 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
18167 int32x2_t __c, const int __lane)
18168 {
18169 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18170 }
18171
18172 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18173 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18174 uint16x4_t __c, const int __lane)
18175 {
18176 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18177 }
18178
18179 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18180 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18181 uint32x2_t __c, const int __lane)
18182 {
18183 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18184 }
18185
18186 /* vmlaq_laneq */
18187
18188 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18189 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18190 float32x4_t __c, const int __lane)
18191 {
18192 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18193 }
18194
18195 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18196 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18197 int16x8_t __c, const int __lane)
18198 {
18199 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18200 }
18201
18202 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18203 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18204 int32x4_t __c, const int __lane)
18205 {
18206 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18207 }
18208
18209 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18210 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18211 uint16x8_t __c, const int __lane)
18212 {
18213 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18214 }
18215
18216 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18217 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18218 uint32x4_t __c, const int __lane)
18219 {
18220 return (__a + (__b * __aarch64_vget_lane_any (__c, __lane)));
18221 }
18222
18223 /* vmls */
18224
18225 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18226 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18227 {
18228 return a - b * c;
18229 }
18230
18231 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18232 vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18233 {
18234 return __a - __b * __c;
18235 }
18236
18237 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18238 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18239 {
18240 return a - b * c;
18241 }
18242
18243 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18244 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18245 {
18246 return a - b * c;
18247 }
18248
18249 /* vmls_lane */
18250
18251 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18252 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
18253 float32x2_t __c, const int __lane)
18254 {
18255 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18256 }
18257
18258 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18259 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
18260 int16x4_t __c, const int __lane)
18261 {
18262 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18263 }
18264
18265 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18266 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
18267 int32x2_t __c, const int __lane)
18268 {
18269 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18270 }
18271
18272 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18273 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18274 uint16x4_t __c, const int __lane)
18275 {
18276 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18277 }
18278
18279 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18280 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18281 uint32x2_t __c, const int __lane)
18282 {
18283 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18284 }
18285
18286 /* vmls_laneq */
18287
18288 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18289 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
18290 float32x4_t __c, const int __lane)
18291 {
18292 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18293 }
18294
18295 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18296 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
18297 int16x8_t __c, const int __lane)
18298 {
18299 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18300 }
18301
18302 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18303 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
18304 int32x4_t __c, const int __lane)
18305 {
18306 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18307 }
18308
18309 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18310 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18311 uint16x8_t __c, const int __lane)
18312 {
18313 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18314 }
18315
18316 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18317 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18318 uint32x4_t __c, const int __lane)
18319 {
18320 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18321 }
18322
18323 /* vmlsq_lane */
18324
18325 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18326 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18327 float32x2_t __c, const int __lane)
18328 {
18329 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18330 }
18331
18332 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18333 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
18334 int16x4_t __c, const int __lane)
18335 {
18336 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18337 }
18338
18339 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18340 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
18341 int32x2_t __c, const int __lane)
18342 {
18343 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18344 }
18345
18346 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18347 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18348 uint16x4_t __c, const int __lane)
18349 {
18350 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18351 }
18352
18353 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18354 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18355 uint32x2_t __c, const int __lane)
18356 {
18357 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18358 }
18359
18360 /* vmlsq_laneq */
18361
18362 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18363 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18364 float32x4_t __c, const int __lane)
18365 {
18366 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18367 }
18368
18369 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18370 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18371 int16x8_t __c, const int __lane)
18372 {
18373 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18374 }
18375
18376 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18377 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18378 int32x4_t __c, const int __lane)
18379 {
18380 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18381 }
18382 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18383 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18384 uint16x8_t __c, const int __lane)
18385 {
18386 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18387 }
18388
18389 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18390 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18391 uint32x4_t __c, const int __lane)
18392 {
18393 return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
18394 }
18395
18396 /* vmov_n_ */
18397
18398 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
18399 vmov_n_f16 (float16_t __a)
18400 {
18401 return vdup_n_f16 (__a);
18402 }
18403
18404 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18405 vmov_n_f32 (float32_t __a)
18406 {
18407 return vdup_n_f32 (__a);
18408 }
18409
18410 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18411 vmov_n_f64 (float64_t __a)
18412 {
18413 return (float64x1_t) {__a};
18414 }
18415
18416 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18417 vmov_n_p8 (poly8_t __a)
18418 {
18419 return vdup_n_p8 (__a);
18420 }
18421
18422 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18423 vmov_n_p16 (poly16_t __a)
18424 {
18425 return vdup_n_p16 (__a);
18426 }
18427
18428 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18429 vmov_n_s8 (int8_t __a)
18430 {
18431 return vdup_n_s8 (__a);
18432 }
18433
18434 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18435 vmov_n_s16 (int16_t __a)
18436 {
18437 return vdup_n_s16 (__a);
18438 }
18439
18440 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18441 vmov_n_s32 (int32_t __a)
18442 {
18443 return vdup_n_s32 (__a);
18444 }
18445
18446 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18447 vmov_n_s64 (int64_t __a)
18448 {
18449 return (int64x1_t) {__a};
18450 }
18451
18452 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18453 vmov_n_u8 (uint8_t __a)
18454 {
18455 return vdup_n_u8 (__a);
18456 }
18457
18458 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18459 vmov_n_u16 (uint16_t __a)
18460 {
18461 return vdup_n_u16 (__a);
18462 }
18463
18464 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18465 vmov_n_u32 (uint32_t __a)
18466 {
18467 return vdup_n_u32 (__a);
18468 }
18469
18470 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18471 vmov_n_u64 (uint64_t __a)
18472 {
18473 return (uint64x1_t) {__a};
18474 }
18475
18476 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
18477 vmovq_n_f16 (float16_t __a)
18478 {
18479 return vdupq_n_f16 (__a);
18480 }
18481
18482 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18483 vmovq_n_f32 (float32_t __a)
18484 {
18485 return vdupq_n_f32 (__a);
18486 }
18487
18488 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18489 vmovq_n_f64 (float64_t __a)
18490 {
18491 return vdupq_n_f64 (__a);
18492 }
18493
18494 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18495 vmovq_n_p8 (poly8_t __a)
18496 {
18497 return vdupq_n_p8 (__a);
18498 }
18499
18500 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18501 vmovq_n_p16 (poly16_t __a)
18502 {
18503 return vdupq_n_p16 (__a);
18504 }
18505
18506 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18507 vmovq_n_s8 (int8_t __a)
18508 {
18509 return vdupq_n_s8 (__a);
18510 }
18511
18512 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18513 vmovq_n_s16 (int16_t __a)
18514 {
18515 return vdupq_n_s16 (__a);
18516 }
18517
18518 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18519 vmovq_n_s32 (int32_t __a)
18520 {
18521 return vdupq_n_s32 (__a);
18522 }
18523
18524 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18525 vmovq_n_s64 (int64_t __a)
18526 {
18527 return vdupq_n_s64 (__a);
18528 }
18529
18530 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18531 vmovq_n_u8 (uint8_t __a)
18532 {
18533 return vdupq_n_u8 (__a);
18534 }
18535
18536 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18537 vmovq_n_u16 (uint16_t __a)
18538 {
18539 return vdupq_n_u16 (__a);
18540 }
18541
18542 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18543 vmovq_n_u32 (uint32_t __a)
18544 {
18545 return vdupq_n_u32 (__a);
18546 }
18547
18548 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18549 vmovq_n_u64 (uint64_t __a)
18550 {
18551 return vdupq_n_u64 (__a);
18552 }
18553
18554 /* vmul_lane */
18555
18556 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18557 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
18558 {
18559 return __a * __aarch64_vget_lane_any (__b, __lane);
18560 }
18561
18562 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18563 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
18564 {
18565 return __a * __b;
18566 }
18567
18568 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18569 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
18570 {
18571 return __a * __aarch64_vget_lane_any (__b, __lane);
18572 }
18573
18574 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18575 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
18576 {
18577 return __a * __aarch64_vget_lane_any (__b, __lane);
18578 }
18579
18580 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18581 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
18582 {
18583 return __a * __aarch64_vget_lane_any (__b, __lane);
18584 }
18585
18586 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18587 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
18588 {
18589 return __a * __aarch64_vget_lane_any (__b, __lane);
18590 }
18591
18592 /* vmuld_lane */
18593
18594 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18595 vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
18596 {
18597 return __a * __aarch64_vget_lane_any (__b, __lane);
18598 }
18599
18600 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18601 vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
18602 {
18603 return __a * __aarch64_vget_lane_any (__b, __lane);
18604 }
18605
18606 /* vmuls_lane */
18607
18608 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18609 vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
18610 {
18611 return __a * __aarch64_vget_lane_any (__b, __lane);
18612 }
18613
18614 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18615 vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
18616 {
18617 return __a * __aarch64_vget_lane_any (__b, __lane);
18618 }
18619
18620 /* vmul_laneq */
18621
18622 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18623 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
18624 {
18625 return __a * __aarch64_vget_lane_any (__b, __lane);
18626 }
18627
18628 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18629 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
18630 {
18631 return __a * __aarch64_vget_lane_any (__b, __lane);
18632 }
18633
18634 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18635 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
18636 {
18637 return __a * __aarch64_vget_lane_any (__b, __lane);
18638 }
18639
18640 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18641 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
18642 {
18643 return __a * __aarch64_vget_lane_any (__b, __lane);
18644 }
18645
18646 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18647 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
18648 {
18649 return __a * __aarch64_vget_lane_any (__b, __lane);
18650 }
18651
18652 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18653 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
18654 {
18655 return __a * __aarch64_vget_lane_any (__b, __lane);
18656 }
18657
18658 /* vmul_n */
18659
18660 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18661 vmul_n_f64 (float64x1_t __a, float64_t __b)
18662 {
18663 return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
18664 }
18665
18666 /* vmulq_lane */
18667
18668 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18669 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
18670 {
18671 return __a * __aarch64_vget_lane_any (__b, __lane);
18672 }
18673
18674 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18675 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
18676 {
18677 __AARCH64_LANE_CHECK (__a, __lane);
18678 return __a * __b[0];
18679 }
18680
18681 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18682 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
18683 {
18684 return __a * __aarch64_vget_lane_any (__b, __lane);
18685 }
18686
18687 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18688 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
18689 {
18690 return __a * __aarch64_vget_lane_any (__b, __lane);
18691 }
18692
18693 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18694 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
18695 {
18696 return __a * __aarch64_vget_lane_any (__b, __lane);
18697 }
18698
18699 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18700 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
18701 {
18702 return __a * __aarch64_vget_lane_any (__b, __lane);
18703 }
18704
18705 /* vmulq_laneq */
18706
18707 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18708 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
18709 {
18710 return __a * __aarch64_vget_lane_any (__b, __lane);
18711 }
18712
18713 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18714 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
18715 {
18716 return __a * __aarch64_vget_lane_any (__b, __lane);
18717 }
18718
18719 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18720 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
18721 {
18722 return __a * __aarch64_vget_lane_any (__b, __lane);
18723 }
18724
18725 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18726 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
18727 {
18728 return __a * __aarch64_vget_lane_any (__b, __lane);
18729 }
18730
18731 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18732 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
18733 {
18734 return __a * __aarch64_vget_lane_any (__b, __lane);
18735 }
18736
18737 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18738 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
18739 {
18740 return __a * __aarch64_vget_lane_any (__b, __lane);
18741 }
18742
18743 /* vmul_n. */
18744
18745 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18746 vmul_n_f32 (float32x2_t __a, float32_t __b)
18747 {
18748 return __a * __b;
18749 }
18750
18751 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18752 vmulq_n_f32 (float32x4_t __a, float32_t __b)
18753 {
18754 return __a * __b;
18755 }
18756
18757 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18758 vmulq_n_f64 (float64x2_t __a, float64_t __b)
18759 {
18760 return __a * __b;
18761 }
18762
18763 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18764 vmul_n_s16 (int16x4_t __a, int16_t __b)
18765 {
18766 return __a * __b;
18767 }
18768
18769 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18770 vmulq_n_s16 (int16x8_t __a, int16_t __b)
18771 {
18772 return __a * __b;
18773 }
18774
18775 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18776 vmul_n_s32 (int32x2_t __a, int32_t __b)
18777 {
18778 return __a * __b;
18779 }
18780
18781 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18782 vmulq_n_s32 (int32x4_t __a, int32_t __b)
18783 {
18784 return __a * __b;
18785 }
18786
18787 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18788 vmul_n_u16 (uint16x4_t __a, uint16_t __b)
18789 {
18790 return __a * __b;
18791 }
18792
18793 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18794 vmulq_n_u16 (uint16x8_t __a, uint16_t __b)
18795 {
18796 return __a * __b;
18797 }
18798
18799 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18800 vmul_n_u32 (uint32x2_t __a, uint32_t __b)
18801 {
18802 return __a * __b;
18803 }
18804
18805 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18806 vmulq_n_u32 (uint32x4_t __a, uint32_t __b)
18807 {
18808 return __a * __b;
18809 }
18810
18811 /* vmvn */
18812
18813 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18814 vmvn_p8 (poly8x8_t __a)
18815 {
18816 return (poly8x8_t) ~((int8x8_t) __a);
18817 }
18818
18819 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18820 vmvn_s8 (int8x8_t __a)
18821 {
18822 return ~__a;
18823 }
18824
18825 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18826 vmvn_s16 (int16x4_t __a)
18827 {
18828 return ~__a;
18829 }
18830
18831 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18832 vmvn_s32 (int32x2_t __a)
18833 {
18834 return ~__a;
18835 }
18836
18837 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18838 vmvn_u8 (uint8x8_t __a)
18839 {
18840 return ~__a;
18841 }
18842
18843 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18844 vmvn_u16 (uint16x4_t __a)
18845 {
18846 return ~__a;
18847 }
18848
18849 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18850 vmvn_u32 (uint32x2_t __a)
18851 {
18852 return ~__a;
18853 }
18854
18855 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18856 vmvnq_p8 (poly8x16_t __a)
18857 {
18858 return (poly8x16_t) ~((int8x16_t) __a);
18859 }
18860
18861 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18862 vmvnq_s8 (int8x16_t __a)
18863 {
18864 return ~__a;
18865 }
18866
18867 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18868 vmvnq_s16 (int16x8_t __a)
18869 {
18870 return ~__a;
18871 }
18872
18873 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18874 vmvnq_s32 (int32x4_t __a)
18875 {
18876 return ~__a;
18877 }
18878
18879 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18880 vmvnq_u8 (uint8x16_t __a)
18881 {
18882 return ~__a;
18883 }
18884
18885 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18886 vmvnq_u16 (uint16x8_t __a)
18887 {
18888 return ~__a;
18889 }
18890
18891 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18892 vmvnq_u32 (uint32x4_t __a)
18893 {
18894 return ~__a;
18895 }
18896
18897 /* vneg */
18898
18899 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18900 vneg_f32 (float32x2_t __a)
18901 {
18902 return -__a;
18903 }
18904
18905 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18906 vneg_f64 (float64x1_t __a)
18907 {
18908 return -__a;
18909 }
18910
18911 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18912 vneg_s8 (int8x8_t __a)
18913 {
18914 return -__a;
18915 }
18916
18917 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18918 vneg_s16 (int16x4_t __a)
18919 {
18920 return -__a;
18921 }
18922
18923 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18924 vneg_s32 (int32x2_t __a)
18925 {
18926 return -__a;
18927 }
18928
18929 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18930 vneg_s64 (int64x1_t __a)
18931 {
18932 return -__a;
18933 }
18934
18935 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18936 vnegq_f32 (float32x4_t __a)
18937 {
18938 return -__a;
18939 }
18940
18941 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18942 vnegq_f64 (float64x2_t __a)
18943 {
18944 return -__a;
18945 }
18946
18947 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18948 vnegq_s8 (int8x16_t __a)
18949 {
18950 return -__a;
18951 }
18952
18953 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18954 vnegq_s16 (int16x8_t __a)
18955 {
18956 return -__a;
18957 }
18958
18959 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18960 vnegq_s32 (int32x4_t __a)
18961 {
18962 return -__a;
18963 }
18964
18965 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18966 vnegq_s64 (int64x2_t __a)
18967 {
18968 return -__a;
18969 }
18970
18971 /* vpadd */
18972
18973 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18974 vpadd_f32 (float32x2_t __a, float32x2_t __b)
18975 {
18976 return __builtin_aarch64_faddpv2sf (__a, __b);
18977 }
18978
18979 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18980 vpaddq_f32 (float32x4_t __a, float32x4_t __b)
18981 {
18982 return __builtin_aarch64_faddpv4sf (__a, __b);
18983 }
18984
18985 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18986 vpaddq_f64 (float64x2_t __a, float64x2_t __b)
18987 {
18988 return __builtin_aarch64_faddpv2df (__a, __b);
18989 }
18990
18991 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18992 vpadd_s8 (int8x8_t __a, int8x8_t __b)
18993 {
18994 return __builtin_aarch64_addpv8qi (__a, __b);
18995 }
18996
18997 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18998 vpadd_s16 (int16x4_t __a, int16x4_t __b)
18999 {
19000 return __builtin_aarch64_addpv4hi (__a, __b);
19001 }
19002
19003 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19004 vpadd_s32 (int32x2_t __a, int32x2_t __b)
19005 {
19006 return __builtin_aarch64_addpv2si (__a, __b);
19007 }
19008
19009 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19010 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
19011 {
19012 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
19013 (int8x8_t) __b);
19014 }
19015
19016 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19017 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
19018 {
19019 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
19020 (int16x4_t) __b);
19021 }
19022
19023 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19024 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
19025 {
19026 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
19027 (int32x2_t) __b);
19028 }
19029
19030 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19031 vpadds_f32 (float32x2_t __a)
19032 {
19033 return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
19034 }
19035
19036 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19037 vpaddd_f64 (float64x2_t __a)
19038 {
19039 return __builtin_aarch64_reduc_plus_scal_v2df (__a);
19040 }
19041
19042 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19043 vpaddd_s64 (int64x2_t __a)
19044 {
19045 return __builtin_aarch64_addpdi (__a);
19046 }
19047
19048 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19049 vpaddd_u64 (uint64x2_t __a)
19050 {
19051 return __builtin_aarch64_addpdi ((int64x2_t) __a);
19052 }
19053
19054 /* vqabs */
19055
19056 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19057 vqabsq_s64 (int64x2_t __a)
19058 {
19059 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
19060 }
19061
19062 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19063 vqabsb_s8 (int8_t __a)
19064 {
19065 return (int8_t) __builtin_aarch64_sqabsqi (__a);
19066 }
19067
19068 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19069 vqabsh_s16 (int16_t __a)
19070 {
19071 return (int16_t) __builtin_aarch64_sqabshi (__a);
19072 }
19073
19074 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19075 vqabss_s32 (int32_t __a)
19076 {
19077 return (int32_t) __builtin_aarch64_sqabssi (__a);
19078 }
19079
19080 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19081 vqabsd_s64 (int64_t __a)
19082 {
19083 return __builtin_aarch64_sqabsdi (__a);
19084 }
19085
19086 /* vqadd */
19087
19088 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19089 vqaddb_s8 (int8_t __a, int8_t __b)
19090 {
19091 return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
19092 }
19093
19094 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19095 vqaddh_s16 (int16_t __a, int16_t __b)
19096 {
19097 return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
19098 }
19099
19100 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19101 vqadds_s32 (int32_t __a, int32_t __b)
19102 {
19103 return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
19104 }
19105
19106 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19107 vqaddd_s64 (int64_t __a, int64_t __b)
19108 {
19109 return __builtin_aarch64_sqadddi (__a, __b);
19110 }
19111
19112 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19113 vqaddb_u8 (uint8_t __a, uint8_t __b)
19114 {
19115 return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
19116 }
19117
19118 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19119 vqaddh_u16 (uint16_t __a, uint16_t __b)
19120 {
19121 return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
19122 }
19123
19124 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19125 vqadds_u32 (uint32_t __a, uint32_t __b)
19126 {
19127 return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
19128 }
19129
19130 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19131 vqaddd_u64 (uint64_t __a, uint64_t __b)
19132 {
19133 return __builtin_aarch64_uqadddi_uuu (__a, __b);
19134 }
19135
19136 /* vqdmlal */
19137
19138 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19139 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19140 {
19141 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
19142 }
19143
19144 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19145 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19146 {
19147 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
19148 }
19149
19150 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19151 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19152 int const __d)
19153 {
19154 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
19155 }
19156
19157 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19158 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19159 int const __d)
19160 {
19161 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
19162 }
19163
19164 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19165 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19166 {
19167 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
19168 }
19169
19170 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19171 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19172 {
19173 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
19174 }
19175
19176 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19177 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19178 {
19179 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
19180 }
19181
19182 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19183 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19184 {
19185 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
19186 }
19187
19188 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19189 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19190 {
19191 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
19192 }
19193
19194 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19195 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19196 {
19197 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
19198 }
19199
19200 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19201 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19202 int const __d)
19203 {
19204 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
19205 }
19206
19207 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19208 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19209 int const __d)
19210 {
19211 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
19212 }
19213
19214 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19215 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19216 {
19217 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
19218 }
19219
19220 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19221 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19222 {
19223 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
19224 }
19225
19226 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19227 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19228 {
19229 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
19230 }
19231
19232 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19233 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19234 {
19235 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
19236 }
19237
19238 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19239 vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c)
19240 {
19241 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
19242 }
19243
19244 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19245 vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
19246 {
19247 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
19248 }
19249
19250 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19251 vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19252 {
19253 return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
19254 }
19255
19256 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19257 vqdmlals_s32 (int64_t __a, int32_t __b, int32_t __c)
19258 {
19259 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
19260 }
19261
19262 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19263 vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19264 {
19265 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
19266 }
19267
19268 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19269 vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19270 {
19271 return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
19272 }
19273
19274 /* vqdmlsl */
19275
19276 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19277 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19278 {
19279 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
19280 }
19281
19282 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19283 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19284 {
19285 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
19286 }
19287
19288 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19289 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19290 int const __d)
19291 {
19292 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
19293 }
19294
19295 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19296 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19297 int const __d)
19298 {
19299 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
19300 }
19301
19302 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19303 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19304 {
19305 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
19306 }
19307
19308 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19309 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19310 {
19311 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
19312 }
19313
19314 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19315 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19316 {
19317 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
19318 }
19319
19320 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19321 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19322 {
19323 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
19324 }
19325
19326 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19327 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19328 {
19329 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
19330 }
19331
19332 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19333 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19334 {
19335 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
19336 }
19337
19338 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19339 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19340 int const __d)
19341 {
19342 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
19343 }
19344
19345 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19346 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19347 int const __d)
19348 {
19349 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
19350 }
19351
19352 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19353 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19354 {
19355 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
19356 }
19357
19358 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19359 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19360 {
19361 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
19362 }
19363
19364 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19365 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19366 {
19367 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
19368 }
19369
19370 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19371 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19372 {
19373 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
19374 }
19375
19376 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19377 vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c)
19378 {
19379 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
19380 }
19381
19382 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19383 vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
19384 {
19385 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
19386 }
19387
19388 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19389 vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19390 {
19391 return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
19392 }
19393
19394 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19395 vqdmlsls_s32 (int64_t __a, int32_t __b, int32_t __c)
19396 {
19397 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
19398 }
19399
19400 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19401 vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19402 {
19403 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
19404 }
19405
19406 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19407 vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19408 {
19409 return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
19410 }
19411
19412 /* vqdmulh */
19413
19414 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19415 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19416 {
19417 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
19418 }
19419
19420 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19421 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19422 {
19423 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
19424 }
19425
19426 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19427 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19428 {
19429 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
19430 }
19431
19432 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19433 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19434 {
19435 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
19436 }
19437
19438 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19439 vqdmulhh_s16 (int16_t __a, int16_t __b)
19440 {
19441 return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b);
19442 }
19443
19444 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19445 vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19446 {
19447 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
19448 }
19449
19450 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19451 vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19452 {
19453 return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
19454 }
19455
19456 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19457 vqdmulhs_s32 (int32_t __a, int32_t __b)
19458 {
19459 return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b);
19460 }
19461
19462 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19463 vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19464 {
19465 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
19466 }
19467
19468 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19469 vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19470 {
19471 return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
19472 }
19473
19474 /* vqdmull */
19475
19476 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19477 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
19478 {
19479 return __builtin_aarch64_sqdmullv4hi (__a, __b);
19480 }
19481
19482 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19483 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
19484 {
19485 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
19486 }
19487
19488 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19489 vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
19490 {
19491 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
19492 }
19493
19494 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19495 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
19496 {
19497 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
19498 }
19499
19500 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19501 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
19502 {
19503 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
19504 }
19505
19506 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19507 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
19508 {
19509 return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
19510 }
19511
19512 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19513 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
19514 {
19515 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
19516 }
19517
19518 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19519 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
19520 {
19521 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
19522 }
19523
19524 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19525 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
19526 {
19527 return __builtin_aarch64_sqdmullv2si (__a, __b);
19528 }
19529
19530 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19531 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
19532 {
19533 return __builtin_aarch64_sqdmull2v4si (__a, __b);
19534 }
19535
19536 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19537 vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
19538 {
19539 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
19540 }
19541
19542 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19543 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
19544 {
19545 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
19546 }
19547
19548 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19549 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
19550 {
19551 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
19552 }
19553
19554 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19555 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
19556 {
19557 return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
19558 }
19559
19560 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19561 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
19562 {
19563 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
19564 }
19565
19566 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19567 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
19568 {
19569 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
19570 }
19571
19572 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19573 vqdmullh_s16 (int16_t __a, int16_t __b)
19574 {
19575 return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b);
19576 }
19577
19578 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19579 vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19580 {
19581 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
19582 }
19583
19584 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19585 vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19586 {
19587 return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c);
19588 }
19589
19590 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19591 vqdmulls_s32 (int32_t __a, int32_t __b)
19592 {
19593 return __builtin_aarch64_sqdmullsi (__a, __b);
19594 }
19595
19596 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19597 vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19598 {
19599 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
19600 }
19601
19602 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19603 vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19604 {
19605 return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c);
19606 }
19607
19608 /* vqmovn */
19609
19610 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19611 vqmovn_s16 (int16x8_t __a)
19612 {
19613 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
19614 }
19615
19616 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19617 vqmovn_s32 (int32x4_t __a)
19618 {
19619 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
19620 }
19621
19622 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19623 vqmovn_s64 (int64x2_t __a)
19624 {
19625 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
19626 }
19627
19628 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19629 vqmovn_u16 (uint16x8_t __a)
19630 {
19631 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
19632 }
19633
19634 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19635 vqmovn_u32 (uint32x4_t __a)
19636 {
19637 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
19638 }
19639
19640 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19641 vqmovn_u64 (uint64x2_t __a)
19642 {
19643 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
19644 }
19645
19646 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19647 vqmovnh_s16 (int16_t __a)
19648 {
19649 return (int8_t) __builtin_aarch64_sqmovnhi (__a);
19650 }
19651
19652 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19653 vqmovns_s32 (int32_t __a)
19654 {
19655 return (int16_t) __builtin_aarch64_sqmovnsi (__a);
19656 }
19657
19658 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19659 vqmovnd_s64 (int64_t __a)
19660 {
19661 return (int32_t) __builtin_aarch64_sqmovndi (__a);
19662 }
19663
19664 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19665 vqmovnh_u16 (uint16_t __a)
19666 {
19667 return (uint8_t) __builtin_aarch64_uqmovnhi (__a);
19668 }
19669
19670 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19671 vqmovns_u32 (uint32_t __a)
19672 {
19673 return (uint16_t) __builtin_aarch64_uqmovnsi (__a);
19674 }
19675
19676 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19677 vqmovnd_u64 (uint64_t __a)
19678 {
19679 return (uint32_t) __builtin_aarch64_uqmovndi (__a);
19680 }
19681
19682 /* vqmovun */
19683
19684 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19685 vqmovun_s16 (int16x8_t __a)
19686 {
19687 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
19688 }
19689
19690 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19691 vqmovun_s32 (int32x4_t __a)
19692 {
19693 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
19694 }
19695
19696 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19697 vqmovun_s64 (int64x2_t __a)
19698 {
19699 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
19700 }
19701
19702 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19703 vqmovunh_s16 (int16_t __a)
19704 {
19705 return (int8_t) __builtin_aarch64_sqmovunhi (__a);
19706 }
19707
19708 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19709 vqmovuns_s32 (int32_t __a)
19710 {
19711 return (int16_t) __builtin_aarch64_sqmovunsi (__a);
19712 }
19713
19714 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19715 vqmovund_s64 (int64_t __a)
19716 {
19717 return (int32_t) __builtin_aarch64_sqmovundi (__a);
19718 }
19719
19720 /* vqneg */
19721
19722 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19723 vqnegq_s64 (int64x2_t __a)
19724 {
19725 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
19726 }
19727
19728 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19729 vqnegb_s8 (int8_t __a)
19730 {
19731 return (int8_t) __builtin_aarch64_sqnegqi (__a);
19732 }
19733
19734 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19735 vqnegh_s16 (int16_t __a)
19736 {
19737 return (int16_t) __builtin_aarch64_sqneghi (__a);
19738 }
19739
19740 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19741 vqnegs_s32 (int32_t __a)
19742 {
19743 return (int32_t) __builtin_aarch64_sqnegsi (__a);
19744 }
19745
19746 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19747 vqnegd_s64 (int64_t __a)
19748 {
19749 return __builtin_aarch64_sqnegdi (__a);
19750 }
19751
19752 /* vqrdmulh */
19753
19754 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19755 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19756 {
19757 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
19758 }
19759
19760 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19761 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19762 {
19763 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
19764 }
19765
19766 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19767 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19768 {
19769 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
19770 }
19771
19772 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19773 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19774 {
19775 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
19776 }
19777
19778 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19779 vqrdmulhh_s16 (int16_t __a, int16_t __b)
19780 {
19781 return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
19782 }
19783
19784 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19785 vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
19786 {
19787 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
19788 }
19789
19790 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19791 vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19792 {
19793 return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
19794 }
19795
19796 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19797 vqrdmulhs_s32 (int32_t __a, int32_t __b)
19798 {
19799 return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
19800 }
19801
19802 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19803 vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
19804 {
19805 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
19806 }
19807
19808 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19809 vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19810 {
19811 return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
19812 }
19813
19814 /* vqrshl */
19815
19816 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19817 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
19818 {
19819 return __builtin_aarch64_sqrshlv8qi (__a, __b);
19820 }
19821
19822 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19823 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
19824 {
19825 return __builtin_aarch64_sqrshlv4hi (__a, __b);
19826 }
19827
19828 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19829 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
19830 {
19831 return __builtin_aarch64_sqrshlv2si (__a, __b);
19832 }
19833
19834 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19835 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
19836 {
19837 return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
19838 }
19839
19840 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19841 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
19842 {
19843 return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
19844 }
19845
19846 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19847 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
19848 {
19849 return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
19850 }
19851
19852 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19853 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
19854 {
19855 return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
19856 }
19857
19858 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19859 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
19860 {
19861 return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
19862 }
19863
19864 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19865 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
19866 {
19867 return __builtin_aarch64_sqrshlv16qi (__a, __b);
19868 }
19869
19870 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19871 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
19872 {
19873 return __builtin_aarch64_sqrshlv8hi (__a, __b);
19874 }
19875
19876 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19877 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
19878 {
19879 return __builtin_aarch64_sqrshlv4si (__a, __b);
19880 }
19881
19882 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19883 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
19884 {
19885 return __builtin_aarch64_sqrshlv2di (__a, __b);
19886 }
19887
19888 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19889 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
19890 {
19891 return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
19892 }
19893
19894 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19895 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
19896 {
19897 return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
19898 }
19899
19900 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19901 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
19902 {
19903 return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
19904 }
19905
19906 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19907 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
19908 {
19909 return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
19910 }
19911
19912 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19913 vqrshlb_s8 (int8_t __a, int8_t __b)
19914 {
19915 return __builtin_aarch64_sqrshlqi (__a, __b);
19916 }
19917
19918 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19919 vqrshlh_s16 (int16_t __a, int16_t __b)
19920 {
19921 return __builtin_aarch64_sqrshlhi (__a, __b);
19922 }
19923
19924 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19925 vqrshls_s32 (int32_t __a, int32_t __b)
19926 {
19927 return __builtin_aarch64_sqrshlsi (__a, __b);
19928 }
19929
19930 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19931 vqrshld_s64 (int64_t __a, int64_t __b)
19932 {
19933 return __builtin_aarch64_sqrshldi (__a, __b);
19934 }
19935
19936 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
19937 vqrshlb_u8 (uint8_t __a, uint8_t __b)
19938 {
19939 return __builtin_aarch64_uqrshlqi_uus (__a, __b);
19940 }
19941
19942 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
19943 vqrshlh_u16 (uint16_t __a, uint16_t __b)
19944 {
19945 return __builtin_aarch64_uqrshlhi_uus (__a, __b);
19946 }
19947
19948 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19949 vqrshls_u32 (uint32_t __a, uint32_t __b)
19950 {
19951 return __builtin_aarch64_uqrshlsi_uus (__a, __b);
19952 }
19953
19954 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19955 vqrshld_u64 (uint64_t __a, uint64_t __b)
19956 {
19957 return __builtin_aarch64_uqrshldi_uus (__a, __b);
19958 }
19959
19960 /* vqrshrn */
19961
19962 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19963 vqrshrn_n_s16 (int16x8_t __a, const int __b)
19964 {
19965 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
19966 }
19967
19968 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19969 vqrshrn_n_s32 (int32x4_t __a, const int __b)
19970 {
19971 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
19972 }
19973
19974 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19975 vqrshrn_n_s64 (int64x2_t __a, const int __b)
19976 {
19977 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
19978 }
19979
19980 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19981 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
19982 {
19983 return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
19984 }
19985
19986 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19987 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
19988 {
19989 return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
19990 }
19991
19992 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19993 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
19994 {
19995 return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
19996 }
19997
19998 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
19999 vqrshrnh_n_s16 (int16_t __a, const int __b)
20000 {
20001 return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
20002 }
20003
20004 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20005 vqrshrns_n_s32 (int32_t __a, const int __b)
20006 {
20007 return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
20008 }
20009
20010 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20011 vqrshrnd_n_s64 (int64_t __a, const int __b)
20012 {
20013 return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
20014 }
20015
20016 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20017 vqrshrnh_n_u16 (uint16_t __a, const int __b)
20018 {
20019 return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
20020 }
20021
20022 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20023 vqrshrns_n_u32 (uint32_t __a, const int __b)
20024 {
20025 return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
20026 }
20027
20028 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20029 vqrshrnd_n_u64 (uint64_t __a, const int __b)
20030 {
20031 return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
20032 }
20033
20034 /* vqrshrun */
20035
20036 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20037 vqrshrun_n_s16 (int16x8_t __a, const int __b)
20038 {
20039 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
20040 }
20041
20042 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20043 vqrshrun_n_s32 (int32x4_t __a, const int __b)
20044 {
20045 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
20046 }
20047
20048 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20049 vqrshrun_n_s64 (int64x2_t __a, const int __b)
20050 {
20051 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
20052 }
20053
20054 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20055 vqrshrunh_n_s16 (int16_t __a, const int __b)
20056 {
20057 return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
20058 }
20059
20060 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20061 vqrshruns_n_s32 (int32_t __a, const int __b)
20062 {
20063 return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
20064 }
20065
20066 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20067 vqrshrund_n_s64 (int64_t __a, const int __b)
20068 {
20069 return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
20070 }
20071
20072 /* vqshl */
20073
20074 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20075 vqshl_s8 (int8x8_t __a, int8x8_t __b)
20076 {
20077 return __builtin_aarch64_sqshlv8qi (__a, __b);
20078 }
20079
20080 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20081 vqshl_s16 (int16x4_t __a, int16x4_t __b)
20082 {
20083 return __builtin_aarch64_sqshlv4hi (__a, __b);
20084 }
20085
20086 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20087 vqshl_s32 (int32x2_t __a, int32x2_t __b)
20088 {
20089 return __builtin_aarch64_sqshlv2si (__a, __b);
20090 }
20091
20092 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20093 vqshl_s64 (int64x1_t __a, int64x1_t __b)
20094 {
20095 return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
20096 }
20097
20098 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20099 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
20100 {
20101 return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
20102 }
20103
20104 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20105 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
20106 {
20107 return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
20108 }
20109
20110 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20111 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
20112 {
20113 return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
20114 }
20115
20116 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20117 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
20118 {
20119 return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
20120 }
20121
20122 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20123 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
20124 {
20125 return __builtin_aarch64_sqshlv16qi (__a, __b);
20126 }
20127
20128 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20129 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
20130 {
20131 return __builtin_aarch64_sqshlv8hi (__a, __b);
20132 }
20133
20134 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20135 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
20136 {
20137 return __builtin_aarch64_sqshlv4si (__a, __b);
20138 }
20139
20140 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20141 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
20142 {
20143 return __builtin_aarch64_sqshlv2di (__a, __b);
20144 }
20145
20146 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20147 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
20148 {
20149 return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
20150 }
20151
20152 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20153 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
20154 {
20155 return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
20156 }
20157
20158 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20159 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
20160 {
20161 return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
20162 }
20163
20164 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20165 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
20166 {
20167 return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
20168 }
20169
20170 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20171 vqshlb_s8 (int8_t __a, int8_t __b)
20172 {
20173 return __builtin_aarch64_sqshlqi (__a, __b);
20174 }
20175
20176 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20177 vqshlh_s16 (int16_t __a, int16_t __b)
20178 {
20179 return __builtin_aarch64_sqshlhi (__a, __b);
20180 }
20181
20182 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20183 vqshls_s32 (int32_t __a, int32_t __b)
20184 {
20185 return __builtin_aarch64_sqshlsi (__a, __b);
20186 }
20187
20188 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20189 vqshld_s64 (int64_t __a, int64_t __b)
20190 {
20191 return __builtin_aarch64_sqshldi (__a, __b);
20192 }
20193
20194 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20195 vqshlb_u8 (uint8_t __a, uint8_t __b)
20196 {
20197 return __builtin_aarch64_uqshlqi_uus (__a, __b);
20198 }
20199
20200 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20201 vqshlh_u16 (uint16_t __a, uint16_t __b)
20202 {
20203 return __builtin_aarch64_uqshlhi_uus (__a, __b);
20204 }
20205
20206 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20207 vqshls_u32 (uint32_t __a, uint32_t __b)
20208 {
20209 return __builtin_aarch64_uqshlsi_uus (__a, __b);
20210 }
20211
20212 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20213 vqshld_u64 (uint64_t __a, uint64_t __b)
20214 {
20215 return __builtin_aarch64_uqshldi_uus (__a, __b);
20216 }
20217
20218 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20219 vqshl_n_s8 (int8x8_t __a, const int __b)
20220 {
20221 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
20222 }
20223
20224 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20225 vqshl_n_s16 (int16x4_t __a, const int __b)
20226 {
20227 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
20228 }
20229
20230 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20231 vqshl_n_s32 (int32x2_t __a, const int __b)
20232 {
20233 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
20234 }
20235
20236 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20237 vqshl_n_s64 (int64x1_t __a, const int __b)
20238 {
20239 return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
20240 }
20241
20242 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20243 vqshl_n_u8 (uint8x8_t __a, const int __b)
20244 {
20245 return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
20246 }
20247
20248 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20249 vqshl_n_u16 (uint16x4_t __a, const int __b)
20250 {
20251 return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
20252 }
20253
20254 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20255 vqshl_n_u32 (uint32x2_t __a, const int __b)
20256 {
20257 return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
20258 }
20259
20260 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20261 vqshl_n_u64 (uint64x1_t __a, const int __b)
20262 {
20263 return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
20264 }
20265
20266 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20267 vqshlq_n_s8 (int8x16_t __a, const int __b)
20268 {
20269 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
20270 }
20271
20272 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20273 vqshlq_n_s16 (int16x8_t __a, const int __b)
20274 {
20275 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
20276 }
20277
20278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20279 vqshlq_n_s32 (int32x4_t __a, const int __b)
20280 {
20281 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
20282 }
20283
20284 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20285 vqshlq_n_s64 (int64x2_t __a, const int __b)
20286 {
20287 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
20288 }
20289
20290 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20291 vqshlq_n_u8 (uint8x16_t __a, const int __b)
20292 {
20293 return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
20294 }
20295
20296 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20297 vqshlq_n_u16 (uint16x8_t __a, const int __b)
20298 {
20299 return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
20300 }
20301
20302 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20303 vqshlq_n_u32 (uint32x4_t __a, const int __b)
20304 {
20305 return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
20306 }
20307
20308 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20309 vqshlq_n_u64 (uint64x2_t __a, const int __b)
20310 {
20311 return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
20312 }
20313
20314 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20315 vqshlb_n_s8 (int8_t __a, const int __b)
20316 {
20317 return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b);
20318 }
20319
20320 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20321 vqshlh_n_s16 (int16_t __a, const int __b)
20322 {
20323 return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b);
20324 }
20325
20326 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20327 vqshls_n_s32 (int32_t __a, const int __b)
20328 {
20329 return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b);
20330 }
20331
20332 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20333 vqshld_n_s64 (int64_t __a, const int __b)
20334 {
20335 return __builtin_aarch64_sqshl_ndi (__a, __b);
20336 }
20337
20338 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20339 vqshlb_n_u8 (uint8_t __a, const int __b)
20340 {
20341 return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
20342 }
20343
20344 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20345 vqshlh_n_u16 (uint16_t __a, const int __b)
20346 {
20347 return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
20348 }
20349
20350 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20351 vqshls_n_u32 (uint32_t __a, const int __b)
20352 {
20353 return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
20354 }
20355
20356 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20357 vqshld_n_u64 (uint64_t __a, const int __b)
20358 {
20359 return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
20360 }
20361
20362 /* vqshlu */
20363
20364 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20365 vqshlu_n_s8 (int8x8_t __a, const int __b)
20366 {
20367 return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
20368 }
20369
20370 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20371 vqshlu_n_s16 (int16x4_t __a, const int __b)
20372 {
20373 return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
20374 }
20375
20376 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20377 vqshlu_n_s32 (int32x2_t __a, const int __b)
20378 {
20379 return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
20380 }
20381
20382 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20383 vqshlu_n_s64 (int64x1_t __a, const int __b)
20384 {
20385 return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
20386 }
20387
20388 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20389 vqshluq_n_s8 (int8x16_t __a, const int __b)
20390 {
20391 return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
20392 }
20393
20394 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20395 vqshluq_n_s16 (int16x8_t __a, const int __b)
20396 {
20397 return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
20398 }
20399
20400 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20401 vqshluq_n_s32 (int32x4_t __a, const int __b)
20402 {
20403 return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
20404 }
20405
20406 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20407 vqshluq_n_s64 (int64x2_t __a, const int __b)
20408 {
20409 return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
20410 }
20411
20412 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20413 vqshlub_n_s8 (int8_t __a, const int __b)
20414 {
20415 return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
20416 }
20417
20418 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20419 vqshluh_n_s16 (int16_t __a, const int __b)
20420 {
20421 return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
20422 }
20423
20424 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20425 vqshlus_n_s32 (int32_t __a, const int __b)
20426 {
20427 return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
20428 }
20429
20430 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20431 vqshlud_n_s64 (int64_t __a, const int __b)
20432 {
20433 return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
20434 }
20435
20436 /* vqshrn */
20437
20438 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20439 vqshrn_n_s16 (int16x8_t __a, const int __b)
20440 {
20441 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
20442 }
20443
20444 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20445 vqshrn_n_s32 (int32x4_t __a, const int __b)
20446 {
20447 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
20448 }
20449
20450 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20451 vqshrn_n_s64 (int64x2_t __a, const int __b)
20452 {
20453 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
20454 }
20455
20456 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20457 vqshrn_n_u16 (uint16x8_t __a, const int __b)
20458 {
20459 return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
20460 }
20461
20462 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20463 vqshrn_n_u32 (uint32x4_t __a, const int __b)
20464 {
20465 return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
20466 }
20467
20468 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20469 vqshrn_n_u64 (uint64x2_t __a, const int __b)
20470 {
20471 return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
20472 }
20473
20474 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20475 vqshrnh_n_s16 (int16_t __a, const int __b)
20476 {
20477 return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
20478 }
20479
20480 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20481 vqshrns_n_s32 (int32_t __a, const int __b)
20482 {
20483 return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
20484 }
20485
20486 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20487 vqshrnd_n_s64 (int64_t __a, const int __b)
20488 {
20489 return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
20490 }
20491
20492 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20493 vqshrnh_n_u16 (uint16_t __a, const int __b)
20494 {
20495 return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
20496 }
20497
20498 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20499 vqshrns_n_u32 (uint32_t __a, const int __b)
20500 {
20501 return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
20502 }
20503
20504 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20505 vqshrnd_n_u64 (uint64_t __a, const int __b)
20506 {
20507 return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
20508 }
20509
20510 /* vqshrun */
20511
20512 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20513 vqshrun_n_s16 (int16x8_t __a, const int __b)
20514 {
20515 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
20516 }
20517
20518 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20519 vqshrun_n_s32 (int32x4_t __a, const int __b)
20520 {
20521 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
20522 }
20523
20524 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20525 vqshrun_n_s64 (int64x2_t __a, const int __b)
20526 {
20527 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
20528 }
20529
20530 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20531 vqshrunh_n_s16 (int16_t __a, const int __b)
20532 {
20533 return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
20534 }
20535
20536 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20537 vqshruns_n_s32 (int32_t __a, const int __b)
20538 {
20539 return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
20540 }
20541
20542 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20543 vqshrund_n_s64 (int64_t __a, const int __b)
20544 {
20545 return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
20546 }
20547
20548 /* vqsub */
20549
20550 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20551 vqsubb_s8 (int8_t __a, int8_t __b)
20552 {
20553 return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
20554 }
20555
20556 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20557 vqsubh_s16 (int16_t __a, int16_t __b)
20558 {
20559 return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
20560 }
20561
20562 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20563 vqsubs_s32 (int32_t __a, int32_t __b)
20564 {
20565 return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
20566 }
20567
20568 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20569 vqsubd_s64 (int64_t __a, int64_t __b)
20570 {
20571 return __builtin_aarch64_sqsubdi (__a, __b);
20572 }
20573
20574 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20575 vqsubb_u8 (uint8_t __a, uint8_t __b)
20576 {
20577 return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
20578 }
20579
20580 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
20581 vqsubh_u16 (uint16_t __a, uint16_t __b)
20582 {
20583 return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
20584 }
20585
20586 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
20587 vqsubs_u32 (uint32_t __a, uint32_t __b)
20588 {
20589 return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
20590 }
20591
20592 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20593 vqsubd_u64 (uint64_t __a, uint64_t __b)
20594 {
20595 return __builtin_aarch64_uqsubdi_uuu (__a, __b);
20596 }
20597
20598 /* vqtbl2 */
20599
20600 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20601 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
20602 {
20603 __builtin_aarch64_simd_oi __o;
20604 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
20605 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
20606 return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
20607 }
20608
20609 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20610 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
20611 {
20612 __builtin_aarch64_simd_oi __o;
20613 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20614 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20615 return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
20616 }
20617
20618 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20619 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
20620 {
20621 __builtin_aarch64_simd_oi __o;
20622 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20623 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20624 return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx);
20625 }
20626
20627 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20628 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
20629 {
20630 __builtin_aarch64_simd_oi __o;
20631 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20632 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20633 return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
20634 }
20635
20636 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20637 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
20638 {
20639 __builtin_aarch64_simd_oi __o;
20640 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20641 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20642 return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
20643 }
20644
20645 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20646 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
20647 {
20648 __builtin_aarch64_simd_oi __o;
20649 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20650 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20651 return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx);
20652 }
20653
20654 /* vqtbl3 */
20655
20656 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20657 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
20658 {
20659 __builtin_aarch64_simd_ci __o;
20660 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20661 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20662 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20663 return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
20664 }
20665
20666 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20667 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
20668 {
20669 __builtin_aarch64_simd_ci __o;
20670 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20671 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20672 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20673 return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
20674 }
20675
20676 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20677 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
20678 {
20679 __builtin_aarch64_simd_ci __o;
20680 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20681 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20682 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20683 return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx);
20684 }
20685
20686 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20687 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
20688 {
20689 __builtin_aarch64_simd_ci __o;
20690 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20691 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20692 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20693 return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
20694 }
20695
20696 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20697 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
20698 {
20699 __builtin_aarch64_simd_ci __o;
20700 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20701 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20702 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20703 return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
20704 }
20705
20706 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20707 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
20708 {
20709 __builtin_aarch64_simd_ci __o;
20710 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20711 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20712 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20713 return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx);
20714 }
20715
20716 /* vqtbl4 */
20717
20718 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20719 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
20720 {
20721 __builtin_aarch64_simd_xi __o;
20722 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20723 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20724 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20725 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20726 return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
20727 }
20728
20729 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20730 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
20731 {
20732 __builtin_aarch64_simd_xi __o;
20733 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20734 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20735 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20736 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20737 return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
20738 }
20739
20740 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20741 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
20742 {
20743 __builtin_aarch64_simd_xi __o;
20744 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20745 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20746 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20747 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20748 return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx);
20749 }
20750
20751 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20752 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
20753 {
20754 __builtin_aarch64_simd_xi __o;
20755 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20756 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20757 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20758 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20759 return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
20760 }
20761
20762 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20763 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
20764 {
20765 __builtin_aarch64_simd_xi __o;
20766 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20767 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20768 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20769 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20770 return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
20771 }
20772
20773 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20774 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
20775 {
20776 __builtin_aarch64_simd_xi __o;
20777 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20778 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20779 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20780 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20781 return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx);
20782 }
20783
20784
20785 /* vqtbx2 */
20786 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20787 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
20788 {
20789 __builtin_aarch64_simd_oi __o;
20790 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
20791 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
20792 return __builtin_aarch64_tbx4v8qi (r, __o, (int8x8_t)idx);
20793 }
20794
20795 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20796 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
20797 {
20798 __builtin_aarch64_simd_oi __o;
20799 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20800 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20801 return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o,
20802 (int8x8_t)idx);
20803 }
20804
20805 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20806 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
20807 {
20808 __builtin_aarch64_simd_oi __o;
20809 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20810 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20811 return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o,
20812 (int8x8_t)idx);
20813 }
20814
20815 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20816 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
20817 {
20818 __builtin_aarch64_simd_oi __o;
20819 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0);
20820 __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1);
20821 return __builtin_aarch64_tbx4v16qi (r, __o, (int8x16_t)idx);
20822 }
20823
20824 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20825 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
20826 {
20827 __builtin_aarch64_simd_oi __o;
20828 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20829 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20830 return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o,
20831 (int8x16_t)idx);
20832 }
20833
20834 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20835 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
20836 {
20837 __builtin_aarch64_simd_oi __o;
20838 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0);
20839 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1);
20840 return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o,
20841 (int8x16_t)idx);
20842 }
20843
20844 /* vqtbx3 */
20845 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20846 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
20847 {
20848 __builtin_aarch64_simd_ci __o;
20849 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0);
20850 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1);
20851 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2);
20852 return __builtin_aarch64_qtbx3v8qi (r, __o, (int8x8_t)idx);
20853 }
20854
20855 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20856 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
20857 {
20858 __builtin_aarch64_simd_ci __o;
20859 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20860 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20861 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20862 return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o,
20863 (int8x8_t)idx);
20864 }
20865
20866 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20867 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
20868 {
20869 __builtin_aarch64_simd_ci __o;
20870 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20871 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20872 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20873 return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o,
20874 (int8x8_t)idx);
20875 }
20876
20877 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20878 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
20879 {
20880 __builtin_aarch64_simd_ci __o;
20881 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0);
20882 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1);
20883 __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2);
20884 return __builtin_aarch64_qtbx3v16qi (r, __o, (int8x16_t)idx);
20885 }
20886
20887 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20888 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
20889 {
20890 __builtin_aarch64_simd_ci __o;
20891 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20892 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20893 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20894 return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o,
20895 (int8x16_t)idx);
20896 }
20897
20898 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20899 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
20900 {
20901 __builtin_aarch64_simd_ci __o;
20902 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0);
20903 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1);
20904 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2);
20905 return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o,
20906 (int8x16_t)idx);
20907 }
20908
20909 /* vqtbx4 */
20910
20911 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20912 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
20913 {
20914 __builtin_aarch64_simd_xi __o;
20915 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0);
20916 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1);
20917 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2);
20918 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3);
20919 return __builtin_aarch64_qtbx4v8qi (r, __o, (int8x8_t)idx);
20920 }
20921
20922 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20923 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
20924 {
20925 __builtin_aarch64_simd_xi __o;
20926 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20927 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20928 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20929 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20930 return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o,
20931 (int8x8_t)idx);
20932 }
20933
20934 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20935 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
20936 {
20937 __builtin_aarch64_simd_xi __o;
20938 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20939 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20940 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20941 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20942 return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o,
20943 (int8x8_t)idx);
20944 }
20945
20946 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20947 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
20948 {
20949 __builtin_aarch64_simd_xi __o;
20950 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0);
20951 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1);
20952 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2);
20953 __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3);
20954 return __builtin_aarch64_qtbx4v16qi (r, __o, (int8x16_t)idx);
20955 }
20956
20957 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20958 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
20959 {
20960 __builtin_aarch64_simd_xi __o;
20961 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20962 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20963 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20964 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20965 return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o,
20966 (int8x16_t)idx);
20967 }
20968
20969 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20970 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
20971 {
20972 __builtin_aarch64_simd_xi __o;
20973 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0);
20974 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1);
20975 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2);
20976 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3);
20977 return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o,
20978 (int8x16_t)idx);
20979 }
20980
20981 /* vrbit */
20982
20983 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20984 vrbit_p8 (poly8x8_t __a)
20985 {
20986 return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20987 }
20988
20989 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20990 vrbit_s8 (int8x8_t __a)
20991 {
20992 return __builtin_aarch64_rbitv8qi (__a);
20993 }
20994
20995 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20996 vrbit_u8 (uint8x8_t __a)
20997 {
20998 return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20999 }
21000
21001 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21002 vrbitq_p8 (poly8x16_t __a)
21003 {
21004 return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
21005 }
21006
21007 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21008 vrbitq_s8 (int8x16_t __a)
21009 {
21010 return __builtin_aarch64_rbitv16qi (__a);
21011 }
21012
21013 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21014 vrbitq_u8 (uint8x16_t __a)
21015 {
21016 return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
21017 }
21018
21019 /* vrecpe */
21020
21021 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21022 vrecpe_u32 (uint32x2_t __a)
21023 {
21024 return (uint32x2_t) __builtin_aarch64_urecpev2si ((int32x2_t) __a);
21025 }
21026
21027 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21028 vrecpeq_u32 (uint32x4_t __a)
21029 {
21030 return (uint32x4_t) __builtin_aarch64_urecpev4si ((int32x4_t) __a);
21031 }
21032
21033 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21034 vrecpes_f32 (float32_t __a)
21035 {
21036 return __builtin_aarch64_frecpesf (__a);
21037 }
21038
21039 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21040 vrecped_f64 (float64_t __a)
21041 {
21042 return __builtin_aarch64_frecpedf (__a);
21043 }
21044
21045 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21046 vrecpe_f32 (float32x2_t __a)
21047 {
21048 return __builtin_aarch64_frecpev2sf (__a);
21049 }
21050
21051 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21052 vrecpe_f64 (float64x1_t __a)
21053 {
21054 return (float64x1_t) { vrecped_f64 (vget_lane_f64 (__a, 0)) };
21055 }
21056
21057 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21058 vrecpeq_f32 (float32x4_t __a)
21059 {
21060 return __builtin_aarch64_frecpev4sf (__a);
21061 }
21062
21063 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21064 vrecpeq_f64 (float64x2_t __a)
21065 {
21066 return __builtin_aarch64_frecpev2df (__a);
21067 }
21068
21069 /* vrecps */
21070
21071 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21072 vrecpss_f32 (float32_t __a, float32_t __b)
21073 {
21074 return __builtin_aarch64_frecpssf (__a, __b);
21075 }
21076
21077 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21078 vrecpsd_f64 (float64_t __a, float64_t __b)
21079 {
21080 return __builtin_aarch64_frecpsdf (__a, __b);
21081 }
21082
21083 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21084 vrecps_f32 (float32x2_t __a, float32x2_t __b)
21085 {
21086 return __builtin_aarch64_frecpsv2sf (__a, __b);
21087 }
21088
21089 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21090 vrecps_f64 (float64x1_t __a, float64x1_t __b)
21091 {
21092 return (float64x1_t) { vrecpsd_f64 (vget_lane_f64 (__a, 0),
21093 vget_lane_f64 (__b, 0)) };
21094 }
21095
21096 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21097 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
21098 {
21099 return __builtin_aarch64_frecpsv4sf (__a, __b);
21100 }
21101
21102 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21103 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
21104 {
21105 return __builtin_aarch64_frecpsv2df (__a, __b);
21106 }
21107
21108 /* vrecpx */
21109
21110 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21111 vrecpxs_f32 (float32_t __a)
21112 {
21113 return __builtin_aarch64_frecpxsf (__a);
21114 }
21115
21116 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21117 vrecpxd_f64 (float64_t __a)
21118 {
21119 return __builtin_aarch64_frecpxdf (__a);
21120 }
21121
21122
21123 /* vrev */
21124
21125 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
21126 vrev16_p8 (poly8x8_t a)
21127 {
21128 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21129 }
21130
21131 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21132 vrev16_s8 (int8x8_t a)
21133 {
21134 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21135 }
21136
21137 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21138 vrev16_u8 (uint8x8_t a)
21139 {
21140 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21141 }
21142
21143 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21144 vrev16q_p8 (poly8x16_t a)
21145 {
21146 return __builtin_shuffle (a,
21147 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21148 }
21149
21150 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21151 vrev16q_s8 (int8x16_t a)
21152 {
21153 return __builtin_shuffle (a,
21154 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21155 }
21156
21157 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21158 vrev16q_u8 (uint8x16_t a)
21159 {
21160 return __builtin_shuffle (a,
21161 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
21162 }
21163
21164 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
21165 vrev32_p8 (poly8x8_t a)
21166 {
21167 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21168 }
21169
21170 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
21171 vrev32_p16 (poly16x4_t a)
21172 {
21173 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21174 }
21175
21176 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21177 vrev32_s8 (int8x8_t a)
21178 {
21179 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21180 }
21181
21182 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21183 vrev32_s16 (int16x4_t a)
21184 {
21185 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21186 }
21187
21188 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21189 vrev32_u8 (uint8x8_t a)
21190 {
21191 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21192 }
21193
21194 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21195 vrev32_u16 (uint16x4_t a)
21196 {
21197 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
21198 }
21199
21200 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21201 vrev32q_p8 (poly8x16_t a)
21202 {
21203 return __builtin_shuffle (a,
21204 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21205 }
21206
21207 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
21208 vrev32q_p16 (poly16x8_t a)
21209 {
21210 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21211 }
21212
21213 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21214 vrev32q_s8 (int8x16_t a)
21215 {
21216 return __builtin_shuffle (a,
21217 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21218 }
21219
21220 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21221 vrev32q_s16 (int16x8_t a)
21222 {
21223 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21224 }
21225
21226 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21227 vrev32q_u8 (uint8x16_t a)
21228 {
21229 return __builtin_shuffle (a,
21230 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
21231 }
21232
21233 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21234 vrev32q_u16 (uint16x8_t a)
21235 {
21236 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
21237 }
21238
21239 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
21240 vrev64_f16 (float16x4_t __a)
21241 {
21242 return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 });
21243 }
21244
21245 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21246 vrev64_f32 (float32x2_t a)
21247 {
21248 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21249 }
21250
21251 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
21252 vrev64_p8 (poly8x8_t a)
21253 {
21254 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21255 }
21256
21257 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
21258 vrev64_p16 (poly16x4_t a)
21259 {
21260 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21261 }
21262
21263 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21264 vrev64_s8 (int8x8_t a)
21265 {
21266 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21267 }
21268
21269 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21270 vrev64_s16 (int16x4_t a)
21271 {
21272 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21273 }
21274
21275 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21276 vrev64_s32 (int32x2_t a)
21277 {
21278 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21279 }
21280
21281 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21282 vrev64_u8 (uint8x8_t a)
21283 {
21284 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21285 }
21286
21287 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21288 vrev64_u16 (uint16x4_t a)
21289 {
21290 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21291 }
21292
21293 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21294 vrev64_u32 (uint32x2_t a)
21295 {
21296 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21297 }
21298
21299 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
21300 vrev64q_f16 (float16x8_t __a)
21301 {
21302 return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21303 }
21304
21305 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21306 vrev64q_f32 (float32x4_t a)
21307 {
21308 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21309 }
21310
21311 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21312 vrev64q_p8 (poly8x16_t a)
21313 {
21314 return __builtin_shuffle (a,
21315 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21316 }
21317
21318 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
21319 vrev64q_p16 (poly16x8_t a)
21320 {
21321 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21322 }
21323
21324 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21325 vrev64q_s8 (int8x16_t a)
21326 {
21327 return __builtin_shuffle (a,
21328 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21329 }
21330
21331 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21332 vrev64q_s16 (int16x8_t a)
21333 {
21334 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21335 }
21336
21337 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21338 vrev64q_s32 (int32x4_t a)
21339 {
21340 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21341 }
21342
21343 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21344 vrev64q_u8 (uint8x16_t a)
21345 {
21346 return __builtin_shuffle (a,
21347 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21348 }
21349
21350 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21351 vrev64q_u16 (uint16x8_t a)
21352 {
21353 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21354 }
21355
21356 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21357 vrev64q_u32 (uint32x4_t a)
21358 {
21359 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21360 }
21361
21362 /* vrnd */
21363
21364 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21365 vrnd_f32 (float32x2_t __a)
21366 {
21367 return __builtin_aarch64_btruncv2sf (__a);
21368 }
21369
21370 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21371 vrnd_f64 (float64x1_t __a)
21372 {
21373 return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
21374 }
21375
21376 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21377 vrndq_f32 (float32x4_t __a)
21378 {
21379 return __builtin_aarch64_btruncv4sf (__a);
21380 }
21381
21382 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21383 vrndq_f64 (float64x2_t __a)
21384 {
21385 return __builtin_aarch64_btruncv2df (__a);
21386 }
21387
21388 /* vrnda */
21389
21390 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21391 vrnda_f32 (float32x2_t __a)
21392 {
21393 return __builtin_aarch64_roundv2sf (__a);
21394 }
21395
21396 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21397 vrnda_f64 (float64x1_t __a)
21398 {
21399 return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
21400 }
21401
21402 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21403 vrndaq_f32 (float32x4_t __a)
21404 {
21405 return __builtin_aarch64_roundv4sf (__a);
21406 }
21407
21408 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21409 vrndaq_f64 (float64x2_t __a)
21410 {
21411 return __builtin_aarch64_roundv2df (__a);
21412 }
21413
21414 /* vrndi */
21415
21416 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21417 vrndi_f32 (float32x2_t __a)
21418 {
21419 return __builtin_aarch64_nearbyintv2sf (__a);
21420 }
21421
21422 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21423 vrndi_f64 (float64x1_t __a)
21424 {
21425 return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
21426 }
21427
21428 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21429 vrndiq_f32 (float32x4_t __a)
21430 {
21431 return __builtin_aarch64_nearbyintv4sf (__a);
21432 }
21433
21434 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21435 vrndiq_f64 (float64x2_t __a)
21436 {
21437 return __builtin_aarch64_nearbyintv2df (__a);
21438 }
21439
21440 /* vrndm */
21441
21442 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21443 vrndm_f32 (float32x2_t __a)
21444 {
21445 return __builtin_aarch64_floorv2sf (__a);
21446 }
21447
21448 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21449 vrndm_f64 (float64x1_t __a)
21450 {
21451 return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
21452 }
21453
21454 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21455 vrndmq_f32 (float32x4_t __a)
21456 {
21457 return __builtin_aarch64_floorv4sf (__a);
21458 }
21459
21460 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21461 vrndmq_f64 (float64x2_t __a)
21462 {
21463 return __builtin_aarch64_floorv2df (__a);
21464 }
21465
21466 /* vrndn */
21467
21468 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21469 vrndn_f32 (float32x2_t __a)
21470 {
21471 return __builtin_aarch64_frintnv2sf (__a);
21472 }
21473
21474 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21475 vrndn_f64 (float64x1_t __a)
21476 {
21477 return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
21478 }
21479
21480 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21481 vrndnq_f32 (float32x4_t __a)
21482 {
21483 return __builtin_aarch64_frintnv4sf (__a);
21484 }
21485
21486 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21487 vrndnq_f64 (float64x2_t __a)
21488 {
21489 return __builtin_aarch64_frintnv2df (__a);
21490 }
21491
21492 /* vrndp */
21493
21494 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21495 vrndp_f32 (float32x2_t __a)
21496 {
21497 return __builtin_aarch64_ceilv2sf (__a);
21498 }
21499
21500 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21501 vrndp_f64 (float64x1_t __a)
21502 {
21503 return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
21504 }
21505
21506 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21507 vrndpq_f32 (float32x4_t __a)
21508 {
21509 return __builtin_aarch64_ceilv4sf (__a);
21510 }
21511
21512 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21513 vrndpq_f64 (float64x2_t __a)
21514 {
21515 return __builtin_aarch64_ceilv2df (__a);
21516 }
21517
21518 /* vrndx */
21519
21520 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21521 vrndx_f32 (float32x2_t __a)
21522 {
21523 return __builtin_aarch64_rintv2sf (__a);
21524 }
21525
21526 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21527 vrndx_f64 (float64x1_t __a)
21528 {
21529 return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
21530 }
21531
21532 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21533 vrndxq_f32 (float32x4_t __a)
21534 {
21535 return __builtin_aarch64_rintv4sf (__a);
21536 }
21537
21538 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21539 vrndxq_f64 (float64x2_t __a)
21540 {
21541 return __builtin_aarch64_rintv2df (__a);
21542 }
21543
21544 /* vrshl */
21545
21546 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21547 vrshl_s8 (int8x8_t __a, int8x8_t __b)
21548 {
21549 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
21550 }
21551
21552 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21553 vrshl_s16 (int16x4_t __a, int16x4_t __b)
21554 {
21555 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
21556 }
21557
21558 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21559 vrshl_s32 (int32x2_t __a, int32x2_t __b)
21560 {
21561 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
21562 }
21563
21564 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21565 vrshl_s64 (int64x1_t __a, int64x1_t __b)
21566 {
21567 return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
21568 }
21569
21570 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21571 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
21572 {
21573 return __builtin_aarch64_urshlv8qi_uus (__a, __b);
21574 }
21575
21576 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21577 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
21578 {
21579 return __builtin_aarch64_urshlv4hi_uus (__a, __b);
21580 }
21581
21582 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21583 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
21584 {
21585 return __builtin_aarch64_urshlv2si_uus (__a, __b);
21586 }
21587
21588 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21589 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
21590 {
21591 return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
21592 }
21593
21594 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21595 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
21596 {
21597 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
21598 }
21599
21600 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21601 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
21602 {
21603 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
21604 }
21605
21606 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21607 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
21608 {
21609 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
21610 }
21611
21612 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21613 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
21614 {
21615 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
21616 }
21617
21618 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21619 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21620 {
21621 return __builtin_aarch64_urshlv16qi_uus (__a, __b);
21622 }
21623
21624 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21625 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21626 {
21627 return __builtin_aarch64_urshlv8hi_uus (__a, __b);
21628 }
21629
21630 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21631 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21632 {
21633 return __builtin_aarch64_urshlv4si_uus (__a, __b);
21634 }
21635
21636 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21637 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21638 {
21639 return __builtin_aarch64_urshlv2di_uus (__a, __b);
21640 }
21641
21642 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21643 vrshld_s64 (int64_t __a, int64_t __b)
21644 {
21645 return __builtin_aarch64_srshldi (__a, __b);
21646 }
21647
21648 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21649 vrshld_u64 (uint64_t __a, int64_t __b)
21650 {
21651 return __builtin_aarch64_urshldi_uus (__a, __b);
21652 }
21653
21654 /* vrshr */
21655
21656 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21657 vrshr_n_s8 (int8x8_t __a, const int __b)
21658 {
21659 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
21660 }
21661
21662 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21663 vrshr_n_s16 (int16x4_t __a, const int __b)
21664 {
21665 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
21666 }
21667
21668 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21669 vrshr_n_s32 (int32x2_t __a, const int __b)
21670 {
21671 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
21672 }
21673
21674 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21675 vrshr_n_s64 (int64x1_t __a, const int __b)
21676 {
21677 return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
21678 }
21679
21680 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21681 vrshr_n_u8 (uint8x8_t __a, const int __b)
21682 {
21683 return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
21684 }
21685
21686 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21687 vrshr_n_u16 (uint16x4_t __a, const int __b)
21688 {
21689 return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
21690 }
21691
21692 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21693 vrshr_n_u32 (uint32x2_t __a, const int __b)
21694 {
21695 return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
21696 }
21697
21698 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21699 vrshr_n_u64 (uint64x1_t __a, const int __b)
21700 {
21701 return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
21702 }
21703
21704 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21705 vrshrq_n_s8 (int8x16_t __a, const int __b)
21706 {
21707 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
21708 }
21709
21710 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21711 vrshrq_n_s16 (int16x8_t __a, const int __b)
21712 {
21713 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
21714 }
21715
21716 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21717 vrshrq_n_s32 (int32x4_t __a, const int __b)
21718 {
21719 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
21720 }
21721
21722 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21723 vrshrq_n_s64 (int64x2_t __a, const int __b)
21724 {
21725 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
21726 }
21727
21728 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21729 vrshrq_n_u8 (uint8x16_t __a, const int __b)
21730 {
21731 return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
21732 }
21733
21734 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21735 vrshrq_n_u16 (uint16x8_t __a, const int __b)
21736 {
21737 return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
21738 }
21739
21740 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21741 vrshrq_n_u32 (uint32x4_t __a, const int __b)
21742 {
21743 return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
21744 }
21745
21746 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21747 vrshrq_n_u64 (uint64x2_t __a, const int __b)
21748 {
21749 return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
21750 }
21751
21752 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21753 vrshrd_n_s64 (int64_t __a, const int __b)
21754 {
21755 return __builtin_aarch64_srshr_ndi (__a, __b);
21756 }
21757
21758 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21759 vrshrd_n_u64 (uint64_t __a, const int __b)
21760 {
21761 return __builtin_aarch64_urshr_ndi_uus (__a, __b);
21762 }
21763
21764 /* vrsqrte. */
21765
21766 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21767 vrsqrtes_f32 (float32_t __a)
21768 {
21769 return __builtin_aarch64_rsqrtesf (__a);
21770 }
21771
21772 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21773 vrsqrted_f64 (float64_t __a)
21774 {
21775 return __builtin_aarch64_rsqrtedf (__a);
21776 }
21777
21778 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21779 vrsqrte_f32 (float32x2_t __a)
21780 {
21781 return __builtin_aarch64_rsqrtev2sf (__a);
21782 }
21783
21784 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21785 vrsqrte_f64 (float64x1_t __a)
21786 {
21787 return (float64x1_t) {vrsqrted_f64 (vget_lane_f64 (__a, 0))};
21788 }
21789
21790 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21791 vrsqrteq_f32 (float32x4_t __a)
21792 {
21793 return __builtin_aarch64_rsqrtev4sf (__a);
21794 }
21795
21796 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21797 vrsqrteq_f64 (float64x2_t __a)
21798 {
21799 return __builtin_aarch64_rsqrtev2df (__a);
21800 }
21801
21802 /* vrsqrts. */
21803
21804 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21805 vrsqrtss_f32 (float32_t __a, float32_t __b)
21806 {
21807 return __builtin_aarch64_rsqrtssf (__a, __b);
21808 }
21809
21810 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21811 vrsqrtsd_f64 (float64_t __a, float64_t __b)
21812 {
21813 return __builtin_aarch64_rsqrtsdf (__a, __b);
21814 }
21815
21816 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21817 vrsqrts_f32 (float32x2_t __a, float32x2_t __b)
21818 {
21819 return __builtin_aarch64_rsqrtsv2sf (__a, __b);
21820 }
21821
21822 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21823 vrsqrts_f64 (float64x1_t __a, float64x1_t __b)
21824 {
21825 return (float64x1_t) {vrsqrtsd_f64 (vget_lane_f64 (__a, 0),
21826 vget_lane_f64 (__b, 0))};
21827 }
21828
21829 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21830 vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b)
21831 {
21832 return __builtin_aarch64_rsqrtsv4sf (__a, __b);
21833 }
21834
21835 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21836 vrsqrtsq_f64 (float64x2_t __a, float64x2_t __b)
21837 {
21838 return __builtin_aarch64_rsqrtsv2df (__a, __b);
21839 }
21840
21841 /* vrsra */
21842
21843 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21844 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
21845 {
21846 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
21847 }
21848
21849 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21850 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21851 {
21852 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
21853 }
21854
21855 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21856 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21857 {
21858 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
21859 }
21860
21861 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21862 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
21863 {
21864 return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
21865 }
21866
21867 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21868 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
21869 {
21870 return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
21871 }
21872
21873 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21874 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
21875 {
21876 return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
21877 }
21878
21879 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21880 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
21881 {
21882 return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
21883 }
21884
21885 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21886 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
21887 {
21888 return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
21889 }
21890
21891 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21892 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
21893 {
21894 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
21895 }
21896
21897 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21898 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
21899 {
21900 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
21901 }
21902
21903 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21904 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
21905 {
21906 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
21907 }
21908
21909 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21910 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
21911 {
21912 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
21913 }
21914
21915 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21916 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
21917 {
21918 return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
21919 }
21920
21921 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21922 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
21923 {
21924 return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
21925 }
21926
21927 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21928 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
21929 {
21930 return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
21931 }
21932
21933 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21934 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
21935 {
21936 return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
21937 }
21938
21939 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21940 vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
21941 {
21942 return __builtin_aarch64_srsra_ndi (__a, __b, __c);
21943 }
21944
21945 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21946 vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
21947 {
21948 return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
21949 }
21950
21951 #pragma GCC push_options
21952 #pragma GCC target ("+nothing+crypto")
21953
21954 /* vsha1 */
21955
21956 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21957 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21958 {
21959 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
21960 }
21961
21962 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21963 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21964 {
21965 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
21966 }
21967
21968 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21969 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21970 {
21971 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
21972 }
21973
21974 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21975 vsha1h_u32 (uint32_t hash_e)
21976 {
21977 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
21978 }
21979
21980 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21981 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
21982 {
21983 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
21984 }
21985
21986 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21987 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
21988 {
21989 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
21990 }
21991
21992 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21993 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
21994 {
21995 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
21996 }
21997
21998 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21999 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
22000 {
22001 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
22002 }
22003
22004 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22005 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
22006 {
22007 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
22008 }
22009
22010 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22011 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
22012 {
22013 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
22014 }
22015
22016 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
22017 vmull_p64 (poly64_t a, poly64_t b)
22018 {
22019 return
22020 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
22021 }
22022
22023 __extension__ static __inline poly128_t __attribute__ ((__always_inline__))
22024 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
22025 {
22026 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
22027 }
22028
22029 #pragma GCC pop_options
22030
22031 /* vshl */
22032
22033 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22034 vshl_n_s8 (int8x8_t __a, const int __b)
22035 {
22036 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
22037 }
22038
22039 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22040 vshl_n_s16 (int16x4_t __a, const int __b)
22041 {
22042 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
22043 }
22044
22045 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22046 vshl_n_s32 (int32x2_t __a, const int __b)
22047 {
22048 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
22049 }
22050
22051 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22052 vshl_n_s64 (int64x1_t __a, const int __b)
22053 {
22054 return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
22055 }
22056
22057 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22058 vshl_n_u8 (uint8x8_t __a, const int __b)
22059 {
22060 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
22061 }
22062
22063 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22064 vshl_n_u16 (uint16x4_t __a, const int __b)
22065 {
22066 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
22067 }
22068
22069 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22070 vshl_n_u32 (uint32x2_t __a, const int __b)
22071 {
22072 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
22073 }
22074
22075 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22076 vshl_n_u64 (uint64x1_t __a, const int __b)
22077 {
22078 return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
22079 }
22080
22081 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22082 vshlq_n_s8 (int8x16_t __a, const int __b)
22083 {
22084 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
22085 }
22086
22087 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22088 vshlq_n_s16 (int16x8_t __a, const int __b)
22089 {
22090 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
22091 }
22092
22093 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22094 vshlq_n_s32 (int32x4_t __a, const int __b)
22095 {
22096 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
22097 }
22098
22099 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22100 vshlq_n_s64 (int64x2_t __a, const int __b)
22101 {
22102 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
22103 }
22104
22105 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22106 vshlq_n_u8 (uint8x16_t __a, const int __b)
22107 {
22108 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
22109 }
22110
22111 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22112 vshlq_n_u16 (uint16x8_t __a, const int __b)
22113 {
22114 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
22115 }
22116
22117 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22118 vshlq_n_u32 (uint32x4_t __a, const int __b)
22119 {
22120 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
22121 }
22122
22123 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22124 vshlq_n_u64 (uint64x2_t __a, const int __b)
22125 {
22126 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
22127 }
22128
22129 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22130 vshld_n_s64 (int64_t __a, const int __b)
22131 {
22132 return __builtin_aarch64_ashldi (__a, __b);
22133 }
22134
22135 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22136 vshld_n_u64 (uint64_t __a, const int __b)
22137 {
22138 return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
22139 }
22140
22141 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22142 vshl_s8 (int8x8_t __a, int8x8_t __b)
22143 {
22144 return __builtin_aarch64_sshlv8qi (__a, __b);
22145 }
22146
22147 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22148 vshl_s16 (int16x4_t __a, int16x4_t __b)
22149 {
22150 return __builtin_aarch64_sshlv4hi (__a, __b);
22151 }
22152
22153 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22154 vshl_s32 (int32x2_t __a, int32x2_t __b)
22155 {
22156 return __builtin_aarch64_sshlv2si (__a, __b);
22157 }
22158
22159 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22160 vshl_s64 (int64x1_t __a, int64x1_t __b)
22161 {
22162 return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
22163 }
22164
22165 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22166 vshl_u8 (uint8x8_t __a, int8x8_t __b)
22167 {
22168 return __builtin_aarch64_ushlv8qi_uus (__a, __b);
22169 }
22170
22171 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22172 vshl_u16 (uint16x4_t __a, int16x4_t __b)
22173 {
22174 return __builtin_aarch64_ushlv4hi_uus (__a, __b);
22175 }
22176
22177 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22178 vshl_u32 (uint32x2_t __a, int32x2_t __b)
22179 {
22180 return __builtin_aarch64_ushlv2si_uus (__a, __b);
22181 }
22182
22183 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22184 vshl_u64 (uint64x1_t __a, int64x1_t __b)
22185 {
22186 return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
22187 }
22188
22189 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22190 vshlq_s8 (int8x16_t __a, int8x16_t __b)
22191 {
22192 return __builtin_aarch64_sshlv16qi (__a, __b);
22193 }
22194
22195 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22196 vshlq_s16 (int16x8_t __a, int16x8_t __b)
22197 {
22198 return __builtin_aarch64_sshlv8hi (__a, __b);
22199 }
22200
22201 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22202 vshlq_s32 (int32x4_t __a, int32x4_t __b)
22203 {
22204 return __builtin_aarch64_sshlv4si (__a, __b);
22205 }
22206
22207 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22208 vshlq_s64 (int64x2_t __a, int64x2_t __b)
22209 {
22210 return __builtin_aarch64_sshlv2di (__a, __b);
22211 }
22212
22213 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22214 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
22215 {
22216 return __builtin_aarch64_ushlv16qi_uus (__a, __b);
22217 }
22218
22219 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22220 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
22221 {
22222 return __builtin_aarch64_ushlv8hi_uus (__a, __b);
22223 }
22224
22225 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22226 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
22227 {
22228 return __builtin_aarch64_ushlv4si_uus (__a, __b);
22229 }
22230
22231 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22232 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
22233 {
22234 return __builtin_aarch64_ushlv2di_uus (__a, __b);
22235 }
22236
22237 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22238 vshld_s64 (int64_t __a, int64_t __b)
22239 {
22240 return __builtin_aarch64_sshldi (__a, __b);
22241 }
22242
22243 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22244 vshld_u64 (uint64_t __a, uint64_t __b)
22245 {
22246 return __builtin_aarch64_ushldi_uus (__a, __b);
22247 }
22248
22249 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22250 vshll_high_n_s8 (int8x16_t __a, const int __b)
22251 {
22252 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
22253 }
22254
22255 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22256 vshll_high_n_s16 (int16x8_t __a, const int __b)
22257 {
22258 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
22259 }
22260
22261 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22262 vshll_high_n_s32 (int32x4_t __a, const int __b)
22263 {
22264 return __builtin_aarch64_sshll2_nv4si (__a, __b);
22265 }
22266
22267 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22268 vshll_high_n_u8 (uint8x16_t __a, const int __b)
22269 {
22270 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
22271 }
22272
22273 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22274 vshll_high_n_u16 (uint16x8_t __a, const int __b)
22275 {
22276 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
22277 }
22278
22279 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22280 vshll_high_n_u32 (uint32x4_t __a, const int __b)
22281 {
22282 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
22283 }
22284
22285 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22286 vshll_n_s8 (int8x8_t __a, const int __b)
22287 {
22288 return __builtin_aarch64_sshll_nv8qi (__a, __b);
22289 }
22290
22291 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22292 vshll_n_s16 (int16x4_t __a, const int __b)
22293 {
22294 return __builtin_aarch64_sshll_nv4hi (__a, __b);
22295 }
22296
22297 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22298 vshll_n_s32 (int32x2_t __a, const int __b)
22299 {
22300 return __builtin_aarch64_sshll_nv2si (__a, __b);
22301 }
22302
22303 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22304 vshll_n_u8 (uint8x8_t __a, const int __b)
22305 {
22306 return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
22307 }
22308
22309 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22310 vshll_n_u16 (uint16x4_t __a, const int __b)
22311 {
22312 return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
22313 }
22314
22315 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22316 vshll_n_u32 (uint32x2_t __a, const int __b)
22317 {
22318 return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
22319 }
22320
22321 /* vshr */
22322
22323 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22324 vshr_n_s8 (int8x8_t __a, const int __b)
22325 {
22326 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
22327 }
22328
22329 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22330 vshr_n_s16 (int16x4_t __a, const int __b)
22331 {
22332 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
22333 }
22334
22335 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22336 vshr_n_s32 (int32x2_t __a, const int __b)
22337 {
22338 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
22339 }
22340
22341 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22342 vshr_n_s64 (int64x1_t __a, const int __b)
22343 {
22344 return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
22345 }
22346
22347 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22348 vshr_n_u8 (uint8x8_t __a, const int __b)
22349 {
22350 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
22351 }
22352
22353 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22354 vshr_n_u16 (uint16x4_t __a, const int __b)
22355 {
22356 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
22357 }
22358
22359 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22360 vshr_n_u32 (uint32x2_t __a, const int __b)
22361 {
22362 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
22363 }
22364
22365 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22366 vshr_n_u64 (uint64x1_t __a, const int __b)
22367 {
22368 return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
22369 }
22370
22371 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22372 vshrq_n_s8 (int8x16_t __a, const int __b)
22373 {
22374 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
22375 }
22376
22377 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22378 vshrq_n_s16 (int16x8_t __a, const int __b)
22379 {
22380 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
22381 }
22382
22383 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22384 vshrq_n_s32 (int32x4_t __a, const int __b)
22385 {
22386 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
22387 }
22388
22389 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22390 vshrq_n_s64 (int64x2_t __a, const int __b)
22391 {
22392 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
22393 }
22394
22395 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22396 vshrq_n_u8 (uint8x16_t __a, const int __b)
22397 {
22398 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
22399 }
22400
22401 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22402 vshrq_n_u16 (uint16x8_t __a, const int __b)
22403 {
22404 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
22405 }
22406
22407 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22408 vshrq_n_u32 (uint32x4_t __a, const int __b)
22409 {
22410 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
22411 }
22412
22413 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22414 vshrq_n_u64 (uint64x2_t __a, const int __b)
22415 {
22416 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
22417 }
22418
22419 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22420 vshrd_n_s64 (int64_t __a, const int __b)
22421 {
22422 return __builtin_aarch64_ashr_simddi (__a, __b);
22423 }
22424
22425 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22426 vshrd_n_u64 (uint64_t __a, const int __b)
22427 {
22428 return __builtin_aarch64_lshr_simddi_uus (__a, __b);
22429 }
22430
22431 /* vsli */
22432
22433 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22434 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22435 {
22436 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
22437 }
22438
22439 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22440 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22441 {
22442 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
22443 }
22444
22445 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22446 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22447 {
22448 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
22449 }
22450
22451 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22452 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22453 {
22454 return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
22455 }
22456
22457 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22458 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22459 {
22460 return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
22461 }
22462
22463 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22464 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22465 {
22466 return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
22467 }
22468
22469 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22470 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22471 {
22472 return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
22473 }
22474
22475 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22476 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22477 {
22478 return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
22479 }
22480
22481 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22482 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22483 {
22484 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
22485 }
22486
22487 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22488 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22489 {
22490 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
22491 }
22492
22493 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22494 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22495 {
22496 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
22497 }
22498
22499 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22500 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22501 {
22502 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
22503 }
22504
22505 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22506 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22507 {
22508 return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
22509 }
22510
22511 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22512 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22513 {
22514 return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
22515 }
22516
22517 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22518 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22519 {
22520 return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
22521 }
22522
22523 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22524 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22525 {
22526 return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
22527 }
22528
22529 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22530 vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
22531 {
22532 return __builtin_aarch64_ssli_ndi (__a, __b, __c);
22533 }
22534
22535 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22536 vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22537 {
22538 return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
22539 }
22540
22541 /* vsqadd */
22542
22543 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22544 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
22545 {
22546 return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
22547 }
22548
22549 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22550 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
22551 {
22552 return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
22553 }
22554
22555 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22556 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
22557 {
22558 return __builtin_aarch64_usqaddv2si_uus (__a, __b);
22559 }
22560
22561 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22562 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
22563 {
22564 return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
22565 }
22566
22567 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22568 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
22569 {
22570 return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
22571 }
22572
22573 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22574 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
22575 {
22576 return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
22577 }
22578
22579 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22580 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
22581 {
22582 return __builtin_aarch64_usqaddv4si_uus (__a, __b);
22583 }
22584
22585 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22586 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
22587 {
22588 return __builtin_aarch64_usqaddv2di_uus (__a, __b);
22589 }
22590
22591 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
22592 vsqaddb_u8 (uint8_t __a, int8_t __b)
22593 {
22594 return __builtin_aarch64_usqaddqi_uus (__a, __b);
22595 }
22596
22597 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
22598 vsqaddh_u16 (uint16_t __a, int16_t __b)
22599 {
22600 return __builtin_aarch64_usqaddhi_uus (__a, __b);
22601 }
22602
22603 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
22604 vsqadds_u32 (uint32_t __a, int32_t __b)
22605 {
22606 return __builtin_aarch64_usqaddsi_uus (__a, __b);
22607 }
22608
22609 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22610 vsqaddd_u64 (uint64_t __a, int64_t __b)
22611 {
22612 return __builtin_aarch64_usqadddi_uus (__a, __b);
22613 }
22614
22615 /* vsqrt */
22616 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22617 vsqrt_f32 (float32x2_t a)
22618 {
22619 return __builtin_aarch64_sqrtv2sf (a);
22620 }
22621
22622 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22623 vsqrtq_f32 (float32x4_t a)
22624 {
22625 return __builtin_aarch64_sqrtv4sf (a);
22626 }
22627
22628 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
22629 vsqrt_f64 (float64x1_t a)
22630 {
22631 return (float64x1_t) { __builtin_aarch64_sqrtdf (a[0]) };
22632 }
22633
22634 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22635 vsqrtq_f64 (float64x2_t a)
22636 {
22637 return __builtin_aarch64_sqrtv2df (a);
22638 }
22639
22640 /* vsra */
22641
22642 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22643 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22644 {
22645 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
22646 }
22647
22648 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22649 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22650 {
22651 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
22652 }
22653
22654 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22655 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22656 {
22657 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
22658 }
22659
22660 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22661 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22662 {
22663 return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
22664 }
22665
22666 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22667 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22668 {
22669 return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
22670 }
22671
22672 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22673 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22674 {
22675 return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
22676 }
22677
22678 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22679 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22680 {
22681 return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
22682 }
22683
22684 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22685 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22686 {
22687 return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
22688 }
22689
22690 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22691 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22692 {
22693 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
22694 }
22695
22696 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22697 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22698 {
22699 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
22700 }
22701
22702 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22703 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22704 {
22705 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
22706 }
22707
22708 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22709 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22710 {
22711 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
22712 }
22713
22714 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22715 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22716 {
22717 return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
22718 }
22719
22720 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22721 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22722 {
22723 return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
22724 }
22725
22726 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22727 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22728 {
22729 return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
22730 }
22731
22732 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22733 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22734 {
22735 return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
22736 }
22737
22738 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22739 vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
22740 {
22741 return __builtin_aarch64_ssra_ndi (__a, __b, __c);
22742 }
22743
22744 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22745 vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22746 {
22747 return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
22748 }
22749
22750 /* vsri */
22751
22752 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22753 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22754 {
22755 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
22756 }
22757
22758 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22759 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22760 {
22761 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
22762 }
22763
22764 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22765 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22766 {
22767 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
22768 }
22769
22770 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22771 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22772 {
22773 return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
22774 }
22775
22776 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22777 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22778 {
22779 return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
22780 }
22781
22782 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22783 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22784 {
22785 return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
22786 }
22787
22788 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22789 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22790 {
22791 return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
22792 }
22793
22794 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22795 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22796 {
22797 return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
22798 }
22799
22800 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22801 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22802 {
22803 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
22804 }
22805
22806 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22807 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22808 {
22809 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
22810 }
22811
22812 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22813 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22814 {
22815 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
22816 }
22817
22818 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22819 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22820 {
22821 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
22822 }
22823
22824 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22825 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22826 {
22827 return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
22828 }
22829
22830 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22831 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22832 {
22833 return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
22834 }
22835
22836 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22837 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22838 {
22839 return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
22840 }
22841
22842 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22843 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22844 {
22845 return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
22846 }
22847
22848 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22849 vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
22850 {
22851 return __builtin_aarch64_ssri_ndi (__a, __b, __c);
22852 }
22853
22854 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22855 vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22856 {
22857 return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
22858 }
22859
22860 /* vst1 */
22861
22862 __extension__ static __inline void __attribute__ ((__always_inline__))
22863 vst1_f16 (float16_t *__a, float16x4_t __b)
22864 {
22865 __builtin_aarch64_st1v4hf (__a, __b);
22866 }
22867
22868 __extension__ static __inline void __attribute__ ((__always_inline__))
22869 vst1_f32 (float32_t *a, float32x2_t b)
22870 {
22871 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
22872 }
22873
22874 __extension__ static __inline void __attribute__ ((__always_inline__))
22875 vst1_f64 (float64_t *a, float64x1_t b)
22876 {
22877 *a = b[0];
22878 }
22879
22880 __extension__ static __inline void __attribute__ ((__always_inline__))
22881 vst1_p8 (poly8_t *a, poly8x8_t b)
22882 {
22883 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22884 (int8x8_t) b);
22885 }
22886
22887 __extension__ static __inline void __attribute__ ((__always_inline__))
22888 vst1_p16 (poly16_t *a, poly16x4_t b)
22889 {
22890 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22891 (int16x4_t) b);
22892 }
22893
22894 __extension__ static __inline void __attribute__ ((__always_inline__))
22895 vst1_s8 (int8_t *a, int8x8_t b)
22896 {
22897 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
22898 }
22899
22900 __extension__ static __inline void __attribute__ ((__always_inline__))
22901 vst1_s16 (int16_t *a, int16x4_t b)
22902 {
22903 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
22904 }
22905
22906 __extension__ static __inline void __attribute__ ((__always_inline__))
22907 vst1_s32 (int32_t *a, int32x2_t b)
22908 {
22909 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
22910 }
22911
22912 __extension__ static __inline void __attribute__ ((__always_inline__))
22913 vst1_s64 (int64_t *a, int64x1_t b)
22914 {
22915 *a = b[0];
22916 }
22917
22918 __extension__ static __inline void __attribute__ ((__always_inline__))
22919 vst1_u8 (uint8_t *a, uint8x8_t b)
22920 {
22921 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22922 (int8x8_t) b);
22923 }
22924
22925 __extension__ static __inline void __attribute__ ((__always_inline__))
22926 vst1_u16 (uint16_t *a, uint16x4_t b)
22927 {
22928 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22929 (int16x4_t) b);
22930 }
22931
22932 __extension__ static __inline void __attribute__ ((__always_inline__))
22933 vst1_u32 (uint32_t *a, uint32x2_t b)
22934 {
22935 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
22936 (int32x2_t) b);
22937 }
22938
22939 __extension__ static __inline void __attribute__ ((__always_inline__))
22940 vst1_u64 (uint64_t *a, uint64x1_t b)
22941 {
22942 *a = b[0];
22943 }
22944
22945 /* vst1q */
22946
22947 __extension__ static __inline void __attribute__ ((__always_inline__))
22948 vst1q_f16 (float16_t *__a, float16x8_t __b)
22949 {
22950 __builtin_aarch64_st1v8hf (__a, __b);
22951 }
22952
22953 __extension__ static __inline void __attribute__ ((__always_inline__))
22954 vst1q_f32 (float32_t *a, float32x4_t b)
22955 {
22956 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
22957 }
22958
22959 __extension__ static __inline void __attribute__ ((__always_inline__))
22960 vst1q_f64 (float64_t *a, float64x2_t b)
22961 {
22962 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
22963 }
22964
22965 __extension__ static __inline void __attribute__ ((__always_inline__))
22966 vst1q_p8 (poly8_t *a, poly8x16_t b)
22967 {
22968 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22969 (int8x16_t) b);
22970 }
22971
22972 __extension__ static __inline void __attribute__ ((__always_inline__))
22973 vst1q_p16 (poly16_t *a, poly16x8_t b)
22974 {
22975 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22976 (int16x8_t) b);
22977 }
22978
22979 __extension__ static __inline void __attribute__ ((__always_inline__))
22980 vst1q_s8 (int8_t *a, int8x16_t b)
22981 {
22982 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
22983 }
22984
22985 __extension__ static __inline void __attribute__ ((__always_inline__))
22986 vst1q_s16 (int16_t *a, int16x8_t b)
22987 {
22988 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
22989 }
22990
22991 __extension__ static __inline void __attribute__ ((__always_inline__))
22992 vst1q_s32 (int32_t *a, int32x4_t b)
22993 {
22994 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
22995 }
22996
22997 __extension__ static __inline void __attribute__ ((__always_inline__))
22998 vst1q_s64 (int64_t *a, int64x2_t b)
22999 {
23000 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
23001 }
23002
23003 __extension__ static __inline void __attribute__ ((__always_inline__))
23004 vst1q_u8 (uint8_t *a, uint8x16_t b)
23005 {
23006 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
23007 (int8x16_t) b);
23008 }
23009
23010 __extension__ static __inline void __attribute__ ((__always_inline__))
23011 vst1q_u16 (uint16_t *a, uint16x8_t b)
23012 {
23013 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
23014 (int16x8_t) b);
23015 }
23016
23017 __extension__ static __inline void __attribute__ ((__always_inline__))
23018 vst1q_u32 (uint32_t *a, uint32x4_t b)
23019 {
23020 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
23021 (int32x4_t) b);
23022 }
23023
23024 __extension__ static __inline void __attribute__ ((__always_inline__))
23025 vst1q_u64 (uint64_t *a, uint64x2_t b)
23026 {
23027 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
23028 (int64x2_t) b);
23029 }
23030
23031 /* vst1_lane */
23032
23033 __extension__ static __inline void __attribute__ ((__always_inline__))
23034 vst1_lane_f16 (float16_t *__a, float16x4_t __b, const int __lane)
23035 {
23036 *__a = __aarch64_vget_lane_any (__b, __lane);
23037 }
23038
23039 __extension__ static __inline void __attribute__ ((__always_inline__))
23040 vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane)
23041 {
23042 *__a = __aarch64_vget_lane_any (__b, __lane);
23043 }
23044
23045 __extension__ static __inline void __attribute__ ((__always_inline__))
23046 vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane)
23047 {
23048 *__a = __aarch64_vget_lane_any (__b, __lane);
23049 }
23050
23051 __extension__ static __inline void __attribute__ ((__always_inline__))
23052 vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane)
23053 {
23054 *__a = __aarch64_vget_lane_any (__b, __lane);
23055 }
23056
23057 __extension__ static __inline void __attribute__ ((__always_inline__))
23058 vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane)
23059 {
23060 *__a = __aarch64_vget_lane_any (__b, __lane);
23061 }
23062
23063 __extension__ static __inline void __attribute__ ((__always_inline__))
23064 vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane)
23065 {
23066 *__a = __aarch64_vget_lane_any (__b, __lane);
23067 }
23068
23069 __extension__ static __inline void __attribute__ ((__always_inline__))
23070 vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane)
23071 {
23072 *__a = __aarch64_vget_lane_any (__b, __lane);
23073 }
23074
23075 __extension__ static __inline void __attribute__ ((__always_inline__))
23076 vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane)
23077 {
23078 *__a = __aarch64_vget_lane_any (__b, __lane);
23079 }
23080
23081 __extension__ static __inline void __attribute__ ((__always_inline__))
23082 vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane)
23083 {
23084 *__a = __aarch64_vget_lane_any (__b, __lane);
23085 }
23086
23087 __extension__ static __inline void __attribute__ ((__always_inline__))
23088 vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane)
23089 {
23090 *__a = __aarch64_vget_lane_any (__b, __lane);
23091 }
23092
23093 __extension__ static __inline void __attribute__ ((__always_inline__))
23094 vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane)
23095 {
23096 *__a = __aarch64_vget_lane_any (__b, __lane);
23097 }
23098
23099 __extension__ static __inline void __attribute__ ((__always_inline__))
23100 vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane)
23101 {
23102 *__a = __aarch64_vget_lane_any (__b, __lane);
23103 }
23104
23105 __extension__ static __inline void __attribute__ ((__always_inline__))
23106 vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane)
23107 {
23108 *__a = __aarch64_vget_lane_any (__b, __lane);
23109 }
23110
23111 /* vst1q_lane */
23112
23113 __extension__ static __inline void __attribute__ ((__always_inline__))
23114 vst1q_lane_f16 (float16_t *__a, float16x8_t __b, const int __lane)
23115 {
23116 *__a = __aarch64_vget_lane_any (__b, __lane);
23117 }
23118
23119 __extension__ static __inline void __attribute__ ((__always_inline__))
23120 vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane)
23121 {
23122 *__a = __aarch64_vget_lane_any (__b, __lane);
23123 }
23124
23125 __extension__ static __inline void __attribute__ ((__always_inline__))
23126 vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane)
23127 {
23128 *__a = __aarch64_vget_lane_any (__b, __lane);
23129 }
23130
23131 __extension__ static __inline void __attribute__ ((__always_inline__))
23132 vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane)
23133 {
23134 *__a = __aarch64_vget_lane_any (__b, __lane);
23135 }
23136
23137 __extension__ static __inline void __attribute__ ((__always_inline__))
23138 vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane)
23139 {
23140 *__a = __aarch64_vget_lane_any (__b, __lane);
23141 }
23142
23143 __extension__ static __inline void __attribute__ ((__always_inline__))
23144 vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane)
23145 {
23146 *__a = __aarch64_vget_lane_any (__b, __lane);
23147 }
23148
23149 __extension__ static __inline void __attribute__ ((__always_inline__))
23150 vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane)
23151 {
23152 *__a = __aarch64_vget_lane_any (__b, __lane);
23153 }
23154
23155 __extension__ static __inline void __attribute__ ((__always_inline__))
23156 vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane)
23157 {
23158 *__a = __aarch64_vget_lane_any (__b, __lane);
23159 }
23160
23161 __extension__ static __inline void __attribute__ ((__always_inline__))
23162 vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane)
23163 {
23164 *__a = __aarch64_vget_lane_any (__b, __lane);
23165 }
23166
23167 __extension__ static __inline void __attribute__ ((__always_inline__))
23168 vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane)
23169 {
23170 *__a = __aarch64_vget_lane_any (__b, __lane);
23171 }
23172
23173 __extension__ static __inline void __attribute__ ((__always_inline__))
23174 vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane)
23175 {
23176 *__a = __aarch64_vget_lane_any (__b, __lane);
23177 }
23178
23179 __extension__ static __inline void __attribute__ ((__always_inline__))
23180 vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane)
23181 {
23182 *__a = __aarch64_vget_lane_any (__b, __lane);
23183 }
23184
23185 __extension__ static __inline void __attribute__ ((__always_inline__))
23186 vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane)
23187 {
23188 *__a = __aarch64_vget_lane_any (__b, __lane);
23189 }
23190
23191 /* vstn */
23192
23193 __extension__ static __inline void
23194 vst2_s64 (int64_t * __a, int64x1x2_t val)
23195 {
23196 __builtin_aarch64_simd_oi __o;
23197 int64x2x2_t temp;
23198 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23199 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23200 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
23201 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
23202 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
23203 }
23204
23205 __extension__ static __inline void
23206 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
23207 {
23208 __builtin_aarch64_simd_oi __o;
23209 uint64x2x2_t temp;
23210 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23211 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23212 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
23213 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
23214 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
23215 }
23216
23217 __extension__ static __inline void
23218 vst2_f64 (float64_t * __a, float64x1x2_t val)
23219 {
23220 __builtin_aarch64_simd_oi __o;
23221 float64x2x2_t temp;
23222 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23223 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23224 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
23225 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
23226 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
23227 }
23228
23229 __extension__ static __inline void
23230 vst2_s8 (int8_t * __a, int8x8x2_t val)
23231 {
23232 __builtin_aarch64_simd_oi __o;
23233 int8x16x2_t temp;
23234 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23235 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23236 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23237 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23238 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23239 }
23240
23241 __extension__ static __inline void __attribute__ ((__always_inline__))
23242 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
23243 {
23244 __builtin_aarch64_simd_oi __o;
23245 poly8x16x2_t temp;
23246 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23247 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23248 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23249 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23250 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23251 }
23252
23253 __extension__ static __inline void __attribute__ ((__always_inline__))
23254 vst2_s16 (int16_t * __a, int16x4x2_t val)
23255 {
23256 __builtin_aarch64_simd_oi __o;
23257 int16x8x2_t temp;
23258 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23259 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23260 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23261 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23262 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23263 }
23264
23265 __extension__ static __inline void __attribute__ ((__always_inline__))
23266 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
23267 {
23268 __builtin_aarch64_simd_oi __o;
23269 poly16x8x2_t temp;
23270 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23271 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23272 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23273 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23274 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23275 }
23276
23277 __extension__ static __inline void __attribute__ ((__always_inline__))
23278 vst2_s32 (int32_t * __a, int32x2x2_t val)
23279 {
23280 __builtin_aarch64_simd_oi __o;
23281 int32x4x2_t temp;
23282 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23283 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23284 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
23285 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
23286 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
23287 }
23288
23289 __extension__ static __inline void __attribute__ ((__always_inline__))
23290 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
23291 {
23292 __builtin_aarch64_simd_oi __o;
23293 uint8x16x2_t temp;
23294 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23295 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23296 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
23297 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
23298 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23299 }
23300
23301 __extension__ static __inline void __attribute__ ((__always_inline__))
23302 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
23303 {
23304 __builtin_aarch64_simd_oi __o;
23305 uint16x8x2_t temp;
23306 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23307 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23308 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
23309 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
23310 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23311 }
23312
23313 __extension__ static __inline void __attribute__ ((__always_inline__))
23314 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
23315 {
23316 __builtin_aarch64_simd_oi __o;
23317 uint32x4x2_t temp;
23318 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23319 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23320 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
23321 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
23322 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
23323 }
23324
23325 __extension__ static __inline void __attribute__ ((__always_inline__))
23326 vst2_f16 (float16_t * __a, float16x4x2_t val)
23327 {
23328 __builtin_aarch64_simd_oi __o;
23329 float16x8x2_t temp;
23330 temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
23331 temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
23332 __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0);
23333 __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1);
23334 __builtin_aarch64_st2v4hf (__a, __o);
23335 }
23336
23337 __extension__ static __inline void __attribute__ ((__always_inline__))
23338 vst2_f32 (float32_t * __a, float32x2x2_t val)
23339 {
23340 __builtin_aarch64_simd_oi __o;
23341 float32x4x2_t temp;
23342 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23343 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23344 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
23345 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
23346 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23347 }
23348
23349 __extension__ static __inline void __attribute__ ((__always_inline__))
23350 vst2q_s8 (int8_t * __a, int8x16x2_t val)
23351 {
23352 __builtin_aarch64_simd_oi __o;
23353 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23354 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23355 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23356 }
23357
23358 __extension__ static __inline void __attribute__ ((__always_inline__))
23359 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
23360 {
23361 __builtin_aarch64_simd_oi __o;
23362 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23363 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23364 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23365 }
23366
23367 __extension__ static __inline void __attribute__ ((__always_inline__))
23368 vst2q_s16 (int16_t * __a, int16x8x2_t val)
23369 {
23370 __builtin_aarch64_simd_oi __o;
23371 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23372 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23373 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23374 }
23375
23376 __extension__ static __inline void __attribute__ ((__always_inline__))
23377 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
23378 {
23379 __builtin_aarch64_simd_oi __o;
23380 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23381 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23382 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23383 }
23384
23385 __extension__ static __inline void __attribute__ ((__always_inline__))
23386 vst2q_s32 (int32_t * __a, int32x4x2_t val)
23387 {
23388 __builtin_aarch64_simd_oi __o;
23389 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
23390 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
23391 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
23392 }
23393
23394 __extension__ static __inline void __attribute__ ((__always_inline__))
23395 vst2q_s64 (int64_t * __a, int64x2x2_t val)
23396 {
23397 __builtin_aarch64_simd_oi __o;
23398 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
23399 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
23400 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
23401 }
23402
23403 __extension__ static __inline void __attribute__ ((__always_inline__))
23404 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
23405 {
23406 __builtin_aarch64_simd_oi __o;
23407 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
23408 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
23409 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23410 }
23411
23412 __extension__ static __inline void __attribute__ ((__always_inline__))
23413 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
23414 {
23415 __builtin_aarch64_simd_oi __o;
23416 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
23417 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
23418 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23419 }
23420
23421 __extension__ static __inline void __attribute__ ((__always_inline__))
23422 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
23423 {
23424 __builtin_aarch64_simd_oi __o;
23425 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
23426 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
23427 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
23428 }
23429
23430 __extension__ static __inline void __attribute__ ((__always_inline__))
23431 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
23432 {
23433 __builtin_aarch64_simd_oi __o;
23434 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
23435 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
23436 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
23437 }
23438
23439 __extension__ static __inline void __attribute__ ((__always_inline__))
23440 vst2q_f16 (float16_t * __a, float16x8x2_t val)
23441 {
23442 __builtin_aarch64_simd_oi __o;
23443 __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0);
23444 __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1);
23445 __builtin_aarch64_st2v8hf (__a, __o);
23446 }
23447
23448 __extension__ static __inline void __attribute__ ((__always_inline__))
23449 vst2q_f32 (float32_t * __a, float32x4x2_t val)
23450 {
23451 __builtin_aarch64_simd_oi __o;
23452 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
23453 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
23454 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23455 }
23456
23457 __extension__ static __inline void __attribute__ ((__always_inline__))
23458 vst2q_f64 (float64_t * __a, float64x2x2_t val)
23459 {
23460 __builtin_aarch64_simd_oi __o;
23461 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
23462 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
23463 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
23464 }
23465
23466 __extension__ static __inline void
23467 vst3_s64 (int64_t * __a, int64x1x3_t val)
23468 {
23469 __builtin_aarch64_simd_ci __o;
23470 int64x2x3_t temp;
23471 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23472 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23473 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23474 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
23475 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
23476 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
23477 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
23478 }
23479
23480 __extension__ static __inline void
23481 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
23482 {
23483 __builtin_aarch64_simd_ci __o;
23484 uint64x2x3_t temp;
23485 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23486 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23487 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23488 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
23489 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
23490 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
23491 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
23492 }
23493
23494 __extension__ static __inline void
23495 vst3_f64 (float64_t * __a, float64x1x3_t val)
23496 {
23497 __builtin_aarch64_simd_ci __o;
23498 float64x2x3_t temp;
23499 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23500 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23501 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23502 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
23503 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
23504 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
23505 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
23506 }
23507
23508 __extension__ static __inline void
23509 vst3_s8 (int8_t * __a, int8x8x3_t val)
23510 {
23511 __builtin_aarch64_simd_ci __o;
23512 int8x16x3_t temp;
23513 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23514 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23515 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23516 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23517 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23518 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23519 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23520 }
23521
23522 __extension__ static __inline void __attribute__ ((__always_inline__))
23523 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
23524 {
23525 __builtin_aarch64_simd_ci __o;
23526 poly8x16x3_t temp;
23527 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23528 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23529 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23530 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23531 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23532 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23533 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23534 }
23535
23536 __extension__ static __inline void __attribute__ ((__always_inline__))
23537 vst3_s16 (int16_t * __a, int16x4x3_t val)
23538 {
23539 __builtin_aarch64_simd_ci __o;
23540 int16x8x3_t temp;
23541 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23542 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23543 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23544 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23545 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23546 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23547 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23548 }
23549
23550 __extension__ static __inline void __attribute__ ((__always_inline__))
23551 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
23552 {
23553 __builtin_aarch64_simd_ci __o;
23554 poly16x8x3_t temp;
23555 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23556 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23557 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23558 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23559 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23560 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23561 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23562 }
23563
23564 __extension__ static __inline void __attribute__ ((__always_inline__))
23565 vst3_s32 (int32_t * __a, int32x2x3_t val)
23566 {
23567 __builtin_aarch64_simd_ci __o;
23568 int32x4x3_t temp;
23569 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23570 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23571 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23572 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23573 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23574 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23575 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23576 }
23577
23578 __extension__ static __inline void __attribute__ ((__always_inline__))
23579 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
23580 {
23581 __builtin_aarch64_simd_ci __o;
23582 uint8x16x3_t temp;
23583 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23584 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23585 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23586 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23587 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23588 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23589 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23590 }
23591
23592 __extension__ static __inline void __attribute__ ((__always_inline__))
23593 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
23594 {
23595 __builtin_aarch64_simd_ci __o;
23596 uint16x8x3_t temp;
23597 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23598 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23599 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23600 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23601 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23602 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23603 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23604 }
23605
23606 __extension__ static __inline void __attribute__ ((__always_inline__))
23607 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
23608 {
23609 __builtin_aarch64_simd_ci __o;
23610 uint32x4x3_t temp;
23611 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23612 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23613 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23614 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23615 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23616 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23617 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23618 }
23619
23620 __extension__ static __inline void __attribute__ ((__always_inline__))
23621 vst3_f16 (float16_t * __a, float16x4x3_t val)
23622 {
23623 __builtin_aarch64_simd_ci __o;
23624 float16x8x3_t temp;
23625 temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
23626 temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
23627 temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
23628 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0);
23629 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1);
23630 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2);
23631 __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
23632 }
23633
23634 __extension__ static __inline void __attribute__ ((__always_inline__))
23635 vst3_f32 (float32_t * __a, float32x2x3_t val)
23636 {
23637 __builtin_aarch64_simd_ci __o;
23638 float32x4x3_t temp;
23639 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23640 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23641 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23642 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
23643 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
23644 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
23645 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23646 }
23647
23648 __extension__ static __inline void __attribute__ ((__always_inline__))
23649 vst3q_s8 (int8_t * __a, int8x16x3_t val)
23650 {
23651 __builtin_aarch64_simd_ci __o;
23652 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23653 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23654 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23655 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23656 }
23657
23658 __extension__ static __inline void __attribute__ ((__always_inline__))
23659 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
23660 {
23661 __builtin_aarch64_simd_ci __o;
23662 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23663 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23664 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23665 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23666 }
23667
23668 __extension__ static __inline void __attribute__ ((__always_inline__))
23669 vst3q_s16 (int16_t * __a, int16x8x3_t val)
23670 {
23671 __builtin_aarch64_simd_ci __o;
23672 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23673 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23674 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23675 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23676 }
23677
23678 __extension__ static __inline void __attribute__ ((__always_inline__))
23679 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
23680 {
23681 __builtin_aarch64_simd_ci __o;
23682 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23683 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23684 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23685 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23686 }
23687
23688 __extension__ static __inline void __attribute__ ((__always_inline__))
23689 vst3q_s32 (int32_t * __a, int32x4x3_t val)
23690 {
23691 __builtin_aarch64_simd_ci __o;
23692 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23693 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23694 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23695 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23696 }
23697
23698 __extension__ static __inline void __attribute__ ((__always_inline__))
23699 vst3q_s64 (int64_t * __a, int64x2x3_t val)
23700 {
23701 __builtin_aarch64_simd_ci __o;
23702 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23703 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23704 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23705 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23706 }
23707
23708 __extension__ static __inline void __attribute__ ((__always_inline__))
23709 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
23710 {
23711 __builtin_aarch64_simd_ci __o;
23712 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23713 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23714 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23715 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23716 }
23717
23718 __extension__ static __inline void __attribute__ ((__always_inline__))
23719 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
23720 {
23721 __builtin_aarch64_simd_ci __o;
23722 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23723 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23724 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23725 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23726 }
23727
23728 __extension__ static __inline void __attribute__ ((__always_inline__))
23729 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
23730 {
23731 __builtin_aarch64_simd_ci __o;
23732 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23733 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23734 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23735 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23736 }
23737
23738 __extension__ static __inline void __attribute__ ((__always_inline__))
23739 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
23740 {
23741 __builtin_aarch64_simd_ci __o;
23742 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23743 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23744 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23745 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23746 }
23747
23748 __extension__ static __inline void __attribute__ ((__always_inline__))
23749 vst3q_f16 (float16_t * __a, float16x8x3_t val)
23750 {
23751 __builtin_aarch64_simd_ci __o;
23752 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0);
23753 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1);
23754 __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2);
23755 __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
23756 }
23757
23758 __extension__ static __inline void __attribute__ ((__always_inline__))
23759 vst3q_f32 (float32_t * __a, float32x4x3_t val)
23760 {
23761 __builtin_aarch64_simd_ci __o;
23762 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
23763 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
23764 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
23765 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23766 }
23767
23768 __extension__ static __inline void __attribute__ ((__always_inline__))
23769 vst3q_f64 (float64_t * __a, float64x2x3_t val)
23770 {
23771 __builtin_aarch64_simd_ci __o;
23772 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
23773 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
23774 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
23775 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
23776 }
23777
23778 __extension__ static __inline void
23779 vst4_s64 (int64_t * __a, int64x1x4_t val)
23780 {
23781 __builtin_aarch64_simd_xi __o;
23782 int64x2x4_t temp;
23783 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23784 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23785 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23786 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
23787 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23788 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23789 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23790 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23791 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23792 }
23793
23794 __extension__ static __inline void
23795 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
23796 {
23797 __builtin_aarch64_simd_xi __o;
23798 uint64x2x4_t temp;
23799 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23800 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23801 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23802 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
23803 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23804 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23805 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23806 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23807 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23808 }
23809
23810 __extension__ static __inline void
23811 vst4_f64 (float64_t * __a, float64x1x4_t val)
23812 {
23813 __builtin_aarch64_simd_xi __o;
23814 float64x2x4_t temp;
23815 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23816 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23817 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23818 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
23819 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
23820 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
23821 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
23822 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
23823 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
23824 }
23825
23826 __extension__ static __inline void
23827 vst4_s8 (int8_t * __a, int8x8x4_t val)
23828 {
23829 __builtin_aarch64_simd_xi __o;
23830 int8x16x4_t temp;
23831 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23832 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23833 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23834 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
23835 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23836 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23837 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23838 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23839 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23840 }
23841
23842 __extension__ static __inline void __attribute__ ((__always_inline__))
23843 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
23844 {
23845 __builtin_aarch64_simd_xi __o;
23846 poly8x16x4_t temp;
23847 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23848 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23849 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23850 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
23851 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23852 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23853 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23854 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23855 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23856 }
23857
23858 __extension__ static __inline void __attribute__ ((__always_inline__))
23859 vst4_s16 (int16_t * __a, int16x4x4_t val)
23860 {
23861 __builtin_aarch64_simd_xi __o;
23862 int16x8x4_t temp;
23863 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23864 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23865 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23866 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
23867 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23868 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23869 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23870 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23871 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23872 }
23873
23874 __extension__ static __inline void __attribute__ ((__always_inline__))
23875 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
23876 {
23877 __builtin_aarch64_simd_xi __o;
23878 poly16x8x4_t temp;
23879 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23880 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23881 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23882 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
23883 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23884 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23885 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23886 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23887 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23888 }
23889
23890 __extension__ static __inline void __attribute__ ((__always_inline__))
23891 vst4_s32 (int32_t * __a, int32x2x4_t val)
23892 {
23893 __builtin_aarch64_simd_xi __o;
23894 int32x4x4_t temp;
23895 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23896 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23897 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23898 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
23899 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23900 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23901 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23902 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23903 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23904 }
23905
23906 __extension__ static __inline void __attribute__ ((__always_inline__))
23907 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
23908 {
23909 __builtin_aarch64_simd_xi __o;
23910 uint8x16x4_t temp;
23911 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23912 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23913 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23914 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
23915 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23916 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23917 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23918 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23919 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23920 }
23921
23922 __extension__ static __inline void __attribute__ ((__always_inline__))
23923 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
23924 {
23925 __builtin_aarch64_simd_xi __o;
23926 uint16x8x4_t temp;
23927 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23928 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23929 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23930 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
23931 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23932 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23933 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23934 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23935 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23936 }
23937
23938 __extension__ static __inline void __attribute__ ((__always_inline__))
23939 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
23940 {
23941 __builtin_aarch64_simd_xi __o;
23942 uint32x4x4_t temp;
23943 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23944 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23945 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23946 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
23947 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23948 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23949 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23950 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23951 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23952 }
23953
23954 __extension__ static __inline void __attribute__ ((__always_inline__))
23955 vst4_f16 (float16_t * __a, float16x4x4_t val)
23956 {
23957 __builtin_aarch64_simd_xi __o;
23958 float16x8x4_t temp;
23959 temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0)));
23960 temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0)));
23961 temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0)));
23962 temp.val[3] = vcombine_f16 (val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0)));
23963 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[0], 0);
23964 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[1], 1);
23965 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[2], 2);
23966 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[3], 3);
23967 __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o);
23968 }
23969
23970 __extension__ static __inline void __attribute__ ((__always_inline__))
23971 vst4_f32 (float32_t * __a, float32x2x4_t val)
23972 {
23973 __builtin_aarch64_simd_xi __o;
23974 float32x4x4_t temp;
23975 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23976 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23977 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23978 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
23979 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
23980 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
23981 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
23982 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
23983 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23984 }
23985
23986 __extension__ static __inline void __attribute__ ((__always_inline__))
23987 vst4q_s8 (int8_t * __a, int8x16x4_t val)
23988 {
23989 __builtin_aarch64_simd_xi __o;
23990 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23991 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23992 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23993 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23994 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23995 }
23996
23997 __extension__ static __inline void __attribute__ ((__always_inline__))
23998 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
23999 {
24000 __builtin_aarch64_simd_xi __o;
24001 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24002 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24003 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24004 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24005 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24006 }
24007
24008 __extension__ static __inline void __attribute__ ((__always_inline__))
24009 vst4q_s16 (int16_t * __a, int16x8x4_t val)
24010 {
24011 __builtin_aarch64_simd_xi __o;
24012 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24013 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24014 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24015 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24016 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24017 }
24018
24019 __extension__ static __inline void __attribute__ ((__always_inline__))
24020 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
24021 {
24022 __builtin_aarch64_simd_xi __o;
24023 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24024 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24025 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24026 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24027 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24028 }
24029
24030 __extension__ static __inline void __attribute__ ((__always_inline__))
24031 vst4q_s32 (int32_t * __a, int32x4x4_t val)
24032 {
24033 __builtin_aarch64_simd_xi __o;
24034 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24035 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24036 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24037 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24038 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24039 }
24040
24041 __extension__ static __inline void __attribute__ ((__always_inline__))
24042 vst4q_s64 (int64_t * __a, int64x2x4_t val)
24043 {
24044 __builtin_aarch64_simd_xi __o;
24045 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24046 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24047 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24048 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24049 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24050 }
24051
24052 __extension__ static __inline void __attribute__ ((__always_inline__))
24053 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
24054 {
24055 __builtin_aarch64_simd_xi __o;
24056 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
24057 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
24058 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
24059 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
24060 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24061 }
24062
24063 __extension__ static __inline void __attribute__ ((__always_inline__))
24064 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
24065 {
24066 __builtin_aarch64_simd_xi __o;
24067 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
24068 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
24069 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
24070 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
24071 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24072 }
24073
24074 __extension__ static __inline void __attribute__ ((__always_inline__))
24075 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
24076 {
24077 __builtin_aarch64_simd_xi __o;
24078 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
24079 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
24080 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
24081 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
24082 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
24083 }
24084
24085 __extension__ static __inline void __attribute__ ((__always_inline__))
24086 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
24087 {
24088 __builtin_aarch64_simd_xi __o;
24089 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
24090 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
24091 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
24092 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
24093 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
24094 }
24095
24096 __extension__ static __inline void __attribute__ ((__always_inline__))
24097 vst4q_f16 (float16_t * __a, float16x8x4_t val)
24098 {
24099 __builtin_aarch64_simd_xi __o;
24100 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[0], 0);
24101 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[1], 1);
24102 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[2], 2);
24103 __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[3], 3);
24104 __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o);
24105 }
24106
24107 __extension__ static __inline void __attribute__ ((__always_inline__))
24108 vst4q_f32 (float32_t * __a, float32x4x4_t val)
24109 {
24110 __builtin_aarch64_simd_xi __o;
24111 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
24112 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
24113 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
24114 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
24115 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24116 }
24117
24118 __extension__ static __inline void __attribute__ ((__always_inline__))
24119 vst4q_f64 (float64_t * __a, float64x2x4_t val)
24120 {
24121 __builtin_aarch64_simd_xi __o;
24122 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
24123 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
24124 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
24125 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
24126 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
24127 }
24128
24129 /* vsub */
24130
24131 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
24132 vsubd_s64 (int64_t __a, int64_t __b)
24133 {
24134 return __a - __b;
24135 }
24136
24137 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24138 vsubd_u64 (uint64_t __a, uint64_t __b)
24139 {
24140 return __a - __b;
24141 }
24142
24143 /* vtbx1 */
24144
24145 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24146 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
24147 {
24148 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24149 vmov_n_u8 (8));
24150 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
24151
24152 return vbsl_s8 (__mask, __tbl, __r);
24153 }
24154
24155 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24156 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
24157 {
24158 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24159 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
24160
24161 return vbsl_u8 (__mask, __tbl, __r);
24162 }
24163
24164 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24165 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
24166 {
24167 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
24168 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
24169
24170 return vbsl_p8 (__mask, __tbl, __r);
24171 }
24172
24173 /* vtbx3 */
24174
24175 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24176 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
24177 {
24178 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
24179 vmov_n_u8 (24));
24180 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
24181
24182 return vbsl_s8 (__mask, __tbl, __r);
24183 }
24184
24185 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24186 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
24187 {
24188 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24189 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
24190
24191 return vbsl_u8 (__mask, __tbl, __r);
24192 }
24193
24194 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24195 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
24196 {
24197 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
24198 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
24199
24200 return vbsl_p8 (__mask, __tbl, __r);
24201 }
24202
24203 /* vtbx4 */
24204
24205 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24206 vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
24207 {
24208 int8x8_t result;
24209 int8x16x2_t temp;
24210 __builtin_aarch64_simd_oi __o;
24211 temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
24212 temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]);
24213 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24214 (int8x16_t) temp.val[0], 0);
24215 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24216 (int8x16_t) temp.val[1], 1);
24217 result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx);
24218 return result;
24219 }
24220
24221 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24222 vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
24223 {
24224 uint8x8_t result;
24225 uint8x16x2_t temp;
24226 __builtin_aarch64_simd_oi __o;
24227 temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
24228 temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]);
24229 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24230 (int8x16_t) temp.val[0], 0);
24231 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24232 (int8x16_t) temp.val[1], 1);
24233 result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
24234 (int8x8_t)__idx);
24235 return result;
24236 }
24237
24238 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24239 vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
24240 {
24241 poly8x8_t result;
24242 poly8x16x2_t temp;
24243 __builtin_aarch64_simd_oi __o;
24244 temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
24245 temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]);
24246 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24247 (int8x16_t) temp.val[0], 0);
24248 __o = __builtin_aarch64_set_qregoiv16qi (__o,
24249 (int8x16_t) temp.val[1], 1);
24250 result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
24251 (int8x8_t)__idx);
24252 return result;
24253 }
24254
24255 /* vtrn */
24256
24257 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
24258 vtrn1_f16 (float16x4_t __a, float16x4_t __b)
24259 {
24260 #ifdef __AARCH64EB__
24261 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24262 #else
24263 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24264 #endif
24265 }
24266
24267 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24268 vtrn1_f32 (float32x2_t __a, float32x2_t __b)
24269 {
24270 #ifdef __AARCH64EB__
24271 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24272 #else
24273 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24274 #endif
24275 }
24276
24277 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24278 vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
24279 {
24280 #ifdef __AARCH64EB__
24281 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24282 #else
24283 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24284 #endif
24285 }
24286
24287 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24288 vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
24289 {
24290 #ifdef __AARCH64EB__
24291 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24292 #else
24293 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24294 #endif
24295 }
24296
24297 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24298 vtrn1_s8 (int8x8_t __a, int8x8_t __b)
24299 {
24300 #ifdef __AARCH64EB__
24301 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24302 #else
24303 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24304 #endif
24305 }
24306
24307 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24308 vtrn1_s16 (int16x4_t __a, int16x4_t __b)
24309 {
24310 #ifdef __AARCH64EB__
24311 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24312 #else
24313 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24314 #endif
24315 }
24316
24317 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24318 vtrn1_s32 (int32x2_t __a, int32x2_t __b)
24319 {
24320 #ifdef __AARCH64EB__
24321 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24322 #else
24323 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24324 #endif
24325 }
24326
24327 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24328 vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
24329 {
24330 #ifdef __AARCH64EB__
24331 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24332 #else
24333 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24334 #endif
24335 }
24336
24337 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24338 vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
24339 {
24340 #ifdef __AARCH64EB__
24341 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
24342 #else
24343 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
24344 #endif
24345 }
24346
24347 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24348 vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
24349 {
24350 #ifdef __AARCH64EB__
24351 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24352 #else
24353 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24354 #endif
24355 }
24356
24357 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
24358 vtrn1q_f16 (float16x8_t __a, float16x8_t __b)
24359 {
24360 #ifdef __AARCH64EB__
24361 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24362 #else
24363 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24364 #endif
24365 }
24366
24367 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24368 vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
24369 {
24370 #ifdef __AARCH64EB__
24371 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24372 #else
24373 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24374 #endif
24375 }
24376
24377 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24378 vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
24379 {
24380 #ifdef __AARCH64EB__
24381 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24382 #else
24383 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24384 #endif
24385 }
24386
24387 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24388 vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
24389 {
24390 #ifdef __AARCH64EB__
24391 return __builtin_shuffle (__a, __b,
24392 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24393 #else
24394 return __builtin_shuffle (__a, __b,
24395 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24396 #endif
24397 }
24398
24399 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24400 vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
24401 {
24402 #ifdef __AARCH64EB__
24403 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24404 #else
24405 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24406 #endif
24407 }
24408
24409 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24410 vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
24411 {
24412 #ifdef __AARCH64EB__
24413 return __builtin_shuffle (__a, __b,
24414 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24415 #else
24416 return __builtin_shuffle (__a, __b,
24417 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24418 #endif
24419 }
24420
24421 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24422 vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
24423 {
24424 #ifdef __AARCH64EB__
24425 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24426 #else
24427 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24428 #endif
24429 }
24430
24431 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24432 vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
24433 {
24434 #ifdef __AARCH64EB__
24435 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24436 #else
24437 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24438 #endif
24439 }
24440
24441 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24442 vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
24443 {
24444 #ifdef __AARCH64EB__
24445 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24446 #else
24447 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24448 #endif
24449 }
24450
24451 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24452 vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
24453 {
24454 #ifdef __AARCH64EB__
24455 return __builtin_shuffle (__a, __b,
24456 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
24457 #else
24458 return __builtin_shuffle (__a, __b,
24459 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
24460 #endif
24461 }
24462
24463 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24464 vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
24465 {
24466 #ifdef __AARCH64EB__
24467 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
24468 #else
24469 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
24470 #endif
24471 }
24472
24473 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24474 vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
24475 {
24476 #ifdef __AARCH64EB__
24477 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
24478 #else
24479 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
24480 #endif
24481 }
24482
24483 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24484 vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
24485 {
24486 #ifdef __AARCH64EB__
24487 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24488 #else
24489 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24490 #endif
24491 }
24492
24493 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
24494 vtrn2_f16 (float16x4_t __a, float16x4_t __b)
24495 {
24496 #ifdef __AARCH64EB__
24497 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24498 #else
24499 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24500 #endif
24501 }
24502
24503 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24504 vtrn2_f32 (float32x2_t __a, float32x2_t __b)
24505 {
24506 #ifdef __AARCH64EB__
24507 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24508 #else
24509 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24510 #endif
24511 }
24512
24513 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24514 vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
24515 {
24516 #ifdef __AARCH64EB__
24517 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24518 #else
24519 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24520 #endif
24521 }
24522
24523 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24524 vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
24525 {
24526 #ifdef __AARCH64EB__
24527 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24528 #else
24529 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24530 #endif
24531 }
24532
24533 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24534 vtrn2_s8 (int8x8_t __a, int8x8_t __b)
24535 {
24536 #ifdef __AARCH64EB__
24537 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24538 #else
24539 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24540 #endif
24541 }
24542
24543 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24544 vtrn2_s16 (int16x4_t __a, int16x4_t __b)
24545 {
24546 #ifdef __AARCH64EB__
24547 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24548 #else
24549 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24550 #endif
24551 }
24552
24553 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24554 vtrn2_s32 (int32x2_t __a, int32x2_t __b)
24555 {
24556 #ifdef __AARCH64EB__
24557 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24558 #else
24559 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24560 #endif
24561 }
24562
24563 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24564 vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
24565 {
24566 #ifdef __AARCH64EB__
24567 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24568 #else
24569 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24570 #endif
24571 }
24572
24573 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24574 vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
24575 {
24576 #ifdef __AARCH64EB__
24577 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
24578 #else
24579 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
24580 #endif
24581 }
24582
24583 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24584 vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
24585 {
24586 #ifdef __AARCH64EB__
24587 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24588 #else
24589 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24590 #endif
24591 }
24592
24593 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
24594 vtrn2q_f16 (float16x8_t __a, float16x8_t __b)
24595 {
24596 #ifdef __AARCH64EB__
24597 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24598 #else
24599 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24600 #endif
24601 }
24602
24603 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24604 vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
24605 {
24606 #ifdef __AARCH64EB__
24607 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24608 #else
24609 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24610 #endif
24611 }
24612
24613 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24614 vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
24615 {
24616 #ifdef __AARCH64EB__
24617 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24618 #else
24619 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24620 #endif
24621 }
24622
24623 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24624 vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
24625 {
24626 #ifdef __AARCH64EB__
24627 return __builtin_shuffle (__a, __b,
24628 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24629 #else
24630 return __builtin_shuffle (__a, __b,
24631 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24632 #endif
24633 }
24634
24635 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24636 vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
24637 {
24638 #ifdef __AARCH64EB__
24639 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24640 #else
24641 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24642 #endif
24643 }
24644
24645 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24646 vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
24647 {
24648 #ifdef __AARCH64EB__
24649 return __builtin_shuffle (__a, __b,
24650 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24651 #else
24652 return __builtin_shuffle (__a, __b,
24653 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24654 #endif
24655 }
24656
24657 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24658 vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
24659 {
24660 #ifdef __AARCH64EB__
24661 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24662 #else
24663 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24664 #endif
24665 }
24666
24667 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24668 vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
24669 {
24670 #ifdef __AARCH64EB__
24671 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24672 #else
24673 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24674 #endif
24675 }
24676
24677 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24678 vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
24679 {
24680 #ifdef __AARCH64EB__
24681 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24682 #else
24683 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24684 #endif
24685 }
24686
24687 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24688 vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
24689 {
24690 #ifdef __AARCH64EB__
24691 return __builtin_shuffle (__a, __b,
24692 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
24693 #else
24694 return __builtin_shuffle (__a, __b,
24695 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
24696 #endif
24697 }
24698
24699 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24700 vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
24701 {
24702 #ifdef __AARCH64EB__
24703 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24704 #else
24705 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24706 #endif
24707 }
24708
24709 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24710 vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
24711 {
24712 #ifdef __AARCH64EB__
24713 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24714 #else
24715 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24716 #endif
24717 }
24718
24719 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24720 vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
24721 {
24722 #ifdef __AARCH64EB__
24723 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24724 #else
24725 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24726 #endif
24727 }
24728
24729 __extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
24730 vtrn_f16 (float16x4_t __a, float16x4_t __b)
24731 {
24732 return (float16x4x2_t) {vtrn1_f16 (__a, __b), vtrn2_f16 (__a, __b)};
24733 }
24734
24735 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
24736 vtrn_f32 (float32x2_t a, float32x2_t b)
24737 {
24738 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
24739 }
24740
24741 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
24742 vtrn_p8 (poly8x8_t a, poly8x8_t b)
24743 {
24744 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
24745 }
24746
24747 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
24748 vtrn_p16 (poly16x4_t a, poly16x4_t b)
24749 {
24750 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
24751 }
24752
24753 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
24754 vtrn_s8 (int8x8_t a, int8x8_t b)
24755 {
24756 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
24757 }
24758
24759 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
24760 vtrn_s16 (int16x4_t a, int16x4_t b)
24761 {
24762 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
24763 }
24764
24765 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
24766 vtrn_s32 (int32x2_t a, int32x2_t b)
24767 {
24768 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
24769 }
24770
24771 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
24772 vtrn_u8 (uint8x8_t a, uint8x8_t b)
24773 {
24774 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
24775 }
24776
24777 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
24778 vtrn_u16 (uint16x4_t a, uint16x4_t b)
24779 {
24780 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
24781 }
24782
24783 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
24784 vtrn_u32 (uint32x2_t a, uint32x2_t b)
24785 {
24786 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
24787 }
24788
24789 __extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
24790 vtrnq_f16 (float16x8_t __a, float16x8_t __b)
24791 {
24792 return (float16x8x2_t) {vtrn1q_f16 (__a, __b), vtrn2q_f16 (__a, __b)};
24793 }
24794
24795 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
24796 vtrnq_f32 (float32x4_t a, float32x4_t b)
24797 {
24798 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
24799 }
24800
24801 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
24802 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
24803 {
24804 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
24805 }
24806
24807 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
24808 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
24809 {
24810 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
24811 }
24812
24813 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
24814 vtrnq_s8 (int8x16_t a, int8x16_t b)
24815 {
24816 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
24817 }
24818
24819 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
24820 vtrnq_s16 (int16x8_t a, int16x8_t b)
24821 {
24822 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
24823 }
24824
24825 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
24826 vtrnq_s32 (int32x4_t a, int32x4_t b)
24827 {
24828 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
24829 }
24830
24831 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
24832 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
24833 {
24834 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
24835 }
24836
24837 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
24838 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
24839 {
24840 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
24841 }
24842
24843 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
24844 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
24845 {
24846 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
24847 }
24848
24849 /* vtst */
24850
24851 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24852 vtst_s8 (int8x8_t __a, int8x8_t __b)
24853 {
24854 return (uint8x8_t) ((__a & __b) != 0);
24855 }
24856
24857 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24858 vtst_s16 (int16x4_t __a, int16x4_t __b)
24859 {
24860 return (uint16x4_t) ((__a & __b) != 0);
24861 }
24862
24863 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24864 vtst_s32 (int32x2_t __a, int32x2_t __b)
24865 {
24866 return (uint32x2_t) ((__a & __b) != 0);
24867 }
24868
24869 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24870 vtst_s64 (int64x1_t __a, int64x1_t __b)
24871 {
24872 return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0));
24873 }
24874
24875 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24876 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
24877 {
24878 return ((__a & __b) != 0);
24879 }
24880
24881 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24882 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
24883 {
24884 return ((__a & __b) != 0);
24885 }
24886
24887 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24888 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
24889 {
24890 return ((__a & __b) != 0);
24891 }
24892
24893 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24894 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
24895 {
24896 return ((__a & __b) != __AARCH64_UINT64_C (0));
24897 }
24898
24899 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24900 vtstq_s8 (int8x16_t __a, int8x16_t __b)
24901 {
24902 return (uint8x16_t) ((__a & __b) != 0);
24903 }
24904
24905 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24906 vtstq_s16 (int16x8_t __a, int16x8_t __b)
24907 {
24908 return (uint16x8_t) ((__a & __b) != 0);
24909 }
24910
24911 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24912 vtstq_s32 (int32x4_t __a, int32x4_t __b)
24913 {
24914 return (uint32x4_t) ((__a & __b) != 0);
24915 }
24916
24917 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24918 vtstq_s64 (int64x2_t __a, int64x2_t __b)
24919 {
24920 return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0));
24921 }
24922
24923 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24924 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
24925 {
24926 return ((__a & __b) != 0);
24927 }
24928
24929 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24930 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
24931 {
24932 return ((__a & __b) != 0);
24933 }
24934
24935 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24936 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
24937 {
24938 return ((__a & __b) != 0);
24939 }
24940
24941 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24942 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
24943 {
24944 return ((__a & __b) != __AARCH64_UINT64_C (0));
24945 }
24946
24947 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24948 vtstd_s64 (int64_t __a, int64_t __b)
24949 {
24950 return (__a & __b) ? -1ll : 0ll;
24951 }
24952
24953 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24954 vtstd_u64 (uint64_t __a, uint64_t __b)
24955 {
24956 return (__a & __b) ? -1ll : 0ll;
24957 }
24958
24959 /* vuqadd */
24960
24961 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24962 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
24963 {
24964 return __builtin_aarch64_suqaddv8qi_ssu (__a, __b);
24965 }
24966
24967 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24968 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
24969 {
24970 return __builtin_aarch64_suqaddv4hi_ssu (__a, __b);
24971 }
24972
24973 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24974 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
24975 {
24976 return __builtin_aarch64_suqaddv2si_ssu (__a, __b);
24977 }
24978
24979 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24980 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
24981 {
24982 return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
24983 }
24984
24985 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24986 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
24987 {
24988 return __builtin_aarch64_suqaddv16qi_ssu (__a, __b);
24989 }
24990
24991 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24992 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
24993 {
24994 return __builtin_aarch64_suqaddv8hi_ssu (__a, __b);
24995 }
24996
24997 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24998 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
24999 {
25000 return __builtin_aarch64_suqaddv4si_ssu (__a, __b);
25001 }
25002
25003 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25004 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
25005 {
25006 return __builtin_aarch64_suqaddv2di_ssu (__a, __b);
25007 }
25008
25009 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
25010 vuqaddb_s8 (int8_t __a, uint8_t __b)
25011 {
25012 return __builtin_aarch64_suqaddqi_ssu (__a, __b);
25013 }
25014
25015 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
25016 vuqaddh_s16 (int16_t __a, uint16_t __b)
25017 {
25018 return __builtin_aarch64_suqaddhi_ssu (__a, __b);
25019 }
25020
25021 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
25022 vuqadds_s32 (int32_t __a, uint32_t __b)
25023 {
25024 return __builtin_aarch64_suqaddsi_ssu (__a, __b);
25025 }
25026
25027 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
25028 vuqaddd_s64 (int64_t __a, uint64_t __b)
25029 {
25030 return __builtin_aarch64_suqadddi_ssu (__a, __b);
25031 }
25032
25033 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25034 __extension__ static __inline rettype \
25035 __attribute__ ((__always_inline__)) \
25036 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25037 { \
25038 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25039 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25040 }
25041
25042 #define __INTERLEAVE_LIST(op) \
25043 __DEFINTERLEAVE (op, float16x4x2_t, float16x4_t, f16,) \
25044 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25045 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25046 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25047 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25048 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25049 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25050 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25051 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25052 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25053 __DEFINTERLEAVE (op, float16x8x2_t, float16x8_t, f16, q) \
25054 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25055 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25056 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25057 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25058 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25059 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25060 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25061 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25062 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25063
25064 /* vuzp */
25065
25066 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
25067 vuzp1_f16 (float16x4_t __a, float16x4_t __b)
25068 {
25069 #ifdef __AARCH64EB__
25070 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
25071 #else
25072 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
25073 #endif
25074 }
25075
25076 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25077 vuzp1_f32 (float32x2_t __a, float32x2_t __b)
25078 {
25079 #ifdef __AARCH64EB__
25080 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25081 #else
25082 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25083 #endif
25084 }
25085
25086 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25087 vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
25088 {
25089 #ifdef __AARCH64EB__
25090 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25091 #else
25092 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25093 #endif
25094 }
25095
25096 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25097 vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
25098 {
25099 #ifdef __AARCH64EB__
25100 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
25101 #else
25102 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
25103 #endif
25104 }
25105
25106 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25107 vuzp1_s8 (int8x8_t __a, int8x8_t __b)
25108 {
25109 #ifdef __AARCH64EB__
25110 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25111 #else
25112 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25113 #endif
25114 }
25115
25116 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25117 vuzp1_s16 (int16x4_t __a, int16x4_t __b)
25118 {
25119 #ifdef __AARCH64EB__
25120 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
25121 #else
25122 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
25123 #endif
25124 }
25125
25126 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25127 vuzp1_s32 (int32x2_t __a, int32x2_t __b)
25128 {
25129 #ifdef __AARCH64EB__
25130 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25131 #else
25132 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25133 #endif
25134 }
25135
25136 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25137 vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
25138 {
25139 #ifdef __AARCH64EB__
25140 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25141 #else
25142 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25143 #endif
25144 }
25145
25146 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25147 vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
25148 {
25149 #ifdef __AARCH64EB__
25150 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
25151 #else
25152 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
25153 #endif
25154 }
25155
25156 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25157 vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
25158 {
25159 #ifdef __AARCH64EB__
25160 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25161 #else
25162 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25163 #endif
25164 }
25165
25166 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
25167 vuzp1q_f16 (float16x8_t __a, float16x8_t __b)
25168 {
25169 #ifdef __AARCH64EB__
25170 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25171 #else
25172 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25173 #endif
25174 }
25175
25176 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25177 vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
25178 {
25179 #ifdef __AARCH64EB__
25180 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
25181 #else
25182 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
25183 #endif
25184 }
25185
25186 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25187 vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
25188 {
25189 #ifdef __AARCH64EB__
25190 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25191 #else
25192 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25193 #endif
25194 }
25195
25196 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25197 vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
25198 {
25199 #ifdef __AARCH64EB__
25200 return __builtin_shuffle (__a, __b, (uint8x16_t)
25201 {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25202 #else
25203 return __builtin_shuffle (__a, __b, (uint8x16_t)
25204 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25205 #endif
25206 }
25207
25208 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25209 vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
25210 {
25211 #ifdef __AARCH64EB__
25212 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25213 #else
25214 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25215 #endif
25216 }
25217
25218 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25219 vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
25220 {
25221 #ifdef __AARCH64EB__
25222 return __builtin_shuffle (__a, __b,
25223 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25224 #else
25225 return __builtin_shuffle (__a, __b,
25226 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25227 #endif
25228 }
25229
25230 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25231 vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
25232 {
25233 #ifdef __AARCH64EB__
25234 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25235 #else
25236 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25237 #endif
25238 }
25239
25240 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25241 vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
25242 {
25243 #ifdef __AARCH64EB__
25244 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
25245 #else
25246 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
25247 #endif
25248 }
25249
25250 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25251 vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
25252 {
25253 #ifdef __AARCH64EB__
25254 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25255 #else
25256 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25257 #endif
25258 }
25259
25260 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25261 vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
25262 {
25263 #ifdef __AARCH64EB__
25264 return __builtin_shuffle (__a, __b,
25265 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
25266 #else
25267 return __builtin_shuffle (__a, __b,
25268 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
25269 #endif
25270 }
25271
25272 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25273 vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
25274 {
25275 #ifdef __AARCH64EB__
25276 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
25277 #else
25278 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
25279 #endif
25280 }
25281
25282 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25283 vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
25284 {
25285 #ifdef __AARCH64EB__
25286 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
25287 #else
25288 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
25289 #endif
25290 }
25291
25292 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25293 vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
25294 {
25295 #ifdef __AARCH64EB__
25296 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25297 #else
25298 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25299 #endif
25300 }
25301
25302 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
25303 vuzp2_f16 (float16x4_t __a, float16x4_t __b)
25304 {
25305 #ifdef __AARCH64EB__
25306 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25307 #else
25308 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25309 #endif
25310 }
25311
25312 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25313 vuzp2_f32 (float32x2_t __a, float32x2_t __b)
25314 {
25315 #ifdef __AARCH64EB__
25316 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25317 #else
25318 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25319 #endif
25320 }
25321
25322 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25323 vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
25324 {
25325 #ifdef __AARCH64EB__
25326 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25327 #else
25328 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25329 #endif
25330 }
25331
25332 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25333 vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
25334 {
25335 #ifdef __AARCH64EB__
25336 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25337 #else
25338 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25339 #endif
25340 }
25341
25342 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25343 vuzp2_s8 (int8x8_t __a, int8x8_t __b)
25344 {
25345 #ifdef __AARCH64EB__
25346 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25347 #else
25348 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25349 #endif
25350 }
25351
25352 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25353 vuzp2_s16 (int16x4_t __a, int16x4_t __b)
25354 {
25355 #ifdef __AARCH64EB__
25356 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25357 #else
25358 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25359 #endif
25360 }
25361
25362 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25363 vuzp2_s32 (int32x2_t __a, int32x2_t __b)
25364 {
25365 #ifdef __AARCH64EB__
25366 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25367 #else
25368 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25369 #endif
25370 }
25371
25372 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25373 vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
25374 {
25375 #ifdef __AARCH64EB__
25376 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25377 #else
25378 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25379 #endif
25380 }
25381
25382 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25383 vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
25384 {
25385 #ifdef __AARCH64EB__
25386 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
25387 #else
25388 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
25389 #endif
25390 }
25391
25392 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25393 vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
25394 {
25395 #ifdef __AARCH64EB__
25396 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25397 #else
25398 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25399 #endif
25400 }
25401
25402 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
25403 vuzp2q_f16 (float16x8_t __a, float16x8_t __b)
25404 {
25405 #ifdef __AARCH64EB__
25406 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25407 #else
25408 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25409 #endif
25410 }
25411
25412 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25413 vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
25414 {
25415 #ifdef __AARCH64EB__
25416 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25417 #else
25418 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25419 #endif
25420 }
25421
25422 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25423 vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
25424 {
25425 #ifdef __AARCH64EB__
25426 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25427 #else
25428 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25429 #endif
25430 }
25431
25432 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25433 vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
25434 {
25435 #ifdef __AARCH64EB__
25436 return __builtin_shuffle (__a, __b,
25437 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25438 #else
25439 return __builtin_shuffle (__a, __b,
25440 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25441 #endif
25442 }
25443
25444 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25445 vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
25446 {
25447 #ifdef __AARCH64EB__
25448 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25449 #else
25450 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25451 #endif
25452 }
25453
25454 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25455 vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
25456 {
25457 #ifdef __AARCH64EB__
25458 return __builtin_shuffle (__a, __b,
25459 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25460 #else
25461 return __builtin_shuffle (__a, __b,
25462 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25463 #endif
25464 }
25465
25466 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25467 vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
25468 {
25469 #ifdef __AARCH64EB__
25470 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25471 #else
25472 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25473 #endif
25474 }
25475
25476 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25477 vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
25478 {
25479 #ifdef __AARCH64EB__
25480 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25481 #else
25482 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25483 #endif
25484 }
25485
25486 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25487 vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
25488 {
25489 #ifdef __AARCH64EB__
25490 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25491 #else
25492 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25493 #endif
25494 }
25495
25496 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25497 vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
25498 {
25499 #ifdef __AARCH64EB__
25500 return __builtin_shuffle (__a, __b, (uint8x16_t)
25501 {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
25502 #else
25503 return __builtin_shuffle (__a, __b, (uint8x16_t)
25504 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
25505 #endif
25506 }
25507
25508 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25509 vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
25510 {
25511 #ifdef __AARCH64EB__
25512 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
25513 #else
25514 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
25515 #endif
25516 }
25517
25518 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25519 vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
25520 {
25521 #ifdef __AARCH64EB__
25522 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
25523 #else
25524 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
25525 #endif
25526 }
25527
25528 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25529 vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
25530 {
25531 #ifdef __AARCH64EB__
25532 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25533 #else
25534 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25535 #endif
25536 }
25537
25538 __INTERLEAVE_LIST (uzp)
25539
25540 /* vzip */
25541
25542 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
25543 vzip1_f16 (float16x4_t __a, float16x4_t __b)
25544 {
25545 #ifdef __AARCH64EB__
25546 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25547 #else
25548 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25549 #endif
25550 }
25551
25552 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25553 vzip1_f32 (float32x2_t __a, float32x2_t __b)
25554 {
25555 #ifdef __AARCH64EB__
25556 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25557 #else
25558 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25559 #endif
25560 }
25561
25562 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25563 vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
25564 {
25565 #ifdef __AARCH64EB__
25566 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25567 #else
25568 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25569 #endif
25570 }
25571
25572 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25573 vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
25574 {
25575 #ifdef __AARCH64EB__
25576 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25577 #else
25578 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25579 #endif
25580 }
25581
25582 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25583 vzip1_s8 (int8x8_t __a, int8x8_t __b)
25584 {
25585 #ifdef __AARCH64EB__
25586 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25587 #else
25588 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25589 #endif
25590 }
25591
25592 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25593 vzip1_s16 (int16x4_t __a, int16x4_t __b)
25594 {
25595 #ifdef __AARCH64EB__
25596 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25597 #else
25598 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25599 #endif
25600 }
25601
25602 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25603 vzip1_s32 (int32x2_t __a, int32x2_t __b)
25604 {
25605 #ifdef __AARCH64EB__
25606 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25607 #else
25608 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25609 #endif
25610 }
25611
25612 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25613 vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
25614 {
25615 #ifdef __AARCH64EB__
25616 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25617 #else
25618 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25619 #endif
25620 }
25621
25622 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25623 vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
25624 {
25625 #ifdef __AARCH64EB__
25626 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
25627 #else
25628 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
25629 #endif
25630 }
25631
25632 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25633 vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
25634 {
25635 #ifdef __AARCH64EB__
25636 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
25637 #else
25638 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
25639 #endif
25640 }
25641
25642 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
25643 vzip1q_f16 (float16x8_t __a, float16x8_t __b)
25644 {
25645 #ifdef __AARCH64EB__
25646 return __builtin_shuffle (__a, __b,
25647 (uint16x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
25648 #else
25649 return __builtin_shuffle (__a, __b,
25650 (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25651 #endif
25652 }
25653
25654 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25655 vzip1q_f32 (float32x4_t __a, float32x4_t __b)
25656 {
25657 #ifdef __AARCH64EB__
25658 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25659 #else
25660 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25661 #endif
25662 }
25663
25664 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25665 vzip1q_f64 (float64x2_t __a, float64x2_t __b)
25666 {
25667 #ifdef __AARCH64EB__
25668 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25669 #else
25670 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25671 #endif
25672 }
25673
25674 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25675 vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
25676 {
25677 #ifdef __AARCH64EB__
25678 return __builtin_shuffle (__a, __b, (uint8x16_t)
25679 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25680 #else
25681 return __builtin_shuffle (__a, __b, (uint8x16_t)
25682 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25683 #endif
25684 }
25685
25686 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25687 vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
25688 {
25689 #ifdef __AARCH64EB__
25690 return __builtin_shuffle (__a, __b, (uint16x8_t)
25691 {12, 4, 13, 5, 14, 6, 15, 7});
25692 #else
25693 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25694 #endif
25695 }
25696
25697 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25698 vzip1q_s8 (int8x16_t __a, int8x16_t __b)
25699 {
25700 #ifdef __AARCH64EB__
25701 return __builtin_shuffle (__a, __b, (uint8x16_t)
25702 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25703 #else
25704 return __builtin_shuffle (__a, __b, (uint8x16_t)
25705 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25706 #endif
25707 }
25708
25709 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25710 vzip1q_s16 (int16x8_t __a, int16x8_t __b)
25711 {
25712 #ifdef __AARCH64EB__
25713 return __builtin_shuffle (__a, __b, (uint16x8_t)
25714 {12, 4, 13, 5, 14, 6, 15, 7});
25715 #else
25716 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25717 #endif
25718 }
25719
25720 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25721 vzip1q_s32 (int32x4_t __a, int32x4_t __b)
25722 {
25723 #ifdef __AARCH64EB__
25724 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25725 #else
25726 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25727 #endif
25728 }
25729
25730 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25731 vzip1q_s64 (int64x2_t __a, int64x2_t __b)
25732 {
25733 #ifdef __AARCH64EB__
25734 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25735 #else
25736 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25737 #endif
25738 }
25739
25740 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25741 vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
25742 {
25743 #ifdef __AARCH64EB__
25744 return __builtin_shuffle (__a, __b, (uint8x16_t)
25745 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
25746 #else
25747 return __builtin_shuffle (__a, __b, (uint8x16_t)
25748 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
25749 #endif
25750 }
25751
25752 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25753 vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
25754 {
25755 #ifdef __AARCH64EB__
25756 return __builtin_shuffle (__a, __b, (uint16x8_t)
25757 {12, 4, 13, 5, 14, 6, 15, 7});
25758 #else
25759 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
25760 #endif
25761 }
25762
25763 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25764 vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
25765 {
25766 #ifdef __AARCH64EB__
25767 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
25768 #else
25769 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25770 #endif
25771 }
25772
25773 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25774 vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
25775 {
25776 #ifdef __AARCH64EB__
25777 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25778 #else
25779 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25780 #endif
25781 }
25782
25783 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
25784 vzip2_f16 (float16x4_t __a, float16x4_t __b)
25785 {
25786 #ifdef __AARCH64EB__
25787 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25788 #else
25789 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25790 #endif
25791 }
25792
25793 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25794 vzip2_f32 (float32x2_t __a, float32x2_t __b)
25795 {
25796 #ifdef __AARCH64EB__
25797 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25798 #else
25799 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25800 #endif
25801 }
25802
25803 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25804 vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
25805 {
25806 #ifdef __AARCH64EB__
25807 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25808 #else
25809 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25810 #endif
25811 }
25812
25813 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25814 vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
25815 {
25816 #ifdef __AARCH64EB__
25817 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25818 #else
25819 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25820 #endif
25821 }
25822
25823 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25824 vzip2_s8 (int8x8_t __a, int8x8_t __b)
25825 {
25826 #ifdef __AARCH64EB__
25827 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25828 #else
25829 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25830 #endif
25831 }
25832
25833 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25834 vzip2_s16 (int16x4_t __a, int16x4_t __b)
25835 {
25836 #ifdef __AARCH64EB__
25837 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25838 #else
25839 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25840 #endif
25841 }
25842
25843 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25844 vzip2_s32 (int32x2_t __a, int32x2_t __b)
25845 {
25846 #ifdef __AARCH64EB__
25847 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25848 #else
25849 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25850 #endif
25851 }
25852
25853 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25854 vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
25855 {
25856 #ifdef __AARCH64EB__
25857 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25858 #else
25859 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25860 #endif
25861 }
25862
25863 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25864 vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
25865 {
25866 #ifdef __AARCH64EB__
25867 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25868 #else
25869 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25870 #endif
25871 }
25872
25873 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25874 vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
25875 {
25876 #ifdef __AARCH64EB__
25877 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25878 #else
25879 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25880 #endif
25881 }
25882
25883 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
25884 vzip2q_f16 (float16x8_t __a, float16x8_t __b)
25885 {
25886 #ifdef __AARCH64EB__
25887 return __builtin_shuffle (__a, __b,
25888 (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25889 #else
25890 return __builtin_shuffle (__a, __b,
25891 (uint16x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25892 #endif
25893 }
25894
25895 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25896 vzip2q_f32 (float32x4_t __a, float32x4_t __b)
25897 {
25898 #ifdef __AARCH64EB__
25899 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25900 #else
25901 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25902 #endif
25903 }
25904
25905 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25906 vzip2q_f64 (float64x2_t __a, float64x2_t __b)
25907 {
25908 #ifdef __AARCH64EB__
25909 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25910 #else
25911 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25912 #endif
25913 }
25914
25915 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25916 vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
25917 {
25918 #ifdef __AARCH64EB__
25919 return __builtin_shuffle (__a, __b, (uint8x16_t)
25920 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25921 #else
25922 return __builtin_shuffle (__a, __b, (uint8x16_t)
25923 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25924 #endif
25925 }
25926
25927 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25928 vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
25929 {
25930 #ifdef __AARCH64EB__
25931 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25932 #else
25933 return __builtin_shuffle (__a, __b, (uint16x8_t)
25934 {4, 12, 5, 13, 6, 14, 7, 15});
25935 #endif
25936 }
25937
25938 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25939 vzip2q_s8 (int8x16_t __a, int8x16_t __b)
25940 {
25941 #ifdef __AARCH64EB__
25942 return __builtin_shuffle (__a, __b, (uint8x16_t)
25943 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25944 #else
25945 return __builtin_shuffle (__a, __b, (uint8x16_t)
25946 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25947 #endif
25948 }
25949
25950 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25951 vzip2q_s16 (int16x8_t __a, int16x8_t __b)
25952 {
25953 #ifdef __AARCH64EB__
25954 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25955 #else
25956 return __builtin_shuffle (__a, __b, (uint16x8_t)
25957 {4, 12, 5, 13, 6, 14, 7, 15});
25958 #endif
25959 }
25960
25961 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25962 vzip2q_s32 (int32x4_t __a, int32x4_t __b)
25963 {
25964 #ifdef __AARCH64EB__
25965 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25966 #else
25967 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25968 #endif
25969 }
25970
25971 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25972 vzip2q_s64 (int64x2_t __a, int64x2_t __b)
25973 {
25974 #ifdef __AARCH64EB__
25975 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25976 #else
25977 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25978 #endif
25979 }
25980
25981 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25982 vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
25983 {
25984 #ifdef __AARCH64EB__
25985 return __builtin_shuffle (__a, __b, (uint8x16_t)
25986 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25987 #else
25988 return __builtin_shuffle (__a, __b, (uint8x16_t)
25989 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25990 #endif
25991 }
25992
25993 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25994 vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
25995 {
25996 #ifdef __AARCH64EB__
25997 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25998 #else
25999 return __builtin_shuffle (__a, __b, (uint16x8_t)
26000 {4, 12, 5, 13, 6, 14, 7, 15});
26001 #endif
26002 }
26003
26004 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
26005 vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
26006 {
26007 #ifdef __AARCH64EB__
26008 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
26009 #else
26010 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
26011 #endif
26012 }
26013
26014 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
26015 vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
26016 {
26017 #ifdef __AARCH64EB__
26018 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
26019 #else
26020 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
26021 #endif
26022 }
26023
26024 __INTERLEAVE_LIST (zip)
26025
26026 #undef __INTERLEAVE_LIST
26027 #undef __DEFINTERLEAVE
26028
26029 /* End of optimal implementations in approved order. */
26030
26031 #pragma GCC pop_options
26032
26033 /* ARMv8.2-A FP16 intrinsics. */
26034
26035 #pragma GCC push_options
26036 #pragma GCC target ("arch=armv8.2-a+fp16")
26037
26038 /* ARMv8.2-A FP16 one operand vector intrinsics. */
26039
26040 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26041 vabs_f16 (float16x4_t __a)
26042 {
26043 return __builtin_aarch64_absv4hf (__a);
26044 }
26045
26046 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26047 vabsq_f16 (float16x8_t __a)
26048 {
26049 return __builtin_aarch64_absv8hf (__a);
26050 }
26051
26052 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26053 vceqz_f16 (float16x4_t __a)
26054 {
26055 return __builtin_aarch64_cmeqv4hf_uss (__a, vdup_n_f16 (0.0f));
26056 }
26057
26058 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26059 vceqzq_f16 (float16x8_t __a)
26060 {
26061 return __builtin_aarch64_cmeqv8hf_uss (__a, vdupq_n_f16 (0.0f));
26062 }
26063
26064 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26065 vcgez_f16 (float16x4_t __a)
26066 {
26067 return __builtin_aarch64_cmgev4hf_uss (__a, vdup_n_f16 (0.0f));
26068 }
26069
26070 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26071 vcgezq_f16 (float16x8_t __a)
26072 {
26073 return __builtin_aarch64_cmgev8hf_uss (__a, vdupq_n_f16 (0.0f));
26074 }
26075
26076 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26077 vcgtz_f16 (float16x4_t __a)
26078 {
26079 return __builtin_aarch64_cmgtv4hf_uss (__a, vdup_n_f16 (0.0f));
26080 }
26081
26082 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26083 vcgtzq_f16 (float16x8_t __a)
26084 {
26085 return __builtin_aarch64_cmgtv8hf_uss (__a, vdupq_n_f16 (0.0f));
26086 }
26087
26088 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26089 vclez_f16 (float16x4_t __a)
26090 {
26091 return __builtin_aarch64_cmlev4hf_uss (__a, vdup_n_f16 (0.0f));
26092 }
26093
26094 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26095 vclezq_f16 (float16x8_t __a)
26096 {
26097 return __builtin_aarch64_cmlev8hf_uss (__a, vdupq_n_f16 (0.0f));
26098 }
26099
26100 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26101 vcltz_f16 (float16x4_t __a)
26102 {
26103 return __builtin_aarch64_cmltv4hf_uss (__a, vdup_n_f16 (0.0f));
26104 }
26105
26106 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26107 vcltzq_f16 (float16x8_t __a)
26108 {
26109 return __builtin_aarch64_cmltv8hf_uss (__a, vdupq_n_f16 (0.0f));
26110 }
26111
26112 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26113 vcvt_f16_s16 (int16x4_t __a)
26114 {
26115 return __builtin_aarch64_floatv4hiv4hf (__a);
26116 }
26117
26118 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26119 vcvtq_f16_s16 (int16x8_t __a)
26120 {
26121 return __builtin_aarch64_floatv8hiv8hf (__a);
26122 }
26123
26124 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26125 vcvt_f16_u16 (uint16x4_t __a)
26126 {
26127 return __builtin_aarch64_floatunsv4hiv4hf ((int16x4_t) __a);
26128 }
26129
26130 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26131 vcvtq_f16_u16 (uint16x8_t __a)
26132 {
26133 return __builtin_aarch64_floatunsv8hiv8hf ((int16x8_t) __a);
26134 }
26135
26136 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
26137 vcvt_s16_f16 (float16x4_t __a)
26138 {
26139 return __builtin_aarch64_lbtruncv4hfv4hi (__a);
26140 }
26141
26142 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
26143 vcvtq_s16_f16 (float16x8_t __a)
26144 {
26145 return __builtin_aarch64_lbtruncv8hfv8hi (__a);
26146 }
26147
26148 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26149 vcvt_u16_f16 (float16x4_t __a)
26150 {
26151 return __builtin_aarch64_lbtruncuv4hfv4hi_us (__a);
26152 }
26153
26154 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26155 vcvtq_u16_f16 (float16x8_t __a)
26156 {
26157 return __builtin_aarch64_lbtruncuv8hfv8hi_us (__a);
26158 }
26159
26160 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
26161 vcvta_s16_f16 (float16x4_t __a)
26162 {
26163 return __builtin_aarch64_lroundv4hfv4hi (__a);
26164 }
26165
26166 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
26167 vcvtaq_s16_f16 (float16x8_t __a)
26168 {
26169 return __builtin_aarch64_lroundv8hfv8hi (__a);
26170 }
26171
26172 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26173 vcvta_u16_f16 (float16x4_t __a)
26174 {
26175 return __builtin_aarch64_lrounduv4hfv4hi_us (__a);
26176 }
26177
26178 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26179 vcvtaq_u16_f16 (float16x8_t __a)
26180 {
26181 return __builtin_aarch64_lrounduv8hfv8hi_us (__a);
26182 }
26183
26184 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
26185 vcvtm_s16_f16 (float16x4_t __a)
26186 {
26187 return __builtin_aarch64_lfloorv4hfv4hi (__a);
26188 }
26189
26190 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
26191 vcvtmq_s16_f16 (float16x8_t __a)
26192 {
26193 return __builtin_aarch64_lfloorv8hfv8hi (__a);
26194 }
26195
26196 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26197 vcvtm_u16_f16 (float16x4_t __a)
26198 {
26199 return __builtin_aarch64_lflooruv4hfv4hi_us (__a);
26200 }
26201
26202 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26203 vcvtmq_u16_f16 (float16x8_t __a)
26204 {
26205 return __builtin_aarch64_lflooruv8hfv8hi_us (__a);
26206 }
26207
26208 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
26209 vcvtn_s16_f16 (float16x4_t __a)
26210 {
26211 return __builtin_aarch64_lfrintnv4hfv4hi (__a);
26212 }
26213
26214 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
26215 vcvtnq_s16_f16 (float16x8_t __a)
26216 {
26217 return __builtin_aarch64_lfrintnv8hfv8hi (__a);
26218 }
26219
26220 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26221 vcvtn_u16_f16 (float16x4_t __a)
26222 {
26223 return __builtin_aarch64_lfrintnuv4hfv4hi_us (__a);
26224 }
26225
26226 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26227 vcvtnq_u16_f16 (float16x8_t __a)
26228 {
26229 return __builtin_aarch64_lfrintnuv8hfv8hi_us (__a);
26230 }
26231
26232 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
26233 vcvtp_s16_f16 (float16x4_t __a)
26234 {
26235 return __builtin_aarch64_lceilv4hfv4hi (__a);
26236 }
26237
26238 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
26239 vcvtpq_s16_f16 (float16x8_t __a)
26240 {
26241 return __builtin_aarch64_lceilv8hfv8hi (__a);
26242 }
26243
26244 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26245 vcvtp_u16_f16 (float16x4_t __a)
26246 {
26247 return __builtin_aarch64_lceiluv4hfv4hi_us (__a);
26248 }
26249
26250 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26251 vcvtpq_u16_f16 (float16x8_t __a)
26252 {
26253 return __builtin_aarch64_lceiluv8hfv8hi_us (__a);
26254 }
26255
26256 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26257 vneg_f16 (float16x4_t __a)
26258 {
26259 return -__a;
26260 }
26261
26262 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26263 vnegq_f16 (float16x8_t __a)
26264 {
26265 return -__a;
26266 }
26267
26268 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26269 vrecpe_f16 (float16x4_t __a)
26270 {
26271 return __builtin_aarch64_frecpev4hf (__a);
26272 }
26273
26274 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26275 vrecpeq_f16 (float16x8_t __a)
26276 {
26277 return __builtin_aarch64_frecpev8hf (__a);
26278 }
26279
26280 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26281 vrnd_f16 (float16x4_t __a)
26282 {
26283 return __builtin_aarch64_btruncv4hf (__a);
26284 }
26285
26286 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26287 vrndq_f16 (float16x8_t __a)
26288 {
26289 return __builtin_aarch64_btruncv8hf (__a);
26290 }
26291
26292 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26293 vrnda_f16 (float16x4_t __a)
26294 {
26295 return __builtin_aarch64_roundv4hf (__a);
26296 }
26297
26298 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26299 vrndaq_f16 (float16x8_t __a)
26300 {
26301 return __builtin_aarch64_roundv8hf (__a);
26302 }
26303
26304 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26305 vrndi_f16 (float16x4_t __a)
26306 {
26307 return __builtin_aarch64_nearbyintv4hf (__a);
26308 }
26309
26310 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26311 vrndiq_f16 (float16x8_t __a)
26312 {
26313 return __builtin_aarch64_nearbyintv8hf (__a);
26314 }
26315
26316 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26317 vrndm_f16 (float16x4_t __a)
26318 {
26319 return __builtin_aarch64_floorv4hf (__a);
26320 }
26321
26322 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26323 vrndmq_f16 (float16x8_t __a)
26324 {
26325 return __builtin_aarch64_floorv8hf (__a);
26326 }
26327
26328 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26329 vrndn_f16 (float16x4_t __a)
26330 {
26331 return __builtin_aarch64_frintnv4hf (__a);
26332 }
26333
26334 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26335 vrndnq_f16 (float16x8_t __a)
26336 {
26337 return __builtin_aarch64_frintnv8hf (__a);
26338 }
26339
26340 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26341 vrndp_f16 (float16x4_t __a)
26342 {
26343 return __builtin_aarch64_ceilv4hf (__a);
26344 }
26345
26346 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26347 vrndpq_f16 (float16x8_t __a)
26348 {
26349 return __builtin_aarch64_ceilv8hf (__a);
26350 }
26351
26352 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26353 vrndx_f16 (float16x4_t __a)
26354 {
26355 return __builtin_aarch64_rintv4hf (__a);
26356 }
26357
26358 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26359 vrndxq_f16 (float16x8_t __a)
26360 {
26361 return __builtin_aarch64_rintv8hf (__a);
26362 }
26363
26364 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26365 vrsqrte_f16 (float16x4_t a)
26366 {
26367 return __builtin_aarch64_rsqrtev4hf (a);
26368 }
26369
26370 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26371 vrsqrteq_f16 (float16x8_t a)
26372 {
26373 return __builtin_aarch64_rsqrtev8hf (a);
26374 }
26375
26376 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26377 vsqrt_f16 (float16x4_t a)
26378 {
26379 return __builtin_aarch64_sqrtv4hf (a);
26380 }
26381
26382 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26383 vsqrtq_f16 (float16x8_t a)
26384 {
26385 return __builtin_aarch64_sqrtv8hf (a);
26386 }
26387
26388 /* ARMv8.2-A FP16 two operands vector intrinsics. */
26389
26390 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26391 vadd_f16 (float16x4_t __a, float16x4_t __b)
26392 {
26393 return __a + __b;
26394 }
26395
26396 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26397 vaddq_f16 (float16x8_t __a, float16x8_t __b)
26398 {
26399 return __a + __b;
26400 }
26401
26402 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26403 vabd_f16 (float16x4_t a, float16x4_t b)
26404 {
26405 return __builtin_aarch64_fabdv4hf (a, b);
26406 }
26407
26408 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26409 vabdq_f16 (float16x8_t a, float16x8_t b)
26410 {
26411 return __builtin_aarch64_fabdv8hf (a, b);
26412 }
26413
26414 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26415 vcage_f16 (float16x4_t __a, float16x4_t __b)
26416 {
26417 return __builtin_aarch64_facgev4hf_uss (__a, __b);
26418 }
26419
26420 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26421 vcageq_f16 (float16x8_t __a, float16x8_t __b)
26422 {
26423 return __builtin_aarch64_facgev8hf_uss (__a, __b);
26424 }
26425
26426 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26427 vcagt_f16 (float16x4_t __a, float16x4_t __b)
26428 {
26429 return __builtin_aarch64_facgtv4hf_uss (__a, __b);
26430 }
26431
26432 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26433 vcagtq_f16 (float16x8_t __a, float16x8_t __b)
26434 {
26435 return __builtin_aarch64_facgtv8hf_uss (__a, __b);
26436 }
26437
26438 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26439 vcale_f16 (float16x4_t __a, float16x4_t __b)
26440 {
26441 return __builtin_aarch64_faclev4hf_uss (__a, __b);
26442 }
26443
26444 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26445 vcaleq_f16 (float16x8_t __a, float16x8_t __b)
26446 {
26447 return __builtin_aarch64_faclev8hf_uss (__a, __b);
26448 }
26449
26450 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26451 vcalt_f16 (float16x4_t __a, float16x4_t __b)
26452 {
26453 return __builtin_aarch64_facltv4hf_uss (__a, __b);
26454 }
26455
26456 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26457 vcaltq_f16 (float16x8_t __a, float16x8_t __b)
26458 {
26459 return __builtin_aarch64_facltv8hf_uss (__a, __b);
26460 }
26461
26462 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26463 vceq_f16 (float16x4_t __a, float16x4_t __b)
26464 {
26465 return __builtin_aarch64_cmeqv4hf_uss (__a, __b);
26466 }
26467
26468 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26469 vceqq_f16 (float16x8_t __a, float16x8_t __b)
26470 {
26471 return __builtin_aarch64_cmeqv8hf_uss (__a, __b);
26472 }
26473
26474 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26475 vcge_f16 (float16x4_t __a, float16x4_t __b)
26476 {
26477 return __builtin_aarch64_cmgev4hf_uss (__a, __b);
26478 }
26479
26480 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26481 vcgeq_f16 (float16x8_t __a, float16x8_t __b)
26482 {
26483 return __builtin_aarch64_cmgev8hf_uss (__a, __b);
26484 }
26485
26486 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26487 vcgt_f16 (float16x4_t __a, float16x4_t __b)
26488 {
26489 return __builtin_aarch64_cmgtv4hf_uss (__a, __b);
26490 }
26491
26492 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26493 vcgtq_f16 (float16x8_t __a, float16x8_t __b)
26494 {
26495 return __builtin_aarch64_cmgtv8hf_uss (__a, __b);
26496 }
26497
26498 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26499 vcle_f16 (float16x4_t __a, float16x4_t __b)
26500 {
26501 return __builtin_aarch64_cmlev4hf_uss (__a, __b);
26502 }
26503
26504 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26505 vcleq_f16 (float16x8_t __a, float16x8_t __b)
26506 {
26507 return __builtin_aarch64_cmlev8hf_uss (__a, __b);
26508 }
26509
26510 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26511 vclt_f16 (float16x4_t __a, float16x4_t __b)
26512 {
26513 return __builtin_aarch64_cmltv4hf_uss (__a, __b);
26514 }
26515
26516 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26517 vcltq_f16 (float16x8_t __a, float16x8_t __b)
26518 {
26519 return __builtin_aarch64_cmltv8hf_uss (__a, __b);
26520 }
26521
26522 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26523 vcvt_n_f16_s16 (int16x4_t __a, const int __b)
26524 {
26525 return __builtin_aarch64_scvtfv4hi (__a, __b);
26526 }
26527
26528 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26529 vcvtq_n_f16_s16 (int16x8_t __a, const int __b)
26530 {
26531 return __builtin_aarch64_scvtfv8hi (__a, __b);
26532 }
26533
26534 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26535 vcvt_n_f16_u16 (uint16x4_t __a, const int __b)
26536 {
26537 return __builtin_aarch64_ucvtfv4hi_sus (__a, __b);
26538 }
26539
26540 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26541 vcvtq_n_f16_u16 (uint16x8_t __a, const int __b)
26542 {
26543 return __builtin_aarch64_ucvtfv8hi_sus (__a, __b);
26544 }
26545
26546 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
26547 vcvt_n_s16_f16 (float16x4_t __a, const int __b)
26548 {
26549 return __builtin_aarch64_fcvtzsv4hf (__a, __b);
26550 }
26551
26552 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
26553 vcvtq_n_s16_f16 (float16x8_t __a, const int __b)
26554 {
26555 return __builtin_aarch64_fcvtzsv8hf (__a, __b);
26556 }
26557
26558 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
26559 vcvt_n_u16_f16 (float16x4_t __a, const int __b)
26560 {
26561 return __builtin_aarch64_fcvtzuv4hf_uss (__a, __b);
26562 }
26563
26564 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
26565 vcvtq_n_u16_f16 (float16x8_t __a, const int __b)
26566 {
26567 return __builtin_aarch64_fcvtzuv8hf_uss (__a, __b);
26568 }
26569
26570 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26571 vdiv_f16 (float16x4_t __a, float16x4_t __b)
26572 {
26573 return __a / __b;
26574 }
26575
26576 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26577 vdivq_f16 (float16x8_t __a, float16x8_t __b)
26578 {
26579 return __a / __b;
26580 }
26581
26582 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26583 vmax_f16 (float16x4_t __a, float16x4_t __b)
26584 {
26585 return __builtin_aarch64_smax_nanv4hf (__a, __b);
26586 }
26587
26588 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26589 vmaxq_f16 (float16x8_t __a, float16x8_t __b)
26590 {
26591 return __builtin_aarch64_smax_nanv8hf (__a, __b);
26592 }
26593
26594 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26595 vmaxnm_f16 (float16x4_t __a, float16x4_t __b)
26596 {
26597 return __builtin_aarch64_fmaxv4hf (__a, __b);
26598 }
26599
26600 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26601 vmaxnmq_f16 (float16x8_t __a, float16x8_t __b)
26602 {
26603 return __builtin_aarch64_fmaxv8hf (__a, __b);
26604 }
26605
26606 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26607 vmin_f16 (float16x4_t __a, float16x4_t __b)
26608 {
26609 return __builtin_aarch64_smin_nanv4hf (__a, __b);
26610 }
26611
26612 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26613 vminq_f16 (float16x8_t __a, float16x8_t __b)
26614 {
26615 return __builtin_aarch64_smin_nanv8hf (__a, __b);
26616 }
26617
26618 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26619 vminnm_f16 (float16x4_t __a, float16x4_t __b)
26620 {
26621 return __builtin_aarch64_fminv4hf (__a, __b);
26622 }
26623
26624 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26625 vminnmq_f16 (float16x8_t __a, float16x8_t __b)
26626 {
26627 return __builtin_aarch64_fminv8hf (__a, __b);
26628 }
26629
26630 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26631 vmul_f16 (float16x4_t __a, float16x4_t __b)
26632 {
26633 return __a * __b;
26634 }
26635
26636 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26637 vmulq_f16 (float16x8_t __a, float16x8_t __b)
26638 {
26639 return __a * __b;
26640 }
26641
26642 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26643 vmulx_f16 (float16x4_t __a, float16x4_t __b)
26644 {
26645 return __builtin_aarch64_fmulxv4hf (__a, __b);
26646 }
26647
26648 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26649 vmulxq_f16 (float16x8_t __a, float16x8_t __b)
26650 {
26651 return __builtin_aarch64_fmulxv8hf (__a, __b);
26652 }
26653
26654 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26655 vpadd_f16 (float16x4_t a, float16x4_t b)
26656 {
26657 return __builtin_aarch64_faddpv4hf (a, b);
26658 }
26659
26660 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26661 vpaddq_f16 (float16x8_t a, float16x8_t b)
26662 {
26663 return __builtin_aarch64_faddpv8hf (a, b);
26664 }
26665
26666 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26667 vpmax_f16 (float16x4_t a, float16x4_t b)
26668 {
26669 return __builtin_aarch64_smax_nanpv4hf (a, b);
26670 }
26671
26672 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26673 vpmaxq_f16 (float16x8_t a, float16x8_t b)
26674 {
26675 return __builtin_aarch64_smax_nanpv8hf (a, b);
26676 }
26677
26678 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26679 vpmaxnm_f16 (float16x4_t a, float16x4_t b)
26680 {
26681 return __builtin_aarch64_smaxpv4hf (a, b);
26682 }
26683
26684 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26685 vpmaxnmq_f16 (float16x8_t a, float16x8_t b)
26686 {
26687 return __builtin_aarch64_smaxpv8hf (a, b);
26688 }
26689
26690 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26691 vpmin_f16 (float16x4_t a, float16x4_t b)
26692 {
26693 return __builtin_aarch64_smin_nanpv4hf (a, b);
26694 }
26695
26696 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26697 vpminq_f16 (float16x8_t a, float16x8_t b)
26698 {
26699 return __builtin_aarch64_smin_nanpv8hf (a, b);
26700 }
26701
26702 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26703 vpminnm_f16 (float16x4_t a, float16x4_t b)
26704 {
26705 return __builtin_aarch64_sminpv4hf (a, b);
26706 }
26707
26708 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26709 vpminnmq_f16 (float16x8_t a, float16x8_t b)
26710 {
26711 return __builtin_aarch64_sminpv8hf (a, b);
26712 }
26713
26714 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26715 vrecps_f16 (float16x4_t __a, float16x4_t __b)
26716 {
26717 return __builtin_aarch64_frecpsv4hf (__a, __b);
26718 }
26719
26720 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26721 vrecpsq_f16 (float16x8_t __a, float16x8_t __b)
26722 {
26723 return __builtin_aarch64_frecpsv8hf (__a, __b);
26724 }
26725
26726 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26727 vrsqrts_f16 (float16x4_t a, float16x4_t b)
26728 {
26729 return __builtin_aarch64_rsqrtsv4hf (a, b);
26730 }
26731
26732 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26733 vrsqrtsq_f16 (float16x8_t a, float16x8_t b)
26734 {
26735 return __builtin_aarch64_rsqrtsv8hf (a, b);
26736 }
26737
26738 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26739 vsub_f16 (float16x4_t __a, float16x4_t __b)
26740 {
26741 return __a - __b;
26742 }
26743
26744 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26745 vsubq_f16 (float16x8_t __a, float16x8_t __b)
26746 {
26747 return __a - __b;
26748 }
26749
26750 /* ARMv8.2-A FP16 three operands vector intrinsics. */
26751
26752 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26753 vfma_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c)
26754 {
26755 return __builtin_aarch64_fmav4hf (__b, __c, __a);
26756 }
26757
26758 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26759 vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
26760 {
26761 return __builtin_aarch64_fmav8hf (__b, __c, __a);
26762 }
26763
26764 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
26765 vfms_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c)
26766 {
26767 return __builtin_aarch64_fnmav4hf (__b, __c, __a);
26768 }
26769
26770 __extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
26771 vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
26772 {
26773 return __builtin_aarch64_fnmav8hf (__b, __c, __a);
26774 }
26775
26776 #pragma GCC pop_options
26777
26778 #undef __aarch64_vget_lane_any
26779
26780 #undef __aarch64_vdup_lane_any
26781 #undef __aarch64_vdup_lane_f16
26782 #undef __aarch64_vdup_lane_f32
26783 #undef __aarch64_vdup_lane_f64
26784 #undef __aarch64_vdup_lane_p8
26785 #undef __aarch64_vdup_lane_p16
26786 #undef __aarch64_vdup_lane_s8
26787 #undef __aarch64_vdup_lane_s16
26788 #undef __aarch64_vdup_lane_s32
26789 #undef __aarch64_vdup_lane_s64
26790 #undef __aarch64_vdup_lane_u8
26791 #undef __aarch64_vdup_lane_u16
26792 #undef __aarch64_vdup_lane_u32
26793 #undef __aarch64_vdup_lane_u64
26794 #undef __aarch64_vdup_laneq_f16
26795 #undef __aarch64_vdup_laneq_f32
26796 #undef __aarch64_vdup_laneq_f64
26797 #undef __aarch64_vdup_laneq_p8
26798 #undef __aarch64_vdup_laneq_p16
26799 #undef __aarch64_vdup_laneq_s8
26800 #undef __aarch64_vdup_laneq_s16
26801 #undef __aarch64_vdup_laneq_s32
26802 #undef __aarch64_vdup_laneq_s64
26803 #undef __aarch64_vdup_laneq_u8
26804 #undef __aarch64_vdup_laneq_u16
26805 #undef __aarch64_vdup_laneq_u32
26806 #undef __aarch64_vdup_laneq_u64
26807 #undef __aarch64_vdupq_lane_f16
26808 #undef __aarch64_vdupq_lane_f32
26809 #undef __aarch64_vdupq_lane_f64
26810 #undef __aarch64_vdupq_lane_p8
26811 #undef __aarch64_vdupq_lane_p16
26812 #undef __aarch64_vdupq_lane_s8
26813 #undef __aarch64_vdupq_lane_s16
26814 #undef __aarch64_vdupq_lane_s32
26815 #undef __aarch64_vdupq_lane_s64
26816 #undef __aarch64_vdupq_lane_u8
26817 #undef __aarch64_vdupq_lane_u16
26818 #undef __aarch64_vdupq_lane_u32
26819 #undef __aarch64_vdupq_lane_u64
26820 #undef __aarch64_vdupq_laneq_f16
26821 #undef __aarch64_vdupq_laneq_f32
26822 #undef __aarch64_vdupq_laneq_f64
26823 #undef __aarch64_vdupq_laneq_p8
26824 #undef __aarch64_vdupq_laneq_p16
26825 #undef __aarch64_vdupq_laneq_s8
26826 #undef __aarch64_vdupq_laneq_s16
26827 #undef __aarch64_vdupq_laneq_s32
26828 #undef __aarch64_vdupq_laneq_s64
26829 #undef __aarch64_vdupq_laneq_u8
26830 #undef __aarch64_vdupq_laneq_u16
26831 #undef __aarch64_vdupq_laneq_u32
26832 #undef __aarch64_vdupq_laneq_u64
26833
26834 #endif