re PR rtl-optimization/63620 (RELOAD lost SET_GOT dependency on Darwin)
[gcc.git] / gcc / config / i386 / avx512fintrin.h
1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
30
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef int __v16si __attribute__ ((__vector_size__ (64)));
42 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
43 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
44
45 /* The Intel API is flexible enough that we must allow aliasing with other
46 vector types, and their scalar components. */
47 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
48 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
49 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
50
51 typedef unsigned char __mmask8;
52 typedef unsigned short __mmask16;
53
54 extern __inline __m512i
55 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
56 _mm512_set_epi64 (long long __A, long long __B, long long __C,
57 long long __D, long long __E, long long __F,
58 long long __G, long long __H)
59 {
60 return __extension__ (__m512i) (__v8di)
61 { __H, __G, __F, __E, __D, __C, __B, __A };
62 }
63
64 /* Create the vector [A B C D E F G H I J K L M N O P]. */
65 extern __inline __m512i
66 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
67 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
68 int __E, int __F, int __G, int __H,
69 int __I, int __J, int __K, int __L,
70 int __M, int __N, int __O, int __P)
71 {
72 return __extension__ (__m512i)(__v16si)
73 { __P, __O, __N, __M, __L, __K, __J, __I,
74 __H, __G, __F, __E, __D, __C, __B, __A };
75 }
76
77 extern __inline __m512d
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm512_set_pd (double __A, double __B, double __C, double __D,
80 double __E, double __F, double __G, double __H)
81 {
82 return __extension__ (__m512d)
83 { __H, __G, __F, __E, __D, __C, __B, __A };
84 }
85
86 extern __inline __m512
87 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88 _mm512_set_ps (float __A, float __B, float __C, float __D,
89 float __E, float __F, float __G, float __H,
90 float __I, float __J, float __K, float __L,
91 float __M, float __N, float __O, float __P)
92 {
93 return __extension__ (__m512)
94 { __P, __O, __N, __M, __L, __K, __J, __I,
95 __H, __G, __F, __E, __D, __C, __B, __A };
96 }
97
98 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
99 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
100
101 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
102 e8,e9,e10,e11,e12,e13,e14,e15) \
103 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
104
105 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
106 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
107
108 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
109 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
110
111 extern __inline __m512
112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
113 _mm512_undefined_ps (void)
114 {
115 __m512 __Y = __Y;
116 return __Y;
117 }
118
119 extern __inline __m512d
120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
121 _mm512_undefined_pd (void)
122 {
123 __m512d __Y = __Y;
124 return __Y;
125 }
126
127 extern __inline __m512i
128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
129 _mm512_undefined_si512 (void)
130 {
131 __m512i __Y = __Y;
132 return __Y;
133 }
134
135 extern __inline __m512i
136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
137 _mm512_set1_epi8 (char __A)
138 {
139 return __extension__ (__m512i)(__v64qi)
140 { __A, __A, __A, __A, __A, __A, __A, __A,
141 __A, __A, __A, __A, __A, __A, __A, __A,
142 __A, __A, __A, __A, __A, __A, __A, __A,
143 __A, __A, __A, __A, __A, __A, __A, __A,
144 __A, __A, __A, __A, __A, __A, __A, __A,
145 __A, __A, __A, __A, __A, __A, __A, __A,
146 __A, __A, __A, __A, __A, __A, __A, __A,
147 __A, __A, __A, __A, __A, __A, __A, __A };
148 }
149
150 extern __inline __m512i
151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
152 _mm512_set1_epi16 (short __A)
153 {
154 return __extension__ (__m512i)(__v32hi)
155 { __A, __A, __A, __A, __A, __A, __A, __A,
156 __A, __A, __A, __A, __A, __A, __A, __A,
157 __A, __A, __A, __A, __A, __A, __A, __A,
158 __A, __A, __A, __A, __A, __A, __A, __A };
159 }
160
161 extern __inline __m512d
162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163 _mm512_set1_pd (double __A)
164 {
165 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
166 (__v2df) { __A, },
167 (__v8df)
168 _mm512_undefined_pd (),
169 (__mmask8) -1);
170 }
171
172 extern __inline __m512
173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174 _mm512_set1_ps (float __A)
175 {
176 return (__m512) __builtin_ia32_broadcastss512 (__extension__
177 (__v4sf) { __A, },
178 (__v16sf)
179 _mm512_undefined_ps (),
180 (__mmask16) -1);
181 }
182
183 /* Create the vector [A B C D A B C D A B C D A B C D]. */
184 extern __inline __m512i
185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
186 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
187 {
188 return __extension__ (__m512i)(__v16si)
189 { __D, __C, __B, __A, __D, __C, __B, __A,
190 __D, __C, __B, __A, __D, __C, __B, __A };
191 }
192
193 extern __inline __m512i
194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
195 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
196 long long __D)
197 {
198 return __extension__ (__m512i) (__v8di)
199 { __D, __C, __B, __A, __D, __C, __B, __A };
200 }
201
202 extern __inline __m512d
203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
204 _mm512_set4_pd (double __A, double __B, double __C, double __D)
205 {
206 return __extension__ (__m512d)
207 { __D, __C, __B, __A, __D, __C, __B, __A };
208 }
209
210 extern __inline __m512
211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212 _mm512_set4_ps (float __A, float __B, float __C, float __D)
213 {
214 return __extension__ (__m512)
215 { __D, __C, __B, __A, __D, __C, __B, __A,
216 __D, __C, __B, __A, __D, __C, __B, __A };
217 }
218
219 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
220 _mm512_set4_epi64(e3,e2,e1,e0)
221
222 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
223 _mm512_set4_epi32(e3,e2,e1,e0)
224
225 #define _mm512_setr4_pd(e0,e1,e2,e3) \
226 _mm512_set4_pd(e3,e2,e1,e0)
227
228 #define _mm512_setr4_ps(e0,e1,e2,e3) \
229 _mm512_set4_ps(e3,e2,e1,e0)
230
231 extern __inline __m512
232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
233 _mm512_setzero_ps (void)
234 {
235 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
236 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
237 }
238
239 extern __inline __m512d
240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
241 _mm512_setzero_pd (void)
242 {
243 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
244 }
245
246 extern __inline __m512i
247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
248 _mm512_setzero_epi32 (void)
249 {
250 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
251 }
252
253 extern __inline __m512i
254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
255 _mm512_setzero_si512 (void)
256 {
257 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
258 }
259
260 extern __inline __m512d
261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
262 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
263 {
264 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
265 (__v8df) __W,
266 (__mmask8) __U);
267 }
268
269 extern __inline __m512d
270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
271 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
272 {
273 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
274 (__v8df)
275 _mm512_setzero_pd (),
276 (__mmask8) __U);
277 }
278
279 extern __inline __m512
280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
281 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
282 {
283 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
284 (__v16sf) __W,
285 (__mmask16) __U);
286 }
287
288 extern __inline __m512
289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
290 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
291 {
292 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
293 (__v16sf)
294 _mm512_setzero_ps (),
295 (__mmask16) __U);
296 }
297
298 extern __inline __m512d
299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
300 _mm512_load_pd (void const *__P)
301 {
302 return *(__m512d *) __P;
303 }
304
305 extern __inline __m512d
306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
307 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
308 {
309 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
310 (__v8df) __W,
311 (__mmask8) __U);
312 }
313
314 extern __inline __m512d
315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
316 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
317 {
318 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
319 (__v8df)
320 _mm512_setzero_pd (),
321 (__mmask8) __U);
322 }
323
324 extern __inline void
325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
326 _mm512_store_pd (void *__P, __m512d __A)
327 {
328 *(__m512d *) __P = __A;
329 }
330
331 extern __inline void
332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
333 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
334 {
335 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
336 (__mmask8) __U);
337 }
338
339 extern __inline __m512
340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
341 _mm512_load_ps (void const *__P)
342 {
343 return *(__m512 *) __P;
344 }
345
346 extern __inline __m512
347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
348 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
349 {
350 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
351 (__v16sf) __W,
352 (__mmask16) __U);
353 }
354
355 extern __inline __m512
356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
357 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
358 {
359 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
360 (__v16sf)
361 _mm512_setzero_ps (),
362 (__mmask16) __U);
363 }
364
365 extern __inline void
366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
367 _mm512_store_ps (void *__P, __m512 __A)
368 {
369 *(__m512 *) __P = __A;
370 }
371
372 extern __inline void
373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
374 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
375 {
376 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
377 (__mmask16) __U);
378 }
379
380 extern __inline __m512i
381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
382 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
383 {
384 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
385 (__v8di) __W,
386 (__mmask8) __U);
387 }
388
389 extern __inline __m512i
390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
391 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
392 {
393 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
394 (__v8di)
395 _mm512_setzero_si512 (),
396 (__mmask8) __U);
397 }
398
399 extern __inline __m512i
400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
401 _mm512_load_epi64 (void const *__P)
402 {
403 return *(__m512i *) __P;
404 }
405
406 extern __inline __m512i
407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
409 {
410 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
411 (__v8di) __W,
412 (__mmask8) __U);
413 }
414
415 extern __inline __m512i
416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
418 {
419 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
420 (__v8di)
421 _mm512_setzero_si512 (),
422 (__mmask8) __U);
423 }
424
425 extern __inline void
426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
427 _mm512_store_epi64 (void *__P, __m512i __A)
428 {
429 *(__m512i *) __P = __A;
430 }
431
432 extern __inline void
433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
434 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
435 {
436 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
437 (__mmask8) __U);
438 }
439
440 extern __inline __m512i
441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
442 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
443 {
444 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
445 (__v16si) __W,
446 (__mmask16) __U);
447 }
448
449 extern __inline __m512i
450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
451 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
452 {
453 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
454 (__v16si)
455 _mm512_setzero_si512 (),
456 (__mmask16) __U);
457 }
458
459 extern __inline __m512i
460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461 _mm512_load_si512 (void const *__P)
462 {
463 return *(__m512i *) __P;
464 }
465
466 extern __inline __m512i
467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
468 _mm512_load_epi32 (void const *__P)
469 {
470 return *(__m512i *) __P;
471 }
472
473 extern __inline __m512i
474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
476 {
477 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
478 (__v16si) __W,
479 (__mmask16) __U);
480 }
481
482 extern __inline __m512i
483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
484 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
485 {
486 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
487 (__v16si)
488 _mm512_setzero_si512 (),
489 (__mmask16) __U);
490 }
491
492 extern __inline void
493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
494 _mm512_store_si512 (void *__P, __m512i __A)
495 {
496 *(__m512i *) __P = __A;
497 }
498
499 extern __inline void
500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501 _mm512_store_epi32 (void *__P, __m512i __A)
502 {
503 *(__m512i *) __P = __A;
504 }
505
506 extern __inline void
507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
508 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
509 {
510 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
511 (__mmask16) __U);
512 }
513
514 extern __inline __m512i
515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
516 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
517 {
518 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
519 (__v16si) __B,
520 (__v16si)
521 _mm512_undefined_si512 (),
522 (__mmask16) -1);
523 }
524
525 extern __inline __m512i
526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
528 {
529 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
530 (__v16si) __B,
531 (__v16si)
532 _mm512_setzero_si512 (),
533 __M);
534 }
535
536 extern __inline __m512i
537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
539 {
540 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
541 (__v16si) __B,
542 (__v16si) __W, __M);
543 }
544
545 extern __inline __m512i
546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
547 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
548 {
549 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
550 (__v16si) __Y,
551 (__v16si)
552 _mm512_undefined_si512 (),
553 (__mmask16) -1);
554 }
555
556 extern __inline __m512i
557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
558 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
559 {
560 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
561 (__v16si) __Y,
562 (__v16si) __W,
563 (__mmask16) __U);
564 }
565
566 extern __inline __m512i
567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
569 {
570 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
571 (__v16si) __Y,
572 (__v16si)
573 _mm512_setzero_si512 (),
574 (__mmask16) __U);
575 }
576
577 extern __inline __m512i
578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
579 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
580 {
581 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
582 (__v16si) __Y,
583 (__v16si)
584 _mm512_undefined_si512 (),
585 (__mmask16) -1);
586 }
587
588 extern __inline __m512i
589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
590 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
591 {
592 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
593 (__v16si) __Y,
594 (__v16si) __W,
595 (__mmask16) __U);
596 }
597
598 extern __inline __m512i
599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
601 {
602 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
603 (__v16si) __Y,
604 (__v16si)
605 _mm512_setzero_si512 (),
606 (__mmask16) __U);
607 }
608
609 extern __inline __m512i
610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
611 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
612 {
613 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
614 (__v16si) __Y,
615 (__v16si)
616 _mm512_undefined_si512 (),
617 (__mmask16) -1);
618 }
619
620 extern __inline __m512i
621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
622 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
623 {
624 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
625 (__v16si) __Y,
626 (__v16si) __W,
627 (__mmask16) __U);
628 }
629
630 extern __inline __m512i
631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
632 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
633 {
634 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
635 (__v16si) __Y,
636 (__v16si)
637 _mm512_setzero_si512 (),
638 (__mmask16) __U);
639 }
640
641 extern __inline __m512i
642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
643 _mm512_add_epi64 (__m512i __A, __m512i __B)
644 {
645 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
646 (__v8di) __B,
647 (__v8di)
648 _mm512_undefined_si512 (),
649 (__mmask8) -1);
650 }
651
652 extern __inline __m512i
653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
654 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
655 {
656 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
657 (__v8di) __B,
658 (__v8di) __W,
659 (__mmask8) __U);
660 }
661
662 extern __inline __m512i
663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
664 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
665 {
666 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
667 (__v8di) __B,
668 (__v8di)
669 _mm512_setzero_si512 (),
670 (__mmask8) __U);
671 }
672
673 extern __inline __m512i
674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
675 _mm512_sub_epi64 (__m512i __A, __m512i __B)
676 {
677 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
678 (__v8di) __B,
679 (__v8di)
680 _mm512_undefined_pd (),
681 (__mmask8) -1);
682 }
683
684 extern __inline __m512i
685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
686 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
687 {
688 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
689 (__v8di) __B,
690 (__v8di) __W,
691 (__mmask8) __U);
692 }
693
694 extern __inline __m512i
695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
696 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
697 {
698 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
699 (__v8di) __B,
700 (__v8di)
701 _mm512_setzero_si512 (),
702 (__mmask8) __U);
703 }
704
705 extern __inline __m512i
706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
707 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
708 {
709 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
710 (__v8di) __Y,
711 (__v8di)
712 _mm512_undefined_pd (),
713 (__mmask8) -1);
714 }
715
716 extern __inline __m512i
717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
718 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
719 {
720 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
721 (__v8di) __Y,
722 (__v8di) __W,
723 (__mmask8) __U);
724 }
725
726 extern __inline __m512i
727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
728 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
729 {
730 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
731 (__v8di) __Y,
732 (__v8di)
733 _mm512_setzero_si512 (),
734 (__mmask8) __U);
735 }
736
737 extern __inline __m512i
738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
739 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
740 {
741 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
742 (__v8di) __Y,
743 (__v8di)
744 _mm512_undefined_si512 (),
745 (__mmask8) -1);
746 }
747
748 extern __inline __m512i
749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
750 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
751 {
752 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
753 (__v8di) __Y,
754 (__v8di) __W,
755 (__mmask8) __U);
756 }
757
758 extern __inline __m512i
759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
760 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
761 {
762 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
763 (__v8di) __Y,
764 (__v8di)
765 _mm512_setzero_si512 (),
766 (__mmask8) __U);
767 }
768
769 extern __inline __m512i
770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
771 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
772 {
773 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
774 (__v8di) __Y,
775 (__v8di)
776 _mm512_undefined_si512 (),
777 (__mmask8) -1);
778 }
779
780 extern __inline __m512i
781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
782 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
783 {
784 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
785 (__v8di) __Y,
786 (__v8di) __W,
787 (__mmask8) __U);
788 }
789
790 extern __inline __m512i
791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
792 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
793 {
794 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
795 (__v8di) __Y,
796 (__v8di)
797 _mm512_setzero_si512 (),
798 (__mmask8) __U);
799 }
800
801 extern __inline __m512i
802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
803 _mm512_add_epi32 (__m512i __A, __m512i __B)
804 {
805 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
806 (__v16si) __B,
807 (__v16si)
808 _mm512_undefined_si512 (),
809 (__mmask16) -1);
810 }
811
812 extern __inline __m512i
813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
815 {
816 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
817 (__v16si) __B,
818 (__v16si) __W,
819 (__mmask16) __U);
820 }
821
822 extern __inline __m512i
823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
824 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
825 {
826 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
827 (__v16si) __B,
828 (__v16si)
829 _mm512_setzero_si512 (),
830 (__mmask16) __U);
831 }
832
833 extern __inline __m512i
834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
835 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
836 {
837 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
838 (__v16si) __Y,
839 (__v8di)
840 _mm512_undefined_si512 (),
841 (__mmask8) -1);
842 }
843
844 extern __inline __m512i
845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
847 {
848 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
849 (__v16si) __Y,
850 (__v8di) __W, __M);
851 }
852
853 extern __inline __m512i
854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
855 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
856 {
857 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
858 (__v16si) __Y,
859 (__v8di)
860 _mm512_setzero_si512 (),
861 __M);
862 }
863
864 extern __inline __m512i
865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
866 _mm512_sub_epi32 (__m512i __A, __m512i __B)
867 {
868 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
869 (__v16si) __B,
870 (__v16si)
871 _mm512_undefined_si512 (),
872 (__mmask16) -1);
873 }
874
875 extern __inline __m512i
876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
878 {
879 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
880 (__v16si) __B,
881 (__v16si) __W,
882 (__mmask16) __U);
883 }
884
885 extern __inline __m512i
886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
887 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
888 {
889 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
890 (__v16si) __B,
891 (__v16si)
892 _mm512_setzero_si512 (),
893 (__mmask16) __U);
894 }
895
896 extern __inline __m512i
897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
898 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
899 {
900 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
901 (__v16si) __Y,
902 (__v8di)
903 _mm512_undefined_si512 (),
904 (__mmask8) -1);
905 }
906
907 extern __inline __m512i
908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
909 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
910 {
911 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
912 (__v16si) __Y,
913 (__v8di) __W, __M);
914 }
915
916 extern __inline __m512i
917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
918 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
919 {
920 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
921 (__v16si) __Y,
922 (__v8di)
923 _mm512_setzero_si512 (),
924 __M);
925 }
926
927 #ifdef __OPTIMIZE__
928 extern __inline __m512i
929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
930 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
931 {
932 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
933 (__v8di)
934 _mm512_undefined_si512 (),
935 (__mmask8) -1);
936 }
937
938 extern __inline __m512i
939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
941 unsigned int __B)
942 {
943 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
944 (__v8di) __W,
945 (__mmask8) __U);
946 }
947
948 extern __inline __m512i
949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
950 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
951 {
952 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
953 (__v8di)
954 _mm512_setzero_si512 (),
955 (__mmask8) __U);
956 }
957 #else
958 #define _mm512_slli_epi64(X, C) \
959 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
960 (__v8di)(__m512i)_mm512_undefined_si512 (),\
961 (__mmask8)-1))
962
963 #define _mm512_mask_slli_epi64(W, U, X, C) \
964 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
965 (__v8di)(__m512i)(W),\
966 (__mmask8)(U)))
967
968 #define _mm512_maskz_slli_epi64(U, X, C) \
969 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
970 (__v8di)(__m512i)_mm512_setzero_si512 (),\
971 (__mmask8)(U)))
972 #endif
973
974 extern __inline __m512i
975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
976 _mm512_sll_epi64 (__m512i __A, __m128i __B)
977 {
978 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
979 (__v2di) __B,
980 (__v8di)
981 _mm512_undefined_si512 (),
982 (__mmask8) -1);
983 }
984
985 extern __inline __m512i
986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
987 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
988 {
989 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
990 (__v2di) __B,
991 (__v8di) __W,
992 (__mmask8) __U);
993 }
994
995 extern __inline __m512i
996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
997 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
998 {
999 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
1000 (__v2di) __B,
1001 (__v8di)
1002 _mm512_setzero_si512 (),
1003 (__mmask8) __U);
1004 }
1005
1006 #ifdef __OPTIMIZE__
1007 extern __inline __m512i
1008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1009 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
1010 {
1011 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1012 (__v8di)
1013 _mm512_undefined_si512 (),
1014 (__mmask8) -1);
1015 }
1016
1017 extern __inline __m512i
1018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1019 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1020 __m512i __A, unsigned int __B)
1021 {
1022 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1023 (__v8di) __W,
1024 (__mmask8) __U);
1025 }
1026
1027 extern __inline __m512i
1028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1029 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1030 {
1031 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1032 (__v8di)
1033 _mm512_setzero_si512 (),
1034 (__mmask8) __U);
1035 }
1036 #else
1037 #define _mm512_srli_epi64(X, C) \
1038 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1039 (__v8di)(__m512i)_mm512_undefined_si512 (),\
1040 (__mmask8)-1))
1041
1042 #define _mm512_mask_srli_epi64(W, U, X, C) \
1043 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1044 (__v8di)(__m512i)(W),\
1045 (__mmask8)(U)))
1046
1047 #define _mm512_maskz_srli_epi64(U, X, C) \
1048 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1049 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1050 (__mmask8)(U)))
1051 #endif
1052
1053 extern __inline __m512i
1054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1055 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1056 {
1057 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1058 (__v2di) __B,
1059 (__v8di)
1060 _mm512_undefined_si512 (),
1061 (__mmask8) -1);
1062 }
1063
1064 extern __inline __m512i
1065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1066 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1067 {
1068 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1069 (__v2di) __B,
1070 (__v8di) __W,
1071 (__mmask8) __U);
1072 }
1073
1074 extern __inline __m512i
1075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1076 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1077 {
1078 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1079 (__v2di) __B,
1080 (__v8di)
1081 _mm512_setzero_si512 (),
1082 (__mmask8) __U);
1083 }
1084
1085 #ifdef __OPTIMIZE__
1086 extern __inline __m512i
1087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1088 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1089 {
1090 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1091 (__v8di)
1092 _mm512_undefined_si512 (),
1093 (__mmask8) -1);
1094 }
1095
1096 extern __inline __m512i
1097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1098 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1099 unsigned int __B)
1100 {
1101 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1102 (__v8di) __W,
1103 (__mmask8) __U);
1104 }
1105
1106 extern __inline __m512i
1107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1108 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1109 {
1110 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1111 (__v8di)
1112 _mm512_setzero_si512 (),
1113 (__mmask8) __U);
1114 }
1115 #else
1116 #define _mm512_srai_epi64(X, C) \
1117 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1118 (__v8di)(__m512i)_mm512_undefined_si512 (),\
1119 (__mmask8)-1))
1120
1121 #define _mm512_mask_srai_epi64(W, U, X, C) \
1122 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1123 (__v8di)(__m512i)(W),\
1124 (__mmask8)(U)))
1125
1126 #define _mm512_maskz_srai_epi64(U, X, C) \
1127 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1128 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1129 (__mmask8)(U)))
1130 #endif
1131
1132 extern __inline __m512i
1133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1134 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1135 {
1136 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1137 (__v2di) __B,
1138 (__v8di)
1139 _mm512_undefined_si512 (),
1140 (__mmask8) -1);
1141 }
1142
1143 extern __inline __m512i
1144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1145 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1146 {
1147 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1148 (__v2di) __B,
1149 (__v8di) __W,
1150 (__mmask8) __U);
1151 }
1152
1153 extern __inline __m512i
1154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1155 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1156 {
1157 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1158 (__v2di) __B,
1159 (__v8di)
1160 _mm512_setzero_si512 (),
1161 (__mmask8) __U);
1162 }
1163
1164 #ifdef __OPTIMIZE__
1165 extern __inline __m512i
1166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1167 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1168 {
1169 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1170 (__v16si)
1171 _mm512_undefined_si512 (),
1172 (__mmask16) -1);
1173 }
1174
1175 extern __inline __m512i
1176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1177 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1178 unsigned int __B)
1179 {
1180 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1181 (__v16si) __W,
1182 (__mmask16) __U);
1183 }
1184
1185 extern __inline __m512i
1186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1187 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1188 {
1189 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1190 (__v16si)
1191 _mm512_setzero_si512 (),
1192 (__mmask16) __U);
1193 }
1194 #else
1195 #define _mm512_slli_epi32(X, C) \
1196 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1197 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1198 (__mmask16)-1))
1199
1200 #define _mm512_mask_slli_epi32(W, U, X, C) \
1201 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1202 (__v16si)(__m512i)(W),\
1203 (__mmask16)(U)))
1204
1205 #define _mm512_maskz_slli_epi32(U, X, C) \
1206 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1207 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1208 (__mmask16)(U)))
1209 #endif
1210
1211 extern __inline __m512i
1212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1213 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1214 {
1215 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1216 (__v4si) __B,
1217 (__v16si)
1218 _mm512_undefined_si512 (),
1219 (__mmask16) -1);
1220 }
1221
1222 extern __inline __m512i
1223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1224 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1225 {
1226 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1227 (__v4si) __B,
1228 (__v16si) __W,
1229 (__mmask16) __U);
1230 }
1231
1232 extern __inline __m512i
1233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1234 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1235 {
1236 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1237 (__v4si) __B,
1238 (__v16si)
1239 _mm512_setzero_si512 (),
1240 (__mmask16) __U);
1241 }
1242
1243 #ifdef __OPTIMIZE__
1244 extern __inline __m512i
1245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1246 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1247 {
1248 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1249 (__v16si)
1250 _mm512_undefined_si512 (),
1251 (__mmask16) -1);
1252 }
1253
1254 extern __inline __m512i
1255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1256 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1257 __m512i __A, unsigned int __B)
1258 {
1259 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1260 (__v16si) __W,
1261 (__mmask16) __U);
1262 }
1263
1264 extern __inline __m512i
1265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1266 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1267 {
1268 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1269 (__v16si)
1270 _mm512_setzero_si512 (),
1271 (__mmask16) __U);
1272 }
1273 #else
1274 #define _mm512_srli_epi32(X, C) \
1275 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1276 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1277 (__mmask16)-1))
1278
1279 #define _mm512_mask_srli_epi32(W, U, X, C) \
1280 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1281 (__v16si)(__m512i)(W),\
1282 (__mmask16)(U)))
1283
1284 #define _mm512_maskz_srli_epi32(U, X, C) \
1285 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1286 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1287 (__mmask16)(U)))
1288 #endif
1289
1290 extern __inline __m512i
1291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1292 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1293 {
1294 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1295 (__v4si) __B,
1296 (__v16si)
1297 _mm512_undefined_si512 (),
1298 (__mmask16) -1);
1299 }
1300
1301 extern __inline __m512i
1302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1303 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1304 {
1305 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1306 (__v4si) __B,
1307 (__v16si) __W,
1308 (__mmask16) __U);
1309 }
1310
1311 extern __inline __m512i
1312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1313 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1314 {
1315 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1316 (__v4si) __B,
1317 (__v16si)
1318 _mm512_setzero_si512 (),
1319 (__mmask16) __U);
1320 }
1321
1322 #ifdef __OPTIMIZE__
1323 extern __inline __m512i
1324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1325 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1326 {
1327 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1328 (__v16si)
1329 _mm512_undefined_si512 (),
1330 (__mmask16) -1);
1331 }
1332
1333 extern __inline __m512i
1334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1335 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1336 unsigned int __B)
1337 {
1338 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1339 (__v16si) __W,
1340 (__mmask16) __U);
1341 }
1342
1343 extern __inline __m512i
1344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1345 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1346 {
1347 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1348 (__v16si)
1349 _mm512_setzero_si512 (),
1350 (__mmask16) __U);
1351 }
1352 #else
1353 #define _mm512_srai_epi32(X, C) \
1354 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1355 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1356 (__mmask16)-1))
1357
1358 #define _mm512_mask_srai_epi32(W, U, X, C) \
1359 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1360 (__v16si)(__m512i)(W),\
1361 (__mmask16)(U)))
1362
1363 #define _mm512_maskz_srai_epi32(U, X, C) \
1364 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1365 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1366 (__mmask16)(U)))
1367 #endif
1368
1369 extern __inline __m512i
1370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1371 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1372 {
1373 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1374 (__v4si) __B,
1375 (__v16si)
1376 _mm512_undefined_si512 (),
1377 (__mmask16) -1);
1378 }
1379
1380 extern __inline __m512i
1381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1382 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1383 {
1384 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1385 (__v4si) __B,
1386 (__v16si) __W,
1387 (__mmask16) __U);
1388 }
1389
1390 extern __inline __m512i
1391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1392 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1393 {
1394 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1395 (__v4si) __B,
1396 (__v16si)
1397 _mm512_setzero_si512 (),
1398 (__mmask16) __U);
1399 }
1400
1401 #ifdef __OPTIMIZE__
1402 extern __inline __m128d
1403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1404 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1405 {
1406 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1407 (__v2df) __B,
1408 __R);
1409 }
1410
1411 extern __inline __m128
1412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1413 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1414 {
1415 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1416 (__v4sf) __B,
1417 __R);
1418 }
1419
1420 extern __inline __m128d
1421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1422 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1423 {
1424 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1425 (__v2df) __B,
1426 __R);
1427 }
1428
1429 extern __inline __m128
1430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1431 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1432 {
1433 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1434 (__v4sf) __B,
1435 __R);
1436 }
1437
1438 #else
1439 #define _mm_add_round_sd(A, B, C) \
1440 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1441
1442 #define _mm_add_round_ss(A, B, C) \
1443 (__m128)__builtin_ia32_addss_round(A, B, C)
1444
1445 #define _mm_sub_round_sd(A, B, C) \
1446 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1447
1448 #define _mm_sub_round_ss(A, B, C) \
1449 (__m128)__builtin_ia32_subss_round(A, B, C)
1450 #endif
1451
1452 #ifdef __OPTIMIZE__
1453 extern __inline __m512i
1454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1455 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1456 {
1457 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1458 (__v8di) __B,
1459 (__v8di) __C, imm,
1460 (__mmask8) -1);
1461 }
1462
1463 extern __inline __m512i
1464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1466 __m512i __C, const int imm)
1467 {
1468 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1469 (__v8di) __B,
1470 (__v8di) __C, imm,
1471 (__mmask8) __U);
1472 }
1473
1474 extern __inline __m512i
1475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1476 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1477 __m512i __C, const int imm)
1478 {
1479 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1480 (__v8di) __B,
1481 (__v8di) __C,
1482 imm, (__mmask8) __U);
1483 }
1484
1485 extern __inline __m512i
1486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1487 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1488 {
1489 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1490 (__v16si) __B,
1491 (__v16si) __C,
1492 imm, (__mmask16) -1);
1493 }
1494
1495 extern __inline __m512i
1496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1497 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1498 __m512i __C, const int imm)
1499 {
1500 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1501 (__v16si) __B,
1502 (__v16si) __C,
1503 imm, (__mmask16) __U);
1504 }
1505
1506 extern __inline __m512i
1507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1509 __m512i __C, const int imm)
1510 {
1511 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1512 (__v16si) __B,
1513 (__v16si) __C,
1514 imm, (__mmask16) __U);
1515 }
1516 #else
1517 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1518 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1519 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1520 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1521 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1522 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1523 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1524 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1525 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1526 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1527 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1528 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1529 (__mmask16)-1))
1530 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1531 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1532 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1533 (__mmask16)(U)))
1534 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1535 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1536 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1537 (__mmask16)(U)))
1538 #endif
1539
1540 extern __inline __m512d
1541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1542 _mm512_rcp14_pd (__m512d __A)
1543 {
1544 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1545 (__v8df)
1546 _mm512_undefined_pd (),
1547 (__mmask8) -1);
1548 }
1549
1550 extern __inline __m512d
1551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1552 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1553 {
1554 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1555 (__v8df) __W,
1556 (__mmask8) __U);
1557 }
1558
1559 extern __inline __m512d
1560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1561 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1562 {
1563 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1564 (__v8df)
1565 _mm512_setzero_pd (),
1566 (__mmask8) __U);
1567 }
1568
1569 extern __inline __m512
1570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1571 _mm512_rcp14_ps (__m512 __A)
1572 {
1573 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1574 (__v16sf)
1575 _mm512_undefined_ps (),
1576 (__mmask16) -1);
1577 }
1578
1579 extern __inline __m512
1580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1581 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1582 {
1583 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1584 (__v16sf) __W,
1585 (__mmask16) __U);
1586 }
1587
1588 extern __inline __m512
1589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1590 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1591 {
1592 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1593 (__v16sf)
1594 _mm512_setzero_ps (),
1595 (__mmask16) __U);
1596 }
1597
1598 extern __inline __m128d
1599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1600 _mm_rcp14_sd (__m128d __A, __m128d __B)
1601 {
1602 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1603 (__v2df) __A);
1604 }
1605
1606 extern __inline __m128
1607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1608 _mm_rcp14_ss (__m128 __A, __m128 __B)
1609 {
1610 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1611 (__v4sf) __A);
1612 }
1613
1614 extern __inline __m512d
1615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1616 _mm512_rsqrt14_pd (__m512d __A)
1617 {
1618 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1619 (__v8df)
1620 _mm512_undefined_pd (),
1621 (__mmask8) -1);
1622 }
1623
1624 extern __inline __m512d
1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1627 {
1628 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1629 (__v8df) __W,
1630 (__mmask8) __U);
1631 }
1632
1633 extern __inline __m512d
1634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1635 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1636 {
1637 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1638 (__v8df)
1639 _mm512_setzero_pd (),
1640 (__mmask8) __U);
1641 }
1642
1643 extern __inline __m512
1644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1645 _mm512_rsqrt14_ps (__m512 __A)
1646 {
1647 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1648 (__v16sf)
1649 _mm512_undefined_ps (),
1650 (__mmask16) -1);
1651 }
1652
1653 extern __inline __m512
1654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1655 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1656 {
1657 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1658 (__v16sf) __W,
1659 (__mmask16) __U);
1660 }
1661
1662 extern __inline __m512
1663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1664 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1665 {
1666 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1667 (__v16sf)
1668 _mm512_setzero_ps (),
1669 (__mmask16) __U);
1670 }
1671
1672 extern __inline __m128d
1673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1674 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1675 {
1676 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1677 (__v2df) __A);
1678 }
1679
1680 extern __inline __m128
1681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1682 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1683 {
1684 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1685 (__v4sf) __A);
1686 }
1687
1688 #ifdef __OPTIMIZE__
1689 extern __inline __m512d
1690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1691 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1692 {
1693 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1694 (__v8df)
1695 _mm512_undefined_pd (),
1696 (__mmask8) -1, __R);
1697 }
1698
1699 extern __inline __m512d
1700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1701 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1702 const int __R)
1703 {
1704 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1705 (__v8df) __W,
1706 (__mmask8) __U, __R);
1707 }
1708
1709 extern __inline __m512d
1710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1711 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1712 {
1713 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1714 (__v8df)
1715 _mm512_setzero_pd (),
1716 (__mmask8) __U, __R);
1717 }
1718
1719 extern __inline __m512
1720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1721 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1722 {
1723 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1724 (__v16sf)
1725 _mm512_undefined_ps (),
1726 (__mmask16) -1, __R);
1727 }
1728
1729 extern __inline __m512
1730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1731 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1732 {
1733 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1734 (__v16sf) __W,
1735 (__mmask16) __U, __R);
1736 }
1737
1738 extern __inline __m512
1739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1741 {
1742 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1743 (__v16sf)
1744 _mm512_setzero_ps (),
1745 (__mmask16) __U, __R);
1746 }
1747
1748 extern __inline __m128d
1749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1750 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1751 {
1752 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1753 (__v2df) __A,
1754 __R);
1755 }
1756
1757 extern __inline __m128
1758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1759 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1760 {
1761 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1762 (__v4sf) __A,
1763 __R);
1764 }
1765 #else
1766 #define _mm512_sqrt_round_pd(A, C) \
1767 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1768
1769 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1770 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1771
1772 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1773 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1774
1775 #define _mm512_sqrt_round_ps(A, C) \
1776 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1777
1778 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1779 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1780
1781 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1782 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1783
1784 #define _mm_sqrt_round_sd(A, B, C) \
1785 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1786
1787 #define _mm_sqrt_round_ss(A, B, C) \
1788 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1789 #endif
1790
1791 extern __inline __m512i
1792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1793 _mm512_cvtepi8_epi32 (__m128i __A)
1794 {
1795 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1796 (__v16si)
1797 _mm512_undefined_si512 (),
1798 (__mmask16) -1);
1799 }
1800
1801 extern __inline __m512i
1802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1803 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1804 {
1805 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1806 (__v16si) __W,
1807 (__mmask16) __U);
1808 }
1809
1810 extern __inline __m512i
1811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1812 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1813 {
1814 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1815 (__v16si)
1816 _mm512_setzero_si512 (),
1817 (__mmask16) __U);
1818 }
1819
1820 extern __inline __m512i
1821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1822 _mm512_cvtepi8_epi64 (__m128i __A)
1823 {
1824 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1825 (__v8di)
1826 _mm512_undefined_si512 (),
1827 (__mmask8) -1);
1828 }
1829
1830 extern __inline __m512i
1831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1832 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1833 {
1834 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1835 (__v8di) __W,
1836 (__mmask8) __U);
1837 }
1838
1839 extern __inline __m512i
1840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1841 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1842 {
1843 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1844 (__v8di)
1845 _mm512_setzero_si512 (),
1846 (__mmask8) __U);
1847 }
1848
1849 extern __inline __m512i
1850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1851 _mm512_cvtepi16_epi32 (__m256i __A)
1852 {
1853 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1854 (__v16si)
1855 _mm512_undefined_si512 (),
1856 (__mmask16) -1);
1857 }
1858
1859 extern __inline __m512i
1860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1861 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1862 {
1863 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1864 (__v16si) __W,
1865 (__mmask16) __U);
1866 }
1867
1868 extern __inline __m512i
1869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1870 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1871 {
1872 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1873 (__v16si)
1874 _mm512_setzero_si512 (),
1875 (__mmask16) __U);
1876 }
1877
1878 extern __inline __m512i
1879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1880 _mm512_cvtepi16_epi64 (__m128i __A)
1881 {
1882 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1883 (__v8di)
1884 _mm512_undefined_si512 (),
1885 (__mmask8) -1);
1886 }
1887
1888 extern __inline __m512i
1889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1890 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1891 {
1892 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1893 (__v8di) __W,
1894 (__mmask8) __U);
1895 }
1896
1897 extern __inline __m512i
1898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1899 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1900 {
1901 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1902 (__v8di)
1903 _mm512_setzero_si512 (),
1904 (__mmask8) __U);
1905 }
1906
1907 extern __inline __m512i
1908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1909 _mm512_cvtepi32_epi64 (__m256i __X)
1910 {
1911 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1912 (__v8di)
1913 _mm512_undefined_si512 (),
1914 (__mmask8) -1);
1915 }
1916
1917 extern __inline __m512i
1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1920 {
1921 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1922 (__v8di) __W,
1923 (__mmask8) __U);
1924 }
1925
1926 extern __inline __m512i
1927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1928 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1929 {
1930 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1931 (__v8di)
1932 _mm512_setzero_si512 (),
1933 (__mmask8) __U);
1934 }
1935
1936 extern __inline __m512i
1937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1938 _mm512_cvtepu8_epi32 (__m128i __A)
1939 {
1940 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1941 (__v16si)
1942 _mm512_undefined_si512 (),
1943 (__mmask16) -1);
1944 }
1945
1946 extern __inline __m512i
1947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1948 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1949 {
1950 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1951 (__v16si) __W,
1952 (__mmask16) __U);
1953 }
1954
1955 extern __inline __m512i
1956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1957 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1958 {
1959 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1960 (__v16si)
1961 _mm512_setzero_si512 (),
1962 (__mmask16) __U);
1963 }
1964
1965 extern __inline __m512i
1966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1967 _mm512_cvtepu8_epi64 (__m128i __A)
1968 {
1969 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1970 (__v8di)
1971 _mm512_undefined_si512 (),
1972 (__mmask8) -1);
1973 }
1974
1975 extern __inline __m512i
1976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1977 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1978 {
1979 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1980 (__v8di) __W,
1981 (__mmask8) __U);
1982 }
1983
1984 extern __inline __m512i
1985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1986 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1987 {
1988 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1989 (__v8di)
1990 _mm512_setzero_si512 (),
1991 (__mmask8) __U);
1992 }
1993
1994 extern __inline __m512i
1995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1996 _mm512_cvtepu16_epi32 (__m256i __A)
1997 {
1998 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1999 (__v16si)
2000 _mm512_undefined_si512 (),
2001 (__mmask16) -1);
2002 }
2003
2004 extern __inline __m512i
2005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2006 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
2007 {
2008 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2009 (__v16si) __W,
2010 (__mmask16) __U);
2011 }
2012
2013 extern __inline __m512i
2014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2015 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2016 {
2017 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2018 (__v16si)
2019 _mm512_setzero_si512 (),
2020 (__mmask16) __U);
2021 }
2022
2023 extern __inline __m512i
2024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2025 _mm512_cvtepu16_epi64 (__m128i __A)
2026 {
2027 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2028 (__v8di)
2029 _mm512_undefined_si512 (),
2030 (__mmask8) -1);
2031 }
2032
2033 extern __inline __m512i
2034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2035 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2036 {
2037 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2038 (__v8di) __W,
2039 (__mmask8) __U);
2040 }
2041
2042 extern __inline __m512i
2043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2044 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2045 {
2046 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2047 (__v8di)
2048 _mm512_setzero_si512 (),
2049 (__mmask8) __U);
2050 }
2051
2052 extern __inline __m512i
2053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2054 _mm512_cvtepu32_epi64 (__m256i __X)
2055 {
2056 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2057 (__v8di)
2058 _mm512_undefined_si512 (),
2059 (__mmask8) -1);
2060 }
2061
2062 extern __inline __m512i
2063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2065 {
2066 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2067 (__v8di) __W,
2068 (__mmask8) __U);
2069 }
2070
2071 extern __inline __m512i
2072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2074 {
2075 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2076 (__v8di)
2077 _mm512_setzero_si512 (),
2078 (__mmask8) __U);
2079 }
2080
2081 #ifdef __OPTIMIZE__
2082 extern __inline __m512d
2083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2084 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2085 {
2086 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2087 (__v8df) __B,
2088 (__v8df)
2089 _mm512_undefined_pd (),
2090 (__mmask8) -1, __R);
2091 }
2092
2093 extern __inline __m512d
2094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2095 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2096 __m512d __B, const int __R)
2097 {
2098 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2099 (__v8df) __B,
2100 (__v8df) __W,
2101 (__mmask8) __U, __R);
2102 }
2103
2104 extern __inline __m512d
2105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2106 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2107 const int __R)
2108 {
2109 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2110 (__v8df) __B,
2111 (__v8df)
2112 _mm512_setzero_pd (),
2113 (__mmask8) __U, __R);
2114 }
2115
2116 extern __inline __m512
2117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2118 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2119 {
2120 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2121 (__v16sf) __B,
2122 (__v16sf)
2123 _mm512_undefined_ps (),
2124 (__mmask16) -1, __R);
2125 }
2126
2127 extern __inline __m512
2128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2129 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2130 __m512 __B, const int __R)
2131 {
2132 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2133 (__v16sf) __B,
2134 (__v16sf) __W,
2135 (__mmask16) __U, __R);
2136 }
2137
2138 extern __inline __m512
2139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2140 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2141 {
2142 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2143 (__v16sf) __B,
2144 (__v16sf)
2145 _mm512_setzero_ps (),
2146 (__mmask16) __U, __R);
2147 }
2148
2149 extern __inline __m512d
2150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2151 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2152 {
2153 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2154 (__v8df) __B,
2155 (__v8df)
2156 _mm512_undefined_pd (),
2157 (__mmask8) -1, __R);
2158 }
2159
2160 extern __inline __m512d
2161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2162 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2163 __m512d __B, const int __R)
2164 {
2165 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2166 (__v8df) __B,
2167 (__v8df) __W,
2168 (__mmask8) __U, __R);
2169 }
2170
2171 extern __inline __m512d
2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2173 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2174 const int __R)
2175 {
2176 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2177 (__v8df) __B,
2178 (__v8df)
2179 _mm512_setzero_pd (),
2180 (__mmask8) __U, __R);
2181 }
2182
2183 extern __inline __m512
2184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2185 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2186 {
2187 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2188 (__v16sf) __B,
2189 (__v16sf)
2190 _mm512_undefined_ps (),
2191 (__mmask16) -1, __R);
2192 }
2193
2194 extern __inline __m512
2195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2196 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2197 __m512 __B, const int __R)
2198 {
2199 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2200 (__v16sf) __B,
2201 (__v16sf) __W,
2202 (__mmask16) __U, __R);
2203 }
2204
2205 extern __inline __m512
2206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2207 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2208 {
2209 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2210 (__v16sf) __B,
2211 (__v16sf)
2212 _mm512_setzero_ps (),
2213 (__mmask16) __U, __R);
2214 }
2215 #else
2216 #define _mm512_add_round_pd(A, B, C) \
2217 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2218
2219 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2220 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2221
2222 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2223 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2224
2225 #define _mm512_add_round_ps(A, B, C) \
2226 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2227
2228 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2229 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2230
2231 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2232 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2233
2234 #define _mm512_sub_round_pd(A, B, C) \
2235 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2236
2237 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2238 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2239
2240 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2241 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2242
2243 #define _mm512_sub_round_ps(A, B, C) \
2244 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2245
2246 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2247 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2248
2249 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2250 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2251 #endif
2252
2253 #ifdef __OPTIMIZE__
2254 extern __inline __m512d
2255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2256 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2257 {
2258 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2259 (__v8df) __B,
2260 (__v8df)
2261 _mm512_undefined_pd (),
2262 (__mmask8) -1, __R);
2263 }
2264
2265 extern __inline __m512d
2266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2267 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2268 __m512d __B, const int __R)
2269 {
2270 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2271 (__v8df) __B,
2272 (__v8df) __W,
2273 (__mmask8) __U, __R);
2274 }
2275
2276 extern __inline __m512d
2277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2278 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2279 const int __R)
2280 {
2281 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2282 (__v8df) __B,
2283 (__v8df)
2284 _mm512_setzero_pd (),
2285 (__mmask8) __U, __R);
2286 }
2287
2288 extern __inline __m512
2289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2290 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2291 {
2292 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2293 (__v16sf) __B,
2294 (__v16sf)
2295 _mm512_undefined_ps (),
2296 (__mmask16) -1, __R);
2297 }
2298
2299 extern __inline __m512
2300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2301 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2302 __m512 __B, const int __R)
2303 {
2304 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2305 (__v16sf) __B,
2306 (__v16sf) __W,
2307 (__mmask16) __U, __R);
2308 }
2309
2310 extern __inline __m512
2311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2312 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2313 {
2314 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2315 (__v16sf) __B,
2316 (__v16sf)
2317 _mm512_setzero_ps (),
2318 (__mmask16) __U, __R);
2319 }
2320
2321 extern __inline __m512d
2322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2323 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2324 {
2325 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2326 (__v8df) __V,
2327 (__v8df)
2328 _mm512_undefined_pd (),
2329 (__mmask8) -1, __R);
2330 }
2331
2332 extern __inline __m512d
2333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2334 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2335 __m512d __V, const int __R)
2336 {
2337 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2338 (__v8df) __V,
2339 (__v8df) __W,
2340 (__mmask8) __U, __R);
2341 }
2342
2343 extern __inline __m512d
2344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2345 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2346 const int __R)
2347 {
2348 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2349 (__v8df) __V,
2350 (__v8df)
2351 _mm512_setzero_pd (),
2352 (__mmask8) __U, __R);
2353 }
2354
2355 extern __inline __m512
2356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2357 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2358 {
2359 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2360 (__v16sf) __B,
2361 (__v16sf)
2362 _mm512_undefined_ps (),
2363 (__mmask16) -1, __R);
2364 }
2365
2366 extern __inline __m512
2367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2368 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2369 __m512 __B, const int __R)
2370 {
2371 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2372 (__v16sf) __B,
2373 (__v16sf) __W,
2374 (__mmask16) __U, __R);
2375 }
2376
2377 extern __inline __m512
2378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2379 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2380 {
2381 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2382 (__v16sf) __B,
2383 (__v16sf)
2384 _mm512_setzero_ps (),
2385 (__mmask16) __U, __R);
2386 }
2387
2388 extern __inline __m128d
2389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2390 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2391 {
2392 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2393 (__v2df) __B,
2394 __R);
2395 }
2396
2397 extern __inline __m128
2398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2400 {
2401 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2402 (__v4sf) __B,
2403 __R);
2404 }
2405
2406 extern __inline __m128d
2407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2408 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2409 {
2410 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2411 (__v2df) __B,
2412 __R);
2413 }
2414
2415 extern __inline __m128
2416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2417 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2418 {
2419 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2420 (__v4sf) __B,
2421 __R);
2422 }
2423
2424 #else
2425 #define _mm512_mul_round_pd(A, B, C) \
2426 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2427
2428 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2429 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2430
2431 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2432 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2433
2434 #define _mm512_mul_round_ps(A, B, C) \
2435 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2436
2437 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2438 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2439
2440 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2441 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2442
2443 #define _mm512_div_round_pd(A, B, C) \
2444 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2445
2446 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2447 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2448
2449 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2450 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2451
2452 #define _mm512_div_round_ps(A, B, C) \
2453 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2454
2455 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2456 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2457
2458 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2459 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2460
2461 #define _mm_mul_round_sd(A, B, C) \
2462 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2463
2464 #define _mm_mul_round_ss(A, B, C) \
2465 (__m128)__builtin_ia32_mulss_round(A, B, C)
2466
2467 #define _mm_div_round_sd(A, B, C) \
2468 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2469
2470 #define _mm_div_round_ss(A, B, C) \
2471 (__m128)__builtin_ia32_divss_round(A, B, C)
2472 #endif
2473
2474 #ifdef __OPTIMIZE__
2475 extern __inline __m512d
2476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2477 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2478 {
2479 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2480 (__v8df) __B,
2481 (__v8df)
2482 _mm512_undefined_pd (),
2483 (__mmask8) -1, __R);
2484 }
2485
2486 extern __inline __m512d
2487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2488 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2489 __m512d __B, const int __R)
2490 {
2491 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2492 (__v8df) __B,
2493 (__v8df) __W,
2494 (__mmask8) __U, __R);
2495 }
2496
2497 extern __inline __m512d
2498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2499 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2500 const int __R)
2501 {
2502 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2503 (__v8df) __B,
2504 (__v8df)
2505 _mm512_setzero_pd (),
2506 (__mmask8) __U, __R);
2507 }
2508
2509 extern __inline __m512
2510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2511 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2512 {
2513 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2514 (__v16sf) __B,
2515 (__v16sf)
2516 _mm512_undefined_ps (),
2517 (__mmask16) -1, __R);
2518 }
2519
2520 extern __inline __m512
2521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2522 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2523 __m512 __B, const int __R)
2524 {
2525 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2526 (__v16sf) __B,
2527 (__v16sf) __W,
2528 (__mmask16) __U, __R);
2529 }
2530
2531 extern __inline __m512
2532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2533 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2534 {
2535 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2536 (__v16sf) __B,
2537 (__v16sf)
2538 _mm512_setzero_ps (),
2539 (__mmask16) __U, __R);
2540 }
2541
2542 extern __inline __m512d
2543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2544 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2545 {
2546 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2547 (__v8df) __B,
2548 (__v8df)
2549 _mm512_undefined_pd (),
2550 (__mmask8) -1, __R);
2551 }
2552
2553 extern __inline __m512d
2554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2555 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2556 __m512d __B, const int __R)
2557 {
2558 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2559 (__v8df) __B,
2560 (__v8df) __W,
2561 (__mmask8) __U, __R);
2562 }
2563
2564 extern __inline __m512d
2565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2566 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2567 const int __R)
2568 {
2569 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2570 (__v8df) __B,
2571 (__v8df)
2572 _mm512_setzero_pd (),
2573 (__mmask8) __U, __R);
2574 }
2575
2576 extern __inline __m512
2577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2578 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2579 {
2580 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2581 (__v16sf) __B,
2582 (__v16sf)
2583 _mm512_undefined_ps (),
2584 (__mmask16) -1, __R);
2585 }
2586
2587 extern __inline __m512
2588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2589 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2590 __m512 __B, const int __R)
2591 {
2592 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2593 (__v16sf) __B,
2594 (__v16sf) __W,
2595 (__mmask16) __U, __R);
2596 }
2597
2598 extern __inline __m512
2599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2600 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2601 {
2602 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2603 (__v16sf) __B,
2604 (__v16sf)
2605 _mm512_setzero_ps (),
2606 (__mmask16) __U, __R);
2607 }
2608 #else
2609 #define _mm512_max_round_pd(A, B, R) \
2610 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2611
2612 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2613 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2614
2615 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2616 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2617
2618 #define _mm512_max_round_ps(A, B, R) \
2619 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2620
2621 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2622 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2623
2624 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2625 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2626
2627 #define _mm512_min_round_pd(A, B, R) \
2628 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2629
2630 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2631 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2632
2633 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2634 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2635
2636 #define _mm512_min_round_ps(A, B, R) \
2637 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2638
2639 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2640 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2641
2642 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2643 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2644 #endif
2645
2646 #ifdef __OPTIMIZE__
2647 extern __inline __m512d
2648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2649 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2650 {
2651 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2652 (__v8df) __B,
2653 (__v8df)
2654 _mm512_undefined_pd (),
2655 (__mmask8) -1, __R);
2656 }
2657
2658 extern __inline __m512d
2659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2661 __m512d __B, const int __R)
2662 {
2663 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2664 (__v8df) __B,
2665 (__v8df) __W,
2666 (__mmask8) __U, __R);
2667 }
2668
2669 extern __inline __m512d
2670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2671 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2672 const int __R)
2673 {
2674 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2675 (__v8df) __B,
2676 (__v8df)
2677 _mm512_setzero_pd (),
2678 (__mmask8) __U, __R);
2679 }
2680
2681 extern __inline __m512
2682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2683 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2684 {
2685 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2686 (__v16sf) __B,
2687 (__v16sf)
2688 _mm512_undefined_ps (),
2689 (__mmask16) -1, __R);
2690 }
2691
2692 extern __inline __m512
2693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2694 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2695 __m512 __B, const int __R)
2696 {
2697 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2698 (__v16sf) __B,
2699 (__v16sf) __W,
2700 (__mmask16) __U, __R);
2701 }
2702
2703 extern __inline __m512
2704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2705 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2706 const int __R)
2707 {
2708 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2709 (__v16sf) __B,
2710 (__v16sf)
2711 _mm512_setzero_ps (),
2712 (__mmask16) __U, __R);
2713 }
2714
2715 extern __inline __m128d
2716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2717 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2718 {
2719 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2720 (__v2df) __B,
2721 __R);
2722 }
2723
2724 extern __inline __m128
2725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2726 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2727 {
2728 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2729 (__v4sf) __B,
2730 __R);
2731 }
2732 #else
2733 #define _mm512_scalef_round_pd(A, B, C) \
2734 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2735
2736 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2737 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2738
2739 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2740 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2741
2742 #define _mm512_scalef_round_ps(A, B, C) \
2743 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2744
2745 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2746 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2747
2748 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2749 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2750
2751 #define _mm_scalef_round_sd(A, B, C) \
2752 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2753
2754 #define _mm_scalef_round_ss(A, B, C) \
2755 (__m128)__builtin_ia32_scalefss_round(A, B, C)
2756 #endif
2757
2758 #ifdef __OPTIMIZE__
2759 extern __inline __m512d
2760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2761 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2762 {
2763 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2764 (__v8df) __B,
2765 (__v8df) __C,
2766 (__mmask8) -1, __R);
2767 }
2768
2769 extern __inline __m512d
2770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2771 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2772 __m512d __C, const int __R)
2773 {
2774 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2775 (__v8df) __B,
2776 (__v8df) __C,
2777 (__mmask8) __U, __R);
2778 }
2779
2780 extern __inline __m512d
2781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2782 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2783 __mmask8 __U, const int __R)
2784 {
2785 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2786 (__v8df) __B,
2787 (__v8df) __C,
2788 (__mmask8) __U, __R);
2789 }
2790
2791 extern __inline __m512d
2792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2793 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2794 __m512d __C, const int __R)
2795 {
2796 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2797 (__v8df) __B,
2798 (__v8df) __C,
2799 (__mmask8) __U, __R);
2800 }
2801
2802 extern __inline __m512
2803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2804 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2805 {
2806 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2807 (__v16sf) __B,
2808 (__v16sf) __C,
2809 (__mmask16) -1, __R);
2810 }
2811
2812 extern __inline __m512
2813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2814 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2815 __m512 __C, const int __R)
2816 {
2817 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2818 (__v16sf) __B,
2819 (__v16sf) __C,
2820 (__mmask16) __U, __R);
2821 }
2822
2823 extern __inline __m512
2824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2825 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2826 __mmask16 __U, const int __R)
2827 {
2828 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2829 (__v16sf) __B,
2830 (__v16sf) __C,
2831 (__mmask16) __U, __R);
2832 }
2833
2834 extern __inline __m512
2835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2836 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2837 __m512 __C, const int __R)
2838 {
2839 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2840 (__v16sf) __B,
2841 (__v16sf) __C,
2842 (__mmask16) __U, __R);
2843 }
2844
2845 extern __inline __m512d
2846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2847 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2848 {
2849 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2850 (__v8df) __B,
2851 -(__v8df) __C,
2852 (__mmask8) -1, __R);
2853 }
2854
2855 extern __inline __m512d
2856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2857 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2858 __m512d __C, const int __R)
2859 {
2860 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2861 (__v8df) __B,
2862 -(__v8df) __C,
2863 (__mmask8) __U, __R);
2864 }
2865
2866 extern __inline __m512d
2867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2868 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2869 __mmask8 __U, const int __R)
2870 {
2871 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2872 (__v8df) __B,
2873 (__v8df) __C,
2874 (__mmask8) __U, __R);
2875 }
2876
2877 extern __inline __m512d
2878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2879 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2880 __m512d __C, const int __R)
2881 {
2882 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2883 (__v8df) __B,
2884 -(__v8df) __C,
2885 (__mmask8) __U, __R);
2886 }
2887
2888 extern __inline __m512
2889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2890 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2891 {
2892 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2893 (__v16sf) __B,
2894 -(__v16sf) __C,
2895 (__mmask16) -1, __R);
2896 }
2897
2898 extern __inline __m512
2899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2900 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2901 __m512 __C, const int __R)
2902 {
2903 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2904 (__v16sf) __B,
2905 -(__v16sf) __C,
2906 (__mmask16) __U, __R);
2907 }
2908
2909 extern __inline __m512
2910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2911 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2912 __mmask16 __U, const int __R)
2913 {
2914 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2915 (__v16sf) __B,
2916 (__v16sf) __C,
2917 (__mmask16) __U, __R);
2918 }
2919
2920 extern __inline __m512
2921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2922 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2923 __m512 __C, const int __R)
2924 {
2925 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2926 (__v16sf) __B,
2927 -(__v16sf) __C,
2928 (__mmask16) __U, __R);
2929 }
2930
2931 extern __inline __m512d
2932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2933 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2934 {
2935 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2936 (__v8df) __B,
2937 (__v8df) __C,
2938 (__mmask8) -1, __R);
2939 }
2940
2941 extern __inline __m512d
2942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2943 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2944 __m512d __C, const int __R)
2945 {
2946 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2947 (__v8df) __B,
2948 (__v8df) __C,
2949 (__mmask8) __U, __R);
2950 }
2951
2952 extern __inline __m512d
2953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2954 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2955 __mmask8 __U, const int __R)
2956 {
2957 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2958 (__v8df) __B,
2959 (__v8df) __C,
2960 (__mmask8) __U, __R);
2961 }
2962
2963 extern __inline __m512d
2964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2965 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2966 __m512d __C, const int __R)
2967 {
2968 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2969 (__v8df) __B,
2970 (__v8df) __C,
2971 (__mmask8) __U, __R);
2972 }
2973
2974 extern __inline __m512
2975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2976 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2977 {
2978 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2979 (__v16sf) __B,
2980 (__v16sf) __C,
2981 (__mmask16) -1, __R);
2982 }
2983
2984 extern __inline __m512
2985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2986 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2987 __m512 __C, const int __R)
2988 {
2989 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2990 (__v16sf) __B,
2991 (__v16sf) __C,
2992 (__mmask16) __U, __R);
2993 }
2994
2995 extern __inline __m512
2996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2997 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2998 __mmask16 __U, const int __R)
2999 {
3000 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3001 (__v16sf) __B,
3002 (__v16sf) __C,
3003 (__mmask16) __U, __R);
3004 }
3005
3006 extern __inline __m512
3007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3008 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3009 __m512 __C, const int __R)
3010 {
3011 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3012 (__v16sf) __B,
3013 (__v16sf) __C,
3014 (__mmask16) __U, __R);
3015 }
3016
3017 extern __inline __m512d
3018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3019 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3020 {
3021 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3022 (__v8df) __B,
3023 -(__v8df) __C,
3024 (__mmask8) -1, __R);
3025 }
3026
3027 extern __inline __m512d
3028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3029 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3030 __m512d __C, const int __R)
3031 {
3032 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3033 (__v8df) __B,
3034 -(__v8df) __C,
3035 (__mmask8) __U, __R);
3036 }
3037
3038 extern __inline __m512d
3039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3040 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3041 __mmask8 __U, const int __R)
3042 {
3043 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3044 (__v8df) __B,
3045 (__v8df) __C,
3046 (__mmask8) __U, __R);
3047 }
3048
3049 extern __inline __m512d
3050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3051 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3052 __m512d __C, const int __R)
3053 {
3054 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3055 (__v8df) __B,
3056 -(__v8df) __C,
3057 (__mmask8) __U, __R);
3058 }
3059
3060 extern __inline __m512
3061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3062 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3063 {
3064 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3065 (__v16sf) __B,
3066 -(__v16sf) __C,
3067 (__mmask16) -1, __R);
3068 }
3069
3070 extern __inline __m512
3071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3072 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3073 __m512 __C, const int __R)
3074 {
3075 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3076 (__v16sf) __B,
3077 -(__v16sf) __C,
3078 (__mmask16) __U, __R);
3079 }
3080
3081 extern __inline __m512
3082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3083 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3084 __mmask16 __U, const int __R)
3085 {
3086 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3087 (__v16sf) __B,
3088 (__v16sf) __C,
3089 (__mmask16) __U, __R);
3090 }
3091
3092 extern __inline __m512
3093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3094 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3095 __m512 __C, const int __R)
3096 {
3097 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3098 (__v16sf) __B,
3099 -(__v16sf) __C,
3100 (__mmask16) __U, __R);
3101 }
3102
3103 extern __inline __m512d
3104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3105 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3106 {
3107 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3108 (__v8df) __B,
3109 (__v8df) __C,
3110 (__mmask8) -1, __R);
3111 }
3112
3113 extern __inline __m512d
3114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3115 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3116 __m512d __C, const int __R)
3117 {
3118 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3119 (__v8df) __B,
3120 (__v8df) __C,
3121 (__mmask8) __U, __R);
3122 }
3123
3124 extern __inline __m512d
3125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3126 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3127 __mmask8 __U, const int __R)
3128 {
3129 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3130 (__v8df) __B,
3131 (__v8df) __C,
3132 (__mmask8) __U, __R);
3133 }
3134
3135 extern __inline __m512d
3136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3137 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3138 __m512d __C, const int __R)
3139 {
3140 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3141 (__v8df) __B,
3142 (__v8df) __C,
3143 (__mmask8) __U, __R);
3144 }
3145
3146 extern __inline __m512
3147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3148 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3149 {
3150 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3151 (__v16sf) __B,
3152 (__v16sf) __C,
3153 (__mmask16) -1, __R);
3154 }
3155
3156 extern __inline __m512
3157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3158 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3159 __m512 __C, const int __R)
3160 {
3161 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3162 (__v16sf) __B,
3163 (__v16sf) __C,
3164 (__mmask16) __U, __R);
3165 }
3166
3167 extern __inline __m512
3168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3169 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3170 __mmask16 __U, const int __R)
3171 {
3172 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3173 (__v16sf) __B,
3174 (__v16sf) __C,
3175 (__mmask16) __U, __R);
3176 }
3177
3178 extern __inline __m512
3179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3180 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3181 __m512 __C, const int __R)
3182 {
3183 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3184 (__v16sf) __B,
3185 (__v16sf) __C,
3186 (__mmask16) __U, __R);
3187 }
3188
3189 extern __inline __m512d
3190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3191 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3192 {
3193 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3194 (__v8df) __B,
3195 -(__v8df) __C,
3196 (__mmask8) -1, __R);
3197 }
3198
3199 extern __inline __m512d
3200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3201 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3202 __m512d __C, const int __R)
3203 {
3204 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3205 (__v8df) __B,
3206 (__v8df) __C,
3207 (__mmask8) __U, __R);
3208 }
3209
3210 extern __inline __m512d
3211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3212 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3213 __mmask8 __U, const int __R)
3214 {
3215 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3216 (__v8df) __B,
3217 (__v8df) __C,
3218 (__mmask8) __U, __R);
3219 }
3220
3221 extern __inline __m512d
3222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3223 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3224 __m512d __C, const int __R)
3225 {
3226 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3227 (__v8df) __B,
3228 -(__v8df) __C,
3229 (__mmask8) __U, __R);
3230 }
3231
3232 extern __inline __m512
3233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3234 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3235 {
3236 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3237 (__v16sf) __B,
3238 -(__v16sf) __C,
3239 (__mmask16) -1, __R);
3240 }
3241
3242 extern __inline __m512
3243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3244 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3245 __m512 __C, const int __R)
3246 {
3247 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3248 (__v16sf) __B,
3249 (__v16sf) __C,
3250 (__mmask16) __U, __R);
3251 }
3252
3253 extern __inline __m512
3254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3255 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3256 __mmask16 __U, const int __R)
3257 {
3258 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3259 (__v16sf) __B,
3260 (__v16sf) __C,
3261 (__mmask16) __U, __R);
3262 }
3263
3264 extern __inline __m512
3265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3266 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3267 __m512 __C, const int __R)
3268 {
3269 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3270 (__v16sf) __B,
3271 -(__v16sf) __C,
3272 (__mmask16) __U, __R);
3273 }
3274 #else
3275 #define _mm512_fmadd_round_pd(A, B, C, R) \
3276 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3277
3278 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3279 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3280
3281 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3282 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3283
3284 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3285 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3286
3287 #define _mm512_fmadd_round_ps(A, B, C, R) \
3288 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3289
3290 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3291 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3292
3293 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3294 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3295
3296 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3297 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3298
3299 #define _mm512_fmsub_round_pd(A, B, C, R) \
3300 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3301
3302 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3303 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3304
3305 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3306 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3307
3308 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3309 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3310
3311 #define _mm512_fmsub_round_ps(A, B, C, R) \
3312 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3313
3314 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3315 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3316
3317 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3318 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3319
3320 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3321 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3322
3323 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3324 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3325
3326 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3327 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3328
3329 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3330 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3331
3332 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3333 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3334
3335 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3336 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3337
3338 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3339 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3340
3341 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3342 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3343
3344 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3345 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3346
3347 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3348 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3349
3350 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3351 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3352
3353 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3354 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3355
3356 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3357 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3358
3359 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3360 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3361
3362 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3363 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3364
3365 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3366 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3367
3368 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3369 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3370
3371 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3372 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3373
3374 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3375 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3376
3377 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3378 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3379
3380 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3381 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3382
3383 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3384 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3385
3386 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3387 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3388
3389 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3390 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3391
3392 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3393 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3394
3395 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3396 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3397
3398 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3399 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3400
3401 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3402 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3403
3404 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3405 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3406
3407 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3408 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3409
3410 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3411 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3412
3413 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3414 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3415
3416 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3417 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3418 #endif
3419
3420 extern __inline __m512i
3421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3422 _mm512_abs_epi64 (__m512i __A)
3423 {
3424 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3425 (__v8di)
3426 _mm512_undefined_si512 (),
3427 (__mmask8) -1);
3428 }
3429
3430 extern __inline __m512i
3431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3432 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3433 {
3434 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3435 (__v8di) __W,
3436 (__mmask8) __U);
3437 }
3438
3439 extern __inline __m512i
3440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3441 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3442 {
3443 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3444 (__v8di)
3445 _mm512_setzero_si512 (),
3446 (__mmask8) __U);
3447 }
3448
3449 extern __inline __m512i
3450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3451 _mm512_abs_epi32 (__m512i __A)
3452 {
3453 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3454 (__v16si)
3455 _mm512_undefined_si512 (),
3456 (__mmask16) -1);
3457 }
3458
3459 extern __inline __m512i
3460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3461 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3462 {
3463 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3464 (__v16si) __W,
3465 (__mmask16) __U);
3466 }
3467
3468 extern __inline __m512i
3469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3470 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3471 {
3472 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3473 (__v16si)
3474 _mm512_setzero_si512 (),
3475 (__mmask16) __U);
3476 }
3477
3478 extern __inline __m512
3479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3480 _mm512_broadcastss_ps (__m128 __A)
3481 {
3482 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3483 (__v16sf)
3484 _mm512_undefined_ps (),
3485 (__mmask16) -1);
3486 }
3487
3488 extern __inline __m512
3489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3491 {
3492 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3493 (__v16sf) __O, __M);
3494 }
3495
3496 extern __inline __m512
3497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3498 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3499 {
3500 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3501 (__v16sf)
3502 _mm512_setzero_ps (),
3503 __M);
3504 }
3505
3506 extern __inline __m512d
3507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3508 _mm512_broadcastsd_pd (__m128d __A)
3509 {
3510 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3511 (__v8df)
3512 _mm512_undefined_pd (),
3513 (__mmask8) -1);
3514 }
3515
3516 extern __inline __m512d
3517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3518 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3519 {
3520 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3521 (__v8df) __O, __M);
3522 }
3523
3524 extern __inline __m512d
3525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3526 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3527 {
3528 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3529 (__v8df)
3530 _mm512_setzero_pd (),
3531 __M);
3532 }
3533
3534 extern __inline __m512i
3535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3536 _mm512_broadcastd_epi32 (__m128i __A)
3537 {
3538 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3539 (__v16si)
3540 _mm512_undefined_si512 (),
3541 (__mmask16) -1);
3542 }
3543
3544 extern __inline __m512i
3545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3546 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3547 {
3548 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3549 (__v16si) __O, __M);
3550 }
3551
3552 extern __inline __m512i
3553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3554 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3555 {
3556 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3557 (__v16si)
3558 _mm512_setzero_si512 (),
3559 __M);
3560 }
3561
3562 extern __inline __m512i
3563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3564 _mm512_set1_epi32 (int __A)
3565 {
3566 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3567 (__v16si)
3568 _mm512_undefined_si512 (),
3569 (__mmask16)(-1));
3570 }
3571
3572 extern __inline __m512i
3573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3574 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3575 {
3576 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3577 __M);
3578 }
3579
3580 extern __inline __m512i
3581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3582 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3583 {
3584 return (__m512i)
3585 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3586 (__v16si) _mm512_setzero_si512 (),
3587 __M);
3588 }
3589
3590 extern __inline __m512i
3591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3592 _mm512_broadcastq_epi64 (__m128i __A)
3593 {
3594 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3595 (__v8di)
3596 _mm512_undefined_si512 (),
3597 (__mmask8) -1);
3598 }
3599
3600 extern __inline __m512i
3601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3602 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3603 {
3604 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3605 (__v8di) __O, __M);
3606 }
3607
3608 extern __inline __m512i
3609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3610 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3611 {
3612 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3613 (__v8di)
3614 _mm512_setzero_si512 (),
3615 __M);
3616 }
3617
3618 extern __inline __m512i
3619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3620 _mm512_set1_epi64 (long long __A)
3621 {
3622 #ifdef TARGET_64BIT
3623 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3624 (__v8di)
3625 _mm512_undefined_si512 (),
3626 (__mmask8)(-1));
3627 #else
3628 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
3629 (__v8di)
3630 _mm512_undefined_si512 (),
3631 (__mmask8)(-1));
3632 #endif
3633 }
3634
3635 extern __inline __m512i
3636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3637 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3638 {
3639 #ifdef TARGET_64BIT
3640 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3641 __M);
3642 #else
3643 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
3644 __M);
3645 #endif
3646 }
3647
3648 extern __inline __m512i
3649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3650 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3651 {
3652 #ifdef TARGET_64BIT
3653 return (__m512i)
3654 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3655 (__v8di) _mm512_setzero_si512 (),
3656 __M);
3657 #else
3658 return (__m512i)
3659 __builtin_ia32_pbroadcastq512_mem_mask (__A,
3660 (__v8di) _mm512_setzero_si512 (),
3661 __M);
3662 #endif
3663 }
3664
3665 extern __inline __m512
3666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3667 _mm512_broadcast_f32x4 (__m128 __A)
3668 {
3669 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3670 (__v16sf)
3671 _mm512_undefined_ps (),
3672 (__mmask16) -1);
3673 }
3674
3675 extern __inline __m512
3676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3677 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3678 {
3679 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3680 (__v16sf) __O,
3681 __M);
3682 }
3683
3684 extern __inline __m512
3685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3686 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3687 {
3688 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3689 (__v16sf)
3690 _mm512_setzero_ps (),
3691 __M);
3692 }
3693
3694 extern __inline __m512i
3695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3696 _mm512_broadcast_i32x4 (__m128i __A)
3697 {
3698 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3699 (__v16si)
3700 _mm512_undefined_si512 (),
3701 (__mmask16) -1);
3702 }
3703
3704 extern __inline __m512i
3705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3706 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3707 {
3708 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3709 (__v16si) __O,
3710 __M);
3711 }
3712
3713 extern __inline __m512i
3714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3715 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3716 {
3717 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3718 (__v16si)
3719 _mm512_setzero_si512 (),
3720 __M);
3721 }
3722
3723 extern __inline __m512d
3724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3725 _mm512_broadcast_f64x4 (__m256d __A)
3726 {
3727 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3728 (__v8df)
3729 _mm512_undefined_pd (),
3730 (__mmask8) -1);
3731 }
3732
3733 extern __inline __m512d
3734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3735 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3736 {
3737 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3738 (__v8df) __O,
3739 __M);
3740 }
3741
3742 extern __inline __m512d
3743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3744 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3745 {
3746 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3747 (__v8df)
3748 _mm512_setzero_pd (),
3749 __M);
3750 }
3751
3752 extern __inline __m512i
3753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3754 _mm512_broadcast_i64x4 (__m256i __A)
3755 {
3756 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3757 (__v8di)
3758 _mm512_undefined_si512 (),
3759 (__mmask8) -1);
3760 }
3761
3762 extern __inline __m512i
3763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3764 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3765 {
3766 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3767 (__v8di) __O,
3768 __M);
3769 }
3770
3771 extern __inline __m512i
3772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3773 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3774 {
3775 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3776 (__v8di)
3777 _mm512_setzero_si512 (),
3778 __M);
3779 }
3780
3781 typedef enum
3782 {
3783 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3784 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3785 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3786 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3787 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3788 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3789 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3790 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3791 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3792 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3793 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3794 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3795 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3796 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3797 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3798 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3799 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3800 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3801 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3802 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3803 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3804 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3805 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3806 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3807 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3808 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3809 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3810 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3811 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3812 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3813 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3814 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3815 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3816 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3817 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3818 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3819 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3820 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3821 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3822 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3823 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3824 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3825 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3826 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3827 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3828 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3829 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3830 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3831 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3832 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3833 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3834 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3835 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3836 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3837 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3838 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3839 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3840 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3841 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3842 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3843 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3844 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3845 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3846 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3847 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3848 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3849 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3850 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3851 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3852 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3853 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3854 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3855 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3856 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3857 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3858 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3859 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3860 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3861 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3862 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3863 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3864 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3865 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3866 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3867 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3868 _MM_PERM_DDDD = 0xFF
3869 } _MM_PERM_ENUM;
3870
3871 #ifdef __OPTIMIZE__
3872 extern __inline __m512i
3873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3874 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3875 {
3876 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3877 __mask,
3878 (__v16si)
3879 _mm512_undefined_si512 (),
3880 (__mmask16) -1);
3881 }
3882
3883 extern __inline __m512i
3884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3885 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3886 _MM_PERM_ENUM __mask)
3887 {
3888 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3889 __mask,
3890 (__v16si) __W,
3891 (__mmask16) __U);
3892 }
3893
3894 extern __inline __m512i
3895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3896 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3897 {
3898 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3899 __mask,
3900 (__v16si)
3901 _mm512_setzero_si512 (),
3902 (__mmask16) __U);
3903 }
3904
3905 extern __inline __m512i
3906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3907 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3908 {
3909 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3910 (__v8di) __B, __imm,
3911 (__v8di)
3912 _mm512_undefined_si512 (),
3913 (__mmask8) -1);
3914 }
3915
3916 extern __inline __m512i
3917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3918 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3919 __m512i __B, const int __imm)
3920 {
3921 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3922 (__v8di) __B, __imm,
3923 (__v8di) __W,
3924 (__mmask8) __U);
3925 }
3926
3927 extern __inline __m512i
3928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3929 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3930 const int __imm)
3931 {
3932 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3933 (__v8di) __B, __imm,
3934 (__v8di)
3935 _mm512_setzero_si512 (),
3936 (__mmask8) __U);
3937 }
3938
3939 extern __inline __m512i
3940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3941 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3942 {
3943 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3944 (__v16si) __B,
3945 __imm,
3946 (__v16si)
3947 _mm512_undefined_si512 (),
3948 (__mmask16) -1);
3949 }
3950
3951 extern __inline __m512i
3952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3953 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3954 __m512i __B, const int __imm)
3955 {
3956 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3957 (__v16si) __B,
3958 __imm,
3959 (__v16si) __W,
3960 (__mmask16) __U);
3961 }
3962
3963 extern __inline __m512i
3964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3965 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3966 const int __imm)
3967 {
3968 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3969 (__v16si) __B,
3970 __imm,
3971 (__v16si)
3972 _mm512_setzero_si512 (),
3973 (__mmask16) __U);
3974 }
3975
3976 extern __inline __m512d
3977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3978 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3979 {
3980 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3981 (__v8df) __B, __imm,
3982 (__v8df)
3983 _mm512_undefined_pd (),
3984 (__mmask8) -1);
3985 }
3986
3987 extern __inline __m512d
3988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3989 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3990 __m512d __B, const int __imm)
3991 {
3992 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3993 (__v8df) __B, __imm,
3994 (__v8df) __W,
3995 (__mmask8) __U);
3996 }
3997
3998 extern __inline __m512d
3999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4000 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
4001 const int __imm)
4002 {
4003 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
4004 (__v8df) __B, __imm,
4005 (__v8df)
4006 _mm512_setzero_pd (),
4007 (__mmask8) __U);
4008 }
4009
4010 extern __inline __m512
4011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
4013 {
4014 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4015 (__v16sf) __B, __imm,
4016 (__v16sf)
4017 _mm512_undefined_ps (),
4018 (__mmask16) -1);
4019 }
4020
4021 extern __inline __m512
4022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4023 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4024 __m512 __B, const int __imm)
4025 {
4026 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4027 (__v16sf) __B, __imm,
4028 (__v16sf) __W,
4029 (__mmask16) __U);
4030 }
4031
4032 extern __inline __m512
4033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4034 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4035 const int __imm)
4036 {
4037 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4038 (__v16sf) __B, __imm,
4039 (__v16sf)
4040 _mm512_setzero_ps (),
4041 (__mmask16) __U);
4042 }
4043
4044 #else
4045 #define _mm512_shuffle_epi32(X, C) \
4046 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4047 (__v16si)(__m512i)_mm512_undefined_si512 (),\
4048 (__mmask16)-1))
4049
4050 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4051 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4052 (__v16si)(__m512i)(W),\
4053 (__mmask16)(U)))
4054
4055 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4056 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4057 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4058 (__mmask16)(U)))
4059
4060 #define _mm512_shuffle_i64x2(X, Y, C) \
4061 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4062 (__v8di)(__m512i)(Y), (int)(C),\
4063 (__v8di)(__m512i)_mm512_undefined_si512 (),\
4064 (__mmask8)-1))
4065
4066 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4067 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4068 (__v8di)(__m512i)(Y), (int)(C),\
4069 (__v8di)(__m512i)(W),\
4070 (__mmask8)(U)))
4071
4072 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4073 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4074 (__v8di)(__m512i)(Y), (int)(C),\
4075 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4076 (__mmask8)(U)))
4077
4078 #define _mm512_shuffle_i32x4(X, Y, C) \
4079 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4080 (__v16si)(__m512i)(Y), (int)(C),\
4081 (__v16si)(__m512i)_mm512_undefined_si512 (),\
4082 (__mmask16)-1))
4083
4084 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4085 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4086 (__v16si)(__m512i)(Y), (int)(C),\
4087 (__v16si)(__m512i)(W),\
4088 (__mmask16)(U)))
4089
4090 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4091 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4092 (__v16si)(__m512i)(Y), (int)(C),\
4093 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4094 (__mmask16)(U)))
4095
4096 #define _mm512_shuffle_f64x2(X, Y, C) \
4097 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4098 (__v8df)(__m512d)(Y), (int)(C),\
4099 (__v8df)(__m512d)_mm512_undefined_pd(),\
4100 (__mmask8)-1))
4101
4102 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4103 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4104 (__v8df)(__m512d)(Y), (int)(C),\
4105 (__v8df)(__m512d)(W),\
4106 (__mmask8)(U)))
4107
4108 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4109 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4110 (__v8df)(__m512d)(Y), (int)(C),\
4111 (__v8df)(__m512d)_mm512_setzero_pd(),\
4112 (__mmask8)(U)))
4113
4114 #define _mm512_shuffle_f32x4(X, Y, C) \
4115 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4116 (__v16sf)(__m512)(Y), (int)(C),\
4117 (__v16sf)(__m512)_mm512_undefined_ps(),\
4118 (__mmask16)-1))
4119
4120 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4121 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4122 (__v16sf)(__m512)(Y), (int)(C),\
4123 (__v16sf)(__m512)(W),\
4124 (__mmask16)(U)))
4125
4126 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4127 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4128 (__v16sf)(__m512)(Y), (int)(C),\
4129 (__v16sf)(__m512)_mm512_setzero_ps(),\
4130 (__mmask16)(U)))
4131 #endif
4132
4133 extern __inline __m512i
4134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4135 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4136 {
4137 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4138 (__v16si) __B,
4139 (__v16si)
4140 _mm512_undefined_si512 (),
4141 (__mmask16) -1);
4142 }
4143
4144 extern __inline __m512i
4145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4146 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4147 {
4148 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4149 (__v16si) __B,
4150 (__v16si) __W,
4151 (__mmask16) __U);
4152 }
4153
4154 extern __inline __m512i
4155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4156 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4157 {
4158 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4159 (__v16si) __B,
4160 (__v16si)
4161 _mm512_setzero_si512 (),
4162 (__mmask16) __U);
4163 }
4164
4165 extern __inline __m512i
4166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4167 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4168 {
4169 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4170 (__v16si) __B,
4171 (__v16si)
4172 _mm512_undefined_si512 (),
4173 (__mmask16) -1);
4174 }
4175
4176 extern __inline __m512i
4177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4178 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4179 {
4180 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4181 (__v16si) __B,
4182 (__v16si) __W,
4183 (__mmask16) __U);
4184 }
4185
4186 extern __inline __m512i
4187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4188 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4189 {
4190 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4191 (__v16si) __B,
4192 (__v16si)
4193 _mm512_setzero_si512 (),
4194 (__mmask16) __U);
4195 }
4196
4197 extern __inline __m512i
4198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4199 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4200 {
4201 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4202 (__v8di) __B,
4203 (__v8di)
4204 _mm512_undefined_si512 (),
4205 (__mmask8) -1);
4206 }
4207
4208 extern __inline __m512i
4209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4210 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4211 {
4212 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4213 (__v8di) __B,
4214 (__v8di) __W,
4215 (__mmask8) __U);
4216 }
4217
4218 extern __inline __m512i
4219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4220 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4221 {
4222 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4223 (__v8di) __B,
4224 (__v8di)
4225 _mm512_setzero_si512 (),
4226 (__mmask8) __U);
4227 }
4228
4229 extern __inline __m512i
4230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4231 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4232 {
4233 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4234 (__v8di) __B,
4235 (__v8di)
4236 _mm512_undefined_si512 (),
4237 (__mmask8) -1);
4238 }
4239
4240 extern __inline __m512i
4241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4242 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4243 {
4244 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4245 (__v8di) __B,
4246 (__v8di) __W,
4247 (__mmask8) __U);
4248 }
4249
4250 extern __inline __m512i
4251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4252 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4253 {
4254 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4255 (__v8di) __B,
4256 (__v8di)
4257 _mm512_setzero_si512 (),
4258 (__mmask8) __U);
4259 }
4260
4261 #ifdef __OPTIMIZE__
4262 extern __inline __m256i
4263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4264 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4265 {
4266 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4267 (__v8si)
4268 _mm256_undefined_si256 (),
4269 (__mmask8) -1, __R);
4270 }
4271
4272 extern __inline __m256i
4273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4274 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4275 const int __R)
4276 {
4277 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4278 (__v8si) __W,
4279 (__mmask8) __U, __R);
4280 }
4281
4282 extern __inline __m256i
4283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4284 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4285 {
4286 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4287 (__v8si)
4288 _mm256_setzero_si256 (),
4289 (__mmask8) __U, __R);
4290 }
4291
4292 extern __inline __m256i
4293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4294 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4295 {
4296 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4297 (__v8si)
4298 _mm256_undefined_si256 (),
4299 (__mmask8) -1, __R);
4300 }
4301
4302 extern __inline __m256i
4303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4304 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4305 const int __R)
4306 {
4307 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4308 (__v8si) __W,
4309 (__mmask8) __U, __R);
4310 }
4311
4312 extern __inline __m256i
4313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4315 {
4316 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4317 (__v8si)
4318 _mm256_setzero_si256 (),
4319 (__mmask8) __U, __R);
4320 }
4321 #else
4322 #define _mm512_cvtt_roundpd_epi32(A, B) \
4323 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4324
4325 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4326 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4327
4328 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4329 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4330
4331 #define _mm512_cvtt_roundpd_epu32(A, B) \
4332 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4333
4334 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4335 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4336
4337 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4338 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4339 #endif
4340
4341 #ifdef __OPTIMIZE__
4342 extern __inline __m256i
4343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4344 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4345 {
4346 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4347 (__v8si)
4348 _mm256_undefined_si256 (),
4349 (__mmask8) -1, __R);
4350 }
4351
4352 extern __inline __m256i
4353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4354 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4355 const int __R)
4356 {
4357 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4358 (__v8si) __W,
4359 (__mmask8) __U, __R);
4360 }
4361
4362 extern __inline __m256i
4363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4364 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4365 {
4366 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4367 (__v8si)
4368 _mm256_setzero_si256 (),
4369 (__mmask8) __U, __R);
4370 }
4371
4372 extern __inline __m256i
4373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4374 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4375 {
4376 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4377 (__v8si)
4378 _mm256_undefined_si256 (),
4379 (__mmask8) -1, __R);
4380 }
4381
4382 extern __inline __m256i
4383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4384 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4385 const int __R)
4386 {
4387 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4388 (__v8si) __W,
4389 (__mmask8) __U, __R);
4390 }
4391
4392 extern __inline __m256i
4393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4394 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4395 {
4396 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4397 (__v8si)
4398 _mm256_setzero_si256 (),
4399 (__mmask8) __U, __R);
4400 }
4401 #else
4402 #define _mm512_cvt_roundpd_epi32(A, B) \
4403 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4404
4405 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4406 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4407
4408 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4409 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4410
4411 #define _mm512_cvt_roundpd_epu32(A, B) \
4412 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4413
4414 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4415 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4416
4417 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4418 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4419 #endif
4420
4421 #ifdef __OPTIMIZE__
4422 extern __inline __m512i
4423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4424 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4425 {
4426 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4427 (__v16si)
4428 _mm512_undefined_si512 (),
4429 (__mmask16) -1, __R);
4430 }
4431
4432 extern __inline __m512i
4433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4434 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4435 const int __R)
4436 {
4437 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4438 (__v16si) __W,
4439 (__mmask16) __U, __R);
4440 }
4441
4442 extern __inline __m512i
4443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4444 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4445 {
4446 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4447 (__v16si)
4448 _mm512_setzero_si512 (),
4449 (__mmask16) __U, __R);
4450 }
4451
4452 extern __inline __m512i
4453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4454 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4455 {
4456 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4457 (__v16si)
4458 _mm512_undefined_si512 (),
4459 (__mmask16) -1, __R);
4460 }
4461
4462 extern __inline __m512i
4463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4464 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4465 const int __R)
4466 {
4467 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4468 (__v16si) __W,
4469 (__mmask16) __U, __R);
4470 }
4471
4472 extern __inline __m512i
4473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4474 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4475 {
4476 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4477 (__v16si)
4478 _mm512_setzero_si512 (),
4479 (__mmask16) __U, __R);
4480 }
4481 #else
4482 #define _mm512_cvtt_roundps_epi32(A, B) \
4483 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4484
4485 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4486 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4487
4488 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4489 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4490
4491 #define _mm512_cvtt_roundps_epu32(A, B) \
4492 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4493
4494 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4495 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4496
4497 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4498 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4499 #endif
4500
4501 #ifdef __OPTIMIZE__
4502 extern __inline __m512i
4503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4504 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4505 {
4506 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4507 (__v16si)
4508 _mm512_undefined_si512 (),
4509 (__mmask16) -1, __R);
4510 }
4511
4512 extern __inline __m512i
4513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4515 const int __R)
4516 {
4517 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4518 (__v16si) __W,
4519 (__mmask16) __U, __R);
4520 }
4521
4522 extern __inline __m512i
4523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4525 {
4526 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4527 (__v16si)
4528 _mm512_setzero_si512 (),
4529 (__mmask16) __U, __R);
4530 }
4531
4532 extern __inline __m512i
4533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4534 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4535 {
4536 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4537 (__v16si)
4538 _mm512_undefined_si512 (),
4539 (__mmask16) -1, __R);
4540 }
4541
4542 extern __inline __m512i
4543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4544 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4545 const int __R)
4546 {
4547 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4548 (__v16si) __W,
4549 (__mmask16) __U, __R);
4550 }
4551
4552 extern __inline __m512i
4553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4554 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4555 {
4556 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4557 (__v16si)
4558 _mm512_setzero_si512 (),
4559 (__mmask16) __U, __R);
4560 }
4561 #else
4562 #define _mm512_cvt_roundps_epi32(A, B) \
4563 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4564
4565 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4566 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4567
4568 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4569 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4570
4571 #define _mm512_cvt_roundps_epu32(A, B) \
4572 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4573
4574 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4575 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4576
4577 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4578 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4579 #endif
4580
4581 extern __inline __m128d
4582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4583 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4584 {
4585 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4586 }
4587
4588 #ifdef __x86_64__
4589 #ifdef __OPTIMIZE__
4590 extern __inline __m128d
4591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4592 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4593 {
4594 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4595 }
4596
4597 extern __inline __m128d
4598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4599 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4600 {
4601 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4602 }
4603
4604 extern __inline __m128d
4605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4606 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4607 {
4608 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4609 }
4610 #else
4611 #define _mm_cvt_roundu64_sd(A, B, C) \
4612 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4613
4614 #define _mm_cvt_roundi64_sd(A, B, C) \
4615 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4616
4617 #define _mm_cvt_roundsi64_sd(A, B, C) \
4618 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4619 #endif
4620
4621 #endif
4622
4623 #ifdef __OPTIMIZE__
4624 extern __inline __m128
4625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4626 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4627 {
4628 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4629 }
4630
4631 extern __inline __m128
4632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4634 {
4635 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4636 }
4637
4638 extern __inline __m128
4639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4640 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4641 {
4642 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4643 }
4644 #else
4645 #define _mm_cvt_roundu32_ss(A, B, C) \
4646 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4647
4648 #define _mm_cvt_roundi32_ss(A, B, C) \
4649 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4650
4651 #define _mm_cvt_roundsi32_ss(A, B, C) \
4652 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4653 #endif
4654
4655 #ifdef __x86_64__
4656 #ifdef __OPTIMIZE__
4657 extern __inline __m128
4658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4659 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4660 {
4661 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4662 }
4663
4664 extern __inline __m128
4665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4666 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4667 {
4668 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4669 }
4670
4671 extern __inline __m128
4672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4673 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4674 {
4675 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4676 }
4677 #else
4678 #define _mm_cvt_roundu64_ss(A, B, C) \
4679 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4680
4681 #define _mm_cvt_roundi64_ss(A, B, C) \
4682 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4683
4684 #define _mm_cvt_roundsi64_ss(A, B, C) \
4685 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4686 #endif
4687
4688 #endif
4689
4690 extern __inline __m128i
4691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4692 _mm512_cvtepi32_epi8 (__m512i __A)
4693 {
4694 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4695 (__v16qi)
4696 _mm_undefined_si128 (),
4697 (__mmask16) -1);
4698 }
4699
4700 extern __inline void
4701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4702 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4703 {
4704 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4705 }
4706
4707 extern __inline __m128i
4708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4710 {
4711 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4712 (__v16qi) __O, __M);
4713 }
4714
4715 extern __inline __m128i
4716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4717 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4718 {
4719 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4720 (__v16qi)
4721 _mm_setzero_si128 (),
4722 __M);
4723 }
4724
4725 extern __inline __m128i
4726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4727 _mm512_cvtsepi32_epi8 (__m512i __A)
4728 {
4729 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4730 (__v16qi)
4731 _mm_undefined_si128 (),
4732 (__mmask16) -1);
4733 }
4734
4735 extern __inline void
4736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4737 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4738 {
4739 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4740 }
4741
4742 extern __inline __m128i
4743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4744 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4745 {
4746 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4747 (__v16qi) __O, __M);
4748 }
4749
4750 extern __inline __m128i
4751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4752 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4753 {
4754 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4755 (__v16qi)
4756 _mm_setzero_si128 (),
4757 __M);
4758 }
4759
4760 extern __inline __m128i
4761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762 _mm512_cvtusepi32_epi8 (__m512i __A)
4763 {
4764 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4765 (__v16qi)
4766 _mm_undefined_si128 (),
4767 (__mmask16) -1);
4768 }
4769
4770 extern __inline void
4771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4773 {
4774 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4775 }
4776
4777 extern __inline __m128i
4778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4779 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4780 {
4781 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4782 (__v16qi) __O,
4783 __M);
4784 }
4785
4786 extern __inline __m128i
4787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4788 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4789 {
4790 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4791 (__v16qi)
4792 _mm_setzero_si128 (),
4793 __M);
4794 }
4795
4796 extern __inline __m256i
4797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4798 _mm512_cvtepi32_epi16 (__m512i __A)
4799 {
4800 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4801 (__v16hi)
4802 _mm256_undefined_si256 (),
4803 (__mmask16) -1);
4804 }
4805
4806 extern __inline void
4807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4809 {
4810 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4811 }
4812
4813 extern __inline __m256i
4814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4815 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4816 {
4817 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4818 (__v16hi) __O, __M);
4819 }
4820
4821 extern __inline __m256i
4822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4823 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4824 {
4825 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4826 (__v16hi)
4827 _mm256_setzero_si256 (),
4828 __M);
4829 }
4830
4831 extern __inline __m256i
4832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4833 _mm512_cvtsepi32_epi16 (__m512i __A)
4834 {
4835 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4836 (__v16hi)
4837 _mm256_undefined_si256 (),
4838 (__mmask16) -1);
4839 }
4840
4841 extern __inline void
4842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4843 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4844 {
4845 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4846 }
4847
4848 extern __inline __m256i
4849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4850 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4851 {
4852 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4853 (__v16hi) __O, __M);
4854 }
4855
4856 extern __inline __m256i
4857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4858 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4859 {
4860 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4861 (__v16hi)
4862 _mm256_setzero_si256 (),
4863 __M);
4864 }
4865
4866 extern __inline __m256i
4867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4868 _mm512_cvtusepi32_epi16 (__m512i __A)
4869 {
4870 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4871 (__v16hi)
4872 _mm256_undefined_si256 (),
4873 (__mmask16) -1);
4874 }
4875
4876 extern __inline void
4877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4878 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4879 {
4880 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4881 }
4882
4883 extern __inline __m256i
4884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4885 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4886 {
4887 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4888 (__v16hi) __O,
4889 __M);
4890 }
4891
4892 extern __inline __m256i
4893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4895 {
4896 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4897 (__v16hi)
4898 _mm256_setzero_si256 (),
4899 __M);
4900 }
4901
4902 extern __inline __m256i
4903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904 _mm512_cvtepi64_epi32 (__m512i __A)
4905 {
4906 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4907 (__v8si)
4908 _mm256_undefined_si256 (),
4909 (__mmask8) -1);
4910 }
4911
4912 extern __inline void
4913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4914 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4915 {
4916 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4917 }
4918
4919 extern __inline __m256i
4920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4921 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4922 {
4923 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4924 (__v8si) __O, __M);
4925 }
4926
4927 extern __inline __m256i
4928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4929 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4930 {
4931 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4932 (__v8si)
4933 _mm256_setzero_si256 (),
4934 __M);
4935 }
4936
4937 extern __inline __m256i
4938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4939 _mm512_cvtsepi64_epi32 (__m512i __A)
4940 {
4941 __v8si __O;
4942 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4943 (__v8si)
4944 _mm256_undefined_si256 (),
4945 (__mmask8) -1);
4946 }
4947
4948 extern __inline void
4949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4950 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4951 {
4952 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4953 }
4954
4955 extern __inline __m256i
4956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4957 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4958 {
4959 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4960 (__v8si) __O, __M);
4961 }
4962
4963 extern __inline __m256i
4964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4965 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4966 {
4967 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4968 (__v8si)
4969 _mm256_setzero_si256 (),
4970 __M);
4971 }
4972
4973 extern __inline __m256i
4974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4975 _mm512_cvtusepi64_epi32 (__m512i __A)
4976 {
4977 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4978 (__v8si)
4979 _mm256_undefined_si256 (),
4980 (__mmask8) -1);
4981 }
4982
4983 extern __inline void
4984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4985 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4986 {
4987 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4988 }
4989
4990 extern __inline __m256i
4991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4992 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4993 {
4994 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4995 (__v8si) __O, __M);
4996 }
4997
4998 extern __inline __m256i
4999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
5001 {
5002 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
5003 (__v8si)
5004 _mm256_setzero_si256 (),
5005 __M);
5006 }
5007
5008 extern __inline __m128i
5009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5010 _mm512_cvtepi64_epi16 (__m512i __A)
5011 {
5012 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5013 (__v8hi)
5014 _mm_undefined_si128 (),
5015 (__mmask8) -1);
5016 }
5017
5018 extern __inline void
5019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5020 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5021 {
5022 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5023 }
5024
5025 extern __inline __m128i
5026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5027 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5028 {
5029 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5030 (__v8hi) __O, __M);
5031 }
5032
5033 extern __inline __m128i
5034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5036 {
5037 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5038 (__v8hi)
5039 _mm_setzero_si128 (),
5040 __M);
5041 }
5042
5043 extern __inline __m128i
5044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5045 _mm512_cvtsepi64_epi16 (__m512i __A)
5046 {
5047 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5048 (__v8hi)
5049 _mm_undefined_si128 (),
5050 (__mmask8) -1);
5051 }
5052
5053 extern __inline void
5054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5055 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5056 {
5057 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5058 }
5059
5060 extern __inline __m128i
5061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5062 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5063 {
5064 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5065 (__v8hi) __O, __M);
5066 }
5067
5068 extern __inline __m128i
5069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5070 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5071 {
5072 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5073 (__v8hi)
5074 _mm_setzero_si128 (),
5075 __M);
5076 }
5077
5078 extern __inline __m128i
5079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5080 _mm512_cvtusepi64_epi16 (__m512i __A)
5081 {
5082 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5083 (__v8hi)
5084 _mm_undefined_si128 (),
5085 (__mmask8) -1);
5086 }
5087
5088 extern __inline void
5089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5090 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5091 {
5092 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5093 }
5094
5095 extern __inline __m128i
5096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5097 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5098 {
5099 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5100 (__v8hi) __O, __M);
5101 }
5102
5103 extern __inline __m128i
5104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5106 {
5107 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5108 (__v8hi)
5109 _mm_setzero_si128 (),
5110 __M);
5111 }
5112
5113 extern __inline __m128i
5114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5115 _mm512_cvtepi64_epi8 (__m512i __A)
5116 {
5117 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5118 (__v16qi)
5119 _mm_undefined_si128 (),
5120 (__mmask8) -1);
5121 }
5122
5123 extern __inline void
5124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5125 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5126 {
5127 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5128 }
5129
5130 extern __inline __m128i
5131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5132 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5133 {
5134 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5135 (__v16qi) __O, __M);
5136 }
5137
5138 extern __inline __m128i
5139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5141 {
5142 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5143 (__v16qi)
5144 _mm_setzero_si128 (),
5145 __M);
5146 }
5147
5148 extern __inline __m128i
5149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5150 _mm512_cvtsepi64_epi8 (__m512i __A)
5151 {
5152 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5153 (__v16qi)
5154 _mm_undefined_si128 (),
5155 (__mmask8) -1);
5156 }
5157
5158 extern __inline void
5159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5160 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5161 {
5162 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5163 }
5164
5165 extern __inline __m128i
5166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5167 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5168 {
5169 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5170 (__v16qi) __O, __M);
5171 }
5172
5173 extern __inline __m128i
5174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5175 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5176 {
5177 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5178 (__v16qi)
5179 _mm_setzero_si128 (),
5180 __M);
5181 }
5182
5183 extern __inline __m128i
5184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5185 _mm512_cvtusepi64_epi8 (__m512i __A)
5186 {
5187 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5188 (__v16qi)
5189 _mm_undefined_si128 (),
5190 (__mmask8) -1);
5191 }
5192
5193 extern __inline void
5194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5195 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5196 {
5197 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5198 }
5199
5200 extern __inline __m128i
5201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5202 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5203 {
5204 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5205 (__v16qi) __O,
5206 __M);
5207 }
5208
5209 extern __inline __m128i
5210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5211 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5212 {
5213 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5214 (__v16qi)
5215 _mm_setzero_si128 (),
5216 __M);
5217 }
5218
5219 extern __inline __m512d
5220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5221 _mm512_cvtepi32_pd (__m256i __A)
5222 {
5223 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5224 (__v8df)
5225 _mm512_undefined_pd (),
5226 (__mmask8) -1);
5227 }
5228
5229 extern __inline __m512d
5230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5231 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5232 {
5233 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5234 (__v8df) __W,
5235 (__mmask8) __U);
5236 }
5237
5238 extern __inline __m512d
5239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5240 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5241 {
5242 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5243 (__v8df)
5244 _mm512_setzero_pd (),
5245 (__mmask8) __U);
5246 }
5247
5248 extern __inline __m512d
5249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5250 _mm512_cvtepu32_pd (__m256i __A)
5251 {
5252 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5253 (__v8df)
5254 _mm512_undefined_pd (),
5255 (__mmask8) -1);
5256 }
5257
5258 extern __inline __m512d
5259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5260 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5261 {
5262 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5263 (__v8df) __W,
5264 (__mmask8) __U);
5265 }
5266
5267 extern __inline __m512d
5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5270 {
5271 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5272 (__v8df)
5273 _mm512_setzero_pd (),
5274 (__mmask8) __U);
5275 }
5276
5277 #ifdef __OPTIMIZE__
5278 extern __inline __m512
5279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5280 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5281 {
5282 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5283 (__v16sf)
5284 _mm512_undefined_ps (),
5285 (__mmask16) -1, __R);
5286 }
5287
5288 extern __inline __m512
5289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5290 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5291 const int __R)
5292 {
5293 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5294 (__v16sf) __W,
5295 (__mmask16) __U, __R);
5296 }
5297
5298 extern __inline __m512
5299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5300 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5301 {
5302 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5303 (__v16sf)
5304 _mm512_setzero_ps (),
5305 (__mmask16) __U, __R);
5306 }
5307
5308 extern __inline __m512
5309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5310 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5311 {
5312 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5313 (__v16sf)
5314 _mm512_undefined_ps (),
5315 (__mmask16) -1, __R);
5316 }
5317
5318 extern __inline __m512
5319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5320 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5321 const int __R)
5322 {
5323 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5324 (__v16sf) __W,
5325 (__mmask16) __U, __R);
5326 }
5327
5328 extern __inline __m512
5329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5330 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5331 {
5332 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5333 (__v16sf)
5334 _mm512_setzero_ps (),
5335 (__mmask16) __U, __R);
5336 }
5337
5338 #else
5339 #define _mm512_cvt_roundepi32_ps(A, B) \
5340 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5341
5342 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5343 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5344
5345 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5346 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5347
5348 #define _mm512_cvt_roundepu32_ps(A, B) \
5349 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5350
5351 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5352 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5353
5354 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5355 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5356 #endif
5357
5358 #ifdef __OPTIMIZE__
5359 extern __inline __m256d
5360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5361 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5362 {
5363 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5364 __imm,
5365 (__v4df)
5366 _mm256_undefined_pd (),
5367 (__mmask8) -1);
5368 }
5369
5370 extern __inline __m256d
5371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5372 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5373 const int __imm)
5374 {
5375 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5376 __imm,
5377 (__v4df) __W,
5378 (__mmask8) __U);
5379 }
5380
5381 extern __inline __m256d
5382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5383 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5384 {
5385 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5386 __imm,
5387 (__v4df)
5388 _mm256_setzero_pd (),
5389 (__mmask8) __U);
5390 }
5391
5392 extern __inline __m128
5393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5394 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5395 {
5396 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5397 __imm,
5398 (__v4sf)
5399 _mm_undefined_ps (),
5400 (__mmask8) -1);
5401 }
5402
5403 extern __inline __m128
5404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5405 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5406 const int __imm)
5407 {
5408 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5409 __imm,
5410 (__v4sf) __W,
5411 (__mmask8) __U);
5412 }
5413
5414 extern __inline __m128
5415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5416 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5417 {
5418 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5419 __imm,
5420 (__v4sf)
5421 _mm_setzero_ps (),
5422 (__mmask8) __U);
5423 }
5424
5425 extern __inline __m256i
5426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5427 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5428 {
5429 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5430 __imm,
5431 (__v4di)
5432 _mm256_undefined_si256 (),
5433 (__mmask8) -1);
5434 }
5435
5436 extern __inline __m256i
5437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5438 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5439 const int __imm)
5440 {
5441 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5442 __imm,
5443 (__v4di) __W,
5444 (__mmask8) __U);
5445 }
5446
5447 extern __inline __m256i
5448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5449 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5450 {
5451 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5452 __imm,
5453 (__v4di)
5454 _mm256_setzero_si256 (),
5455 (__mmask8) __U);
5456 }
5457
5458 extern __inline __m128i
5459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5460 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5461 {
5462 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5463 __imm,
5464 (__v4si)
5465 _mm_undefined_si128 (),
5466 (__mmask8) -1);
5467 }
5468
5469 extern __inline __m128i
5470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5471 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5472 const int __imm)
5473 {
5474 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5475 __imm,
5476 (__v4si) __W,
5477 (__mmask8) __U);
5478 }
5479
5480 extern __inline __m128i
5481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5482 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5483 {
5484 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5485 __imm,
5486 (__v4si)
5487 _mm_setzero_si128 (),
5488 (__mmask8) __U);
5489 }
5490 #else
5491
5492 #define _mm512_extractf64x4_pd(X, C) \
5493 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5494 (int) (C),\
5495 (__v4df)(__m256d)_mm256_undefined_pd(),\
5496 (__mmask8)-1))
5497
5498 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5499 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5500 (int) (C),\
5501 (__v4df)(__m256d)(W),\
5502 (__mmask8)(U)))
5503
5504 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5505 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5506 (int) (C),\
5507 (__v4df)(__m256d)_mm256_setzero_pd(),\
5508 (__mmask8)(U)))
5509
5510 #define _mm512_extractf32x4_ps(X, C) \
5511 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5512 (int) (C),\
5513 (__v4sf)(__m128)_mm_undefined_ps(),\
5514 (__mmask8)-1))
5515
5516 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5517 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5518 (int) (C),\
5519 (__v4sf)(__m128)(W),\
5520 (__mmask8)(U)))
5521
5522 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5523 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5524 (int) (C),\
5525 (__v4sf)(__m128)_mm_setzero_ps(),\
5526 (__mmask8)(U)))
5527
5528 #define _mm512_extracti64x4_epi64(X, C) \
5529 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5530 (int) (C),\
5531 (__v4di)(__m256i)_mm256_undefined_si256 (),\
5532 (__mmask8)-1))
5533
5534 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5535 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5536 (int) (C),\
5537 (__v4di)(__m256i)(W),\
5538 (__mmask8)(U)))
5539
5540 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5541 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5542 (int) (C),\
5543 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5544 (__mmask8)(U)))
5545
5546 #define _mm512_extracti32x4_epi32(X, C) \
5547 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5548 (int) (C),\
5549 (__v4si)(__m128i)_mm_undefined_si128 (),\
5550 (__mmask8)-1))
5551
5552 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5553 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5554 (int) (C),\
5555 (__v4si)(__m128i)(W),\
5556 (__mmask8)(U)))
5557
5558 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5559 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5560 (int) (C),\
5561 (__v4si)(__m128i)_mm_setzero_si128 (),\
5562 (__mmask8)(U)))
5563 #endif
5564
5565 #ifdef __OPTIMIZE__
5566 extern __inline __m512i
5567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5568 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5569 {
5570 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5571 (__v4si) __B,
5572 __imm,
5573 (__v16si) __A, -1);
5574 }
5575
5576 extern __inline __m512
5577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5578 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5579 {
5580 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5581 (__v4sf) __B,
5582 __imm,
5583 (__v16sf) __A, -1);
5584 }
5585
5586 extern __inline __m512i
5587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5588 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5589 {
5590 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5591 (__v4di) __B,
5592 __imm,
5593 (__v8di)
5594 _mm512_undefined_si512 (),
5595 (__mmask8) -1);
5596 }
5597
5598 extern __inline __m512i
5599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5600 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5601 __m256i __B, const int __imm)
5602 {
5603 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5604 (__v4di) __B,
5605 __imm,
5606 (__v8di) __W,
5607 (__mmask8) __U);
5608 }
5609
5610 extern __inline __m512i
5611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5612 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5613 const int __imm)
5614 {
5615 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5616 (__v4di) __B,
5617 __imm,
5618 (__v8di)
5619 _mm512_setzero_si512 (),
5620 (__mmask8) __U);
5621 }
5622
5623 extern __inline __m512d
5624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5625 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5626 {
5627 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5628 (__v4df) __B,
5629 __imm,
5630 (__v8df)
5631 _mm512_undefined_pd (),
5632 (__mmask8) -1);
5633 }
5634
5635 extern __inline __m512d
5636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5637 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5638 __m256d __B, const int __imm)
5639 {
5640 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5641 (__v4df) __B,
5642 __imm,
5643 (__v8df) __W,
5644 (__mmask8) __U);
5645 }
5646
5647 extern __inline __m512d
5648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5649 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5650 const int __imm)
5651 {
5652 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5653 (__v4df) __B,
5654 __imm,
5655 (__v8df)
5656 _mm512_setzero_pd (),
5657 (__mmask8) __U);
5658 }
5659 #else
5660 #define _mm512_insertf32x4(X, Y, C) \
5661 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5662 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5663
5664 #define _mm512_inserti32x4(X, Y, C) \
5665 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5666 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5667
5668 #define _mm512_insertf64x4(X, Y, C) \
5669 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5670 (__v4df)(__m256d) (Y), (int) (C), \
5671 (__v8df)(__m512d)_mm512_undefined_pd(), \
5672 (__mmask8)-1))
5673
5674 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5675 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5676 (__v4df)(__m256d) (Y), (int) (C), \
5677 (__v8df)(__m512d)(W), \
5678 (__mmask8)(U)))
5679
5680 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5681 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5682 (__v4df)(__m256d) (Y), (int) (C), \
5683 (__v8df)(__m512d)_mm512_setzero_pd(), \
5684 (__mmask8)(U)))
5685
5686 #define _mm512_inserti64x4(X, Y, C) \
5687 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5688 (__v4di)(__m256i) (Y), (int) (C), \
5689 (__v8di)(__m512i)_mm512_undefined_si512 (), \
5690 (__mmask8)-1))
5691
5692 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5693 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5694 (__v4di)(__m256i) (Y), (int) (C),\
5695 (__v8di)(__m512i)(W),\
5696 (__mmask8)(U)))
5697
5698 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
5699 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5700 (__v4di)(__m256i) (Y), (int) (C), \
5701 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5702 (__mmask8)(U)))
5703 #endif
5704
5705 extern __inline __m512d
5706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5707 _mm512_loadu_pd (void const *__P)
5708 {
5709 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5710 (__v8df)
5711 _mm512_undefined_pd (),
5712 (__mmask8) -1);
5713 }
5714
5715 extern __inline __m512d
5716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5717 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5718 {
5719 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5720 (__v8df) __W,
5721 (__mmask8) __U);
5722 }
5723
5724 extern __inline __m512d
5725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5726 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5727 {
5728 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5729 (__v8df)
5730 _mm512_setzero_pd (),
5731 (__mmask8) __U);
5732 }
5733
5734 extern __inline void
5735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5736 _mm512_storeu_pd (void *__P, __m512d __A)
5737 {
5738 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5739 (__mmask8) -1);
5740 }
5741
5742 extern __inline void
5743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5744 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5745 {
5746 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5747 (__mmask8) __U);
5748 }
5749
5750 extern __inline __m512
5751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5752 _mm512_loadu_ps (void const *__P)
5753 {
5754 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5755 (__v16sf)
5756 _mm512_undefined_ps (),
5757 (__mmask16) -1);
5758 }
5759
5760 extern __inline __m512
5761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5762 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5763 {
5764 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5765 (__v16sf) __W,
5766 (__mmask16) __U);
5767 }
5768
5769 extern __inline __m512
5770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5771 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5772 {
5773 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5774 (__v16sf)
5775 _mm512_setzero_ps (),
5776 (__mmask16) __U);
5777 }
5778
5779 extern __inline void
5780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781 _mm512_storeu_ps (void *__P, __m512 __A)
5782 {
5783 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5784 (__mmask16) -1);
5785 }
5786
5787 extern __inline void
5788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5789 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5790 {
5791 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5792 (__mmask16) __U);
5793 }
5794
5795 extern __inline __m512i
5796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5797 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5798 {
5799 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5800 (__v8di) __W,
5801 (__mmask8) __U);
5802 }
5803
5804 extern __inline __m512i
5805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5806 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5807 {
5808 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5809 (__v8di)
5810 _mm512_setzero_si512 (),
5811 (__mmask8) __U);
5812 }
5813
5814 extern __inline void
5815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5816 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5817 {
5818 __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5819 (__mmask8) __U);
5820 }
5821
5822 extern __inline __m512i
5823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5824 _mm512_loadu_si512 (void const *__P)
5825 {
5826 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5827 (__v16si)
5828 _mm512_setzero_si512 (),
5829 (__mmask16) -1);
5830 }
5831
5832 extern __inline __m512i
5833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5834 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5835 {
5836 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5837 (__v16si) __W,
5838 (__mmask16) __U);
5839 }
5840
5841 extern __inline __m512i
5842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5843 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5844 {
5845 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5846 (__v16si)
5847 _mm512_setzero_si512 (),
5848 (__mmask16) __U);
5849 }
5850
5851 extern __inline void
5852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5853 _mm512_storeu_si512 (void *__P, __m512i __A)
5854 {
5855 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5856 (__mmask16) -1);
5857 }
5858
5859 extern __inline void
5860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5861 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5862 {
5863 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5864 (__mmask16) __U);
5865 }
5866
5867 extern __inline __m512d
5868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5869 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5870 {
5871 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5872 (__v8di) __C,
5873 (__v8df)
5874 _mm512_undefined_pd (),
5875 (__mmask8) -1);
5876 }
5877
5878 extern __inline __m512d
5879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5880 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5881 {
5882 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5883 (__v8di) __C,
5884 (__v8df) __W,
5885 (__mmask8) __U);
5886 }
5887
5888 extern __inline __m512d
5889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5890 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5891 {
5892 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5893 (__v8di) __C,
5894 (__v8df)
5895 _mm512_setzero_pd (),
5896 (__mmask8) __U);
5897 }
5898
5899 extern __inline __m512
5900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5901 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5902 {
5903 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5904 (__v16si) __C,
5905 (__v16sf)
5906 _mm512_undefined_ps (),
5907 (__mmask16) -1);
5908 }
5909
5910 extern __inline __m512
5911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5912 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5913 {
5914 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5915 (__v16si) __C,
5916 (__v16sf) __W,
5917 (__mmask16) __U);
5918 }
5919
5920 extern __inline __m512
5921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5922 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5923 {
5924 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5925 (__v16si) __C,
5926 (__v16sf)
5927 _mm512_setzero_ps (),
5928 (__mmask16) __U);
5929 }
5930
5931 extern __inline __m512i
5932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5934 {
5935 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5936 /* idx */ ,
5937 (__v8di) __A,
5938 (__v8di) __B,
5939 (__mmask8) -1);
5940 }
5941
5942 extern __inline __m512i
5943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5944 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5945 __m512i __B)
5946 {
5947 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5948 /* idx */ ,
5949 (__v8di) __A,
5950 (__v8di) __B,
5951 (__mmask8) __U);
5952 }
5953
5954 extern __inline __m512i
5955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5956 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5957 __mmask8 __U, __m512i __B)
5958 {
5959 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5960 (__v8di) __I
5961 /* idx */ ,
5962 (__v8di) __B,
5963 (__mmask8) __U);
5964 }
5965
5966 extern __inline __m512i
5967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5969 __m512i __I, __m512i __B)
5970 {
5971 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5972 /* idx */ ,
5973 (__v8di) __A,
5974 (__v8di) __B,
5975 (__mmask8) __U);
5976 }
5977
5978 extern __inline __m512i
5979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5980 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5981 {
5982 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5983 /* idx */ ,
5984 (__v16si) __A,
5985 (__v16si) __B,
5986 (__mmask16) -1);
5987 }
5988
5989 extern __inline __m512i
5990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5991 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5992 __m512i __I, __m512i __B)
5993 {
5994 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5995 /* idx */ ,
5996 (__v16si) __A,
5997 (__v16si) __B,
5998 (__mmask16) __U);
5999 }
6000
6001 extern __inline __m512i
6002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6003 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6004 __mmask16 __U, __m512i __B)
6005 {
6006 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6007 (__v16si) __I
6008 /* idx */ ,
6009 (__v16si) __B,
6010 (__mmask16) __U);
6011 }
6012
6013 extern __inline __m512i
6014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6015 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6016 __m512i __I, __m512i __B)
6017 {
6018 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6019 /* idx */ ,
6020 (__v16si) __A,
6021 (__v16si) __B,
6022 (__mmask16) __U);
6023 }
6024
6025 extern __inline __m512d
6026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6027 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6028 {
6029 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6030 /* idx */ ,
6031 (__v8df) __A,
6032 (__v8df) __B,
6033 (__mmask8) -1);
6034 }
6035
6036 extern __inline __m512d
6037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6038 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6039 __m512d __B)
6040 {
6041 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6042 /* idx */ ,
6043 (__v8df) __A,
6044 (__v8df) __B,
6045 (__mmask8) __U);
6046 }
6047
6048 extern __inline __m512d
6049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6050 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6051 __m512d __B)
6052 {
6053 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6054 (__v8di) __I
6055 /* idx */ ,
6056 (__v8df) __B,
6057 (__mmask8) __U);
6058 }
6059
6060 extern __inline __m512d
6061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6062 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6063 __m512d __B)
6064 {
6065 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6066 /* idx */ ,
6067 (__v8df) __A,
6068 (__v8df) __B,
6069 (__mmask8) __U);
6070 }
6071
6072 extern __inline __m512
6073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6074 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6075 {
6076 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6077 /* idx */ ,
6078 (__v16sf) __A,
6079 (__v16sf) __B,
6080 (__mmask16) -1);
6081 }
6082
6083 extern __inline __m512
6084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6085 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6086 {
6087 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6088 /* idx */ ,
6089 (__v16sf) __A,
6090 (__v16sf) __B,
6091 (__mmask16) __U);
6092 }
6093
6094 extern __inline __m512
6095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6096 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6097 __m512 __B)
6098 {
6099 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6100 (__v16si) __I
6101 /* idx */ ,
6102 (__v16sf) __B,
6103 (__mmask16) __U);
6104 }
6105
6106 extern __inline __m512
6107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6108 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6109 __m512 __B)
6110 {
6111 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6112 /* idx */ ,
6113 (__v16sf) __A,
6114 (__v16sf) __B,
6115 (__mmask16) __U);
6116 }
6117
6118 #ifdef __OPTIMIZE__
6119 extern __inline __m512d
6120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6121 _mm512_permute_pd (__m512d __X, const int __C)
6122 {
6123 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6124 (__v8df)
6125 _mm512_undefined_pd (),
6126 (__mmask8) -1);
6127 }
6128
6129 extern __inline __m512d
6130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6131 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6132 {
6133 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6134 (__v8df) __W,
6135 (__mmask8) __U);
6136 }
6137
6138 extern __inline __m512d
6139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6140 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6141 {
6142 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6143 (__v8df)
6144 _mm512_setzero_pd (),
6145 (__mmask8) __U);
6146 }
6147
6148 extern __inline __m512
6149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6150 _mm512_permute_ps (__m512 __X, const int __C)
6151 {
6152 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6153 (__v16sf)
6154 _mm512_undefined_ps (),
6155 (__mmask16) -1);
6156 }
6157
6158 extern __inline __m512
6159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6160 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6161 {
6162 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6163 (__v16sf) __W,
6164 (__mmask16) __U);
6165 }
6166
6167 extern __inline __m512
6168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6169 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6170 {
6171 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6172 (__v16sf)
6173 _mm512_setzero_ps (),
6174 (__mmask16) __U);
6175 }
6176 #else
6177 #define _mm512_permute_pd(X, C) \
6178 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6179 (__v8df)(__m512d)_mm512_undefined_pd(),\
6180 (__mmask8)(-1)))
6181
6182 #define _mm512_mask_permute_pd(W, U, X, C) \
6183 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6184 (__v8df)(__m512d)(W), \
6185 (__mmask8)(U)))
6186
6187 #define _mm512_maskz_permute_pd(U, X, C) \
6188 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6189 (__v8df)(__m512d)_mm512_setzero_pd(), \
6190 (__mmask8)(U)))
6191
6192 #define _mm512_permute_ps(X, C) \
6193 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6194 (__v16sf)(__m512)_mm512_undefined_ps(),\
6195 (__mmask16)(-1)))
6196
6197 #define _mm512_mask_permute_ps(W, U, X, C) \
6198 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6199 (__v16sf)(__m512)(W), \
6200 (__mmask16)(U)))
6201
6202 #define _mm512_maskz_permute_ps(U, X, C) \
6203 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6204 (__v16sf)(__m512)_mm512_setzero_ps(), \
6205 (__mmask16)(U)))
6206 #endif
6207
6208 #ifdef __OPTIMIZE__
6209 extern __inline __m512i
6210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6211 _mm512_permutex_epi64 (__m512i __X, const int __I)
6212 {
6213 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6214 (__v8di)
6215 _mm512_undefined_si512 (),
6216 (__mmask8) (-1));
6217 }
6218
6219 extern __inline __m512i
6220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6221 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6222 __m512i __X, const int __I)
6223 {
6224 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6225 (__v8di) __W,
6226 (__mmask8) __M);
6227 }
6228
6229 extern __inline __m512i
6230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6231 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6232 {
6233 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6234 (__v8di)
6235 _mm512_setzero_si512 (),
6236 (__mmask8) __M);
6237 }
6238
6239 extern __inline __m512d
6240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6241 _mm512_permutex_pd (__m512d __X, const int __M)
6242 {
6243 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6244 (__v8df)
6245 _mm512_undefined_pd (),
6246 (__mmask8) -1);
6247 }
6248
6249 extern __inline __m512d
6250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6251 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6252 {
6253 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6254 (__v8df) __W,
6255 (__mmask8) __U);
6256 }
6257
6258 extern __inline __m512d
6259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6260 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6261 {
6262 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6263 (__v8df)
6264 _mm512_setzero_pd (),
6265 (__mmask8) __U);
6266 }
6267 #else
6268 #define _mm512_permutex_pd(X, M) \
6269 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6270 (__v8df)(__m512d)_mm512_undefined_pd(),\
6271 (__mmask8)-1))
6272
6273 #define _mm512_mask_permutex_pd(W, U, X, M) \
6274 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6275 (__v8df)(__m512d)(W), (__mmask8)(U)))
6276
6277 #define _mm512_maskz_permutex_pd(U, X, M) \
6278 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6279 (__v8df)(__m512d)_mm512_setzero_pd(),\
6280 (__mmask8)(U)))
6281
6282 #define _mm512_permutex_epi64(X, I) \
6283 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6284 (int)(I), \
6285 (__v8di)(__m512i) \
6286 (_mm512_undefined_si512 ()),\
6287 (__mmask8)(-1)))
6288
6289 #define _mm512_maskz_permutex_epi64(M, X, I) \
6290 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6291 (int)(I), \
6292 (__v8di)(__m512i) \
6293 (_mm512_setzero_si512 ()),\
6294 (__mmask8)(M)))
6295
6296 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6297 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6298 (int)(I), \
6299 (__v8di)(__m512i)(W), \
6300 (__mmask8)(M)))
6301 #endif
6302
6303 extern __inline __m512i
6304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6305 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6306 {
6307 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6308 (__v8di) __X,
6309 (__v8di)
6310 _mm512_setzero_si512 (),
6311 __M);
6312 }
6313
6314 extern __inline __m512i
6315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6316 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6317 {
6318 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6319 (__v8di) __X,
6320 (__v8di)
6321 _mm512_undefined_si512 (),
6322 (__mmask8) -1);
6323 }
6324
6325 extern __inline __m512i
6326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6327 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6328 __m512i __Y)
6329 {
6330 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6331 (__v8di) __X,
6332 (__v8di) __W,
6333 __M);
6334 }
6335
6336 extern __inline __m512i
6337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6338 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6339 {
6340 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6341 (__v16si) __X,
6342 (__v16si)
6343 _mm512_setzero_si512 (),
6344 __M);
6345 }
6346
6347 extern __inline __m512i
6348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6349 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6350 {
6351 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6352 (__v16si) __X,
6353 (__v16si)
6354 _mm512_undefined_si512 (),
6355 (__mmask16) -1);
6356 }
6357
6358 extern __inline __m512i
6359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6360 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6361 __m512i __Y)
6362 {
6363 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6364 (__v16si) __X,
6365 (__v16si) __W,
6366 __M);
6367 }
6368
6369 extern __inline __m512d
6370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6371 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6372 {
6373 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6374 (__v8di) __X,
6375 (__v8df)
6376 _mm512_undefined_pd (),
6377 (__mmask8) -1);
6378 }
6379
6380 extern __inline __m512d
6381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6382 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6383 {
6384 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6385 (__v8di) __X,
6386 (__v8df) __W,
6387 (__mmask8) __U);
6388 }
6389
6390 extern __inline __m512d
6391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6392 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6393 {
6394 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6395 (__v8di) __X,
6396 (__v8df)
6397 _mm512_setzero_pd (),
6398 (__mmask8) __U);
6399 }
6400
6401 extern __inline __m512
6402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6403 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6404 {
6405 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6406 (__v16si) __X,
6407 (__v16sf)
6408 _mm512_undefined_ps (),
6409 (__mmask16) -1);
6410 }
6411
6412 extern __inline __m512
6413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6414 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6415 {
6416 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6417 (__v16si) __X,
6418 (__v16sf) __W,
6419 (__mmask16) __U);
6420 }
6421
6422 extern __inline __m512
6423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6424 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6425 {
6426 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6427 (__v16si) __X,
6428 (__v16sf)
6429 _mm512_setzero_ps (),
6430 (__mmask16) __U);
6431 }
6432
6433 #ifdef __OPTIMIZE__
6434 extern __inline __m512
6435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6436 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6437 {
6438 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6439 (__v16sf) __V, __imm,
6440 (__v16sf)
6441 _mm512_undefined_ps (),
6442 (__mmask16) -1);
6443 }
6444
6445 extern __inline __m512
6446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6447 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6448 __m512 __V, const int __imm)
6449 {
6450 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6451 (__v16sf) __V, __imm,
6452 (__v16sf) __W,
6453 (__mmask16) __U);
6454 }
6455
6456 extern __inline __m512
6457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6458 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6459 {
6460 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6461 (__v16sf) __V, __imm,
6462 (__v16sf)
6463 _mm512_setzero_ps (),
6464 (__mmask16) __U);
6465 }
6466
6467 extern __inline __m512d
6468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6469 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6470 {
6471 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6472 (__v8df) __V, __imm,
6473 (__v8df)
6474 _mm512_undefined_pd (),
6475 (__mmask8) -1);
6476 }
6477
6478 extern __inline __m512d
6479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6481 __m512d __V, const int __imm)
6482 {
6483 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6484 (__v8df) __V, __imm,
6485 (__v8df) __W,
6486 (__mmask8) __U);
6487 }
6488
6489 extern __inline __m512d
6490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6491 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6492 const int __imm)
6493 {
6494 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6495 (__v8df) __V, __imm,
6496 (__v8df)
6497 _mm512_setzero_pd (),
6498 (__mmask8) __U);
6499 }
6500
6501 extern __inline __m512d
6502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6503 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6504 const int __imm, const int __R)
6505 {
6506 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6507 (__v8df) __B,
6508 (__v8di) __C,
6509 __imm,
6510 (__mmask8) -1, __R);
6511 }
6512
6513 extern __inline __m512d
6514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6515 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6516 __m512i __C, const int __imm, const int __R)
6517 {
6518 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6519 (__v8df) __B,
6520 (__v8di) __C,
6521 __imm,
6522 (__mmask8) __U, __R);
6523 }
6524
6525 extern __inline __m512d
6526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6527 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6528 __m512i __C, const int __imm, const int __R)
6529 {
6530 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6531 (__v8df) __B,
6532 (__v8di) __C,
6533 __imm,
6534 (__mmask8) __U, __R);
6535 }
6536
6537 extern __inline __m512
6538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6539 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6540 const int __imm, const int __R)
6541 {
6542 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6543 (__v16sf) __B,
6544 (__v16si) __C,
6545 __imm,
6546 (__mmask16) -1, __R);
6547 }
6548
6549 extern __inline __m512
6550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6552 __m512i __C, const int __imm, const int __R)
6553 {
6554 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6555 (__v16sf) __B,
6556 (__v16si) __C,
6557 __imm,
6558 (__mmask16) __U, __R);
6559 }
6560
6561 extern __inline __m512
6562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6563 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6564 __m512i __C, const int __imm, const int __R)
6565 {
6566 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6567 (__v16sf) __B,
6568 (__v16si) __C,
6569 __imm,
6570 (__mmask16) __U, __R);
6571 }
6572
6573 extern __inline __m128d
6574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6575 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6576 const int __imm, const int __R)
6577 {
6578 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6579 (__v2df) __B,
6580 (__v2di) __C, __imm,
6581 (__mmask8) -1, __R);
6582 }
6583
6584 extern __inline __m128d
6585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6586 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6587 __m128i __C, const int __imm, const int __R)
6588 {
6589 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6590 (__v2df) __B,
6591 (__v2di) __C, __imm,
6592 (__mmask8) __U, __R);
6593 }
6594
6595 extern __inline __m128d
6596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6597 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6598 __m128i __C, const int __imm, const int __R)
6599 {
6600 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6601 (__v2df) __B,
6602 (__v2di) __C,
6603 __imm,
6604 (__mmask8) __U, __R);
6605 }
6606
6607 extern __inline __m128
6608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6609 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6610 const int __imm, const int __R)
6611 {
6612 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6613 (__v4sf) __B,
6614 (__v4si) __C, __imm,
6615 (__mmask8) -1, __R);
6616 }
6617
6618 extern __inline __m128
6619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6620 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6621 __m128i __C, const int __imm, const int __R)
6622 {
6623 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6624 (__v4sf) __B,
6625 (__v4si) __C, __imm,
6626 (__mmask8) __U, __R);
6627 }
6628
6629 extern __inline __m128
6630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6631 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6632 __m128i __C, const int __imm, const int __R)
6633 {
6634 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6635 (__v4sf) __B,
6636 (__v4si) __C, __imm,
6637 (__mmask8) __U, __R);
6638 }
6639
6640 #else
6641 #define _mm512_shuffle_pd(X, Y, C) \
6642 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6643 (__v8df)(__m512d)(Y), (int)(C),\
6644 (__v8df)(__m512d)_mm512_undefined_pd(),\
6645 (__mmask8)-1))
6646
6647 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6648 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6649 (__v8df)(__m512d)(Y), (int)(C),\
6650 (__v8df)(__m512d)(W),\
6651 (__mmask8)(U)))
6652
6653 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6654 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6655 (__v8df)(__m512d)(Y), (int)(C),\
6656 (__v8df)(__m512d)_mm512_setzero_pd(),\
6657 (__mmask8)(U)))
6658
6659 #define _mm512_shuffle_ps(X, Y, C) \
6660 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6661 (__v16sf)(__m512)(Y), (int)(C),\
6662 (__v16sf)(__m512)_mm512_undefined_ps(),\
6663 (__mmask16)-1))
6664
6665 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6666 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6667 (__v16sf)(__m512)(Y), (int)(C),\
6668 (__v16sf)(__m512)(W),\
6669 (__mmask16)(U)))
6670
6671 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6672 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6673 (__v16sf)(__m512)(Y), (int)(C),\
6674 (__v16sf)(__m512)_mm512_setzero_ps(),\
6675 (__mmask16)(U)))
6676
6677 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6678 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6679 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6680 (__mmask8)(-1), (R)))
6681
6682 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6683 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6684 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6685 (__mmask8)(U), (R)))
6686
6687 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6688 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6689 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6690 (__mmask8)(U), (R)))
6691
6692 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6693 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6694 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6695 (__mmask16)(-1), (R)))
6696
6697 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6698 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6699 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6700 (__mmask16)(U), (R)))
6701
6702 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6703 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6704 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6705 (__mmask16)(U), (R)))
6706
6707 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6708 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6709 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6710 (__mmask8)(-1), (R)))
6711
6712 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6713 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6714 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6715 (__mmask8)(U), (R)))
6716
6717 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6718 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6719 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6720 (__mmask8)(U), (R)))
6721
6722 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6723 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6724 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6725 (__mmask8)(-1), (R)))
6726
6727 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6728 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6729 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6730 (__mmask8)(U), (R)))
6731
6732 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6733 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6734 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6735 (__mmask8)(U), (R)))
6736 #endif
6737
6738 extern __inline __m512
6739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6740 _mm512_movehdup_ps (__m512 __A)
6741 {
6742 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6743 (__v16sf)
6744 _mm512_undefined_ps (),
6745 (__mmask16) -1);
6746 }
6747
6748 extern __inline __m512
6749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6750 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6751 {
6752 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6753 (__v16sf) __W,
6754 (__mmask16) __U);
6755 }
6756
6757 extern __inline __m512
6758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6759 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6760 {
6761 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6762 (__v16sf)
6763 _mm512_setzero_ps (),
6764 (__mmask16) __U);
6765 }
6766
6767 extern __inline __m512
6768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6769 _mm512_moveldup_ps (__m512 __A)
6770 {
6771 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6772 (__v16sf)
6773 _mm512_undefined_ps (),
6774 (__mmask16) -1);
6775 }
6776
6777 extern __inline __m512
6778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6779 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6780 {
6781 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6782 (__v16sf) __W,
6783 (__mmask16) __U);
6784 }
6785
6786 extern __inline __m512
6787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6788 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6789 {
6790 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6791 (__v16sf)
6792 _mm512_setzero_ps (),
6793 (__mmask16) __U);
6794 }
6795
6796 extern __inline __m512i
6797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6798 _mm512_or_si512 (__m512i __A, __m512i __B)
6799 {
6800 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6801 (__v16si) __B,
6802 (__v16si)
6803 _mm512_undefined_si512 (),
6804 (__mmask16) -1);
6805 }
6806
6807 extern __inline __m512i
6808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6809 _mm512_or_epi32 (__m512i __A, __m512i __B)
6810 {
6811 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6812 (__v16si) __B,
6813 (__v16si)
6814 _mm512_undefined_si512 (),
6815 (__mmask16) -1);
6816 }
6817
6818 extern __inline __m512i
6819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6820 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6821 {
6822 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6823 (__v16si) __B,
6824 (__v16si) __W,
6825 (__mmask16) __U);
6826 }
6827
6828 extern __inline __m512i
6829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6830 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6831 {
6832 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6833 (__v16si) __B,
6834 (__v16si)
6835 _mm512_setzero_si512 (),
6836 (__mmask16) __U);
6837 }
6838
6839 extern __inline __m512i
6840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6841 _mm512_or_epi64 (__m512i __A, __m512i __B)
6842 {
6843 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6844 (__v8di) __B,
6845 (__v8di)
6846 _mm512_undefined_si512 (),
6847 (__mmask8) -1);
6848 }
6849
6850 extern __inline __m512i
6851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6852 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6853 {
6854 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6855 (__v8di) __B,
6856 (__v8di) __W,
6857 (__mmask8) __U);
6858 }
6859
6860 extern __inline __m512i
6861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6862 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6863 {
6864 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6865 (__v8di) __B,
6866 (__v8di)
6867 _mm512_setzero_si512 (),
6868 (__mmask8) __U);
6869 }
6870
6871 extern __inline __m512i
6872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6873 _mm512_xor_si512 (__m512i __A, __m512i __B)
6874 {
6875 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6876 (__v16si) __B,
6877 (__v16si)
6878 _mm512_undefined_si512 (),
6879 (__mmask16) -1);
6880 }
6881
6882 extern __inline __m512i
6883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6884 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6885 {
6886 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6887 (__v16si) __B,
6888 (__v16si)
6889 _mm512_undefined_si512 (),
6890 (__mmask16) -1);
6891 }
6892
6893 extern __inline __m512i
6894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6895 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6896 {
6897 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6898 (__v16si) __B,
6899 (__v16si) __W,
6900 (__mmask16) __U);
6901 }
6902
6903 extern __inline __m512i
6904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6905 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6906 {
6907 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6908 (__v16si) __B,
6909 (__v16si)
6910 _mm512_setzero_si512 (),
6911 (__mmask16) __U);
6912 }
6913
6914 extern __inline __m512i
6915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6916 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6917 {
6918 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6919 (__v8di) __B,
6920 (__v8di)
6921 _mm512_undefined_si512 (),
6922 (__mmask8) -1);
6923 }
6924
6925 extern __inline __m512i
6926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6927 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6928 {
6929 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6930 (__v8di) __B,
6931 (__v8di) __W,
6932 (__mmask8) __U);
6933 }
6934
6935 extern __inline __m512i
6936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6937 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6938 {
6939 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6940 (__v8di) __B,
6941 (__v8di)
6942 _mm512_setzero_si512 (),
6943 (__mmask8) __U);
6944 }
6945
6946 #ifdef __OPTIMIZE__
6947 extern __inline __m512i
6948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6949 _mm512_rol_epi32 (__m512i __A, const int __B)
6950 {
6951 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6952 (__v16si)
6953 _mm512_undefined_si512 (),
6954 (__mmask16) -1);
6955 }
6956
6957 extern __inline __m512i
6958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6959 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6960 {
6961 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6962 (__v16si) __W,
6963 (__mmask16) __U);
6964 }
6965
6966 extern __inline __m512i
6967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6968 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6969 {
6970 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6971 (__v16si)
6972 _mm512_setzero_si512 (),
6973 (__mmask16) __U);
6974 }
6975
6976 extern __inline __m512i
6977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6978 _mm512_ror_epi32 (__m512i __A, int __B)
6979 {
6980 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6981 (__v16si)
6982 _mm512_undefined_si512 (),
6983 (__mmask16) -1);
6984 }
6985
6986 extern __inline __m512i
6987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6989 {
6990 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6991 (__v16si) __W,
6992 (__mmask16) __U);
6993 }
6994
6995 extern __inline __m512i
6996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6997 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6998 {
6999 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
7000 (__v16si)
7001 _mm512_setzero_si512 (),
7002 (__mmask16) __U);
7003 }
7004
7005 extern __inline __m512i
7006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7007 _mm512_rol_epi64 (__m512i __A, const int __B)
7008 {
7009 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7010 (__v8di)
7011 _mm512_undefined_si512 (),
7012 (__mmask8) -1);
7013 }
7014
7015 extern __inline __m512i
7016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7017 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
7018 {
7019 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7020 (__v8di) __W,
7021 (__mmask8) __U);
7022 }
7023
7024 extern __inline __m512i
7025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7026 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
7027 {
7028 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
7029 (__v8di)
7030 _mm512_setzero_si512 (),
7031 (__mmask8) __U);
7032 }
7033
7034 extern __inline __m512i
7035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7036 _mm512_ror_epi64 (__m512i __A, int __B)
7037 {
7038 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7039 (__v8di)
7040 _mm512_undefined_si512 (),
7041 (__mmask8) -1);
7042 }
7043
7044 extern __inline __m512i
7045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7046 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7047 {
7048 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7049 (__v8di) __W,
7050 (__mmask8) __U);
7051 }
7052
7053 extern __inline __m512i
7054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7055 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7056 {
7057 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7058 (__v8di)
7059 _mm512_setzero_si512 (),
7060 (__mmask8) __U);
7061 }
7062
7063 #else
7064 #define _mm512_rol_epi32(A, B) \
7065 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7066 (int)(B), \
7067 (__v16si)_mm512_undefined_si512 (), \
7068 (__mmask16)(-1)))
7069 #define _mm512_mask_rol_epi32(W, U, A, B) \
7070 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7071 (int)(B), \
7072 (__v16si)(__m512i)(W), \
7073 (__mmask16)(U)))
7074 #define _mm512_maskz_rol_epi32(U, A, B) \
7075 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7076 (int)(B), \
7077 (__v16si)_mm512_setzero_si512 (), \
7078 (__mmask16)(U)))
7079 #define _mm512_ror_epi32(A, B) \
7080 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7081 (int)(B), \
7082 (__v16si)_mm512_undefined_si512 (), \
7083 (__mmask16)(-1)))
7084 #define _mm512_mask_ror_epi32(W, U, A, B) \
7085 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7086 (int)(B), \
7087 (__v16si)(__m512i)(W), \
7088 (__mmask16)(U)))
7089 #define _mm512_maskz_ror_epi32(U, A, B) \
7090 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7091 (int)(B), \
7092 (__v16si)_mm512_setzero_si512 (), \
7093 (__mmask16)(U)))
7094 #define _mm512_rol_epi64(A, B) \
7095 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7096 (int)(B), \
7097 (__v8di)_mm512_undefined_si512 (), \
7098 (__mmask8)(-1)))
7099 #define _mm512_mask_rol_epi64(W, U, A, B) \
7100 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7101 (int)(B), \
7102 (__v8di)(__m512i)(W), \
7103 (__mmask8)(U)))
7104 #define _mm512_maskz_rol_epi64(U, A, B) \
7105 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7106 (int)(B), \
7107 (__v8di)_mm512_setzero_si512 (), \
7108 (__mmask8)(U)))
7109
7110 #define _mm512_ror_epi64(A, B) \
7111 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7112 (int)(B), \
7113 (__v8di)_mm512_undefined_si512 (), \
7114 (__mmask8)(-1)))
7115 #define _mm512_mask_ror_epi64(W, U, A, B) \
7116 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7117 (int)(B), \
7118 (__v8di)(__m512i)(W), \
7119 (__mmask8)(U)))
7120 #define _mm512_maskz_ror_epi64(U, A, B) \
7121 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7122 (int)(B), \
7123 (__v8di)_mm512_setzero_si512 (), \
7124 (__mmask8)(U)))
7125 #endif
7126
7127 extern __inline __m512i
7128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7129 _mm512_and_si512 (__m512i __A, __m512i __B)
7130 {
7131 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7132 (__v16si) __B,
7133 (__v16si)
7134 _mm512_undefined_si512 (),
7135 (__mmask16) -1);
7136 }
7137
7138 extern __inline __m512i
7139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7140 _mm512_and_epi32 (__m512i __A, __m512i __B)
7141 {
7142 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7143 (__v16si) __B,
7144 (__v16si)
7145 _mm512_undefined_si512 (),
7146 (__mmask16) -1);
7147 }
7148
7149 extern __inline __m512i
7150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7151 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7152 {
7153 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7154 (__v16si) __B,
7155 (__v16si) __W,
7156 (__mmask16) __U);
7157 }
7158
7159 extern __inline __m512i
7160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7161 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7162 {
7163 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7164 (__v16si) __B,
7165 (__v16si)
7166 _mm512_setzero_si512 (),
7167 (__mmask16) __U);
7168 }
7169
7170 extern __inline __m512i
7171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7172 _mm512_and_epi64 (__m512i __A, __m512i __B)
7173 {
7174 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7175 (__v8di) __B,
7176 (__v8di)
7177 _mm512_undefined_si512 (),
7178 (__mmask8) -1);
7179 }
7180
7181 extern __inline __m512i
7182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7183 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7184 {
7185 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7186 (__v8di) __B,
7187 (__v8di) __W, __U);
7188 }
7189
7190 extern __inline __m512i
7191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7192 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7193 {
7194 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7195 (__v8di) __B,
7196 (__v8di)
7197 _mm512_setzero_pd (),
7198 __U);
7199 }
7200
7201 extern __inline __m512i
7202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7203 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7204 {
7205 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7206 (__v16si) __B,
7207 (__v16si)
7208 _mm512_undefined_si512 (),
7209 (__mmask16) -1);
7210 }
7211
7212 extern __inline __m512i
7213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7214 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7215 {
7216 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7217 (__v16si) __B,
7218 (__v16si)
7219 _mm512_undefined_si512 (),
7220 (__mmask16) -1);
7221 }
7222
7223 extern __inline __m512i
7224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7225 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7226 {
7227 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7228 (__v16si) __B,
7229 (__v16si) __W,
7230 (__mmask16) __U);
7231 }
7232
7233 extern __inline __m512i
7234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7235 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7236 {
7237 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7238 (__v16si) __B,
7239 (__v16si)
7240 _mm512_setzero_si512 (),
7241 (__mmask16) __U);
7242 }
7243
7244 extern __inline __m512i
7245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7246 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7247 {
7248 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7249 (__v8di) __B,
7250 (__v8di)
7251 _mm512_undefined_si512 (),
7252 (__mmask8) -1);
7253 }
7254
7255 extern __inline __m512i
7256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7257 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7258 {
7259 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7260 (__v8di) __B,
7261 (__v8di) __W, __U);
7262 }
7263
7264 extern __inline __m512i
7265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7266 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7267 {
7268 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7269 (__v8di) __B,
7270 (__v8di)
7271 _mm512_setzero_pd (),
7272 __U);
7273 }
7274
7275 extern __inline __mmask16
7276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7277 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7278 {
7279 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7280 (__v16si) __B,
7281 (__mmask16) -1);
7282 }
7283
7284 extern __inline __mmask16
7285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7286 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7287 {
7288 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7289 (__v16si) __B, __U);
7290 }
7291
7292 extern __inline __mmask8
7293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7294 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7295 {
7296 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7297 (__v8di) __B,
7298 (__mmask8) -1);
7299 }
7300
7301 extern __inline __mmask8
7302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7303 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7304 {
7305 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7306 }
7307
7308 extern __inline __mmask16
7309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7310 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7311 {
7312 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7313 (__v16si) __B,
7314 (__mmask16) -1);
7315 }
7316
7317 extern __inline __mmask16
7318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7319 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7320 {
7321 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7322 (__v16si) __B, __U);
7323 }
7324
7325 extern __inline __mmask8
7326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7327 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7328 {
7329 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7330 (__v8di) __B,
7331 (__mmask8) -1);
7332 }
7333
7334 extern __inline __mmask8
7335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7336 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7337 {
7338 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7339 (__v8di) __B, __U);
7340 }
7341
7342 extern __inline __m512i
7343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7344 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7345 {
7346 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7347 (__v16si) __B,
7348 (__v16si)
7349 _mm512_undefined_si512 (),
7350 (__mmask16) -1);
7351 }
7352
7353 extern __inline __m512i
7354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7355 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7356 __m512i __B)
7357 {
7358 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7359 (__v16si) __B,
7360 (__v16si) __W,
7361 (__mmask16) __U);
7362 }
7363
7364 extern __inline __m512i
7365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7366 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7367 {
7368 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7369 (__v16si) __B,
7370 (__v16si)
7371 _mm512_setzero_si512 (),
7372 (__mmask16) __U);
7373 }
7374
7375 extern __inline __m512i
7376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7377 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7378 {
7379 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7380 (__v8di) __B,
7381 (__v8di)
7382 _mm512_undefined_si512 (),
7383 (__mmask8) -1);
7384 }
7385
7386 extern __inline __m512i
7387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7388 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7389 {
7390 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7391 (__v8di) __B,
7392 (__v8di) __W,
7393 (__mmask8) __U);
7394 }
7395
7396 extern __inline __m512i
7397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7398 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7399 {
7400 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7401 (__v8di) __B,
7402 (__v8di)
7403 _mm512_setzero_si512 (),
7404 (__mmask8) __U);
7405 }
7406
7407 extern __inline __m512i
7408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7409 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7410 {
7411 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7412 (__v16si) __B,
7413 (__v16si)
7414 _mm512_undefined_si512 (),
7415 (__mmask16) -1);
7416 }
7417
7418 extern __inline __m512i
7419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7420 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7421 __m512i __B)
7422 {
7423 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7424 (__v16si) __B,
7425 (__v16si) __W,
7426 (__mmask16) __U);
7427 }
7428
7429 extern __inline __m512i
7430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7431 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7432 {
7433 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7434 (__v16si) __B,
7435 (__v16si)
7436 _mm512_setzero_si512 (),
7437 (__mmask16) __U);
7438 }
7439
7440 extern __inline __m512i
7441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7442 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7443 {
7444 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7445 (__v8di) __B,
7446 (__v8di)
7447 _mm512_undefined_si512 (),
7448 (__mmask8) -1);
7449 }
7450
7451 extern __inline __m512i
7452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7453 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7454 {
7455 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7456 (__v8di) __B,
7457 (__v8di) __W,
7458 (__mmask8) __U);
7459 }
7460
7461 extern __inline __m512i
7462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7463 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7464 {
7465 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7466 (__v8di) __B,
7467 (__v8di)
7468 _mm512_setzero_si512 (),
7469 (__mmask8) __U);
7470 }
7471
7472 #ifdef __x86_64__
7473 #ifdef __OPTIMIZE__
7474 extern __inline unsigned long long
7475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7476 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7477 {
7478 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7479 }
7480
7481 extern __inline long long
7482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7483 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7484 {
7485 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7486 }
7487
7488 extern __inline long long
7489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7491 {
7492 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7493 }
7494
7495 extern __inline unsigned long long
7496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7497 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7498 {
7499 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7500 }
7501
7502 extern __inline long long
7503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7504 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7505 {
7506 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7507 }
7508
7509 extern __inline long long
7510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7511 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7512 {
7513 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7514 }
7515 #else
7516 #define _mm_cvt_roundss_u64(A, B) \
7517 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7518
7519 #define _mm_cvt_roundss_si64(A, B) \
7520 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7521
7522 #define _mm_cvt_roundss_i64(A, B) \
7523 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7524
7525 #define _mm_cvtt_roundss_u64(A, B) \
7526 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7527
7528 #define _mm_cvtt_roundss_i64(A, B) \
7529 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7530
7531 #define _mm_cvtt_roundss_si64(A, B) \
7532 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7533 #endif
7534 #endif
7535
7536 #ifdef __OPTIMIZE__
7537 extern __inline unsigned
7538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7539 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7540 {
7541 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7542 }
7543
7544 extern __inline int
7545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7546 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7547 {
7548 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7549 }
7550
7551 extern __inline int
7552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7553 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7554 {
7555 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7556 }
7557
7558 extern __inline unsigned
7559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7560 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7561 {
7562 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7563 }
7564
7565 extern __inline int
7566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7567 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7568 {
7569 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7570 }
7571
7572 extern __inline int
7573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7574 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7575 {
7576 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7577 }
7578 #else
7579 #define _mm_cvt_roundss_u32(A, B) \
7580 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7581
7582 #define _mm_cvt_roundss_si32(A, B) \
7583 ((int)__builtin_ia32_vcvtss2si32(A, B))
7584
7585 #define _mm_cvt_roundss_i32(A, B) \
7586 ((int)__builtin_ia32_vcvtss2si32(A, B))
7587
7588 #define _mm_cvtt_roundss_u32(A, B) \
7589 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7590
7591 #define _mm_cvtt_roundss_si32(A, B) \
7592 ((int)__builtin_ia32_vcvttss2si32(A, B))
7593
7594 #define _mm_cvtt_roundss_i32(A, B) \
7595 ((int)__builtin_ia32_vcvttss2si32(A, B))
7596 #endif
7597
7598 #ifdef __x86_64__
7599 #ifdef __OPTIMIZE__
7600 extern __inline unsigned long long
7601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7602 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7603 {
7604 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7605 }
7606
7607 extern __inline long long
7608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7609 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7610 {
7611 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7612 }
7613
7614 extern __inline long long
7615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7616 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7617 {
7618 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7619 }
7620
7621 extern __inline unsigned long long
7622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7623 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7624 {
7625 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7626 }
7627
7628 extern __inline long long
7629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7630 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7631 {
7632 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7633 }
7634
7635 extern __inline long long
7636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7637 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7638 {
7639 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7640 }
7641 #else
7642 #define _mm_cvt_roundsd_u64(A, B) \
7643 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7644
7645 #define _mm_cvt_roundsd_si64(A, B) \
7646 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7647
7648 #define _mm_cvt_roundsd_i64(A, B) \
7649 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7650
7651 #define _mm_cvtt_roundsd_u64(A, B) \
7652 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7653
7654 #define _mm_cvtt_roundsd_si64(A, B) \
7655 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7656
7657 #define _mm_cvtt_roundsd_i64(A, B) \
7658 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7659 #endif
7660 #endif
7661
7662 #ifdef __OPTIMIZE__
7663 extern __inline unsigned
7664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7665 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7666 {
7667 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7668 }
7669
7670 extern __inline int
7671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7672 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7673 {
7674 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7675 }
7676
7677 extern __inline int
7678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7679 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7680 {
7681 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7682 }
7683
7684 extern __inline unsigned
7685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7686 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7687 {
7688 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7689 }
7690
7691 extern __inline int
7692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7693 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7694 {
7695 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7696 }
7697
7698 extern __inline int
7699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7700 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7701 {
7702 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7703 }
7704 #else
7705 #define _mm_cvt_roundsd_u32(A, B) \
7706 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7707
7708 #define _mm_cvt_roundsd_si32(A, B) \
7709 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7710
7711 #define _mm_cvt_roundsd_i32(A, B) \
7712 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7713
7714 #define _mm_cvtt_roundsd_u32(A, B) \
7715 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7716
7717 #define _mm_cvtt_roundsd_si32(A, B) \
7718 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7719
7720 #define _mm_cvtt_roundsd_i32(A, B) \
7721 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7722 #endif
7723
7724 extern __inline __m512d
7725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7726 _mm512_movedup_pd (__m512d __A)
7727 {
7728 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7729 (__v8df)
7730 _mm512_undefined_pd (),
7731 (__mmask8) -1);
7732 }
7733
7734 extern __inline __m512d
7735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7736 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7737 {
7738 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7739 (__v8df) __W,
7740 (__mmask8) __U);
7741 }
7742
7743 extern __inline __m512d
7744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7745 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7746 {
7747 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7748 (__v8df)
7749 _mm512_setzero_pd (),
7750 (__mmask8) __U);
7751 }
7752
7753 extern __inline __m512d
7754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7755 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7756 {
7757 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7758 (__v8df) __B,
7759 (__v8df)
7760 _mm512_undefined_pd (),
7761 (__mmask8) -1);
7762 }
7763
7764 extern __inline __m512d
7765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7766 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7767 {
7768 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7769 (__v8df) __B,
7770 (__v8df) __W,
7771 (__mmask8) __U);
7772 }
7773
7774 extern __inline __m512d
7775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7776 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7777 {
7778 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7779 (__v8df) __B,
7780 (__v8df)
7781 _mm512_setzero_pd (),
7782 (__mmask8) __U);
7783 }
7784
7785 extern __inline __m512d
7786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7787 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7788 {
7789 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7790 (__v8df) __B,
7791 (__v8df)
7792 _mm512_undefined_pd (),
7793 (__mmask8) -1);
7794 }
7795
7796 extern __inline __m512d
7797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7798 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7799 {
7800 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7801 (__v8df) __B,
7802 (__v8df) __W,
7803 (__mmask8) __U);
7804 }
7805
7806 extern __inline __m512d
7807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7808 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7809 {
7810 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7811 (__v8df) __B,
7812 (__v8df)
7813 _mm512_setzero_pd (),
7814 (__mmask8) __U);
7815 }
7816
7817 extern __inline __m512
7818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7819 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7820 {
7821 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7822 (__v16sf) __B,
7823 (__v16sf)
7824 _mm512_undefined_ps (),
7825 (__mmask16) -1);
7826 }
7827
7828 extern __inline __m512
7829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7830 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7831 {
7832 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7833 (__v16sf) __B,
7834 (__v16sf) __W,
7835 (__mmask16) __U);
7836 }
7837
7838 extern __inline __m512
7839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7840 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7841 {
7842 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7843 (__v16sf) __B,
7844 (__v16sf)
7845 _mm512_setzero_ps (),
7846 (__mmask16) __U);
7847 }
7848
7849 #ifdef __OPTIMIZE__
7850 extern __inline __m512d
7851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7852 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7853 {
7854 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7855 (__v8df)
7856 _mm512_undefined_pd (),
7857 (__mmask8) -1, __R);
7858 }
7859
7860 extern __inline __m512d
7861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7863 const int __R)
7864 {
7865 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7866 (__v8df) __W,
7867 (__mmask8) __U, __R);
7868 }
7869
7870 extern __inline __m512d
7871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7872 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7873 {
7874 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7875 (__v8df)
7876 _mm512_setzero_pd (),
7877 (__mmask8) __U, __R);
7878 }
7879
7880 extern __inline __m512
7881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7882 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7883 {
7884 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7885 (__v16sf)
7886 _mm512_undefined_ps (),
7887 (__mmask16) -1, __R);
7888 }
7889
7890 extern __inline __m512
7891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7892 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7893 const int __R)
7894 {
7895 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7896 (__v16sf) __W,
7897 (__mmask16) __U, __R);
7898 }
7899
7900 extern __inline __m512
7901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7902 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7903 {
7904 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7905 (__v16sf)
7906 _mm512_setzero_ps (),
7907 (__mmask16) __U, __R);
7908 }
7909
7910 extern __inline __m256i
7911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7912 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7913 {
7914 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7915 __I,
7916 (__v16hi)
7917 _mm256_undefined_si256 (),
7918 -1);
7919 }
7920
7921 extern __inline __m256i
7922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7923 _mm512_cvtps_ph (__m512 __A, const int __I)
7924 {
7925 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7926 __I,
7927 (__v16hi)
7928 _mm256_undefined_si256 (),
7929 -1);
7930 }
7931
7932 extern __inline __m256i
7933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7934 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7935 const int __I)
7936 {
7937 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7938 __I,
7939 (__v16hi) __U,
7940 (__mmask16) __W);
7941 }
7942
7943 extern __inline __m256i
7944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7945 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7946 {
7947 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7948 __I,
7949 (__v16hi) __U,
7950 (__mmask16) __W);
7951 }
7952
7953 extern __inline __m256i
7954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7955 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7956 {
7957 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7958 __I,
7959 (__v16hi)
7960 _mm256_setzero_si256 (),
7961 (__mmask16) __W);
7962 }
7963
7964 extern __inline __m256i
7965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7966 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7967 {
7968 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7969 __I,
7970 (__v16hi)
7971 _mm256_setzero_si256 (),
7972 (__mmask16) __W);
7973 }
7974 #else
7975 #define _mm512_cvt_roundps_pd(A, B) \
7976 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
7977
7978 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7979 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7980
7981 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7982 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7983
7984 #define _mm512_cvt_roundph_ps(A, B) \
7985 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
7986
7987 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7988 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7989
7990 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7991 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7992
7993 #define _mm512_cvt_roundps_ph(A, I) \
7994 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7995 (__v16hi)_mm256_undefined_si256 (), -1))
7996 #define _mm512_cvtps_ph(A, I) \
7997 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7998 (__v16hi)_mm256_undefined_si256 (), -1))
7999 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
8000 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8001 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8002 #define _mm512_mask_cvtps_ph(U, W, A, I) \
8003 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8004 (__v16hi)(__m256i)(U), (__mmask16) (W)))
8005 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
8006 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8007 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8008 #define _mm512_maskz_cvtps_ph(W, A, I) \
8009 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
8010 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
8011 #endif
8012
8013 #ifdef __OPTIMIZE__
8014 extern __inline __m256
8015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8016 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
8017 {
8018 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8019 (__v8sf)
8020 _mm256_undefined_ps (),
8021 (__mmask8) -1, __R);
8022 }
8023
8024 extern __inline __m256
8025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8026 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
8027 const int __R)
8028 {
8029 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8030 (__v8sf) __W,
8031 (__mmask8) __U, __R);
8032 }
8033
8034 extern __inline __m256
8035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8036 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
8037 {
8038 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
8039 (__v8sf)
8040 _mm256_setzero_ps (),
8041 (__mmask8) __U, __R);
8042 }
8043
8044 extern __inline __m128
8045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8046 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
8047 {
8048 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
8049 (__v2df) __B,
8050 __R);
8051 }
8052
8053 extern __inline __m128d
8054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8055 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8056 {
8057 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8058 (__v4sf) __B,
8059 __R);
8060 }
8061 #else
8062 #define _mm512_cvt_roundpd_ps(A, B) \
8063 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
8064
8065 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8066 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8067
8068 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8069 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8070
8071 #define _mm_cvt_roundsd_ss(A, B, C) \
8072 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8073
8074 #define _mm_cvt_roundss_sd(A, B, C) \
8075 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8076 #endif
8077
8078 extern __inline void
8079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8080 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8081 {
8082 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8083 }
8084
8085 extern __inline void
8086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8087 _mm512_stream_ps (float *__P, __m512 __A)
8088 {
8089 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8090 }
8091
8092 extern __inline void
8093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8094 _mm512_stream_pd (double *__P, __m512d __A)
8095 {
8096 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8097 }
8098
8099 extern __inline __m512i
8100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8101 _mm512_stream_load_si512 (void *__P)
8102 {
8103 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8104 }
8105
8106 /* Constants for mantissa extraction */
8107 typedef enum
8108 {
8109 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8110 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8111 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8112 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8113 } _MM_MANTISSA_NORM_ENUM;
8114
8115 typedef enum
8116 {
8117 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8118 _MM_MANT_SIGN_zero, /* sign = 0 */
8119 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8120 } _MM_MANTISSA_SIGN_ENUM;
8121
8122 #ifdef __OPTIMIZE__
8123 extern __inline __m128
8124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8125 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8126 {
8127 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8128 (__v4sf) __B,
8129 __R);
8130 }
8131
8132 extern __inline __m128d
8133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8134 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8135 {
8136 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8137 (__v2df) __B,
8138 __R);
8139 }
8140
8141 extern __inline __m512
8142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8143 _mm512_getexp_round_ps (__m512 __A, const int __R)
8144 {
8145 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8146 (__v16sf)
8147 _mm512_undefined_ps (),
8148 (__mmask16) -1, __R);
8149 }
8150
8151 extern __inline __m512
8152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8153 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8154 const int __R)
8155 {
8156 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8157 (__v16sf) __W,
8158 (__mmask16) __U, __R);
8159 }
8160
8161 extern __inline __m512
8162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8163 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8164 {
8165 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8166 (__v16sf)
8167 _mm512_setzero_ps (),
8168 (__mmask16) __U, __R);
8169 }
8170
8171 extern __inline __m512d
8172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8173 _mm512_getexp_round_pd (__m512d __A, const int __R)
8174 {
8175 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8176 (__v8df)
8177 _mm512_undefined_pd (),
8178 (__mmask8) -1, __R);
8179 }
8180
8181 extern __inline __m512d
8182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8183 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8184 const int __R)
8185 {
8186 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8187 (__v8df) __W,
8188 (__mmask8) __U, __R);
8189 }
8190
8191 extern __inline __m512d
8192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8193 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8194 {
8195 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8196 (__v8df)
8197 _mm512_setzero_pd (),
8198 (__mmask8) __U, __R);
8199 }
8200
8201 extern __inline __m512d
8202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8203 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8204 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8205 {
8206 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8207 (__C << 2) | __B,
8208 _mm512_undefined_pd (),
8209 (__mmask8) -1, __R);
8210 }
8211
8212 extern __inline __m512d
8213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8214 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8215 _MM_MANTISSA_NORM_ENUM __B,
8216 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8217 {
8218 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8219 (__C << 2) | __B,
8220 (__v8df) __W, __U,
8221 __R);
8222 }
8223
8224 extern __inline __m512d
8225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8226 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8227 _MM_MANTISSA_NORM_ENUM __B,
8228 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8229 {
8230 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8231 (__C << 2) | __B,
8232 (__v8df)
8233 _mm512_setzero_pd (),
8234 __U, __R);
8235 }
8236
8237 extern __inline __m512
8238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8239 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8240 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8241 {
8242 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8243 (__C << 2) | __B,
8244 _mm512_undefined_ps (),
8245 (__mmask16) -1, __R);
8246 }
8247
8248 extern __inline __m512
8249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8250 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8251 _MM_MANTISSA_NORM_ENUM __B,
8252 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8253 {
8254 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8255 (__C << 2) | __B,
8256 (__v16sf) __W, __U,
8257 __R);
8258 }
8259
8260 extern __inline __m512
8261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8262 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8263 _MM_MANTISSA_NORM_ENUM __B,
8264 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8265 {
8266 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8267 (__C << 2) | __B,
8268 (__v16sf)
8269 _mm512_setzero_ps (),
8270 __U, __R);
8271 }
8272
8273 extern __inline __m128d
8274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8275 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8276 _MM_MANTISSA_NORM_ENUM __C,
8277 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8278 {
8279 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8280 (__v2df) __B,
8281 (__D << 2) | __C,
8282 __R);
8283 }
8284
8285 extern __inline __m128
8286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8287 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8288 _MM_MANTISSA_NORM_ENUM __C,
8289 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8290 {
8291 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8292 (__v4sf) __B,
8293 (__D << 2) | __C,
8294 __R);
8295 }
8296
8297 #else
8298 #define _mm512_getmant_round_pd(X, B, C, R) \
8299 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8300 (int)(((C)<<2) | (B)), \
8301 (__v8df)(__m512d)_mm512_undefined_pd(), \
8302 (__mmask8)-1,\
8303 (R)))
8304
8305 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8306 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8307 (int)(((C)<<2) | (B)), \
8308 (__v8df)(__m512d)(W), \
8309 (__mmask8)(U),\
8310 (R)))
8311
8312 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8313 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8314 (int)(((C)<<2) | (B)), \
8315 (__v8df)(__m512d)_mm512_setzero_pd(), \
8316 (__mmask8)(U),\
8317 (R)))
8318 #define _mm512_getmant_round_ps(X, B, C, R) \
8319 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8320 (int)(((C)<<2) | (B)), \
8321 (__v16sf)(__m512)_mm512_undefined_ps(), \
8322 (__mmask16)-1,\
8323 (R)))
8324
8325 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8326 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8327 (int)(((C)<<2) | (B)), \
8328 (__v16sf)(__m512)(W), \
8329 (__mmask16)(U),\
8330 (R)))
8331
8332 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8333 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8334 (int)(((C)<<2) | (B)), \
8335 (__v16sf)(__m512)_mm512_setzero_ps(), \
8336 (__mmask16)(U),\
8337 (R)))
8338 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8339 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8340 (__v2df)(__m128d)(Y), \
8341 (int)(((D)<<2) | (C)), \
8342 (R)))
8343
8344 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8345 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8346 (__v4sf)(__m128)(Y), \
8347 (int)(((D)<<2) | (C)), \
8348 (R)))
8349
8350 #define _mm_getexp_round_ss(A, B, R) \
8351 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8352
8353 #define _mm_getexp_round_sd(A, B, R) \
8354 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8355
8356 #define _mm512_getexp_round_ps(A, R) \
8357 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8358 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8359
8360 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8361 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8362 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8363
8364 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8365 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8366 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8367
8368 #define _mm512_getexp_round_pd(A, R) \
8369 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8370 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8371
8372 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8373 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8374 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8375
8376 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8377 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8378 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8379 #endif
8380
8381 #ifdef __OPTIMIZE__
8382 extern __inline __m512
8383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8384 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8385 {
8386 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8387 (__v16sf)
8388 _mm512_undefined_ps (),
8389 -1, __R);
8390 }
8391
8392 extern __inline __m512
8393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8394 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8395 const int __imm, const int __R)
8396 {
8397 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8398 (__v16sf) __A,
8399 (__mmask16) __B, __R);
8400 }
8401
8402 extern __inline __m512
8403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8404 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8405 const int __imm, const int __R)
8406 {
8407 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8408 __imm,
8409 (__v16sf)
8410 _mm512_setzero_ps (),
8411 (__mmask16) __A, __R);
8412 }
8413
8414 extern __inline __m512d
8415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8416 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8417 {
8418 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8419 (__v8df)
8420 _mm512_undefined_pd (),
8421 -1, __R);
8422 }
8423
8424 extern __inline __m512d
8425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8426 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8427 __m512d __C, const int __imm, const int __R)
8428 {
8429 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8430 (__v8df) __A,
8431 (__mmask8) __B, __R);
8432 }
8433
8434 extern __inline __m512d
8435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8436 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8437 const int __imm, const int __R)
8438 {
8439 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8440 __imm,
8441 (__v8df)
8442 _mm512_setzero_pd (),
8443 (__mmask8) __A, __R);
8444 }
8445
8446 extern __inline __m128
8447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8448 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8449 {
8450 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8451 (__v4sf) __B, __imm, __R);
8452 }
8453
8454 extern __inline __m128d
8455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8456 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8457 const int __R)
8458 {
8459 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8460 (__v2df) __B, __imm, __R);
8461 }
8462
8463 #else
8464 #define _mm512_roundscale_round_ps(A, B, R) \
8465 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8466 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8467 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8468 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8469 (int)(D), \
8470 (__v16sf)(__m512)(A), \
8471 (__mmask16)(B), R))
8472 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8473 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8474 (int)(C), \
8475 (__v16sf)_mm512_setzero_ps(),\
8476 (__mmask16)(A), R))
8477 #define _mm512_roundscale_round_pd(A, B, R) \
8478 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8479 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8480 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8481 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8482 (int)(D), \
8483 (__v8df)(__m512d)(A), \
8484 (__mmask8)(B), R))
8485 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8486 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8487 (int)(C), \
8488 (__v8df)_mm512_setzero_pd(),\
8489 (__mmask8)(A), R))
8490 #define _mm_roundscale_round_ss(A, B, C, R) \
8491 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8492 (__v4sf)(__m128)(B), (int)(C), R))
8493 #define _mm_roundscale_round_sd(A, B, C, R) \
8494 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8495 (__v2df)(__m128d)(B), (int)(C), R))
8496 #endif
8497
8498 extern __inline __m512
8499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8500 _mm512_floor_ps (__m512 __A)
8501 {
8502 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8503 _MM_FROUND_FLOOR,
8504 (__v16sf) __A, -1,
8505 _MM_FROUND_CUR_DIRECTION);
8506 }
8507
8508 extern __inline __m512d
8509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8510 _mm512_floor_pd (__m512d __A)
8511 {
8512 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8513 _MM_FROUND_FLOOR,
8514 (__v8df) __A, -1,
8515 _MM_FROUND_CUR_DIRECTION);
8516 }
8517
8518 extern __inline __m512
8519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8520 _mm512_ceil_ps (__m512 __A)
8521 {
8522 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8523 _MM_FROUND_CEIL,
8524 (__v16sf) __A, -1,
8525 _MM_FROUND_CUR_DIRECTION);
8526 }
8527
8528 extern __inline __m512d
8529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8530 _mm512_ceil_pd (__m512d __A)
8531 {
8532 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8533 _MM_FROUND_CEIL,
8534 (__v8df) __A, -1,
8535 _MM_FROUND_CUR_DIRECTION);
8536 }
8537
8538 extern __inline __m512
8539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8540 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8541 {
8542 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8543 _MM_FROUND_FLOOR,
8544 (__v16sf) __W, __U,
8545 _MM_FROUND_CUR_DIRECTION);
8546 }
8547
8548 extern __inline __m512d
8549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8550 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8551 {
8552 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8553 _MM_FROUND_FLOOR,
8554 (__v8df) __W, __U,
8555 _MM_FROUND_CUR_DIRECTION);
8556 }
8557
8558 extern __inline __m512
8559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8560 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8561 {
8562 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8563 _MM_FROUND_CEIL,
8564 (__v16sf) __W, __U,
8565 _MM_FROUND_CUR_DIRECTION);
8566 }
8567
8568 extern __inline __m512d
8569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8570 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8571 {
8572 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8573 _MM_FROUND_CEIL,
8574 (__v8df) __W, __U,
8575 _MM_FROUND_CUR_DIRECTION);
8576 }
8577
8578 #ifdef __OPTIMIZE__
8579 extern __inline __m512i
8580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8581 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8582 {
8583 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8584 (__v16si) __B, __imm,
8585 (__v16si)
8586 _mm512_undefined_si512 (),
8587 (__mmask16) -1);
8588 }
8589
8590 extern __inline __m512i
8591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8592 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8593 __m512i __B, const int __imm)
8594 {
8595 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8596 (__v16si) __B, __imm,
8597 (__v16si) __W,
8598 (__mmask16) __U);
8599 }
8600
8601 extern __inline __m512i
8602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8603 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8604 const int __imm)
8605 {
8606 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8607 (__v16si) __B, __imm,
8608 (__v16si)
8609 _mm512_setzero_si512 (),
8610 (__mmask16) __U);
8611 }
8612
8613 extern __inline __m512i
8614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8615 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8616 {
8617 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8618 (__v8di) __B, __imm,
8619 (__v8di)
8620 _mm512_undefined_si512 (),
8621 (__mmask8) -1);
8622 }
8623
8624 extern __inline __m512i
8625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8626 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8627 __m512i __B, const int __imm)
8628 {
8629 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8630 (__v8di) __B, __imm,
8631 (__v8di) __W,
8632 (__mmask8) __U);
8633 }
8634
8635 extern __inline __m512i
8636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8637 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8638 const int __imm)
8639 {
8640 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8641 (__v8di) __B, __imm,
8642 (__v8di)
8643 _mm512_setzero_si512 (),
8644 (__mmask8) __U);
8645 }
8646 #else
8647 #define _mm512_alignr_epi32(X, Y, C) \
8648 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8649 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
8650 (__mmask16)-1))
8651
8652 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8653 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8654 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8655 (__mmask16)(U)))
8656
8657 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8658 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8659 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8660 (__mmask16)(U)))
8661
8662 #define _mm512_alignr_epi64(X, Y, C) \
8663 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8664 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \
8665 (__mmask8)-1))
8666
8667 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8668 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8669 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8670
8671 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8672 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8673 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8674 (__mmask8)(U)))
8675 #endif
8676
8677 extern __inline __mmask16
8678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8679 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8680 {
8681 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8682 (__v16si) __B,
8683 (__mmask16) -1);
8684 }
8685
8686 extern __inline __mmask16
8687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8688 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8689 {
8690 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8691 (__v16si) __B, __U);
8692 }
8693
8694 extern __inline __mmask8
8695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8696 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8697 {
8698 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8699 (__v8di) __B, __U);
8700 }
8701
8702 extern __inline __mmask8
8703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8704 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8705 {
8706 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8707 (__v8di) __B,
8708 (__mmask8) -1);
8709 }
8710
8711 extern __inline __mmask16
8712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8713 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8714 {
8715 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8716 (__v16si) __B,
8717 (__mmask16) -1);
8718 }
8719
8720 extern __inline __mmask16
8721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8722 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8723 {
8724 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8725 (__v16si) __B, __U);
8726 }
8727
8728 extern __inline __mmask8
8729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8730 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8731 {
8732 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8733 (__v8di) __B, __U);
8734 }
8735
8736 extern __inline __mmask8
8737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8738 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8739 {
8740 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8741 (__v8di) __B,
8742 (__mmask8) -1);
8743 }
8744
8745 extern __inline __mmask16
8746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8747 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8748 {
8749 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8750 (__v16si) __Y, 5,
8751 (__mmask16) -1);
8752 }
8753
8754 extern __inline __mmask16
8755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8756 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8757 {
8758 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8759 (__v16si) __Y, 5,
8760 (__mmask16) __M);
8761 }
8762
8763 extern __inline __mmask16
8764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8765 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8766 {
8767 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8768 (__v16si) __Y, 5,
8769 (__mmask16) __M);
8770 }
8771
8772 extern __inline __mmask16
8773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8774 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8775 {
8776 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8777 (__v16si) __Y, 5,
8778 (__mmask16) -1);
8779 }
8780
8781 extern __inline __mmask8
8782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8783 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8784 {
8785 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8786 (__v8di) __Y, 5,
8787 (__mmask8) __M);
8788 }
8789
8790 extern __inline __mmask8
8791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8792 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8793 {
8794 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8795 (__v8di) __Y, 5,
8796 (__mmask8) -1);
8797 }
8798
8799 extern __inline __mmask8
8800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8801 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8802 {
8803 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8804 (__v8di) __Y, 5,
8805 (__mmask8) __M);
8806 }
8807
8808 extern __inline __mmask8
8809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8810 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8811 {
8812 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8813 (__v8di) __Y, 5,
8814 (__mmask8) -1);
8815 }
8816
8817 extern __inline __mmask16
8818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8819 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8820 {
8821 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8822 (__v16si) __Y, 2,
8823 (__mmask16) __M);
8824 }
8825
8826 extern __inline __mmask16
8827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8828 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8829 {
8830 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8831 (__v16si) __Y, 2,
8832 (__mmask16) -1);
8833 }
8834
8835 extern __inline __mmask16
8836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8837 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8838 {
8839 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8840 (__v16si) __Y, 2,
8841 (__mmask16) __M);
8842 }
8843
8844 extern __inline __mmask16
8845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8846 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8847 {
8848 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8849 (__v16si) __Y, 2,
8850 (__mmask16) -1);
8851 }
8852
8853 extern __inline __mmask8
8854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8855 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8856 {
8857 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8858 (__v8di) __Y, 2,
8859 (__mmask8) __M);
8860 }
8861
8862 extern __inline __mmask8
8863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8864 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8865 {
8866 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8867 (__v8di) __Y, 2,
8868 (__mmask8) -1);
8869 }
8870
8871 extern __inline __mmask8
8872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8873 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8874 {
8875 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8876 (__v8di) __Y, 2,
8877 (__mmask8) __M);
8878 }
8879
8880 extern __inline __mmask8
8881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8882 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8883 {
8884 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8885 (__v8di) __Y, 2,
8886 (__mmask8) -1);
8887 }
8888
8889 extern __inline __mmask16
8890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8891 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8892 {
8893 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8894 (__v16si) __Y, 1,
8895 (__mmask16) __M);
8896 }
8897
8898 extern __inline __mmask16
8899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8900 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8901 {
8902 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8903 (__v16si) __Y, 1,
8904 (__mmask16) -1);
8905 }
8906
8907 extern __inline __mmask16
8908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8909 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8910 {
8911 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8912 (__v16si) __Y, 1,
8913 (__mmask16) __M);
8914 }
8915
8916 extern __inline __mmask16
8917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8918 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8919 {
8920 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8921 (__v16si) __Y, 1,
8922 (__mmask16) -1);
8923 }
8924
8925 extern __inline __mmask8
8926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8927 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8928 {
8929 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8930 (__v8di) __Y, 1,
8931 (__mmask8) __M);
8932 }
8933
8934 extern __inline __mmask8
8935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8936 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8937 {
8938 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8939 (__v8di) __Y, 1,
8940 (__mmask8) -1);
8941 }
8942
8943 extern __inline __mmask8
8944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8945 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8946 {
8947 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8948 (__v8di) __Y, 1,
8949 (__mmask8) __M);
8950 }
8951
8952 extern __inline __mmask8
8953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8954 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8955 {
8956 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8957 (__v8di) __Y, 1,
8958 (__mmask8) -1);
8959 }
8960
8961 extern __inline __mmask16
8962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8963 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8964 {
8965 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8966 (__v16si) __Y, 4,
8967 (__mmask16) -1);
8968 }
8969
8970 extern __inline __mmask16
8971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8972 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8973 {
8974 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8975 (__v16si) __Y, 4,
8976 (__mmask16) __M);
8977 }
8978
8979 extern __inline __mmask16
8980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8981 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8982 {
8983 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8984 (__v16si) __Y, 4,
8985 (__mmask16) __M);
8986 }
8987
8988 extern __inline __mmask16
8989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8990 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8991 {
8992 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8993 (__v16si) __Y, 4,
8994 (__mmask16) -1);
8995 }
8996
8997 extern __inline __mmask8
8998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8999 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
9000 {
9001 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9002 (__v8di) __Y, 4,
9003 (__mmask8) __M);
9004 }
9005
9006 extern __inline __mmask8
9007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9008 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
9009 {
9010 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9011 (__v8di) __Y, 4,
9012 (__mmask8) -1);
9013 }
9014
9015 extern __inline __mmask8
9016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9017 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
9018 {
9019 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9020 (__v8di) __Y, 4,
9021 (__mmask8) __M);
9022 }
9023
9024 extern __inline __mmask8
9025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9026 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
9027 {
9028 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9029 (__v8di) __Y, 4,
9030 (__mmask8) -1);
9031 }
9032
9033 #define _MM_CMPINT_EQ 0x0
9034 #define _MM_CMPINT_LT 0x1
9035 #define _MM_CMPINT_LE 0x2
9036 #define _MM_CMPINT_UNUSED 0x3
9037 #define _MM_CMPINT_NE 0x4
9038 #define _MM_CMPINT_NLT 0x5
9039 #define _MM_CMPINT_GE 0x5
9040 #define _MM_CMPINT_NLE 0x6
9041 #define _MM_CMPINT_GT 0x6
9042
9043 #ifdef __OPTIMIZE__
9044 extern __inline __mmask8
9045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9046 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
9047 {
9048 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9049 (__v8di) __Y, __P,
9050 (__mmask8) -1);
9051 }
9052
9053 extern __inline __mmask16
9054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9055 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9056 {
9057 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9058 (__v16si) __Y, __P,
9059 (__mmask16) -1);
9060 }
9061
9062 extern __inline __mmask8
9063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9064 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9065 {
9066 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9067 (__v8di) __Y, __P,
9068 (__mmask8) -1);
9069 }
9070
9071 extern __inline __mmask16
9072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9073 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9074 {
9075 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9076 (__v16si) __Y, __P,
9077 (__mmask16) -1);
9078 }
9079
9080 extern __inline __mmask8
9081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9082 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9083 const int __R)
9084 {
9085 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9086 (__v8df) __Y, __P,
9087 (__mmask8) -1, __R);
9088 }
9089
9090 extern __inline __mmask16
9091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9092 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9093 {
9094 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9095 (__v16sf) __Y, __P,
9096 (__mmask16) -1, __R);
9097 }
9098
9099 extern __inline __mmask8
9100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9101 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9102 const int __P)
9103 {
9104 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9105 (__v8di) __Y, __P,
9106 (__mmask8) __U);
9107 }
9108
9109 extern __inline __mmask16
9110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9111 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9112 const int __P)
9113 {
9114 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9115 (__v16si) __Y, __P,
9116 (__mmask16) __U);
9117 }
9118
9119 extern __inline __mmask8
9120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9121 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9122 const int __P)
9123 {
9124 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9125 (__v8di) __Y, __P,
9126 (__mmask8) __U);
9127 }
9128
9129 extern __inline __mmask16
9130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9131 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9132 const int __P)
9133 {
9134 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9135 (__v16si) __Y, __P,
9136 (__mmask16) __U);
9137 }
9138
9139 extern __inline __mmask8
9140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9141 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9142 const int __P, const int __R)
9143 {
9144 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9145 (__v8df) __Y, __P,
9146 (__mmask8) __U, __R);
9147 }
9148
9149 extern __inline __mmask16
9150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9151 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9152 const int __P, const int __R)
9153 {
9154 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9155 (__v16sf) __Y, __P,
9156 (__mmask16) __U, __R);
9157 }
9158
9159 extern __inline __mmask8
9160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9161 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9162 {
9163 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9164 (__v2df) __Y, __P,
9165 (__mmask8) -1, __R);
9166 }
9167
9168 extern __inline __mmask8
9169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9170 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9171 const int __P, const int __R)
9172 {
9173 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9174 (__v2df) __Y, __P,
9175 (__mmask8) __M, __R);
9176 }
9177
9178 extern __inline __mmask8
9179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9180 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9181 {
9182 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9183 (__v4sf) __Y, __P,
9184 (__mmask8) -1, __R);
9185 }
9186
9187 extern __inline __mmask8
9188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9189 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9190 const int __P, const int __R)
9191 {
9192 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9193 (__v4sf) __Y, __P,
9194 (__mmask8) __M, __R);
9195 }
9196
9197 #else
9198 #define _mm512_cmp_epi64_mask(X, Y, P) \
9199 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9200 (__v8di)(__m512i)(Y), (int)(P),\
9201 (__mmask8)-1))
9202
9203 #define _mm512_cmp_epi32_mask(X, Y, P) \
9204 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9205 (__v16si)(__m512i)(Y), (int)(P),\
9206 (__mmask16)-1))
9207
9208 #define _mm512_cmp_epu64_mask(X, Y, P) \
9209 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9210 (__v8di)(__m512i)(Y), (int)(P),\
9211 (__mmask8)-1))
9212
9213 #define _mm512_cmp_epu32_mask(X, Y, P) \
9214 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9215 (__v16si)(__m512i)(Y), (int)(P),\
9216 (__mmask16)-1))
9217
9218 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9219 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9220 (__v8df)(__m512d)(Y), (int)(P),\
9221 (__mmask8)-1, R))
9222
9223 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9224 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9225 (__v16sf)(__m512)(Y), (int)(P),\
9226 (__mmask16)-1, R))
9227
9228 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9229 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9230 (__v8di)(__m512i)(Y), (int)(P),\
9231 (__mmask8)M))
9232
9233 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9234 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9235 (__v16si)(__m512i)(Y), (int)(P),\
9236 (__mmask16)M))
9237
9238 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9239 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9240 (__v8di)(__m512i)(Y), (int)(P),\
9241 (__mmask8)M))
9242
9243 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9244 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9245 (__v16si)(__m512i)(Y), (int)(P),\
9246 (__mmask16)M))
9247
9248 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9249 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9250 (__v8df)(__m512d)(Y), (int)(P),\
9251 (__mmask8)M, R))
9252
9253 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9254 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9255 (__v16sf)(__m512)(Y), (int)(P),\
9256 (__mmask16)M, R))
9257
9258 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9259 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9260 (__v2df)(__m128d)(Y), (int)(P),\
9261 (__mmask8)-1, R))
9262
9263 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9264 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9265 (__v2df)(__m128d)(Y), (int)(P),\
9266 (M), R))
9267
9268 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9269 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9270 (__v4sf)(__m128)(Y), (int)(P), \
9271 (__mmask8)-1, R))
9272
9273 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9274 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9275 (__v4sf)(__m128)(Y), (int)(P), \
9276 (M), R))
9277 #endif
9278
9279 #ifdef __OPTIMIZE__
9280 extern __inline __m512
9281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9282 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
9283 {
9284 __m512 v1_old = _mm512_undefined_ps ();
9285 __mmask16 mask = 0xFFFF;
9286
9287 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9288 __addr,
9289 (__v16si) __index,
9290 mask, __scale);
9291 }
9292
9293 extern __inline __m512
9294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9295 _mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
9296 __m512i __index, float const *__addr, int __scale)
9297 {
9298 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9299 __addr,
9300 (__v16si) __index,
9301 __mask, __scale);
9302 }
9303
9304 extern __inline __m512d
9305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9306 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9307 {
9308 __m512d v1_old = _mm512_undefined_pd ();
9309 __mmask8 mask = 0xFF;
9310
9311 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
9312 __addr,
9313 (__v8si) __index, mask,
9314 __scale);
9315 }
9316
9317 extern __inline __m512d
9318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9319 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9320 __m256i __index, double const *__addr, int __scale)
9321 {
9322 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9323 __addr,
9324 (__v8si) __index,
9325 __mask, __scale);
9326 }
9327
9328 extern __inline __m256
9329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9330 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9331 {
9332 __m256 v1_old = _mm256_undefined_ps ();
9333 __mmask8 mask = 0xFF;
9334
9335 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
9336 __addr,
9337 (__v8di) __index, mask,
9338 __scale);
9339 }
9340
9341 extern __inline __m256
9342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9343 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9344 __m512i __index, float const *__addr, int __scale)
9345 {
9346 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9347 __addr,
9348 (__v8di) __index,
9349 __mask, __scale);
9350 }
9351
9352 extern __inline __m512d
9353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9354 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9355 {
9356 __m512d v1_old = _mm512_undefined_pd ();
9357 __mmask8 mask = 0xFF;
9358
9359 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9360 __addr,
9361 (__v8di) __index, mask,
9362 __scale);
9363 }
9364
9365 extern __inline __m512d
9366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9367 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9368 __m512i __index, double const *__addr, int __scale)
9369 {
9370 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9371 __addr,
9372 (__v8di) __index,
9373 __mask, __scale);
9374 }
9375
9376 extern __inline __m512i
9377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9378 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9379 {
9380 __m512i v1_old = _mm512_undefined_si512 ();
9381 __mmask16 mask = 0xFFFF;
9382
9383 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9384 __addr,
9385 (__v16si) __index,
9386 mask, __scale);
9387 }
9388
9389 extern __inline __m512i
9390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9391 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9392 __m512i __index, int const *__addr, int __scale)
9393 {
9394 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9395 __addr,
9396 (__v16si) __index,
9397 __mask, __scale);
9398 }
9399
9400 extern __inline __m512i
9401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9402 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9403 {
9404 __m512i v1_old = _mm512_undefined_si512 ();
9405 __mmask8 mask = 0xFF;
9406
9407 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9408 __addr,
9409 (__v8si) __index, mask,
9410 __scale);
9411 }
9412
9413 extern __inline __m512i
9414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9415 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9416 __m256i __index, long long const *__addr,
9417 int __scale)
9418 {
9419 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9420 __addr,
9421 (__v8si) __index,
9422 __mask, __scale);
9423 }
9424
9425 extern __inline __m256i
9426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9427 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9428 {
9429 __m256i v1_old = _mm256_undefined_si256 ();
9430 __mmask8 mask = 0xFF;
9431
9432 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9433 __addr,
9434 (__v8di) __index,
9435 mask, __scale);
9436 }
9437
9438 extern __inline __m256i
9439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9440 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9441 __m512i __index, int const *__addr, int __scale)
9442 {
9443 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9444 __addr,
9445 (__v8di) __index,
9446 __mask, __scale);
9447 }
9448
9449 extern __inline __m512i
9450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9451 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9452 {
9453 __m512i v1_old = _mm512_undefined_si512 ();
9454 __mmask8 mask = 0xFF;
9455
9456 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9457 __addr,
9458 (__v8di) __index, mask,
9459 __scale);
9460 }
9461
9462 extern __inline __m512i
9463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9464 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9465 __m512i __index, long long const *__addr,
9466 int __scale)
9467 {
9468 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9469 __addr,
9470 (__v8di) __index,
9471 __mask, __scale);
9472 }
9473
9474 extern __inline void
9475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9476 _mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9477 {
9478 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9479 (__v16si) __index, (__v16sf) __v1, __scale);
9480 }
9481
9482 extern __inline void
9483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9484 _mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9485 __m512i __index, __m512 __v1, int __scale)
9486 {
9487 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9488 (__v16sf) __v1, __scale);
9489 }
9490
9491 extern __inline void
9492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9493 _mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9494 int __scale)
9495 {
9496 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9497 (__v8si) __index, (__v8df) __v1, __scale);
9498 }
9499
9500 extern __inline void
9501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9502 _mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9503 __m256i __index, __m512d __v1, int __scale)
9504 {
9505 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9506 (__v8df) __v1, __scale);
9507 }
9508
9509 extern __inline void
9510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9511 _mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9512 {
9513 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9514 (__v8di) __index, (__v8sf) __v1, __scale);
9515 }
9516
9517 extern __inline void
9518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9519 _mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9520 __m512i __index, __m256 __v1, int __scale)
9521 {
9522 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9523 (__v8sf) __v1, __scale);
9524 }
9525
9526 extern __inline void
9527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9528 _mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9529 int __scale)
9530 {
9531 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9532 (__v8di) __index, (__v8df) __v1, __scale);
9533 }
9534
9535 extern __inline void
9536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9537 _mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9538 __m512i __index, __m512d __v1, int __scale)
9539 {
9540 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9541 (__v8df) __v1, __scale);
9542 }
9543
9544 extern __inline void
9545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9546 _mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9547 __m512i __v1, int __scale)
9548 {
9549 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9550 (__v16si) __index, (__v16si) __v1, __scale);
9551 }
9552
9553 extern __inline void
9554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9555 _mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9556 __m512i __index, __m512i __v1, int __scale)
9557 {
9558 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9559 (__v16si) __v1, __scale);
9560 }
9561
9562 extern __inline void
9563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9564 _mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9565 __m512i __v1, int __scale)
9566 {
9567 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9568 (__v8si) __index, (__v8di) __v1, __scale);
9569 }
9570
9571 extern __inline void
9572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9573 _mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9574 __m256i __index, __m512i __v1, int __scale)
9575 {
9576 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9577 (__v8di) __v1, __scale);
9578 }
9579
9580 extern __inline void
9581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9582 _mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9583 __m256i __v1, int __scale)
9584 {
9585 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9586 (__v8di) __index, (__v8si) __v1, __scale);
9587 }
9588
9589 extern __inline void
9590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9591 _mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9592 __m512i __index, __m256i __v1, int __scale)
9593 {
9594 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9595 (__v8si) __v1, __scale);
9596 }
9597
9598 extern __inline void
9599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9600 _mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9601 __m512i __v1, int __scale)
9602 {
9603 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9604 (__v8di) __index, (__v8di) __v1, __scale);
9605 }
9606
9607 extern __inline void
9608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9609 _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9610 __m512i __index, __m512i __v1, int __scale)
9611 {
9612 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9613 (__v8di) __v1, __scale);
9614 }
9615 #else
9616 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9617 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9618 (float const *)ADDR, \
9619 (__v16si)(__m512i)INDEX, \
9620 (__mmask16)0xFFFF, (int)SCALE)
9621
9622 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9623 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9624 (float const *)ADDR, \
9625 (__v16si)(__m512i)INDEX, \
9626 (__mmask16)MASK, (int)SCALE)
9627
9628 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9629 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
9630 (double const *)ADDR, \
9631 (__v8si)(__m256i)INDEX, \
9632 (__mmask8)0xFF, (int)SCALE)
9633
9634 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9635 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9636 (double const *)ADDR, \
9637 (__v8si)(__m256i)INDEX, \
9638 (__mmask8)MASK, (int)SCALE)
9639
9640 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9641 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
9642 (float const *)ADDR, \
9643 (__v8di)(__m512i)INDEX, \
9644 (__mmask8)0xFF, (int)SCALE)
9645
9646 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9647 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9648 (float const *)ADDR, \
9649 (__v8di)(__m512i)INDEX, \
9650 (__mmask8)MASK, (int)SCALE)
9651
9652 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9653 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
9654 (double const *)ADDR, \
9655 (__v8di)(__m512i)INDEX, \
9656 (__mmask8)0xFF, (int)SCALE)
9657
9658 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9659 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9660 (double const *)ADDR, \
9661 (__v8di)(__m512i)INDEX, \
9662 (__mmask8)MASK, (int)SCALE)
9663
9664 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9665 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \
9666 (int const *)ADDR, \
9667 (__v16si)(__m512i)INDEX, \
9668 (__mmask16)0xFFFF, (int)SCALE)
9669
9670 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9671 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9672 (int const *)ADDR, \
9673 (__v16si)(__m512i)INDEX, \
9674 (__mmask16)MASK, (int)SCALE)
9675
9676 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9677 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \
9678 (long long const *)ADDR, \
9679 (__v8si)(__m256i)INDEX, \
9680 (__mmask8)0xFF, (int)SCALE)
9681
9682 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9683 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9684 (long long const *)ADDR, \
9685 (__v8si)(__m256i)INDEX, \
9686 (__mmask8)MASK, (int)SCALE)
9687
9688 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9689 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
9690 (int const *)ADDR, \
9691 (__v8di)(__m512i)INDEX, \
9692 (__mmask8)0xFF, (int)SCALE)
9693
9694 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9695 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9696 (int const *)ADDR, \
9697 (__v8di)(__m512i)INDEX, \
9698 (__mmask8)MASK, (int)SCALE)
9699
9700 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
9701 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \
9702 (long long const *)ADDR, \
9703 (__v8di)(__m512i)INDEX, \
9704 (__mmask8)0xFF, (int)SCALE)
9705
9706 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9707 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9708 (long long const *)ADDR, \
9709 (__v8di)(__m512i)INDEX, \
9710 (__mmask8)MASK, (int)SCALE)
9711
9712 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9713 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
9714 (__v16si)(__m512i)INDEX, \
9715 (__v16sf)(__m512)V1, (int)SCALE)
9716
9717 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9718 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
9719 (__v16si)(__m512i)INDEX, \
9720 (__v16sf)(__m512)V1, (int)SCALE)
9721
9722 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9723 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
9724 (__v8si)(__m256i)INDEX, \
9725 (__v8df)(__m512d)V1, (int)SCALE)
9726
9727 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9728 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
9729 (__v8si)(__m256i)INDEX, \
9730 (__v8df)(__m512d)V1, (int)SCALE)
9731
9732 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9733 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
9734 (__v8di)(__m512i)INDEX, \
9735 (__v8sf)(__m256)V1, (int)SCALE)
9736
9737 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9738 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
9739 (__v8di)(__m512i)INDEX, \
9740 (__v8sf)(__m256)V1, (int)SCALE)
9741
9742 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9743 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
9744 (__v8di)(__m512i)INDEX, \
9745 (__v8df)(__m512d)V1, (int)SCALE)
9746
9747 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9748 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
9749 (__v8di)(__m512i)INDEX, \
9750 (__v8df)(__m512d)V1, (int)SCALE)
9751
9752 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9753 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
9754 (__v16si)(__m512i)INDEX, \
9755 (__v16si)(__m512i)V1, (int)SCALE)
9756
9757 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9758 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
9759 (__v16si)(__m512i)INDEX, \
9760 (__v16si)(__m512i)V1, (int)SCALE)
9761
9762 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9763 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9764 (__v8si)(__m256i)INDEX, \
9765 (__v8di)(__m512i)V1, (int)SCALE)
9766
9767 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9768 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
9769 (__v8si)(__m256i)INDEX, \
9770 (__v8di)(__m512i)V1, (int)SCALE)
9771
9772 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9773 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
9774 (__v8di)(__m512i)INDEX, \
9775 (__v8si)(__m256i)V1, (int)SCALE)
9776
9777 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9778 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
9779 (__v8di)(__m512i)INDEX, \
9780 (__v8si)(__m256i)V1, (int)SCALE)
9781
9782 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9783 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9784 (__v8di)(__m512i)INDEX, \
9785 (__v8di)(__m512i)V1, (int)SCALE)
9786
9787 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9788 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
9789 (__v8di)(__m512i)INDEX, \
9790 (__v8di)(__m512i)V1, (int)SCALE)
9791 #endif
9792
9793 extern __inline __m512d
9794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9795 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9796 {
9797 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9798 (__v8df) __W,
9799 (__mmask8) __U);
9800 }
9801
9802 extern __inline __m512d
9803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9804 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9805 {
9806 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9807 (__v8df)
9808 _mm512_setzero_pd (),
9809 (__mmask8) __U);
9810 }
9811
9812 extern __inline void
9813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9814 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9815 {
9816 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9817 (__mmask8) __U);
9818 }
9819
9820 extern __inline __m512
9821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9822 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9823 {
9824 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9825 (__v16sf) __W,
9826 (__mmask16) __U);
9827 }
9828
9829 extern __inline __m512
9830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9831 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9832 {
9833 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9834 (__v16sf)
9835 _mm512_setzero_ps (),
9836 (__mmask16) __U);
9837 }
9838
9839 extern __inline void
9840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9841 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9842 {
9843 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9844 (__mmask16) __U);
9845 }
9846
9847 extern __inline __m512i
9848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9849 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9850 {
9851 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9852 (__v8di) __W,
9853 (__mmask8) __U);
9854 }
9855
9856 extern __inline __m512i
9857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9858 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9859 {
9860 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9861 (__v8di)
9862 _mm512_setzero_si512 (),
9863 (__mmask8) __U);
9864 }
9865
9866 extern __inline void
9867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9868 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9869 {
9870 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9871 (__mmask8) __U);
9872 }
9873
9874 extern __inline __m512i
9875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9876 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9877 {
9878 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9879 (__v16si) __W,
9880 (__mmask16) __U);
9881 }
9882
9883 extern __inline __m512i
9884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9885 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9886 {
9887 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9888 (__v16si)
9889 _mm512_setzero_si512 (),
9890 (__mmask16) __U);
9891 }
9892
9893 extern __inline void
9894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9895 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9896 {
9897 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9898 (__mmask16) __U);
9899 }
9900
9901 extern __inline __m512d
9902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9903 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9904 {
9905 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9906 (__v8df) __W,
9907 (__mmask8) __U);
9908 }
9909
9910 extern __inline __m512d
9911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9912 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9913 {
9914 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9915 (__v8df)
9916 _mm512_setzero_pd (),
9917 (__mmask8) __U);
9918 }
9919
9920 extern __inline __m512d
9921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9922 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9923 {
9924 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9925 (__v8df) __W,
9926 (__mmask8) __U);
9927 }
9928
9929 extern __inline __m512d
9930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9931 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9932 {
9933 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9934 (__v8df)
9935 _mm512_setzero_pd (),
9936 (__mmask8) __U);
9937 }
9938
9939 extern __inline __m512
9940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9941 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9942 {
9943 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9944 (__v16sf) __W,
9945 (__mmask16) __U);
9946 }
9947
9948 extern __inline __m512
9949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9950 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9951 {
9952 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9953 (__v16sf)
9954 _mm512_setzero_ps (),
9955 (__mmask16) __U);
9956 }
9957
9958 extern __inline __m512
9959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9960 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9961 {
9962 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9963 (__v16sf) __W,
9964 (__mmask16) __U);
9965 }
9966
9967 extern __inline __m512
9968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9969 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9970 {
9971 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9972 (__v16sf)
9973 _mm512_setzero_ps (),
9974 (__mmask16) __U);
9975 }
9976
9977 extern __inline __m512i
9978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9979 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9980 {
9981 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9982 (__v8di) __W,
9983 (__mmask8) __U);
9984 }
9985
9986 extern __inline __m512i
9987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9988 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9989 {
9990 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9991 (__v8di)
9992 _mm512_setzero_si512 (),
9993 (__mmask8) __U);
9994 }
9995
9996 extern __inline __m512i
9997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9998 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9999 {
10000 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
10001 (__v8di) __W,
10002 (__mmask8) __U);
10003 }
10004
10005 extern __inline __m512i
10006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10007 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
10008 {
10009 return (__m512i)
10010 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
10011 (__v8di)
10012 _mm512_setzero_si512 (),
10013 (__mmask8) __U);
10014 }
10015
10016 extern __inline __m512i
10017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10018 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
10019 {
10020 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
10021 (__v16si) __W,
10022 (__mmask16) __U);
10023 }
10024
10025 extern __inline __m512i
10026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10027 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
10028 {
10029 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
10030 (__v16si)
10031 _mm512_setzero_si512 (),
10032 (__mmask16) __U);
10033 }
10034
10035 extern __inline __m512i
10036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
10038 {
10039 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
10040 (__v16si) __W,
10041 (__mmask16) __U);
10042 }
10043
10044 extern __inline __m512i
10045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10046 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
10047 {
10048 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
10049 (__v16si)
10050 _mm512_setzero_si512
10051 (), (__mmask16) __U);
10052 }
10053
10054 /* Mask arithmetic operations */
10055 extern __inline __mmask16
10056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10057 _mm512_kand (__mmask16 __A, __mmask16 __B)
10058 {
10059 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10060 }
10061
10062 extern __inline __mmask16
10063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10064 _mm512_kandn (__mmask16 __A, __mmask16 __B)
10065 {
10066 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
10067 }
10068
10069 extern __inline __mmask16
10070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10071 _mm512_kor (__mmask16 __A, __mmask16 __B)
10072 {
10073 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10074 }
10075
10076 extern __inline int
10077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10078 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10079 {
10080 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10081 (__mmask16) __B);
10082 }
10083
10084 extern __inline int
10085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10086 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10087 {
10088 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10089 (__mmask16) __B);
10090 }
10091
10092 extern __inline __mmask16
10093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10094 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10095 {
10096 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10097 }
10098
10099 extern __inline __mmask16
10100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10101 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10102 {
10103 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10104 }
10105
10106 extern __inline __mmask16
10107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10108 _mm512_knot (__mmask16 __A)
10109 {
10110 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10111 }
10112
10113 extern __inline __mmask16
10114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10115 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10116 {
10117 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10118 }
10119
10120 #ifdef __OPTIMIZE__
10121 extern __inline __m512i
10122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10123 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10124 const int __imm)
10125 {
10126 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10127 (__v4si) __D,
10128 __imm,
10129 (__v16si)
10130 _mm512_setzero_si512 (),
10131 __B);
10132 }
10133
10134 extern __inline __m512
10135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10136 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10137 const int __imm)
10138 {
10139 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10140 (__v4sf) __D,
10141 __imm,
10142 (__v16sf)
10143 _mm512_setzero_ps (), __B);
10144 }
10145
10146 extern __inline __m512i
10147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10148 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10149 __m128i __D, const int __imm)
10150 {
10151 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10152 (__v4si) __D,
10153 __imm,
10154 (__v16si) __A,
10155 __B);
10156 }
10157
10158 extern __inline __m512
10159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10160 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10161 __m128 __D, const int __imm)
10162 {
10163 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10164 (__v4sf) __D,
10165 __imm,
10166 (__v16sf) __A, __B);
10167 }
10168 #else
10169 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10170 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10171 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10172 (__mmask8)(A)))
10173
10174 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10175 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10176 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10177 (__mmask8)(A)))
10178
10179 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10180 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10181 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10182 (__mmask8)(B)))
10183
10184 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10185 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10186 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10187 (__mmask8)(B)))
10188 #endif
10189
10190 extern __inline __m512i
10191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10192 _mm512_max_epi64 (__m512i __A, __m512i __B)
10193 {
10194 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10195 (__v8di) __B,
10196 (__v8di)
10197 _mm512_undefined_si512 (),
10198 (__mmask8) -1);
10199 }
10200
10201 extern __inline __m512i
10202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10203 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10204 {
10205 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10206 (__v8di) __B,
10207 (__v8di)
10208 _mm512_setzero_si512 (),
10209 __M);
10210 }
10211
10212 extern __inline __m512i
10213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10214 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10215 {
10216 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10217 (__v8di) __B,
10218 (__v8di) __W, __M);
10219 }
10220
10221 extern __inline __m512i
10222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10223 _mm512_min_epi64 (__m512i __A, __m512i __B)
10224 {
10225 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10226 (__v8di) __B,
10227 (__v8di)
10228 _mm512_undefined_si512 (),
10229 (__mmask8) -1);
10230 }
10231
10232 extern __inline __m512i
10233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10234 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10235 {
10236 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10237 (__v8di) __B,
10238 (__v8di) __W, __M);
10239 }
10240
10241 extern __inline __m512i
10242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10243 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10244 {
10245 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10246 (__v8di) __B,
10247 (__v8di)
10248 _mm512_setzero_si512 (),
10249 __M);
10250 }
10251
10252 extern __inline __m512i
10253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10254 _mm512_max_epu64 (__m512i __A, __m512i __B)
10255 {
10256 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10257 (__v8di) __B,
10258 (__v8di)
10259 _mm512_undefined_si512 (),
10260 (__mmask8) -1);
10261 }
10262
10263 extern __inline __m512i
10264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10265 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10266 {
10267 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10268 (__v8di) __B,
10269 (__v8di)
10270 _mm512_setzero_si512 (),
10271 __M);
10272 }
10273
10274 extern __inline __m512i
10275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10276 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10277 {
10278 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10279 (__v8di) __B,
10280 (__v8di) __W, __M);
10281 }
10282
10283 extern __inline __m512i
10284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10285 _mm512_min_epu64 (__m512i __A, __m512i __B)
10286 {
10287 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10288 (__v8di) __B,
10289 (__v8di)
10290 _mm512_undefined_si512 (),
10291 (__mmask8) -1);
10292 }
10293
10294 extern __inline __m512i
10295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10296 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10297 {
10298 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10299 (__v8di) __B,
10300 (__v8di) __W, __M);
10301 }
10302
10303 extern __inline __m512i
10304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10305 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10306 {
10307 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10308 (__v8di) __B,
10309 (__v8di)
10310 _mm512_setzero_si512 (),
10311 __M);
10312 }
10313
10314 extern __inline __m512i
10315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10316 _mm512_max_epi32 (__m512i __A, __m512i __B)
10317 {
10318 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10319 (__v16si) __B,
10320 (__v16si)
10321 _mm512_undefined_si512 (),
10322 (__mmask16) -1);
10323 }
10324
10325 extern __inline __m512i
10326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10327 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10328 {
10329 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10330 (__v16si) __B,
10331 (__v16si)
10332 _mm512_setzero_si512 (),
10333 __M);
10334 }
10335
10336 extern __inline __m512i
10337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10338 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10339 {
10340 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10341 (__v16si) __B,
10342 (__v16si) __W, __M);
10343 }
10344
10345 extern __inline __m512i
10346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10347 _mm512_min_epi32 (__m512i __A, __m512i __B)
10348 {
10349 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10350 (__v16si) __B,
10351 (__v16si)
10352 _mm512_undefined_si512 (),
10353 (__mmask16) -1);
10354 }
10355
10356 extern __inline __m512i
10357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10358 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10359 {
10360 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10361 (__v16si) __B,
10362 (__v16si)
10363 _mm512_setzero_si512 (),
10364 __M);
10365 }
10366
10367 extern __inline __m512i
10368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10369 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10370 {
10371 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10372 (__v16si) __B,
10373 (__v16si) __W, __M);
10374 }
10375
10376 extern __inline __m512i
10377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10378 _mm512_max_epu32 (__m512i __A, __m512i __B)
10379 {
10380 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10381 (__v16si) __B,
10382 (__v16si)
10383 _mm512_undefined_si512 (),
10384 (__mmask16) -1);
10385 }
10386
10387 extern __inline __m512i
10388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10389 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10390 {
10391 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10392 (__v16si) __B,
10393 (__v16si)
10394 _mm512_setzero_si512 (),
10395 __M);
10396 }
10397
10398 extern __inline __m512i
10399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10400 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10401 {
10402 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10403 (__v16si) __B,
10404 (__v16si) __W, __M);
10405 }
10406
10407 extern __inline __m512i
10408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10409 _mm512_min_epu32 (__m512i __A, __m512i __B)
10410 {
10411 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10412 (__v16si) __B,
10413 (__v16si)
10414 _mm512_undefined_si512 (),
10415 (__mmask16) -1);
10416 }
10417
10418 extern __inline __m512i
10419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10420 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10421 {
10422 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10423 (__v16si) __B,
10424 (__v16si)
10425 _mm512_setzero_si512 (),
10426 __M);
10427 }
10428
10429 extern __inline __m512i
10430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10431 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10432 {
10433 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10434 (__v16si) __B,
10435 (__v16si) __W, __M);
10436 }
10437
10438 extern __inline __m512
10439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10440 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10441 {
10442 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10443 (__v16sf) __B,
10444 (__v16sf)
10445 _mm512_undefined_ps (),
10446 (__mmask16) -1);
10447 }
10448
10449 extern __inline __m512
10450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10451 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10452 {
10453 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10454 (__v16sf) __B,
10455 (__v16sf) __W,
10456 (__mmask16) __U);
10457 }
10458
10459 extern __inline __m512
10460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10461 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10462 {
10463 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10464 (__v16sf) __B,
10465 (__v16sf)
10466 _mm512_setzero_ps (),
10467 (__mmask16) __U);
10468 }
10469
10470 #ifdef __OPTIMIZE__
10471 extern __inline __m128d
10472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10473 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10474 {
10475 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10476 (__v2df) __B,
10477 __R);
10478 }
10479
10480 extern __inline __m128
10481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10482 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10483 {
10484 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10485 (__v4sf) __B,
10486 __R);
10487 }
10488
10489 extern __inline __m128d
10490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10491 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10492 {
10493 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10494 (__v2df) __B,
10495 __R);
10496 }
10497
10498 extern __inline __m128
10499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10500 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10501 {
10502 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10503 (__v4sf) __B,
10504 __R);
10505 }
10506
10507 #else
10508 #define _mm_max_round_sd(A, B, C) \
10509 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10510
10511 #define _mm_max_round_ss(A, B, C) \
10512 (__m128)__builtin_ia32_addss_round(A, B, C)
10513
10514 #define _mm_min_round_sd(A, B, C) \
10515 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10516
10517 #define _mm_min_round_ss(A, B, C) \
10518 (__m128)__builtin_ia32_subss_round(A, B, C)
10519 #endif
10520
10521 extern __inline __m512d
10522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10523 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10524 {
10525 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10526 (__v8df) __W,
10527 (__mmask8) __U);
10528 }
10529
10530 extern __inline __m512
10531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10532 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10533 {
10534 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10535 (__v16sf) __W,
10536 (__mmask16) __U);
10537 }
10538
10539 extern __inline __m512i
10540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10541 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10542 {
10543 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10544 (__v8di) __W,
10545 (__mmask8) __U);
10546 }
10547
10548 extern __inline __m512i
10549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10550 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10551 {
10552 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10553 (__v16si) __W,
10554 (__mmask16) __U);
10555 }
10556
10557 #ifdef __OPTIMIZE__
10558 extern __inline __m128d
10559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10560 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10561 {
10562 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10563 (__v2df) __A,
10564 (__v2df) __B,
10565 __R);
10566 }
10567
10568 extern __inline __m128
10569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10570 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10571 {
10572 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10573 (__v4sf) __A,
10574 (__v4sf) __B,
10575 __R);
10576 }
10577
10578 extern __inline __m128d
10579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10580 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10581 {
10582 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10583 (__v2df) __A,
10584 -(__v2df) __B,
10585 __R);
10586 }
10587
10588 extern __inline __m128
10589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10590 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10591 {
10592 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10593 (__v4sf) __A,
10594 -(__v4sf) __B,
10595 __R);
10596 }
10597
10598 extern __inline __m128d
10599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10600 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10601 {
10602 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10603 -(__v2df) __A,
10604 (__v2df) __B,
10605 __R);
10606 }
10607
10608 extern __inline __m128
10609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10610 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10611 {
10612 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10613 -(__v4sf) __A,
10614 (__v4sf) __B,
10615 __R);
10616 }
10617
10618 extern __inline __m128d
10619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10620 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10621 {
10622 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10623 -(__v2df) __A,
10624 -(__v2df) __B,
10625 __R);
10626 }
10627
10628 extern __inline __m128
10629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10630 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10631 {
10632 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10633 -(__v4sf) __A,
10634 -(__v4sf) __B,
10635 __R);
10636 }
10637 #else
10638 #define _mm_fmadd_round_sd(A, B, C, R) \
10639 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10640
10641 #define _mm_fmadd_round_ss(A, B, C, R) \
10642 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10643
10644 #define _mm_fmsub_round_sd(A, B, C, R) \
10645 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10646
10647 #define _mm_fmsub_round_ss(A, B, C, R) \
10648 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10649
10650 #define _mm_fnmadd_round_sd(A, B, C, R) \
10651 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10652
10653 #define _mm_fnmadd_round_ss(A, B, C, R) \
10654 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10655
10656 #define _mm_fnmsub_round_sd(A, B, C, R) \
10657 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10658
10659 #define _mm_fnmsub_round_ss(A, B, C, R) \
10660 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10661 #endif
10662
10663 #ifdef __OPTIMIZE__
10664 extern __inline int
10665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10666 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10667 {
10668 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10669 }
10670
10671 extern __inline int
10672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10673 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10674 {
10675 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10676 }
10677 #else
10678 #define _mm_comi_round_ss(A, B, C, D)\
10679 __builtin_ia32_vcomiss(A, B, C, D)
10680 #define _mm_comi_round_sd(A, B, C, D)\
10681 __builtin_ia32_vcomisd(A, B, C, D)
10682 #endif
10683
10684 extern __inline __m512d
10685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10686 _mm512_sqrt_pd (__m512d __A)
10687 {
10688 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10689 (__v8df)
10690 _mm512_undefined_pd (),
10691 (__mmask8) -1,
10692 _MM_FROUND_CUR_DIRECTION);
10693 }
10694
10695 extern __inline __m512d
10696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10697 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10698 {
10699 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10700 (__v8df) __W,
10701 (__mmask8) __U,
10702 _MM_FROUND_CUR_DIRECTION);
10703 }
10704
10705 extern __inline __m512d
10706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10707 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10708 {
10709 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10710 (__v8df)
10711 _mm512_setzero_pd (),
10712 (__mmask8) __U,
10713 _MM_FROUND_CUR_DIRECTION);
10714 }
10715
10716 extern __inline __m512
10717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10718 _mm512_sqrt_ps (__m512 __A)
10719 {
10720 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10721 (__v16sf)
10722 _mm512_undefined_ps (),
10723 (__mmask16) -1,
10724 _MM_FROUND_CUR_DIRECTION);
10725 }
10726
10727 extern __inline __m512
10728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10729 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10730 {
10731 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10732 (__v16sf) __W,
10733 (__mmask16) __U,
10734 _MM_FROUND_CUR_DIRECTION);
10735 }
10736
10737 extern __inline __m512
10738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10739 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10740 {
10741 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10742 (__v16sf)
10743 _mm512_setzero_ps (),
10744 (__mmask16) __U,
10745 _MM_FROUND_CUR_DIRECTION);
10746 }
10747
10748 extern __inline __m512d
10749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10750 _mm512_add_pd (__m512d __A, __m512d __B)
10751 {
10752 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10753 (__v8df) __B,
10754 (__v8df)
10755 _mm512_undefined_pd (),
10756 (__mmask8) -1,
10757 _MM_FROUND_CUR_DIRECTION);
10758 }
10759
10760 extern __inline __m512d
10761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10762 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10763 {
10764 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10765 (__v8df) __B,
10766 (__v8df) __W,
10767 (__mmask8) __U,
10768 _MM_FROUND_CUR_DIRECTION);
10769 }
10770
10771 extern __inline __m512d
10772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10773 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10774 {
10775 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10776 (__v8df) __B,
10777 (__v8df)
10778 _mm512_setzero_pd (),
10779 (__mmask8) __U,
10780 _MM_FROUND_CUR_DIRECTION);
10781 }
10782
10783 extern __inline __m512
10784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10785 _mm512_add_ps (__m512 __A, __m512 __B)
10786 {
10787 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10788 (__v16sf) __B,
10789 (__v16sf)
10790 _mm512_undefined_ps (),
10791 (__mmask16) -1,
10792 _MM_FROUND_CUR_DIRECTION);
10793 }
10794
10795 extern __inline __m512
10796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10797 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10798 {
10799 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10800 (__v16sf) __B,
10801 (__v16sf) __W,
10802 (__mmask16) __U,
10803 _MM_FROUND_CUR_DIRECTION);
10804 }
10805
10806 extern __inline __m512
10807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10808 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10809 {
10810 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10811 (__v16sf) __B,
10812 (__v16sf)
10813 _mm512_setzero_ps (),
10814 (__mmask16) __U,
10815 _MM_FROUND_CUR_DIRECTION);
10816 }
10817
10818 extern __inline __m512d
10819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10820 _mm512_sub_pd (__m512d __A, __m512d __B)
10821 {
10822 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10823 (__v8df) __B,
10824 (__v8df)
10825 _mm512_undefined_pd (),
10826 (__mmask8) -1,
10827 _MM_FROUND_CUR_DIRECTION);
10828 }
10829
10830 extern __inline __m512d
10831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10832 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10833 {
10834 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10835 (__v8df) __B,
10836 (__v8df) __W,
10837 (__mmask8) __U,
10838 _MM_FROUND_CUR_DIRECTION);
10839 }
10840
10841 extern __inline __m512d
10842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10843 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10844 {
10845 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10846 (__v8df) __B,
10847 (__v8df)
10848 _mm512_setzero_pd (),
10849 (__mmask8) __U,
10850 _MM_FROUND_CUR_DIRECTION);
10851 }
10852
10853 extern __inline __m512
10854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10855 _mm512_sub_ps (__m512 __A, __m512 __B)
10856 {
10857 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10858 (__v16sf) __B,
10859 (__v16sf)
10860 _mm512_undefined_ps (),
10861 (__mmask16) -1,
10862 _MM_FROUND_CUR_DIRECTION);
10863 }
10864
10865 extern __inline __m512
10866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10867 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10868 {
10869 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10870 (__v16sf) __B,
10871 (__v16sf) __W,
10872 (__mmask16) __U,
10873 _MM_FROUND_CUR_DIRECTION);
10874 }
10875
10876 extern __inline __m512
10877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10878 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10879 {
10880 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10881 (__v16sf) __B,
10882 (__v16sf)
10883 _mm512_setzero_ps (),
10884 (__mmask16) __U,
10885 _MM_FROUND_CUR_DIRECTION);
10886 }
10887
10888 extern __inline __m512d
10889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10890 _mm512_mul_pd (__m512d __A, __m512d __B)
10891 {
10892 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10893 (__v8df) __B,
10894 (__v8df)
10895 _mm512_undefined_pd (),
10896 (__mmask8) -1,
10897 _MM_FROUND_CUR_DIRECTION);
10898 }
10899
10900 extern __inline __m512d
10901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10902 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10903 {
10904 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10905 (__v8df) __B,
10906 (__v8df) __W,
10907 (__mmask8) __U,
10908 _MM_FROUND_CUR_DIRECTION);
10909 }
10910
10911 extern __inline __m512d
10912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10913 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10914 {
10915 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10916 (__v8df) __B,
10917 (__v8df)
10918 _mm512_setzero_pd (),
10919 (__mmask8) __U,
10920 _MM_FROUND_CUR_DIRECTION);
10921 }
10922
10923 extern __inline __m512
10924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10925 _mm512_mul_ps (__m512 __A, __m512 __B)
10926 {
10927 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10928 (__v16sf) __B,
10929 (__v16sf)
10930 _mm512_undefined_ps (),
10931 (__mmask16) -1,
10932 _MM_FROUND_CUR_DIRECTION);
10933 }
10934
10935 extern __inline __m512
10936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10937 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10938 {
10939 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10940 (__v16sf) __B,
10941 (__v16sf) __W,
10942 (__mmask16) __U,
10943 _MM_FROUND_CUR_DIRECTION);
10944 }
10945
10946 extern __inline __m512
10947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10948 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10949 {
10950 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10951 (__v16sf) __B,
10952 (__v16sf)
10953 _mm512_setzero_ps (),
10954 (__mmask16) __U,
10955 _MM_FROUND_CUR_DIRECTION);
10956 }
10957
10958 extern __inline __m512d
10959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10960 _mm512_div_pd (__m512d __M, __m512d __V)
10961 {
10962 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10963 (__v8df) __V,
10964 (__v8df)
10965 _mm512_undefined_pd (),
10966 (__mmask8) -1,
10967 _MM_FROUND_CUR_DIRECTION);
10968 }
10969
10970 extern __inline __m512d
10971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10972 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10973 {
10974 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10975 (__v8df) __V,
10976 (__v8df) __W,
10977 (__mmask8) __U,
10978 _MM_FROUND_CUR_DIRECTION);
10979 }
10980
10981 extern __inline __m512d
10982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10983 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10984 {
10985 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10986 (__v8df) __V,
10987 (__v8df)
10988 _mm512_setzero_pd (),
10989 (__mmask8) __U,
10990 _MM_FROUND_CUR_DIRECTION);
10991 }
10992
10993 extern __inline __m512
10994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10995 _mm512_div_ps (__m512 __A, __m512 __B)
10996 {
10997 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10998 (__v16sf) __B,
10999 (__v16sf)
11000 _mm512_undefined_ps (),
11001 (__mmask16) -1,
11002 _MM_FROUND_CUR_DIRECTION);
11003 }
11004
11005 extern __inline __m512
11006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11007 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11008 {
11009 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11010 (__v16sf) __B,
11011 (__v16sf) __W,
11012 (__mmask16) __U,
11013 _MM_FROUND_CUR_DIRECTION);
11014 }
11015
11016 extern __inline __m512
11017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11018 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
11019 {
11020 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
11021 (__v16sf) __B,
11022 (__v16sf)
11023 _mm512_setzero_ps (),
11024 (__mmask16) __U,
11025 _MM_FROUND_CUR_DIRECTION);
11026 }
11027
11028 extern __inline __m512d
11029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11030 _mm512_max_pd (__m512d __A, __m512d __B)
11031 {
11032 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11033 (__v8df) __B,
11034 (__v8df)
11035 _mm512_undefined_pd (),
11036 (__mmask8) -1,
11037 _MM_FROUND_CUR_DIRECTION);
11038 }
11039
11040 extern __inline __m512d
11041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11042 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11043 {
11044 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11045 (__v8df) __B,
11046 (__v8df) __W,
11047 (__mmask8) __U,
11048 _MM_FROUND_CUR_DIRECTION);
11049 }
11050
11051 extern __inline __m512d
11052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11053 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
11054 {
11055 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
11056 (__v8df) __B,
11057 (__v8df)
11058 _mm512_setzero_pd (),
11059 (__mmask8) __U,
11060 _MM_FROUND_CUR_DIRECTION);
11061 }
11062
11063 extern __inline __m512
11064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11065 _mm512_max_ps (__m512 __A, __m512 __B)
11066 {
11067 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11068 (__v16sf) __B,
11069 (__v16sf)
11070 _mm512_undefined_ps (),
11071 (__mmask16) -1,
11072 _MM_FROUND_CUR_DIRECTION);
11073 }
11074
11075 extern __inline __m512
11076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11077 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11078 {
11079 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11080 (__v16sf) __B,
11081 (__v16sf) __W,
11082 (__mmask16) __U,
11083 _MM_FROUND_CUR_DIRECTION);
11084 }
11085
11086 extern __inline __m512
11087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11088 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
11089 {
11090 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
11091 (__v16sf) __B,
11092 (__v16sf)
11093 _mm512_setzero_ps (),
11094 (__mmask16) __U,
11095 _MM_FROUND_CUR_DIRECTION);
11096 }
11097
11098 extern __inline __m512d
11099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11100 _mm512_min_pd (__m512d __A, __m512d __B)
11101 {
11102 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11103 (__v8df) __B,
11104 (__v8df)
11105 _mm512_undefined_pd (),
11106 (__mmask8) -1,
11107 _MM_FROUND_CUR_DIRECTION);
11108 }
11109
11110 extern __inline __m512d
11111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11112 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11113 {
11114 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11115 (__v8df) __B,
11116 (__v8df) __W,
11117 (__mmask8) __U,
11118 _MM_FROUND_CUR_DIRECTION);
11119 }
11120
11121 extern __inline __m512d
11122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11123 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11124 {
11125 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11126 (__v8df) __B,
11127 (__v8df)
11128 _mm512_setzero_pd (),
11129 (__mmask8) __U,
11130 _MM_FROUND_CUR_DIRECTION);
11131 }
11132
11133 extern __inline __m512
11134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11135 _mm512_min_ps (__m512 __A, __m512 __B)
11136 {
11137 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11138 (__v16sf) __B,
11139 (__v16sf)
11140 _mm512_undefined_ps (),
11141 (__mmask16) -1,
11142 _MM_FROUND_CUR_DIRECTION);
11143 }
11144
11145 extern __inline __m512
11146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11147 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11148 {
11149 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11150 (__v16sf) __B,
11151 (__v16sf) __W,
11152 (__mmask16) __U,
11153 _MM_FROUND_CUR_DIRECTION);
11154 }
11155
11156 extern __inline __m512
11157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11158 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11159 {
11160 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11161 (__v16sf) __B,
11162 (__v16sf)
11163 _mm512_setzero_ps (),
11164 (__mmask16) __U,
11165 _MM_FROUND_CUR_DIRECTION);
11166 }
11167
11168 extern __inline __m512d
11169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11170 _mm512_scalef_pd (__m512d __A, __m512d __B)
11171 {
11172 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11173 (__v8df) __B,
11174 (__v8df)
11175 _mm512_undefined_pd (),
11176 (__mmask8) -1,
11177 _MM_FROUND_CUR_DIRECTION);
11178 }
11179
11180 extern __inline __m512d
11181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11182 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11183 {
11184 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11185 (__v8df) __B,
11186 (__v8df) __W,
11187 (__mmask8) __U,
11188 _MM_FROUND_CUR_DIRECTION);
11189 }
11190
11191 extern __inline __m512d
11192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11193 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11194 {
11195 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11196 (__v8df) __B,
11197 (__v8df)
11198 _mm512_setzero_pd (),
11199 (__mmask8) __U,
11200 _MM_FROUND_CUR_DIRECTION);
11201 }
11202
11203 extern __inline __m512
11204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11205 _mm512_scalef_ps (__m512 __A, __m512 __B)
11206 {
11207 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11208 (__v16sf) __B,
11209 (__v16sf)
11210 _mm512_undefined_ps (),
11211 (__mmask16) -1,
11212 _MM_FROUND_CUR_DIRECTION);
11213 }
11214
11215 extern __inline __m512
11216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11217 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11218 {
11219 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11220 (__v16sf) __B,
11221 (__v16sf) __W,
11222 (__mmask16) __U,
11223 _MM_FROUND_CUR_DIRECTION);
11224 }
11225
11226 extern __inline __m512
11227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11228 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11229 {
11230 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11231 (__v16sf) __B,
11232 (__v16sf)
11233 _mm512_setzero_ps (),
11234 (__mmask16) __U,
11235 _MM_FROUND_CUR_DIRECTION);
11236 }
11237
11238 extern __inline __m128d
11239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11240 _mm_scalef_sd (__m128d __A, __m128d __B)
11241 {
11242 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11243 (__v2df) __B,
11244 _MM_FROUND_CUR_DIRECTION);
11245 }
11246
11247 extern __inline __m128
11248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11249 _mm_scalef_ss (__m128 __A, __m128 __B)
11250 {
11251 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11252 (__v4sf) __B,
11253 _MM_FROUND_CUR_DIRECTION);
11254 }
11255
11256 extern __inline __m512d
11257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11258 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11259 {
11260 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11261 (__v8df) __B,
11262 (__v8df) __C,
11263 (__mmask8) -1,
11264 _MM_FROUND_CUR_DIRECTION);
11265 }
11266
11267 extern __inline __m512d
11268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11269 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11270 {
11271 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11272 (__v8df) __B,
11273 (__v8df) __C,
11274 (__mmask8) __U,
11275 _MM_FROUND_CUR_DIRECTION);
11276 }
11277
11278 extern __inline __m512d
11279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11280 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11281 {
11282 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11283 (__v8df) __B,
11284 (__v8df) __C,
11285 (__mmask8) __U,
11286 _MM_FROUND_CUR_DIRECTION);
11287 }
11288
11289 extern __inline __m512d
11290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11291 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11292 {
11293 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11294 (__v8df) __B,
11295 (__v8df) __C,
11296 (__mmask8) __U,
11297 _MM_FROUND_CUR_DIRECTION);
11298 }
11299
11300 extern __inline __m512
11301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11302 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11303 {
11304 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11305 (__v16sf) __B,
11306 (__v16sf) __C,
11307 (__mmask16) -1,
11308 _MM_FROUND_CUR_DIRECTION);
11309 }
11310
11311 extern __inline __m512
11312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11313 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11314 {
11315 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11316 (__v16sf) __B,
11317 (__v16sf) __C,
11318 (__mmask16) __U,
11319 _MM_FROUND_CUR_DIRECTION);
11320 }
11321
11322 extern __inline __m512
11323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11324 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11325 {
11326 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11327 (__v16sf) __B,
11328 (__v16sf) __C,
11329 (__mmask16) __U,
11330 _MM_FROUND_CUR_DIRECTION);
11331 }
11332
11333 extern __inline __m512
11334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11335 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11336 {
11337 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11338 (__v16sf) __B,
11339 (__v16sf) __C,
11340 (__mmask16) __U,
11341 _MM_FROUND_CUR_DIRECTION);
11342 }
11343
11344 extern __inline __m512d
11345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11346 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11347 {
11348 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11349 (__v8df) __B,
11350 -(__v8df) __C,
11351 (__mmask8) -1,
11352 _MM_FROUND_CUR_DIRECTION);
11353 }
11354
11355 extern __inline __m512d
11356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11358 {
11359 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11360 (__v8df) __B,
11361 -(__v8df) __C,
11362 (__mmask8) __U,
11363 _MM_FROUND_CUR_DIRECTION);
11364 }
11365
11366 extern __inline __m512d
11367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11368 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11369 {
11370 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11371 (__v8df) __B,
11372 (__v8df) __C,
11373 (__mmask8) __U,
11374 _MM_FROUND_CUR_DIRECTION);
11375 }
11376
11377 extern __inline __m512d
11378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11379 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11380 {
11381 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11382 (__v8df) __B,
11383 -(__v8df) __C,
11384 (__mmask8) __U,
11385 _MM_FROUND_CUR_DIRECTION);
11386 }
11387
11388 extern __inline __m512
11389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11390 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11391 {
11392 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11393 (__v16sf) __B,
11394 -(__v16sf) __C,
11395 (__mmask16) -1,
11396 _MM_FROUND_CUR_DIRECTION);
11397 }
11398
11399 extern __inline __m512
11400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11401 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11402 {
11403 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11404 (__v16sf) __B,
11405 -(__v16sf) __C,
11406 (__mmask16) __U,
11407 _MM_FROUND_CUR_DIRECTION);
11408 }
11409
11410 extern __inline __m512
11411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11412 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11413 {
11414 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11415 (__v16sf) __B,
11416 (__v16sf) __C,
11417 (__mmask16) __U,
11418 _MM_FROUND_CUR_DIRECTION);
11419 }
11420
11421 extern __inline __m512
11422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11423 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11424 {
11425 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11426 (__v16sf) __B,
11427 -(__v16sf) __C,
11428 (__mmask16) __U,
11429 _MM_FROUND_CUR_DIRECTION);
11430 }
11431
11432 extern __inline __m512d
11433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11434 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11435 {
11436 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11437 (__v8df) __B,
11438 (__v8df) __C,
11439 (__mmask8) -1,
11440 _MM_FROUND_CUR_DIRECTION);
11441 }
11442
11443 extern __inline __m512d
11444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11445 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11446 {
11447 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11448 (__v8df) __B,
11449 (__v8df) __C,
11450 (__mmask8) __U,
11451 _MM_FROUND_CUR_DIRECTION);
11452 }
11453
11454 extern __inline __m512d
11455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11456 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11457 {
11458 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11459 (__v8df) __B,
11460 (__v8df) __C,
11461 (__mmask8) __U,
11462 _MM_FROUND_CUR_DIRECTION);
11463 }
11464
11465 extern __inline __m512d
11466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11467 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11468 {
11469 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11470 (__v8df) __B,
11471 (__v8df) __C,
11472 (__mmask8) __U,
11473 _MM_FROUND_CUR_DIRECTION);
11474 }
11475
11476 extern __inline __m512
11477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11478 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11479 {
11480 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11481 (__v16sf) __B,
11482 (__v16sf) __C,
11483 (__mmask16) -1,
11484 _MM_FROUND_CUR_DIRECTION);
11485 }
11486
11487 extern __inline __m512
11488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11489 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11490 {
11491 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11492 (__v16sf) __B,
11493 (__v16sf) __C,
11494 (__mmask16) __U,
11495 _MM_FROUND_CUR_DIRECTION);
11496 }
11497
11498 extern __inline __m512
11499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11500 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11501 {
11502 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11503 (__v16sf) __B,
11504 (__v16sf) __C,
11505 (__mmask16) __U,
11506 _MM_FROUND_CUR_DIRECTION);
11507 }
11508
11509 extern __inline __m512
11510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11511 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11512 {
11513 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11514 (__v16sf) __B,
11515 (__v16sf) __C,
11516 (__mmask16) __U,
11517 _MM_FROUND_CUR_DIRECTION);
11518 }
11519
11520 extern __inline __m512d
11521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11523 {
11524 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11525 (__v8df) __B,
11526 -(__v8df) __C,
11527 (__mmask8) -1,
11528 _MM_FROUND_CUR_DIRECTION);
11529 }
11530
11531 extern __inline __m512d
11532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11533 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11534 {
11535 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11536 (__v8df) __B,
11537 -(__v8df) __C,
11538 (__mmask8) __U,
11539 _MM_FROUND_CUR_DIRECTION);
11540 }
11541
11542 extern __inline __m512d
11543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11544 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11545 {
11546 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11547 (__v8df) __B,
11548 (__v8df) __C,
11549 (__mmask8) __U,
11550 _MM_FROUND_CUR_DIRECTION);
11551 }
11552
11553 extern __inline __m512d
11554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11555 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11556 {
11557 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11558 (__v8df) __B,
11559 -(__v8df) __C,
11560 (__mmask8) __U,
11561 _MM_FROUND_CUR_DIRECTION);
11562 }
11563
11564 extern __inline __m512
11565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11566 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11567 {
11568 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11569 (__v16sf) __B,
11570 -(__v16sf) __C,
11571 (__mmask16) -1,
11572 _MM_FROUND_CUR_DIRECTION);
11573 }
11574
11575 extern __inline __m512
11576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11577 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11578 {
11579 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11580 (__v16sf) __B,
11581 -(__v16sf) __C,
11582 (__mmask16) __U,
11583 _MM_FROUND_CUR_DIRECTION);
11584 }
11585
11586 extern __inline __m512
11587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11588 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11589 {
11590 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11591 (__v16sf) __B,
11592 (__v16sf) __C,
11593 (__mmask16) __U,
11594 _MM_FROUND_CUR_DIRECTION);
11595 }
11596
11597 extern __inline __m512
11598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11599 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11600 {
11601 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11602 (__v16sf) __B,
11603 -(__v16sf) __C,
11604 (__mmask16) __U,
11605 _MM_FROUND_CUR_DIRECTION);
11606 }
11607
11608 extern __inline __m512d
11609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11610 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11611 {
11612 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11613 (__v8df) __B,
11614 (__v8df) __C,
11615 (__mmask8) -1,
11616 _MM_FROUND_CUR_DIRECTION);
11617 }
11618
11619 extern __inline __m512d
11620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11621 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11622 {
11623 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11624 (__v8df) __B,
11625 (__v8df) __C,
11626 (__mmask8) __U,
11627 _MM_FROUND_CUR_DIRECTION);
11628 }
11629
11630 extern __inline __m512d
11631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11632 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11633 {
11634 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11635 (__v8df) __B,
11636 (__v8df) __C,
11637 (__mmask8) __U,
11638 _MM_FROUND_CUR_DIRECTION);
11639 }
11640
11641 extern __inline __m512d
11642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11643 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11644 {
11645 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11646 (__v8df) __B,
11647 (__v8df) __C,
11648 (__mmask8) __U,
11649 _MM_FROUND_CUR_DIRECTION);
11650 }
11651
11652 extern __inline __m512
11653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11654 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11655 {
11656 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11657 (__v16sf) __B,
11658 (__v16sf) __C,
11659 (__mmask16) -1,
11660 _MM_FROUND_CUR_DIRECTION);
11661 }
11662
11663 extern __inline __m512
11664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11665 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11666 {
11667 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11668 (__v16sf) __B,
11669 (__v16sf) __C,
11670 (__mmask16) __U,
11671 _MM_FROUND_CUR_DIRECTION);
11672 }
11673
11674 extern __inline __m512
11675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11676 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11677 {
11678 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11679 (__v16sf) __B,
11680 (__v16sf) __C,
11681 (__mmask16) __U,
11682 _MM_FROUND_CUR_DIRECTION);
11683 }
11684
11685 extern __inline __m512
11686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11687 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11688 {
11689 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11690 (__v16sf) __B,
11691 (__v16sf) __C,
11692 (__mmask16) __U,
11693 _MM_FROUND_CUR_DIRECTION);
11694 }
11695
11696 extern __inline __m512d
11697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11698 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11699 {
11700 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11701 (__v8df) __B,
11702 -(__v8df) __C,
11703 (__mmask8) -1,
11704 _MM_FROUND_CUR_DIRECTION);
11705 }
11706
11707 extern __inline __m512d
11708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11709 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11710 {
11711 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11712 (__v8df) __B,
11713 (__v8df) __C,
11714 (__mmask8) __U,
11715 _MM_FROUND_CUR_DIRECTION);
11716 }
11717
11718 extern __inline __m512d
11719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11720 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11721 {
11722 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11723 (__v8df) __B,
11724 (__v8df) __C,
11725 (__mmask8) __U,
11726 _MM_FROUND_CUR_DIRECTION);
11727 }
11728
11729 extern __inline __m512d
11730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11731 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11732 {
11733 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11734 (__v8df) __B,
11735 -(__v8df) __C,
11736 (__mmask8) __U,
11737 _MM_FROUND_CUR_DIRECTION);
11738 }
11739
11740 extern __inline __m512
11741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11742 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11743 {
11744 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11745 (__v16sf) __B,
11746 -(__v16sf) __C,
11747 (__mmask16) -1,
11748 _MM_FROUND_CUR_DIRECTION);
11749 }
11750
11751 extern __inline __m512
11752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11753 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11754 {
11755 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11756 (__v16sf) __B,
11757 (__v16sf) __C,
11758 (__mmask16) __U,
11759 _MM_FROUND_CUR_DIRECTION);
11760 }
11761
11762 extern __inline __m512
11763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11764 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11765 {
11766 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11767 (__v16sf) __B,
11768 (__v16sf) __C,
11769 (__mmask16) __U,
11770 _MM_FROUND_CUR_DIRECTION);
11771 }
11772
11773 extern __inline __m512
11774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11775 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11776 {
11777 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11778 (__v16sf) __B,
11779 -(__v16sf) __C,
11780 (__mmask16) __U,
11781 _MM_FROUND_CUR_DIRECTION);
11782 }
11783
11784 extern __inline __m256i
11785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11786 _mm512_cvttpd_epi32 (__m512d __A)
11787 {
11788 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11789 (__v8si)
11790 _mm256_undefined_si256 (),
11791 (__mmask8) -1,
11792 _MM_FROUND_CUR_DIRECTION);
11793 }
11794
11795 extern __inline __m256i
11796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11797 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11798 {
11799 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11800 (__v8si) __W,
11801 (__mmask8) __U,
11802 _MM_FROUND_CUR_DIRECTION);
11803 }
11804
11805 extern __inline __m256i
11806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11807 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11808 {
11809 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11810 (__v8si)
11811 _mm256_setzero_si256 (),
11812 (__mmask8) __U,
11813 _MM_FROUND_CUR_DIRECTION);
11814 }
11815
11816 extern __inline __m256i
11817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11818 _mm512_cvttpd_epu32 (__m512d __A)
11819 {
11820 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11821 (__v8si)
11822 _mm256_undefined_si256 (),
11823 (__mmask8) -1,
11824 _MM_FROUND_CUR_DIRECTION);
11825 }
11826
11827 extern __inline __m256i
11828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11829 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11830 {
11831 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11832 (__v8si) __W,
11833 (__mmask8) __U,
11834 _MM_FROUND_CUR_DIRECTION);
11835 }
11836
11837 extern __inline __m256i
11838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11839 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11840 {
11841 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11842 (__v8si)
11843 _mm256_setzero_si256 (),
11844 (__mmask8) __U,
11845 _MM_FROUND_CUR_DIRECTION);
11846 }
11847
11848 extern __inline __m256i
11849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11850 _mm512_cvtpd_epi32 (__m512d __A)
11851 {
11852 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11853 (__v8si)
11854 _mm256_undefined_si256 (),
11855 (__mmask8) -1,
11856 _MM_FROUND_CUR_DIRECTION);
11857 }
11858
11859 extern __inline __m256i
11860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11861 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11862 {
11863 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11864 (__v8si) __W,
11865 (__mmask8) __U,
11866 _MM_FROUND_CUR_DIRECTION);
11867 }
11868
11869 extern __inline __m256i
11870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11871 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11872 {
11873 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11874 (__v8si)
11875 _mm256_setzero_si256 (),
11876 (__mmask8) __U,
11877 _MM_FROUND_CUR_DIRECTION);
11878 }
11879
11880 extern __inline __m256i
11881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11882 _mm512_cvtpd_epu32 (__m512d __A)
11883 {
11884 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11885 (__v8si)
11886 _mm256_undefined_si256 (),
11887 (__mmask8) -1,
11888 _MM_FROUND_CUR_DIRECTION);
11889 }
11890
11891 extern __inline __m256i
11892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11893 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11894 {
11895 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11896 (__v8si) __W,
11897 (__mmask8) __U,
11898 _MM_FROUND_CUR_DIRECTION);
11899 }
11900
11901 extern __inline __m256i
11902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11903 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11904 {
11905 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11906 (__v8si)
11907 _mm256_setzero_si256 (),
11908 (__mmask8) __U,
11909 _MM_FROUND_CUR_DIRECTION);
11910 }
11911
11912 extern __inline __m512i
11913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11914 _mm512_cvttps_epi32 (__m512 __A)
11915 {
11916 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11917 (__v16si)
11918 _mm512_undefined_si512 (),
11919 (__mmask16) -1,
11920 _MM_FROUND_CUR_DIRECTION);
11921 }
11922
11923 extern __inline __m512i
11924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11925 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11926 {
11927 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11928 (__v16si) __W,
11929 (__mmask16) __U,
11930 _MM_FROUND_CUR_DIRECTION);
11931 }
11932
11933 extern __inline __m512i
11934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11935 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11936 {
11937 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11938 (__v16si)
11939 _mm512_setzero_si512 (),
11940 (__mmask16) __U,
11941 _MM_FROUND_CUR_DIRECTION);
11942 }
11943
11944 extern __inline __m512i
11945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11946 _mm512_cvttps_epu32 (__m512 __A)
11947 {
11948 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11949 (__v16si)
11950 _mm512_undefined_si512 (),
11951 (__mmask16) -1,
11952 _MM_FROUND_CUR_DIRECTION);
11953 }
11954
11955 extern __inline __m512i
11956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11957 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11958 {
11959 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11960 (__v16si) __W,
11961 (__mmask16) __U,
11962 _MM_FROUND_CUR_DIRECTION);
11963 }
11964
11965 extern __inline __m512i
11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11968 {
11969 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11970 (__v16si)
11971 _mm512_setzero_si512 (),
11972 (__mmask16) __U,
11973 _MM_FROUND_CUR_DIRECTION);
11974 }
11975
11976 extern __inline __m512i
11977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11978 _mm512_cvtps_epi32 (__m512 __A)
11979 {
11980 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11981 (__v16si)
11982 _mm512_undefined_si512 (),
11983 (__mmask16) -1,
11984 _MM_FROUND_CUR_DIRECTION);
11985 }
11986
11987 extern __inline __m512i
11988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11989 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11990 {
11991 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11992 (__v16si) __W,
11993 (__mmask16) __U,
11994 _MM_FROUND_CUR_DIRECTION);
11995 }
11996
11997 extern __inline __m512i
11998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11999 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
12000 {
12001 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
12002 (__v16si)
12003 _mm512_setzero_si512 (),
12004 (__mmask16) __U,
12005 _MM_FROUND_CUR_DIRECTION);
12006 }
12007
12008 extern __inline __m512i
12009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12010 _mm512_cvtps_epu32 (__m512 __A)
12011 {
12012 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12013 (__v16si)
12014 _mm512_undefined_si512 (),
12015 (__mmask16) -1,
12016 _MM_FROUND_CUR_DIRECTION);
12017 }
12018
12019 extern __inline __m512i
12020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12021 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
12022 {
12023 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12024 (__v16si) __W,
12025 (__mmask16) __U,
12026 _MM_FROUND_CUR_DIRECTION);
12027 }
12028
12029 extern __inline __m512i
12030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12031 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
12032 {
12033 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
12034 (__v16si)
12035 _mm512_setzero_si512 (),
12036 (__mmask16) __U,
12037 _MM_FROUND_CUR_DIRECTION);
12038 }
12039
12040 #ifdef __x86_64__
12041 extern __inline __m128
12042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12043 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
12044 {
12045 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
12046 _MM_FROUND_CUR_DIRECTION);
12047 }
12048
12049 extern __inline __m128d
12050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12051 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
12052 {
12053 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
12054 _MM_FROUND_CUR_DIRECTION);
12055 }
12056 #endif
12057
12058 extern __inline __m128
12059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12060 _mm_cvtu32_ss (__m128 __A, unsigned __B)
12061 {
12062 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
12063 _MM_FROUND_CUR_DIRECTION);
12064 }
12065
12066 extern __inline __m512
12067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12068 _mm512_cvtepi32_ps (__m512i __A)
12069 {
12070 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12071 (__v16sf)
12072 _mm512_undefined_ps (),
12073 (__mmask16) -1,
12074 _MM_FROUND_CUR_DIRECTION);
12075 }
12076
12077 extern __inline __m512
12078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12079 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12080 {
12081 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12082 (__v16sf) __W,
12083 (__mmask16) __U,
12084 _MM_FROUND_CUR_DIRECTION);
12085 }
12086
12087 extern __inline __m512
12088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12089 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
12090 {
12091 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12092 (__v16sf)
12093 _mm512_setzero_ps (),
12094 (__mmask16) __U,
12095 _MM_FROUND_CUR_DIRECTION);
12096 }
12097
12098 extern __inline __m512
12099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12100 _mm512_cvtepu32_ps (__m512i __A)
12101 {
12102 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12103 (__v16sf)
12104 _mm512_undefined_ps (),
12105 (__mmask16) -1,
12106 _MM_FROUND_CUR_DIRECTION);
12107 }
12108
12109 extern __inline __m512
12110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12111 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12112 {
12113 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12114 (__v16sf) __W,
12115 (__mmask16) __U,
12116 _MM_FROUND_CUR_DIRECTION);
12117 }
12118
12119 extern __inline __m512
12120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12121 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12122 {
12123 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12124 (__v16sf)
12125 _mm512_setzero_ps (),
12126 (__mmask16) __U,
12127 _MM_FROUND_CUR_DIRECTION);
12128 }
12129
12130 #ifdef __OPTIMIZE__
12131 extern __inline __m512d
12132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12133 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12134 {
12135 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12136 (__v8df) __B,
12137 (__v8di) __C,
12138 __imm,
12139 (__mmask8) -1,
12140 _MM_FROUND_CUR_DIRECTION);
12141 }
12142
12143 extern __inline __m512d
12144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12146 __m512i __C, const int __imm)
12147 {
12148 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12149 (__v8df) __B,
12150 (__v8di) __C,
12151 __imm,
12152 (__mmask8) __U,
12153 _MM_FROUND_CUR_DIRECTION);
12154 }
12155
12156 extern __inline __m512d
12157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12158 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12159 __m512i __C, const int __imm)
12160 {
12161 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12162 (__v8df) __B,
12163 (__v8di) __C,
12164 __imm,
12165 (__mmask8) __U,
12166 _MM_FROUND_CUR_DIRECTION);
12167 }
12168
12169 extern __inline __m512
12170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12171 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12172 {
12173 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12174 (__v16sf) __B,
12175 (__v16si) __C,
12176 __imm,
12177 (__mmask16) -1,
12178 _MM_FROUND_CUR_DIRECTION);
12179 }
12180
12181 extern __inline __m512
12182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12183 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12184 __m512i __C, const int __imm)
12185 {
12186 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12187 (__v16sf) __B,
12188 (__v16si) __C,
12189 __imm,
12190 (__mmask16) __U,
12191 _MM_FROUND_CUR_DIRECTION);
12192 }
12193
12194 extern __inline __m512
12195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12196 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12197 __m512i __C, const int __imm)
12198 {
12199 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12200 (__v16sf) __B,
12201 (__v16si) __C,
12202 __imm,
12203 (__mmask16) __U,
12204 _MM_FROUND_CUR_DIRECTION);
12205 }
12206
12207 extern __inline __m128d
12208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12209 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12210 {
12211 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12212 (__v2df) __B,
12213 (__v2di) __C, __imm,
12214 (__mmask8) -1,
12215 _MM_FROUND_CUR_DIRECTION);
12216 }
12217
12218 extern __inline __m128d
12219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12220 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12221 __m128i __C, const int __imm)
12222 {
12223 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12224 (__v2df) __B,
12225 (__v2di) __C, __imm,
12226 (__mmask8) __U,
12227 _MM_FROUND_CUR_DIRECTION);
12228 }
12229
12230 extern __inline __m128d
12231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12232 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12233 __m128i __C, const int __imm)
12234 {
12235 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12236 (__v2df) __B,
12237 (__v2di) __C,
12238 __imm,
12239 (__mmask8) __U,
12240 _MM_FROUND_CUR_DIRECTION);
12241 }
12242
12243 extern __inline __m128
12244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12245 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12246 {
12247 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12248 (__v4sf) __B,
12249 (__v4si) __C, __imm,
12250 (__mmask8) -1,
12251 _MM_FROUND_CUR_DIRECTION);
12252 }
12253
12254 extern __inline __m128
12255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12256 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12257 __m128i __C, const int __imm)
12258 {
12259 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12260 (__v4sf) __B,
12261 (__v4si) __C, __imm,
12262 (__mmask8) __U,
12263 _MM_FROUND_CUR_DIRECTION);
12264 }
12265
12266 extern __inline __m128
12267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12268 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12269 __m128i __C, const int __imm)
12270 {
12271 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12272 (__v4sf) __B,
12273 (__v4si) __C, __imm,
12274 (__mmask8) __U,
12275 _MM_FROUND_CUR_DIRECTION);
12276 }
12277 #else
12278 #define _mm512_fixupimm_pd(X, Y, Z, C) \
12279 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12280 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12281 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12282
12283 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12284 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12285 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12286 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12287
12288 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12289 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12290 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12291 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12292
12293 #define _mm512_fixupimm_ps(X, Y, Z, C) \
12294 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12295 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12296 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12297
12298 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12299 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12300 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12301 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12302
12303 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12304 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12305 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12306 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12307
12308 #define _mm_fixupimm_sd(X, Y, Z, C) \
12309 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12310 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12311 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12312
12313 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12314 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12315 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12316 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12317
12318 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12319 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12320 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12321 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12322
12323 #define _mm_fixupimm_ss(X, Y, Z, C) \
12324 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12325 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12326 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12327
12328 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12329 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12330 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12331 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12332
12333 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12334 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12335 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12336 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12337 #endif
12338
12339 #ifdef __x86_64__
12340 extern __inline unsigned long long
12341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12342 _mm_cvtss_u64 (__m128 __A)
12343 {
12344 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12345 __A,
12346 _MM_FROUND_CUR_DIRECTION);
12347 }
12348
12349 extern __inline unsigned long long
12350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12351 _mm_cvttss_u64 (__m128 __A)
12352 {
12353 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12354 __A,
12355 _MM_FROUND_CUR_DIRECTION);
12356 }
12357
12358 extern __inline long long
12359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12360 _mm_cvttss_i64 (__m128 __A)
12361 {
12362 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12363 _MM_FROUND_CUR_DIRECTION);
12364 }
12365 #endif /* __x86_64__ */
12366
12367 extern __inline unsigned
12368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12369 _mm_cvtss_u32 (__m128 __A)
12370 {
12371 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12372 _MM_FROUND_CUR_DIRECTION);
12373 }
12374
12375 extern __inline unsigned
12376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12377 _mm_cvttss_u32 (__m128 __A)
12378 {
12379 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12380 _MM_FROUND_CUR_DIRECTION);
12381 }
12382
12383 extern __inline int
12384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12385 _mm_cvttss_i32 (__m128 __A)
12386 {
12387 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12388 _MM_FROUND_CUR_DIRECTION);
12389 }
12390
12391 #ifdef __x86_64__
12392 extern __inline unsigned long long
12393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12394 _mm_cvtsd_u64 (__m128d __A)
12395 {
12396 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12397 __A,
12398 _MM_FROUND_CUR_DIRECTION);
12399 }
12400
12401 extern __inline unsigned long long
12402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12403 _mm_cvttsd_u64 (__m128d __A)
12404 {
12405 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12406 __A,
12407 _MM_FROUND_CUR_DIRECTION);
12408 }
12409
12410 extern __inline long long
12411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12412 _mm_cvttsd_i64 (__m128d __A)
12413 {
12414 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12415 _MM_FROUND_CUR_DIRECTION);
12416 }
12417 #endif /* __x86_64__ */
12418
12419 extern __inline unsigned
12420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12421 _mm_cvtsd_u32 (__m128d __A)
12422 {
12423 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12424 _MM_FROUND_CUR_DIRECTION);
12425 }
12426
12427 extern __inline unsigned
12428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12429 _mm_cvttsd_u32 (__m128d __A)
12430 {
12431 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12432 _MM_FROUND_CUR_DIRECTION);
12433 }
12434
12435 extern __inline int
12436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12437 _mm_cvttsd_i32 (__m128d __A)
12438 {
12439 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12440 _MM_FROUND_CUR_DIRECTION);
12441 }
12442
12443 extern __inline __m512d
12444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12445 _mm512_cvtps_pd (__m256 __A)
12446 {
12447 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12448 (__v8df)
12449 _mm512_undefined_pd (),
12450 (__mmask8) -1,
12451 _MM_FROUND_CUR_DIRECTION);
12452 }
12453
12454 extern __inline __m512d
12455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12456 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12457 {
12458 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12459 (__v8df) __W,
12460 (__mmask8) __U,
12461 _MM_FROUND_CUR_DIRECTION);
12462 }
12463
12464 extern __inline __m512d
12465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12466 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12467 {
12468 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12469 (__v8df)
12470 _mm512_setzero_pd (),
12471 (__mmask8) __U,
12472 _MM_FROUND_CUR_DIRECTION);
12473 }
12474
12475 extern __inline __m512
12476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12477 _mm512_cvtph_ps (__m256i __A)
12478 {
12479 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12480 (__v16sf)
12481 _mm512_undefined_ps (),
12482 (__mmask16) -1,
12483 _MM_FROUND_CUR_DIRECTION);
12484 }
12485
12486 extern __inline __m512
12487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12488 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12489 {
12490 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12491 (__v16sf) __W,
12492 (__mmask16) __U,
12493 _MM_FROUND_CUR_DIRECTION);
12494 }
12495
12496 extern __inline __m512
12497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12498 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12499 {
12500 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12501 (__v16sf)
12502 _mm512_setzero_ps (),
12503 (__mmask16) __U,
12504 _MM_FROUND_CUR_DIRECTION);
12505 }
12506
12507 extern __inline __m256
12508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12509 _mm512_cvtpd_ps (__m512d __A)
12510 {
12511 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12512 (__v8sf)
12513 _mm256_undefined_ps (),
12514 (__mmask8) -1,
12515 _MM_FROUND_CUR_DIRECTION);
12516 }
12517
12518 extern __inline __m256
12519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12520 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12521 {
12522 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12523 (__v8sf) __W,
12524 (__mmask8) __U,
12525 _MM_FROUND_CUR_DIRECTION);
12526 }
12527
12528 extern __inline __m256
12529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12530 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12531 {
12532 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12533 (__v8sf)
12534 _mm256_setzero_ps (),
12535 (__mmask8) __U,
12536 _MM_FROUND_CUR_DIRECTION);
12537 }
12538
12539 #ifdef __OPTIMIZE__
12540 extern __inline __m512
12541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12542 _mm512_getexp_ps (__m512 __A)
12543 {
12544 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12545 (__v16sf)
12546 _mm512_undefined_ps (),
12547 (__mmask16) -1,
12548 _MM_FROUND_CUR_DIRECTION);
12549 }
12550
12551 extern __inline __m512
12552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12553 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12554 {
12555 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12556 (__v16sf) __W,
12557 (__mmask16) __U,
12558 _MM_FROUND_CUR_DIRECTION);
12559 }
12560
12561 extern __inline __m512
12562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12563 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12564 {
12565 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12566 (__v16sf)
12567 _mm512_setzero_ps (),
12568 (__mmask16) __U,
12569 _MM_FROUND_CUR_DIRECTION);
12570 }
12571
12572 extern __inline __m512d
12573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12574 _mm512_getexp_pd (__m512d __A)
12575 {
12576 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12577 (__v8df)
12578 _mm512_undefined_pd (),
12579 (__mmask8) -1,
12580 _MM_FROUND_CUR_DIRECTION);
12581 }
12582
12583 extern __inline __m512d
12584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12585 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12586 {
12587 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12588 (__v8df) __W,
12589 (__mmask8) __U,
12590 _MM_FROUND_CUR_DIRECTION);
12591 }
12592
12593 extern __inline __m512d
12594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12595 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12596 {
12597 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12598 (__v8df)
12599 _mm512_setzero_pd (),
12600 (__mmask8) __U,
12601 _MM_FROUND_CUR_DIRECTION);
12602 }
12603
12604 extern __inline __m128
12605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12606 _mm_getexp_ss (__m128 __A, __m128 __B)
12607 {
12608 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12609 (__v4sf) __B,
12610 _MM_FROUND_CUR_DIRECTION);
12611 }
12612
12613 extern __inline __m128d
12614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12615 _mm_getexp_sd (__m128d __A, __m128d __B)
12616 {
12617 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12618 (__v2df) __B,
12619 _MM_FROUND_CUR_DIRECTION);
12620 }
12621
12622 extern __inline __m512d
12623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12624 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12625 _MM_MANTISSA_SIGN_ENUM __C)
12626 {
12627 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12628 (__C << 2) | __B,
12629 _mm512_undefined_pd (),
12630 (__mmask8) -1,
12631 _MM_FROUND_CUR_DIRECTION);
12632 }
12633
12634 extern __inline __m512d
12635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12636 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12637 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12638 {
12639 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12640 (__C << 2) | __B,
12641 (__v8df) __W, __U,
12642 _MM_FROUND_CUR_DIRECTION);
12643 }
12644
12645 extern __inline __m512d
12646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12647 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12648 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12649 {
12650 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12651 (__C << 2) | __B,
12652 (__v8df)
12653 _mm512_setzero_pd (),
12654 __U,
12655 _MM_FROUND_CUR_DIRECTION);
12656 }
12657
12658 extern __inline __m512
12659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12660 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12661 _MM_MANTISSA_SIGN_ENUM __C)
12662 {
12663 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12664 (__C << 2) | __B,
12665 _mm512_undefined_ps (),
12666 (__mmask16) -1,
12667 _MM_FROUND_CUR_DIRECTION);
12668 }
12669
12670 extern __inline __m512
12671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12672 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12673 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12674 {
12675 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12676 (__C << 2) | __B,
12677 (__v16sf) __W, __U,
12678 _MM_FROUND_CUR_DIRECTION);
12679 }
12680
12681 extern __inline __m512
12682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12683 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12684 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12685 {
12686 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12687 (__C << 2) | __B,
12688 (__v16sf)
12689 _mm512_setzero_ps (),
12690 __U,
12691 _MM_FROUND_CUR_DIRECTION);
12692 }
12693
12694 extern __inline __m128d
12695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12696 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12697 _MM_MANTISSA_SIGN_ENUM __D)
12698 {
12699 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12700 (__v2df) __B,
12701 (__D << 2) | __C,
12702 _MM_FROUND_CUR_DIRECTION);
12703 }
12704
12705 extern __inline __m128
12706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12707 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12708 _MM_MANTISSA_SIGN_ENUM __D)
12709 {
12710 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12711 (__v4sf) __B,
12712 (__D << 2) | __C,
12713 _MM_FROUND_CUR_DIRECTION);
12714 }
12715
12716 #else
12717 #define _mm512_getmant_pd(X, B, C) \
12718 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12719 (int)(((C)<<2) | (B)), \
12720 (__v8df)_mm512_undefined_pd(), \
12721 (__mmask8)-1,\
12722 _MM_FROUND_CUR_DIRECTION))
12723
12724 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
12725 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12726 (int)(((C)<<2) | (B)), \
12727 (__v8df)(__m512d)(W), \
12728 (__mmask8)(U),\
12729 _MM_FROUND_CUR_DIRECTION))
12730
12731 #define _mm512_maskz_getmant_pd(U, X, B, C) \
12732 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12733 (int)(((C)<<2) | (B)), \
12734 (__v8df)_mm512_setzero_pd(), \
12735 (__mmask8)(U),\
12736 _MM_FROUND_CUR_DIRECTION))
12737 #define _mm512_getmant_ps(X, B, C) \
12738 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12739 (int)(((C)<<2) | (B)), \
12740 (__v16sf)_mm512_undefined_ps(), \
12741 (__mmask16)-1,\
12742 _MM_FROUND_CUR_DIRECTION))
12743
12744 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
12745 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12746 (int)(((C)<<2) | (B)), \
12747 (__v16sf)(__m512)(W), \
12748 (__mmask16)(U),\
12749 _MM_FROUND_CUR_DIRECTION))
12750
12751 #define _mm512_maskz_getmant_ps(U, X, B, C) \
12752 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12753 (int)(((C)<<2) | (B)), \
12754 (__v16sf)_mm512_setzero_ps(), \
12755 (__mmask16)(U),\
12756 _MM_FROUND_CUR_DIRECTION))
12757 #define _mm_getmant_sd(X, Y, C, D) \
12758 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12759 (__v2df)(__m128d)(Y), \
12760 (int)(((D)<<2) | (C)), \
12761 _MM_FROUND_CUR_DIRECTION))
12762
12763 #define _mm_getmant_ss(X, Y, C, D) \
12764 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12765 (__v4sf)(__m128)(Y), \
12766 (int)(((D)<<2) | (C)), \
12767 _MM_FROUND_CUR_DIRECTION))
12768
12769 #define _mm_getexp_ss(A, B) \
12770 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12771 _MM_FROUND_CUR_DIRECTION))
12772
12773 #define _mm_getexp_sd(A, B) \
12774 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12775 _MM_FROUND_CUR_DIRECTION))
12776
12777 #define _mm512_getexp_ps(A) \
12778 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12779 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12780
12781 #define _mm512_mask_getexp_ps(W, U, A) \
12782 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12783 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12784
12785 #define _mm512_maskz_getexp_ps(U, A) \
12786 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12787 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12788
12789 #define _mm512_getexp_pd(A) \
12790 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12791 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12792
12793 #define _mm512_mask_getexp_pd(W, U, A) \
12794 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12795 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12796
12797 #define _mm512_maskz_getexp_pd(U, A) \
12798 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12799 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12800 #endif
12801
12802 #ifdef __OPTIMIZE__
12803 extern __inline __m512
12804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12805 _mm512_roundscale_ps (__m512 __A, const int __imm)
12806 {
12807 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12808 (__v16sf)
12809 _mm512_undefined_ps (),
12810 -1,
12811 _MM_FROUND_CUR_DIRECTION);
12812 }
12813
12814 extern __inline __m512
12815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12816 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12817 const int __imm)
12818 {
12819 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12820 (__v16sf) __A,
12821 (__mmask16) __B,
12822 _MM_FROUND_CUR_DIRECTION);
12823 }
12824
12825 extern __inline __m512
12826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12827 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12828 {
12829 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12830 __imm,
12831 (__v16sf)
12832 _mm512_setzero_ps (),
12833 (__mmask16) __A,
12834 _MM_FROUND_CUR_DIRECTION);
12835 }
12836
12837 extern __inline __m512d
12838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12839 _mm512_roundscale_pd (__m512d __A, const int __imm)
12840 {
12841 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12842 (__v8df)
12843 _mm512_undefined_pd (),
12844 -1,
12845 _MM_FROUND_CUR_DIRECTION);
12846 }
12847
12848 extern __inline __m512d
12849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12850 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12851 const int __imm)
12852 {
12853 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12854 (__v8df) __A,
12855 (__mmask8) __B,
12856 _MM_FROUND_CUR_DIRECTION);
12857 }
12858
12859 extern __inline __m512d
12860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12861 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12862 {
12863 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12864 __imm,
12865 (__v8df)
12866 _mm512_setzero_pd (),
12867 (__mmask8) __A,
12868 _MM_FROUND_CUR_DIRECTION);
12869 }
12870
12871 extern __inline __m128
12872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12873 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12874 {
12875 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12876 (__v4sf) __B, __imm,
12877 _MM_FROUND_CUR_DIRECTION);
12878 }
12879
12880 extern __inline __m128d
12881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12882 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12883 {
12884 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12885 (__v2df) __B, __imm,
12886 _MM_FROUND_CUR_DIRECTION);
12887 }
12888
12889 #else
12890 #define _mm512_roundscale_ps(A, B) \
12891 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12892 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12893 #define _mm512_mask_roundscale_ps(A, B, C, D) \
12894 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12895 (int)(D), \
12896 (__v16sf)(__m512)(A), \
12897 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12898 #define _mm512_maskz_roundscale_ps(A, B, C) \
12899 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12900 (int)(C), \
12901 (__v16sf)_mm512_setzero_ps(),\
12902 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12903 #define _mm512_roundscale_pd(A, B) \
12904 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12905 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12906 #define _mm512_mask_roundscale_pd(A, B, C, D) \
12907 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12908 (int)(D), \
12909 (__v8df)(__m512d)(A), \
12910 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12911 #define _mm512_maskz_roundscale_pd(A, B, C) \
12912 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12913 (int)(C), \
12914 (__v8df)_mm512_setzero_pd(),\
12915 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12916 #define _mm_roundscale_ss(A, B, C) \
12917 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12918 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12919 #define _mm_roundscale_sd(A, B, C) \
12920 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12921 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12922 #endif
12923
12924 #ifdef __OPTIMIZE__
12925 extern __inline __mmask8
12926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12927 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12928 {
12929 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12930 (__v8df) __Y, __P,
12931 (__mmask8) -1,
12932 _MM_FROUND_CUR_DIRECTION);
12933 }
12934
12935 extern __inline __mmask16
12936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12937 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12938 {
12939 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12940 (__v16sf) __Y, __P,
12941 (__mmask16) -1,
12942 _MM_FROUND_CUR_DIRECTION);
12943 }
12944
12945 extern __inline __mmask16
12946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12947 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12948 {
12949 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12950 (__v16sf) __Y, __P,
12951 (__mmask16) __U,
12952 _MM_FROUND_CUR_DIRECTION);
12953 }
12954
12955 extern __inline __mmask8
12956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12957 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12958 {
12959 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12960 (__v8df) __Y, __P,
12961 (__mmask8) __U,
12962 _MM_FROUND_CUR_DIRECTION);
12963 }
12964
12965 extern __inline __mmask8
12966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12967 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12968 {
12969 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12970 (__v2df) __Y, __P,
12971 (__mmask8) -1,
12972 _MM_FROUND_CUR_DIRECTION);
12973 }
12974
12975 extern __inline __mmask8
12976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12977 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12978 {
12979 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12980 (__v2df) __Y, __P,
12981 (__mmask8) __M,
12982 _MM_FROUND_CUR_DIRECTION);
12983 }
12984
12985 extern __inline __mmask8
12986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12987 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12988 {
12989 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12990 (__v4sf) __Y, __P,
12991 (__mmask8) -1,
12992 _MM_FROUND_CUR_DIRECTION);
12993 }
12994
12995 extern __inline __mmask8
12996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12997 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12998 {
12999 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
13000 (__v4sf) __Y, __P,
13001 (__mmask8) __M,
13002 _MM_FROUND_CUR_DIRECTION);
13003 }
13004
13005 #else
13006 #define _mm512_cmp_pd_mask(X, Y, P) \
13007 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
13008 (__v8df)(__m512d)(Y), (int)(P),\
13009 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13010
13011 #define _mm512_cmp_ps_mask(X, Y, P) \
13012 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
13013 (__v16sf)(__m512)(Y), (int)(P),\
13014 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
13015
13016 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
13017 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
13018 (__v8df)(__m512d)(Y), (int)(P),\
13019 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
13020
13021 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
13022 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
13023 (__v16sf)(__m512)(Y), (int)(P),\
13024 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
13025
13026 #define _mm_cmp_sd_mask(X, Y, P) \
13027 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
13028 (__v2df)(__m128d)(Y), (int)(P),\
13029 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13030
13031 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
13032 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
13033 (__v2df)(__m128d)(Y), (int)(P),\
13034 M,_MM_FROUND_CUR_DIRECTION))
13035
13036 #define _mm_cmp_ss_mask(X, Y, P) \
13037 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
13038 (__v4sf)(__m128)(Y), (int)(P), \
13039 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
13040
13041 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
13042 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
13043 (__v4sf)(__m128)(Y), (int)(P), \
13044 M,_MM_FROUND_CUR_DIRECTION))
13045 #endif
13046
13047 extern __inline __mmask16
13048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13049 _mm512_kmov (__mmask16 __A)
13050 {
13051 return __builtin_ia32_kmov16 (__A);
13052 }
13053
13054 extern __inline __m512
13055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13056 _mm512_castpd_ps (__m512d __A)
13057 {
13058 return (__m512) (__A);
13059 }
13060
13061 extern __inline __m512i
13062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13063 _mm512_castpd_si512 (__m512d __A)
13064 {
13065 return (__m512i) (__A);
13066 }
13067
13068 extern __inline __m512d
13069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13070 _mm512_castps_pd (__m512 __A)
13071 {
13072 return (__m512d) (__A);
13073 }
13074
13075 extern __inline __m512i
13076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13077 _mm512_castps_si512 (__m512 __A)
13078 {
13079 return (__m512i) (__A);
13080 }
13081
13082 extern __inline __m512
13083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13084 _mm512_castsi512_ps (__m512i __A)
13085 {
13086 return (__m512) (__A);
13087 }
13088
13089 extern __inline __m512d
13090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13091 _mm512_castsi512_pd (__m512i __A)
13092 {
13093 return (__m512d) (__A);
13094 }
13095
13096 extern __inline __m128d
13097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13098 _mm512_castpd512_pd128 (__m512d __A)
13099 {
13100 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
13101 }
13102
13103 extern __inline __m128
13104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13105 _mm512_castps512_ps128 (__m512 __A)
13106 {
13107 return _mm512_extractf32x4_ps(__A, 0);
13108 }
13109
13110 extern __inline __m128i
13111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13112 _mm512_castsi512_si128 (__m512i __A)
13113 {
13114 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13115 }
13116
13117 extern __inline __m256d
13118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13119 _mm512_castpd512_pd256 (__m512d __A)
13120 {
13121 return _mm512_extractf64x4_pd(__A, 0);
13122 }
13123
13124 extern __inline __m256
13125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13126 _mm512_castps512_ps256 (__m512 __A)
13127 {
13128 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13129 }
13130
13131 extern __inline __m256i
13132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13133 _mm512_castsi512_si256 (__m512i __A)
13134 {
13135 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13136 }
13137
13138 extern __inline __m512d
13139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13140 _mm512_castpd128_pd512 (__m128d __A)
13141 {
13142 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13143 }
13144
13145 extern __inline __m512
13146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13147 _mm512_castps128_ps512 (__m128 __A)
13148 {
13149 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13150 }
13151
13152 extern __inline __m512i
13153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13154 _mm512_castsi128_si512 (__m128i __A)
13155 {
13156 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13157 }
13158
13159 extern __inline __m512d
13160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13161 _mm512_castpd256_pd512 (__m256d __A)
13162 {
13163 return __builtin_ia32_pd512_256pd (__A);
13164 }
13165
13166 extern __inline __m512
13167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13168 _mm512_castps256_ps512 (__m256 __A)
13169 {
13170 return __builtin_ia32_ps512_256ps (__A);
13171 }
13172
13173 extern __inline __m512i
13174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13175 _mm512_castsi256_si512 (__m256i __A)
13176 {
13177 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13178 }
13179
13180 extern __inline __mmask16
13181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13182 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13183 {
13184 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13185 (__v16si) __B, 0,
13186 (__mmask16) -1);
13187 }
13188
13189 extern __inline __mmask16
13190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13191 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13192 {
13193 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13194 (__v16si) __B, 0, __U);
13195 }
13196
13197 extern __inline __mmask8
13198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13199 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13200 {
13201 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13202 (__v8di) __B, 0, __U);
13203 }
13204
13205 extern __inline __mmask8
13206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13207 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13208 {
13209 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13210 (__v8di) __B, 0,
13211 (__mmask8) -1);
13212 }
13213
13214 extern __inline __mmask16
13215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13216 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13217 {
13218 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13219 (__v16si) __B, 6,
13220 (__mmask16) -1);
13221 }
13222
13223 extern __inline __mmask16
13224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13225 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13226 {
13227 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13228 (__v16si) __B, 6, __U);
13229 }
13230
13231 extern __inline __mmask8
13232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13233 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13234 {
13235 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13236 (__v8di) __B, 6, __U);
13237 }
13238
13239 extern __inline __mmask8
13240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13241 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13242 {
13243 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13244 (__v8di) __B, 6,
13245 (__mmask8) -1);
13246 }
13247
13248 #ifdef __DISABLE_AVX512F__
13249 #undef __DISABLE_AVX512F__
13250 #pragma GCC pop_options
13251 #endif /* __DISABLE_AVX512F__ */
13252
13253 #endif /* _AVX512FINTRIN_H_INCLUDED */