Add AMD SSE5 support; Add iterator over function arguments; Add stdarg_p, prototype_p...
[gcc.git] / gcc / config / i386 / bmmintrin.h
1 /* Copyright (C) 2007 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING. If not, write to
17 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA. */
19
20 /* As a special exception, if you include this header file into source
21 files compiled by GCC, this header file does not by itself cause
22 the resulting executable to be covered by the GNU General Public
23 License. This exception does not however invalidate any other
24 reasons why the executable file might be covered by the GNU General
25 Public License. */
26
27 #ifndef _BMMINTRIN_H_INCLUDED
28 #define _BMMINTRIN_H_INCLUDED
29
30 #ifndef __SSE5__
31 # error "SSE5 instruction set not enabled"
32 #else
33
34 /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */
35 #include <ammintrin.h>
36 #include <mmintrin-common.h>
37
38 /* Floating point multiply/add type instructions */
39 static __inline __m128 __attribute__((__always_inline__))
40 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
41 {
42 return (__m128) __builtin_ia32_fmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
43 }
44
45 static __inline __m128d __attribute__((__always_inline__))
46 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
47 {
48 return (__m128d) __builtin_ia32_fmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
49 }
50
51 static __inline __m128 __attribute__((__always_inline__))
52 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
53 {
54 return (__m128) __builtin_ia32_fmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
55 }
56
57 static __inline __m128d __attribute__((__always_inline__))
58 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
59 {
60 return (__m128d) __builtin_ia32_fmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
61 }
62
63 static __inline __m128 __attribute__((__always_inline__))
64 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
65 {
66 return (__m128) __builtin_ia32_fmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
67 }
68
69 static __inline __m128d __attribute__((__always_inline__))
70 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
71 {
72 return (__m128d) __builtin_ia32_fmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
73 }
74
75 static __inline __m128 __attribute__((__always_inline__))
76 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
77 {
78 return (__m128) __builtin_ia32_fmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
79 }
80
81 static __inline __m128d __attribute__((__always_inline__))
82 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
83 {
84 return (__m128d) __builtin_ia32_fmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
85 }
86
87 static __inline __m128 __attribute__((__always_inline__))
88 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
89 {
90 return (__m128) __builtin_ia32_fnmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
91 }
92
93 static __inline __m128d __attribute__((__always_inline__))
94 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
95 {
96 return (__m128d) __builtin_ia32_fnmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
97 }
98
99 static __inline __m128 __attribute__((__always_inline__))
100 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
101 {
102 return (__m128) __builtin_ia32_fnmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
103 }
104
105 static __inline __m128d __attribute__((__always_inline__))
106 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
107 {
108 return (__m128d) __builtin_ia32_fnmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
109 }
110
111 static __inline __m128 __attribute__((__always_inline__))
112 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
113 {
114 return (__m128) __builtin_ia32_fnmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
115 }
116
117 static __inline __m128d __attribute__((__always_inline__))
118 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
119 {
120 return (__m128d) __builtin_ia32_fnmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
121 }
122
123 static __inline __m128 __attribute__((__always_inline__))
124 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
125 {
126 return (__m128) __builtin_ia32_fnmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
127 }
128
129 static __inline __m128d __attribute__((__always_inline__))
130 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
131 {
132 return (__m128d) __builtin_ia32_fnmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
133 }
134
135 /* Integer multiply/add intructions. */
136 static __inline __m128i __attribute__((__always_inline__))
137 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
138 {
139 return (__m128i) __builtin_ia32_pmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
140 }
141
142 static __inline __m128i __attribute__((__always_inline__))
143 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
144 {
145 return (__m128i) __builtin_ia32_pmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
146 }
147
148 static __inline __m128i __attribute__((__always_inline__))
149 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
150 {
151 return (__m128i) __builtin_ia32_pmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
152 }
153
154 static __inline __m128i __attribute__((__always_inline__))
155 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
156 {
157 return (__m128i) __builtin_ia32_pmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
158 }
159
160 static __inline __m128i __attribute__((__always_inline__))
161 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
162 {
163 return (__m128i) __builtin_ia32_pmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
164 }
165
166 static __inline __m128i __attribute__((__always_inline__))
167 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
168 {
169 return (__m128i) __builtin_ia32_pmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
170 }
171
172 static __inline __m128i __attribute__((__always_inline__))
173 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
174 {
175 return (__m128i) __builtin_ia32_pmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
176 }
177
178 static __inline __m128i __attribute__((__always_inline__))
179 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
180 {
181 return (__m128i) __builtin_ia32_pmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
182 }
183
184 static __inline __m128i __attribute__((__always_inline__))
185 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
186 {
187 return (__m128i) __builtin_ia32_pmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
188 }
189
190 static __inline __m128i __attribute__((__always_inline__))
191 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
192 {
193 return (__m128i) __builtin_ia32_pmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
194 }
195
196 static __inline __m128i __attribute__((__always_inline__))
197 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
198 {
199 return (__m128i) __builtin_ia32_pmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
200 }
201
202 static __inline __m128i __attribute__((__always_inline__))
203 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
204 {
205 return (__m128i) __builtin_ia32_pmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
206 }
207
208 /* Packed Integer Horizontal Add and Subtract */
209 static __inline __m128i __attribute__((__always_inline__))
210 _mm_haddw_epi8(__m128i __A)
211 {
212 return (__m128i) __builtin_ia32_phaddbw ((__v16qi)__A);
213 }
214
215 static __inline __m128i __attribute__((__always_inline__))
216 _mm_haddd_epi8(__m128i __A)
217 {
218 return (__m128i) __builtin_ia32_phaddbd ((__v16qi)__A);
219 }
220
221 static __inline __m128i __attribute__((__always_inline__))
222 _mm_haddq_epi8(__m128i __A)
223 {
224 return (__m128i) __builtin_ia32_phaddbq ((__v16qi)__A);
225 }
226
227 static __inline __m128i __attribute__((__always_inline__))
228 _mm_haddd_epi16(__m128i __A)
229 {
230 return (__m128i) __builtin_ia32_phaddwd ((__v8hi)__A);
231 }
232
233 static __inline __m128i __attribute__((__always_inline__))
234 _mm_haddq_epi16(__m128i __A)
235 {
236 return (__m128i) __builtin_ia32_phaddwq ((__v8hi)__A);
237 }
238
239 static __inline __m128i __attribute__((__always_inline__))
240 _mm_haddq_epi32(__m128i __A)
241 {
242 return (__m128i) __builtin_ia32_phadddq ((__v4si)__A);
243 }
244
245 static __inline __m128i __attribute__((__always_inline__))
246 _mm_haddw_epu8(__m128i __A)
247 {
248 return (__m128i) __builtin_ia32_phaddubw ((__v16qi)__A);
249 }
250
251 static __inline __m128i __attribute__((__always_inline__))
252 _mm_haddd_epu8(__m128i __A)
253 {
254 return (__m128i) __builtin_ia32_phaddubd ((__v16qi)__A);
255 }
256
257 static __inline __m128i __attribute__((__always_inline__))
258 _mm_haddq_epu8(__m128i __A)
259 {
260 return (__m128i) __builtin_ia32_phaddubq ((__v16qi)__A);
261 }
262
263 static __inline __m128i __attribute__((__always_inline__))
264 _mm_haddd_epu16(__m128i __A)
265 {
266 return (__m128i) __builtin_ia32_phadduwd ((__v8hi)__A);
267 }
268
269 static __inline __m128i __attribute__((__always_inline__))
270 _mm_haddq_epu16(__m128i __A)
271 {
272 return (__m128i) __builtin_ia32_phadduwq ((__v8hi)__A);
273 }
274
275 static __inline __m128i __attribute__((__always_inline__))
276 _mm_haddq_epu32(__m128i __A)
277 {
278 return (__m128i) __builtin_ia32_phaddudq ((__v4si)__A);
279 }
280
281 static __inline __m128i __attribute__((__always_inline__))
282 _mm_hsubw_epi8(__m128i __A)
283 {
284 return (__m128i) __builtin_ia32_phsubbw ((__v16qi)__A);
285 }
286
287 static __inline __m128i __attribute__((__always_inline__))
288 _mm_hsubd_epi16(__m128i __A)
289 {
290 return (__m128i) __builtin_ia32_phsubwd ((__v8hi)__A);
291 }
292
293 static __inline __m128i __attribute__((__always_inline__))
294 _mm_hsubq_epi32(__m128i __A)
295 {
296 return (__m128i) __builtin_ia32_phsubdq ((__v4si)__A);
297 }
298
299 /* Vector conditional move and permute */
300 static __inline __m128i __attribute__((__always_inline__))
301 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
302 {
303 return (__m128i) __builtin_ia32_pcmov (__A, __B, __C);
304 }
305
306 static __inline __m128i __attribute__((__always_inline__))
307 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
308 {
309 return (__m128i) __builtin_ia32_pperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
310 }
311
312 static __inline __m128 __attribute__((__always_inline__))
313 _mm_perm_ps(__m128 __A, __m128 __B, __m128i __C)
314 {
315 return (__m128) __builtin_ia32_permps ((__m128)__A, (__m128)__B, (__v16qi)__C);
316 }
317
318 static __inline __m128d __attribute__((__always_inline__))
319 _mm_perm_pd(__m128d __A, __m128d __B, __m128i __C)
320 {
321 return (__m128d) __builtin_ia32_permpd ((__m128d)__A, (__m128d)__B, (__v16qi)__C);
322 }
323
324 /* Packed Integer Rotates and Shifts */
325
326 /* Rotates - Non-Immediate form */
327 static __inline __m128i __attribute__((__always_inline__))
328 _mm_rot_epi8(__m128i __A, __m128i __B)
329 {
330 return (__m128i) __builtin_ia32_protb ((__v16qi)__A, (__v16qi)__B);
331 }
332
333 static __inline __m128i __attribute__((__always_inline__))
334 _mm_rot_epi16(__m128i __A, __m128i __B)
335 {
336 return (__m128i) __builtin_ia32_protw ((__v8hi)__A, (__v8hi)__B);
337 }
338
339 static __inline __m128i __attribute__((__always_inline__))
340 _mm_rot_epi32(__m128i __A, __m128i __B)
341 {
342 return (__m128i) __builtin_ia32_protd ((__v4si)__A, (__v4si)__B);
343 }
344
345 static __inline __m128i __attribute__((__always_inline__))
346 _mm_rot_epi64(__m128i __A, __m128i __B)
347 {
348 return (__m128i) __builtin_ia32_protq ((__v2di)__A, (__v2di)__B);
349 }
350
351
352 /* Rotates - Immediate form */
353 #ifdef __OPTIMIZE__
354 static __inline __m128i __attribute__((__always_inline__))
355 _mm_roti_epi8(__m128i __A, int __B)
356 {
357 return (__m128i) __builtin_ia32_protbi ((__v16qi)__A, __B);
358 }
359
360 static __inline __m128i __attribute__((__always_inline__))
361 _mm_roti_epi16(__m128i __A, int __B)
362 {
363 return (__m128i) __builtin_ia32_protwi ((__v8hi)__A, __B);
364 }
365
366 static __inline __m128i __attribute__((__always_inline__))
367 _mm_roti_epi32(__m128i __A, int __B)
368 {
369 return (__m128i) __builtin_ia32_protdi ((__v4si)__A, __B);
370 }
371
372 static __inline __m128i __attribute__((__always_inline__))
373 _mm_roti_epi64(__m128i __A, int __B)
374 {
375 return (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B);
376 }
377 #else
378 #define _mm_roti_epi8(A, B) ((_m128i) __builtin_ia32_protbi ((__v16qi)(A), B)
379 #define _mm_roti_epi16(A, B) ((_m128i) __builtin_ia32_protwi ((__v8hi)(A), B)
380 #define _mm_roti_epi32(A, B) ((_m128i) __builtin_ia32_protdi ((__v4si)(A), B)
381 #define _mm_roti_epi64(A, B) ((_m128i) __builtin_ia32_protqi ((__v2di)(A), B)
382 #endif
383
384 /* pshl */
385
386 static __inline __m128i __attribute__((__always_inline__))
387 _mm_shl_epi8(__m128i __A, __m128i __B)
388 {
389 return (__m128i) __builtin_ia32_pshlb ((__v16qi)__A, (__v16qi)__B);
390 }
391
392 static __inline __m128i __attribute__((__always_inline__))
393 _mm_shl_epi16(__m128i __A, __m128i __B)
394 {
395 return (__m128i) __builtin_ia32_pshlw ((__v8hi)__A, (__v8hi)__B);
396 }
397
398 static __inline __m128i __attribute__((__always_inline__))
399 _mm_shl_epi32(__m128i __A, __m128i __B)
400 {
401 return (__m128i) __builtin_ia32_pshld ((__v4si)__A, (__v4si)__B);
402 }
403
404 static __inline __m128i __attribute__((__always_inline__))
405 _mm_shl_epi64(__m128i __A, __m128i __B)
406 {
407 return (__m128i) __builtin_ia32_pshlq ((__v2di)__A, (__v2di)__B);
408 }
409
410 /* psha */
411 static __inline __m128i __attribute__((__always_inline__))
412 _mm_sha_epi8(__m128i __A, __m128i __B)
413 {
414 return (__m128i) __builtin_ia32_pshab ((__v16qi)__A, (__v16qi)__B);
415 }
416
417 static __inline __m128i __attribute__((__always_inline__))
418 _mm_sha_epi16(__m128i __A, __m128i __B)
419 {
420 return (__m128i) __builtin_ia32_pshaw ((__v8hi)__A, (__v8hi)__B);
421 }
422
423 static __inline __m128i __attribute__((__always_inline__))
424 _mm_sha_epi32(__m128i __A, __m128i __B)
425 {
426 return (__m128i) __builtin_ia32_pshad ((__v4si)__A, (__v4si)__B);
427 }
428
429 static __inline __m128i __attribute__((__always_inline__))
430 _mm_sha_epi64(__m128i __A, __m128i __B)
431 {
432 return (__m128i) __builtin_ia32_pshaq ((__v2di)__A, (__v2di)__B);
433 }
434
435 /* Compare and Predicate Generation */
436
437 /* com (floating point, packed single) */
438 static __inline __m128 __attribute__((__always_inline__))
439 _mm_comeq_ps(__m128 __A, __m128 __B)
440 {
441 return (__m128) __builtin_ia32_comeqps ((__v4sf)__A, (__v4sf)__B);
442 }
443
444 static __inline __m128 __attribute__((__always_inline__))
445 _mm_comlt_ps(__m128 __A, __m128 __B)
446 {
447 return (__m128) __builtin_ia32_comltps ((__v4sf)__A, (__v4sf)__B);
448 }
449
450 static __inline __m128 __attribute__((__always_inline__))
451 _mm_comle_ps(__m128 __A, __m128 __B)
452 {
453 return (__m128) __builtin_ia32_comleps ((__v4sf)__A, (__v4sf)__B);
454 }
455
456 static __inline __m128 __attribute__((__always_inline__))
457 _mm_comunord_ps(__m128 __A, __m128 __B)
458 {
459 return (__m128) __builtin_ia32_comunordps ((__v4sf)__A, (__v4sf)__B);
460 }
461
462 static __inline __m128 __attribute__((__always_inline__))
463 _mm_comneq_ps(__m128 __A, __m128 __B)
464 {
465 return (__m128) __builtin_ia32_comuneqps ((__v4sf)__A, (__v4sf)__B);
466 }
467
468 static __inline __m128 __attribute__((__always_inline__))
469 _mm_comnlt_ps(__m128 __A, __m128 __B)
470 {
471 return (__m128) __builtin_ia32_comunltps ((__v4sf)__A, (__v4sf)__B);
472 }
473
474 static __inline __m128 __attribute__((__always_inline__))
475 _mm_comnle_ps(__m128 __A, __m128 __B)
476 {
477 return (__m128) __builtin_ia32_comunleps ((__v4sf)__A, (__v4sf)__B);
478 }
479
480
481 static __inline __m128 __attribute__((__always_inline__))
482 _mm_comord_ps(__m128 __A, __m128 __B)
483 {
484 return (__m128) __builtin_ia32_comordps ((__v4sf)__A, (__v4sf)__B);
485 }
486
487
488 static __inline __m128 __attribute__((__always_inline__))
489 _mm_comueq_ps(__m128 __A, __m128 __B)
490 {
491 return (__m128) __builtin_ia32_comueqps ((__v4sf)__A, (__v4sf)__B);
492 }
493
494 static __inline __m128 __attribute__((__always_inline__))
495 _mm_comnge_ps(__m128 __A, __m128 __B)
496 {
497 return (__m128) __builtin_ia32_comungeps ((__v4sf)__A, (__v4sf)__B);
498 }
499
500 static __inline __m128 __attribute__((__always_inline__))
501 _mm_comngt_ps(__m128 __A, __m128 __B)
502 {
503 return (__m128) __builtin_ia32_comungtps ((__v4sf)__A, (__v4sf)__B);
504 }
505
506 static __inline __m128 __attribute__((__always_inline__))
507 _mm_comfalse_ps(__m128 __A, __m128 __B)
508 {
509 return (__m128) __builtin_ia32_comfalseps ((__v4sf)__A, (__v4sf)__B);
510 }
511
512 static __inline __m128 __attribute__((__always_inline__))
513 _mm_comoneq_ps(__m128 __A, __m128 __B)
514 {
515 return (__m128) __builtin_ia32_comneqps ((__v4sf)__A, (__v4sf)__B);
516 }
517
518 static __inline __m128 __attribute__((__always_inline__))
519 _mm_comge_ps(__m128 __A, __m128 __B)
520 {
521 return (__m128) __builtin_ia32_comgeps ((__v4sf)__A, (__v4sf)__B);
522 }
523
524 static __inline __m128 __attribute__((__always_inline__))
525 _mm_comgt_ps(__m128 __A, __m128 __B)
526 {
527 return (__m128) __builtin_ia32_comgtps ((__v4sf)__A, (__v4sf)__B);
528 }
529
530 static __inline __m128 __attribute__((__always_inline__))
531 _mm_comtrue_ps(__m128 __A, __m128 __B)
532 {
533 return (__m128) __builtin_ia32_comtrueps ((__v4sf)__A, (__v4sf)__B);
534 }
535
536 /* com (floating point, packed double) */
537
538 static __inline __m128d __attribute__((__always_inline__))
539 _mm_comeq_pd(__m128d __A, __m128d __B)
540 {
541 return (__m128d) __builtin_ia32_comeqpd ((__v2df)__A, (__v2df)__B);
542 }
543
544 static __inline __m128d __attribute__((__always_inline__))
545 _mm_comlt_pd(__m128d __A, __m128d __B)
546 {
547 return (__m128d) __builtin_ia32_comltpd ((__v2df)__A, (__v2df)__B);
548 }
549
550 static __inline __m128d __attribute__((__always_inline__))
551 _mm_comle_pd(__m128d __A, __m128d __B)
552 {
553 return (__m128d) __builtin_ia32_comlepd ((__v2df)__A, (__v2df)__B);
554 }
555
556 static __inline __m128d __attribute__((__always_inline__))
557 _mm_comunord_pd(__m128d __A, __m128d __B)
558 {
559 return (__m128d) __builtin_ia32_comunordpd ((__v2df)__A, (__v2df)__B);
560 }
561
562 static __inline __m128d __attribute__((__always_inline__))
563 _mm_comneq_pd(__m128d __A, __m128d __B)
564 {
565 return (__m128d) __builtin_ia32_comuneqpd ((__v2df)__A, (__v2df)__B);
566 }
567
568 static __inline __m128d __attribute__((__always_inline__))
569 _mm_comnlt_pd(__m128d __A, __m128d __B)
570 {
571 return (__m128d) __builtin_ia32_comunltpd ((__v2df)__A, (__v2df)__B);
572 }
573
574 static __inline __m128d __attribute__((__always_inline__))
575 _mm_comnle_pd(__m128d __A, __m128d __B)
576 {
577 return (__m128d) __builtin_ia32_comunlepd ((__v2df)__A, (__v2df)__B);
578 }
579
580
581 static __inline __m128d __attribute__((__always_inline__))
582 _mm_comord_pd(__m128d __A, __m128d __B)
583 {
584 return (__m128d) __builtin_ia32_comordpd ((__v2df)__A, (__v2df)__B);
585 }
586
587 static __inline __m128d __attribute__((__always_inline__))
588 _mm_comueq_pd(__m128d __A, __m128d __B)
589 {
590 return (__m128d) __builtin_ia32_comueqpd ((__v2df)__A, (__v2df)__B);
591 }
592
593 static __inline __m128d __attribute__((__always_inline__))
594 _mm_comnge_pd(__m128d __A, __m128d __B)
595 {
596 return (__m128d) __builtin_ia32_comungepd ((__v2df)__A, (__v2df)__B);
597 }
598
599 static __inline __m128d __attribute__((__always_inline__))
600 _mm_comngt_pd(__m128d __A, __m128d __B)
601 {
602 return (__m128d) __builtin_ia32_comungtpd ((__v2df)__A, (__v2df)__B);
603 }
604
605 static __inline __m128d __attribute__((__always_inline__))
606 _mm_comfalse_pd(__m128d __A, __m128d __B)
607 {
608 return (__m128d) __builtin_ia32_comfalsepd ((__v2df)__A, (__v2df)__B);
609 }
610
611 static __inline __m128d __attribute__((__always_inline__))
612 _mm_comoneq_pd(__m128d __A, __m128d __B)
613 {
614 return (__m128d) __builtin_ia32_comneqpd ((__v2df)__A, (__v2df)__B);
615 }
616
617 static __inline __m128d __attribute__((__always_inline__))
618 _mm_comge_pd(__m128d __A, __m128d __B)
619 {
620 return (__m128d) __builtin_ia32_comgepd ((__v2df)__A, (__v2df)__B);
621 }
622
623 static __inline __m128d __attribute__((__always_inline__))
624 _mm_comgt_pd(__m128d __A, __m128d __B)
625 {
626 return (__m128d) __builtin_ia32_comgtpd ((__v2df)__A, (__v2df)__B);
627 }
628
629 static __inline __m128d __attribute__((__always_inline__))
630 _mm_comtrue_pd(__m128d __A, __m128d __B)
631 {
632 return (__m128d) __builtin_ia32_comtruepd ((__v2df)__A, (__v2df)__B);
633 }
634
635 /* com (floating point, scalar single) */
636 static __inline __m128 __attribute__((__always_inline__))
637 _mm_comeq_ss(__m128 __A, __m128 __B)
638 {
639 return (__m128) __builtin_ia32_comeqss ((__v4sf)__A, (__v4sf)__B);
640 }
641
642 static __inline __m128 __attribute__((__always_inline__))
643 _mm_comlt_ss(__m128 __A, __m128 __B)
644 {
645 return (__m128) __builtin_ia32_comltss ((__v4sf)__A, (__v4sf)__B);
646 }
647
648 static __inline __m128 __attribute__((__always_inline__))
649 _mm_comle_ss(__m128 __A, __m128 __B)
650 {
651 return (__m128) __builtin_ia32_comless ((__v4sf)__A, (__v4sf)__B);
652 }
653
654 static __inline __m128 __attribute__((__always_inline__))
655 _mm_comunord_ss(__m128 __A, __m128 __B)
656 {
657 return (__m128) __builtin_ia32_comunordss ((__v4sf)__A, (__v4sf)__B);
658 }
659
660 static __inline __m128 __attribute__((__always_inline__))
661 _mm_comneq_ss(__m128 __A, __m128 __B)
662 {
663 return (__m128) __builtin_ia32_comuneqss ((__v4sf)__A, (__v4sf)__B);
664 }
665
666 static __inline __m128 __attribute__((__always_inline__))
667 _mm_comnlt_ss(__m128 __A, __m128 __B)
668 {
669 return (__m128) __builtin_ia32_comunltss ((__v4sf)__A, (__v4sf)__B);
670 }
671
672 static __inline __m128 __attribute__((__always_inline__))
673 _mm_comnle_ss(__m128 __A, __m128 __B)
674 {
675 return (__m128) __builtin_ia32_comunless ((__v4sf)__A, (__v4sf)__B);
676 }
677
678
679 static __inline __m128 __attribute__((__always_inline__))
680 _mm_comord_ss(__m128 __A, __m128 __B)
681 {
682 return (__m128) __builtin_ia32_comordss ((__v4sf)__A, (__v4sf)__B);
683 }
684
685 static __inline __m128 __attribute__((__always_inline__))
686 _mm_comueq_ss(__m128 __A, __m128 __B)
687 {
688 return (__m128) __builtin_ia32_comueqss ((__v4sf)__A, (__v4sf)__B);
689 }
690
691 static __inline __m128 __attribute__((__always_inline__))
692 _mm_comnge_ss(__m128 __A, __m128 __B)
693 {
694 return (__m128) __builtin_ia32_comungess ((__v4sf)__A, (__v4sf)__B);
695 }
696
697 static __inline __m128 __attribute__((__always_inline__))
698 _mm_comngt_ss(__m128 __A, __m128 __B)
699 {
700 return (__m128) __builtin_ia32_comungtss ((__v4sf)__A, (__v4sf)__B);
701 }
702
703 static __inline __m128 __attribute__((__always_inline__))
704 _mm_comfalse_ss(__m128 __A, __m128 __B)
705 {
706 return (__m128) __builtin_ia32_comfalsess ((__v4sf)__A, (__v4sf)__B);
707 }
708
709 static __inline __m128 __attribute__((__always_inline__))
710 _mm_comoneq_ss(__m128 __A, __m128 __B)
711 {
712 return (__m128) __builtin_ia32_comneqss ((__v4sf)__A, (__v4sf)__B);
713 }
714
715 static __inline __m128 __attribute__((__always_inline__))
716 _mm_comge_ss(__m128 __A, __m128 __B)
717 {
718 return (__m128) __builtin_ia32_comgess ((__v4sf)__A, (__v4sf)__B);
719 }
720
721 static __inline __m128 __attribute__((__always_inline__))
722 _mm_comgt_ss(__m128 __A, __m128 __B)
723 {
724 return (__m128) __builtin_ia32_comgtss ((__v4sf)__A, (__v4sf)__B);
725 }
726
727 static __inline __m128 __attribute__((__always_inline__))
728 _mm_comtrue_ss(__m128 __A, __m128 __B)
729 {
730 return (__m128) __builtin_ia32_comtruess ((__v4sf)__A, (__v4sf)__B);
731 }
732
733 /* com (floating point, scalar double) */
734
735 static __inline __m128d __attribute__((__always_inline__))
736 _mm_comeq_sd(__m128d __A, __m128d __B)
737 {
738 return (__m128d) __builtin_ia32_comeqsd ((__v2df)__A, (__v2df)__B);
739 }
740
741 static __inline __m128d __attribute__((__always_inline__))
742 _mm_comlt_sd(__m128d __A, __m128d __B)
743 {
744 return (__m128d) __builtin_ia32_comltsd ((__v2df)__A, (__v2df)__B);
745 }
746
747 static __inline __m128d __attribute__((__always_inline__))
748 _mm_comle_sd(__m128d __A, __m128d __B)
749 {
750 return (__m128d) __builtin_ia32_comlesd ((__v2df)__A, (__v2df)__B);
751 }
752
753 static __inline __m128d __attribute__((__always_inline__))
754 _mm_comunord_sd(__m128d __A, __m128d __B)
755 {
756 return (__m128d) __builtin_ia32_comunordsd ((__v2df)__A, (__v2df)__B);
757 }
758
759 static __inline __m128d __attribute__((__always_inline__))
760 _mm_comneq_sd(__m128d __A, __m128d __B)
761 {
762 return (__m128d) __builtin_ia32_comuneqsd ((__v2df)__A, (__v2df)__B);
763 }
764
765 static __inline __m128d __attribute__((__always_inline__))
766 _mm_comnlt_sd(__m128d __A, __m128d __B)
767 {
768 return (__m128d) __builtin_ia32_comunltsd ((__v2df)__A, (__v2df)__B);
769 }
770
771 static __inline __m128d __attribute__((__always_inline__))
772 _mm_comnle_sd(__m128d __A, __m128d __B)
773 {
774 return (__m128d) __builtin_ia32_comunlesd ((__v2df)__A, (__v2df)__B);
775 }
776
777
778 static __inline __m128d __attribute__((__always_inline__))
779 _mm_comord_sd(__m128d __A, __m128d __B)
780 {
781 return (__m128d) __builtin_ia32_comordsd ((__v2df)__A, (__v2df)__B);
782 }
783
784 static __inline __m128d __attribute__((__always_inline__))
785 _mm_comueq_sd(__m128d __A, __m128d __B)
786 {
787 return (__m128d) __builtin_ia32_comueqsd ((__v2df)__A, (__v2df)__B);
788 }
789
790 static __inline __m128d __attribute__((__always_inline__))
791 _mm_comnge_sd(__m128d __A, __m128d __B)
792 {
793 return (__m128d) __builtin_ia32_comungesd ((__v2df)__A, (__v2df)__B);
794 }
795
796 static __inline __m128d __attribute__((__always_inline__))
797 _mm_comngt_sd(__m128d __A, __m128d __B)
798 {
799 return (__m128d) __builtin_ia32_comungtsd ((__v2df)__A, (__v2df)__B);
800 }
801
802 static __inline __m128d __attribute__((__always_inline__))
803 _mm_comfalse_sd(__m128d __A, __m128d __B)
804 {
805 return (__m128d) __builtin_ia32_comfalsesd ((__v2df)__A, (__v2df)__B);
806 }
807
808 static __inline __m128d __attribute__((__always_inline__))
809 _mm_comoneq_sd(__m128d __A, __m128d __B)
810 {
811 return (__m128d) __builtin_ia32_comneqsd ((__v2df)__A, (__v2df)__B);
812 }
813
814 static __inline __m128d __attribute__((__always_inline__))
815 _mm_comge_sd(__m128d __A, __m128d __B)
816 {
817 return (__m128d) __builtin_ia32_comgesd ((__v2df)__A, (__v2df)__B);
818 }
819
820 static __inline __m128d __attribute__((__always_inline__))
821 _mm_comgt_sd(__m128d __A, __m128d __B)
822 {
823 return (__m128d) __builtin_ia32_comgtsd ((__v2df)__A, (__v2df)__B);
824 }
825
826 static __inline __m128d __attribute__((__always_inline__))
827 _mm_comtrue_sd(__m128d __A, __m128d __B)
828 {
829 return (__m128d) __builtin_ia32_comtruesd ((__v2df)__A, (__v2df)__B);
830 }
831
832
833 /*pcom (integer, unsinged bytes) */
834
835 static __inline __m128i __attribute__((__always_inline__))
836 _mm_comlt_epu8(__m128i __A, __m128i __B)
837 {
838 return (__m128i) __builtin_ia32_pcomltub ((__v16qi)__A, (__v16qi)__B);
839 }
840
841 static __inline __m128i __attribute__((__always_inline__))
842 _mm_comle_epu8(__m128i __A, __m128i __B)
843 {
844 return (__m128i) __builtin_ia32_pcomleub ((__v16qi)__A, (__v16qi)__B);
845 }
846
847 static __inline __m128i __attribute__((__always_inline__))
848 _mm_comgt_epu8(__m128i __A, __m128i __B)
849 {
850 return (__m128i) __builtin_ia32_pcomgtub ((__v16qi)__A, (__v16qi)__B);
851 }
852
853 static __inline __m128i __attribute__((__always_inline__))
854 _mm_comge_epu8(__m128i __A, __m128i __B)
855 {
856 return (__m128i) __builtin_ia32_pcomgeub ((__v16qi)__A, (__v16qi)__B);
857 }
858
859 static __inline __m128i __attribute__((__always_inline__))
860 _mm_comeq_epu8(__m128i __A, __m128i __B)
861 {
862 return (__m128i) __builtin_ia32_pcomequb ((__v16qi)__A, (__v16qi)__B);
863 }
864
865 static __inline __m128i __attribute__((__always_inline__))
866 _mm_comneq_epu8(__m128i __A, __m128i __B)
867 {
868 return (__m128i) __builtin_ia32_pcomnequb ((__v16qi)__A, (__v16qi)__B);
869 }
870
871 static __inline __m128i __attribute__((__always_inline__))
872 _mm_comfalse_epu8(__m128i __A, __m128i __B)
873 {
874 return (__m128i) __builtin_ia32_pcomfalseub ((__v16qi)__A, (__v16qi)__B);
875 }
876
877 static __inline __m128i __attribute__((__always_inline__))
878 _mm_comtrue_epu8(__m128i __A, __m128i __B)
879 {
880 return (__m128i) __builtin_ia32_pcomtrueub ((__v16qi)__A, (__v16qi)__B);
881 }
882
883 /*pcom (integer, unsinged words) */
884
885 static __inline __m128i __attribute__((__always_inline__))
886 _mm_comlt_epu16(__m128i __A, __m128i __B)
887 {
888 return (__m128i) __builtin_ia32_pcomltuw ((__v8hi)__A, (__v8hi)__B);
889 }
890
891 static __inline __m128i __attribute__((__always_inline__))
892 _mm_comle_epu16(__m128i __A, __m128i __B)
893 {
894 return (__m128i) __builtin_ia32_pcomleuw ((__v8hi)__A, (__v8hi)__B);
895 }
896
897 static __inline __m128i __attribute__((__always_inline__))
898 _mm_comgt_epu16(__m128i __A, __m128i __B)
899 {
900 return (__m128i) __builtin_ia32_pcomgtuw ((__v8hi)__A, (__v8hi)__B);
901 }
902
903 static __inline __m128i __attribute__((__always_inline__))
904 _mm_comge_epu16(__m128i __A, __m128i __B)
905 {
906 return (__m128i) __builtin_ia32_pcomgeuw ((__v8hi)__A, (__v8hi)__B);
907 }
908
909 static __inline __m128i __attribute__((__always_inline__))
910 _mm_comeq_epu16(__m128i __A, __m128i __B)
911 {
912 return (__m128i) __builtin_ia32_pcomequw ((__v8hi)__A, (__v8hi)__B);
913 }
914
915 static __inline __m128i __attribute__((__always_inline__))
916 _mm_comneq_epu16(__m128i __A, __m128i __B)
917 {
918 return (__m128i) __builtin_ia32_pcomnequw ((__v8hi)__A, (__v8hi)__B);
919 }
920
921 static __inline __m128i __attribute__((__always_inline__))
922 _mm_comfalse_epu16(__m128i __A, __m128i __B)
923 {
924 return (__m128i) __builtin_ia32_pcomfalseuw ((__v8hi)__A, (__v8hi)__B);
925 }
926
927 static __inline __m128i __attribute__((__always_inline__))
928 _mm_comtrue_epu16(__m128i __A, __m128i __B)
929 {
930 return (__m128i) __builtin_ia32_pcomtrueuw ((__v8hi)__A, (__v8hi)__B);
931 }
932
933 /*pcom (integer, unsinged double words) */
934
935 static __inline __m128i __attribute__((__always_inline__))
936 _mm_comlt_epu32(__m128i __A, __m128i __B)
937 {
938 return (__m128i) __builtin_ia32_pcomltud ((__v4si)__A, (__v4si)__B);
939 }
940
941 static __inline __m128i __attribute__((__always_inline__))
942 _mm_comle_epu32(__m128i __A, __m128i __B)
943 {
944 return (__m128i) __builtin_ia32_pcomleud ((__v4si)__A, (__v4si)__B);
945 }
946
947 static __inline __m128i __attribute__((__always_inline__))
948 _mm_comgt_epu32(__m128i __A, __m128i __B)
949 {
950 return (__m128i) __builtin_ia32_pcomgtud ((__v4si)__A, (__v4si)__B);
951 }
952
953 static __inline __m128i __attribute__((__always_inline__))
954 _mm_comge_epu32(__m128i __A, __m128i __B)
955 {
956 return (__m128i) __builtin_ia32_pcomgeud ((__v4si)__A, (__v4si)__B);
957 }
958
959 static __inline __m128i __attribute__((__always_inline__))
960 _mm_comeq_epu32(__m128i __A, __m128i __B)
961 {
962 return (__m128i) __builtin_ia32_pcomequd ((__v4si)__A, (__v4si)__B);
963 }
964
965 static __inline __m128i __attribute__((__always_inline__))
966 _mm_comneq_epu32(__m128i __A, __m128i __B)
967 {
968 return (__m128i) __builtin_ia32_pcomnequd ((__v4si)__A, (__v4si)__B);
969 }
970
971 static __inline __m128i __attribute__((__always_inline__))
972 _mm_comfalse_epu32(__m128i __A, __m128i __B)
973 {
974 return (__m128i) __builtin_ia32_pcomfalseud ((__v4si)__A, (__v4si)__B);
975 }
976
977 static __inline __m128i __attribute__((__always_inline__))
978 _mm_comtrue_epu32(__m128i __A, __m128i __B)
979 {
980 return (__m128i) __builtin_ia32_pcomtrueud ((__v4si)__A, (__v4si)__B);
981 }
982
983 /*pcom (integer, unsinged quad words) */
984
985 static __inline __m128i __attribute__((__always_inline__))
986 _mm_comlt_epu64(__m128i __A, __m128i __B)
987 {
988 return (__m128i) __builtin_ia32_pcomltuq ((__v2di)__A, (__v2di)__B);
989 }
990
991 static __inline __m128i __attribute__((__always_inline__))
992 _mm_comle_epu64(__m128i __A, __m128i __B)
993 {
994 return (__m128i) __builtin_ia32_pcomleuq ((__v2di)__A, (__v2di)__B);
995 }
996
997 static __inline __m128i __attribute__((__always_inline__))
998 _mm_comgt_epu64(__m128i __A, __m128i __B)
999 {
1000 return (__m128i) __builtin_ia32_pcomgtuq ((__v2di)__A, (__v2di)__B);
1001 }
1002
1003 static __inline __m128i __attribute__((__always_inline__))
1004 _mm_comge_epu64(__m128i __A, __m128i __B)
1005 {
1006 return (__m128i) __builtin_ia32_pcomgeuq ((__v2di)__A, (__v2di)__B);
1007 }
1008
1009 static __inline __m128i __attribute__((__always_inline__))
1010 _mm_comeq_epu64(__m128i __A, __m128i __B)
1011 {
1012 return (__m128i) __builtin_ia32_pcomequq ((__v2di)__A, (__v2di)__B);
1013 }
1014
1015 static __inline __m128i __attribute__((__always_inline__))
1016 _mm_comneq_epu64(__m128i __A, __m128i __B)
1017 {
1018 return (__m128i) __builtin_ia32_pcomnequq ((__v2di)__A, (__v2di)__B);
1019 }
1020
1021 static __inline __m128i __attribute__((__always_inline__))
1022 _mm_comfalse_epu64(__m128i __A, __m128i __B)
1023 {
1024 return (__m128i) __builtin_ia32_pcomfalseuq ((__v2di)__A, (__v2di)__B);
1025 }
1026
1027 static __inline __m128i __attribute__((__always_inline__))
1028 _mm_comtrue_epu64(__m128i __A, __m128i __B)
1029 {
1030 return (__m128i) __builtin_ia32_pcomtrueuq ((__v2di)__A, (__v2di)__B);
1031 }
1032
1033 /*pcom (integer, signed bytes) */
1034
1035 static __inline __m128i __attribute__((__always_inline__))
1036 _mm_comlt_epi8(__m128i __A, __m128i __B)
1037 {
1038 return (__m128i) __builtin_ia32_pcomltb ((__v16qi)__A, (__v16qi)__B);
1039 }
1040
1041 static __inline __m128i __attribute__((__always_inline__))
1042 _mm_comle_epi8(__m128i __A, __m128i __B)
1043 {
1044 return (__m128i) __builtin_ia32_pcomleb ((__v16qi)__A, (__v16qi)__B);
1045 }
1046
1047 static __inline __m128i __attribute__((__always_inline__))
1048 _mm_comgt_epi8(__m128i __A, __m128i __B)
1049 {
1050 return (__m128i) __builtin_ia32_pcomgtb ((__v16qi)__A, (__v16qi)__B);
1051 }
1052
1053 static __inline __m128i __attribute__((__always_inline__))
1054 _mm_comge_epi8(__m128i __A, __m128i __B)
1055 {
1056 return (__m128i) __builtin_ia32_pcomgeb ((__v16qi)__A, (__v16qi)__B);
1057 }
1058
1059 static __inline __m128i __attribute__((__always_inline__))
1060 _mm_comeq_epi8(__m128i __A, __m128i __B)
1061 {
1062 return (__m128i) __builtin_ia32_pcomeqb ((__v16qi)__A, (__v16qi)__B);
1063 }
1064
1065 static __inline __m128i __attribute__((__always_inline__))
1066 _mm_comneq_epi8(__m128i __A, __m128i __B)
1067 {
1068 return (__m128i) __builtin_ia32_pcomneqb ((__v16qi)__A, (__v16qi)__B);
1069 }
1070
1071 static __inline __m128i __attribute__((__always_inline__))
1072 _mm_comfalse_epi8(__m128i __A, __m128i __B)
1073 {
1074 return (__m128i) __builtin_ia32_pcomfalseb ((__v16qi)__A, (__v16qi)__B);
1075 }
1076
1077 static __inline __m128i __attribute__((__always_inline__))
1078 _mm_comtrue_epi8(__m128i __A, __m128i __B)
1079 {
1080 return (__m128i) __builtin_ia32_pcomtrueb ((__v16qi)__A, (__v16qi)__B);
1081 }
1082
1083 /*pcom (integer, signed words) */
1084
1085 static __inline __m128i __attribute__((__always_inline__))
1086 _mm_comlt_epi16(__m128i __A, __m128i __B)
1087 {
1088 return (__m128i) __builtin_ia32_pcomltw ((__v8hi)__A, (__v8hi)__B);
1089 }
1090
1091 static __inline __m128i __attribute__((__always_inline__))
1092 _mm_comle_epi16(__m128i __A, __m128i __B)
1093 {
1094 return (__m128i) __builtin_ia32_pcomlew ((__v8hi)__A, (__v8hi)__B);
1095 }
1096
1097 static __inline __m128i __attribute__((__always_inline__))
1098 _mm_comgt_epi16(__m128i __A, __m128i __B)
1099 {
1100 return (__m128i) __builtin_ia32_pcomgtw ((__v8hi)__A, (__v8hi)__B);
1101 }
1102
1103 static __inline __m128i __attribute__((__always_inline__))
1104 _mm_comge_epi16(__m128i __A, __m128i __B)
1105 {
1106 return (__m128i) __builtin_ia32_pcomgew ((__v8hi)__A, (__v8hi)__B);
1107 }
1108
1109 static __inline __m128i __attribute__((__always_inline__))
1110 _mm_comeq_epi16(__m128i __A, __m128i __B)
1111 {
1112 return (__m128i) __builtin_ia32_pcomeqw ((__v8hi)__A, (__v8hi)__B);
1113 }
1114
1115 static __inline __m128i __attribute__((__always_inline__))
1116 _mm_comneq_epi16(__m128i __A, __m128i __B)
1117 {
1118 return (__m128i) __builtin_ia32_pcomneqw ((__v8hi)__A, (__v8hi)__B);
1119 }
1120
1121 static __inline __m128i __attribute__((__always_inline__))
1122 _mm_comfalse_epi16(__m128i __A, __m128i __B)
1123 {
1124 return (__m128i) __builtin_ia32_pcomfalsew ((__v8hi)__A, (__v8hi)__B);
1125 }
1126
1127 static __inline __m128i __attribute__((__always_inline__))
1128 _mm_comtrue_epi16(__m128i __A, __m128i __B)
1129 {
1130 return (__m128i) __builtin_ia32_pcomtruew ((__v8hi)__A, (__v8hi)__B);
1131 }
1132
1133 /*pcom (integer, signed double words) */
1134
1135 static __inline __m128i __attribute__((__always_inline__))
1136 _mm_comlt_epi32(__m128i __A, __m128i __B)
1137 {
1138 return (__m128i) __builtin_ia32_pcomltd ((__v4si)__A, (__v4si)__B);
1139 }
1140
1141 static __inline __m128i __attribute__((__always_inline__))
1142 _mm_comle_epi32(__m128i __A, __m128i __B)
1143 {
1144 return (__m128i) __builtin_ia32_pcomled ((__v4si)__A, (__v4si)__B);
1145 }
1146
1147 static __inline __m128i __attribute__((__always_inline__))
1148 _mm_comgt_epi32(__m128i __A, __m128i __B)
1149 {
1150 return (__m128i) __builtin_ia32_pcomgtd ((__v4si)__A, (__v4si)__B);
1151 }
1152
1153 static __inline __m128i __attribute__((__always_inline__))
1154 _mm_comge_epi32(__m128i __A, __m128i __B)
1155 {
1156 return (__m128i) __builtin_ia32_pcomged ((__v4si)__A, (__v4si)__B);
1157 }
1158
1159 static __inline __m128i __attribute__((__always_inline__))
1160 _mm_comeq_epi32(__m128i __A, __m128i __B)
1161 {
1162 return (__m128i) __builtin_ia32_pcomeqd ((__v4si)__A, (__v4si)__B);
1163 }
1164
1165 static __inline __m128i __attribute__((__always_inline__))
1166 _mm_comneq_epi32(__m128i __A, __m128i __B)
1167 {
1168 return (__m128i) __builtin_ia32_pcomneqd ((__v4si)__A, (__v4si)__B);
1169 }
1170
1171 static __inline __m128i __attribute__((__always_inline__))
1172 _mm_comfalse_epi32(__m128i __A, __m128i __B)
1173 {
1174 return (__m128i) __builtin_ia32_pcomfalsed ((__v4si)__A, (__v4si)__B);
1175 }
1176
1177 static __inline __m128i __attribute__((__always_inline__))
1178 _mm_comtrue_epi32(__m128i __A, __m128i __B)
1179 {
1180 return (__m128i) __builtin_ia32_pcomtrued ((__v4si)__A, (__v4si)__B);
1181 }
1182
1183 /*pcom (integer, signed quad words) */
1184
1185 static __inline __m128i __attribute__((__always_inline__))
1186 _mm_comlt_epi64(__m128i __A, __m128i __B)
1187 {
1188 return (__m128i) __builtin_ia32_pcomltq ((__v2di)__A, (__v2di)__B);
1189 }
1190
1191 static __inline __m128i __attribute__((__always_inline__))
1192 _mm_comle_epi64(__m128i __A, __m128i __B)
1193 {
1194 return (__m128i) __builtin_ia32_pcomleq ((__v2di)__A, (__v2di)__B);
1195 }
1196
1197 static __inline __m128i __attribute__((__always_inline__))
1198 _mm_comgt_epi64(__m128i __A, __m128i __B)
1199 {
1200 return (__m128i) __builtin_ia32_pcomgtq ((__v2di)__A, (__v2di)__B);
1201 }
1202
1203 static __inline __m128i __attribute__((__always_inline__))
1204 _mm_comge_epi64(__m128i __A, __m128i __B)
1205 {
1206 return (__m128i) __builtin_ia32_pcomgeq ((__v2di)__A, (__v2di)__B);
1207 }
1208
1209 static __inline __m128i __attribute__((__always_inline__))
1210 _mm_comeq_epi64(__m128i __A, __m128i __B)
1211 {
1212 return (__m128i) __builtin_ia32_pcomeqq ((__v2di)__A, (__v2di)__B);
1213 }
1214
1215 static __inline __m128i __attribute__((__always_inline__))
1216 _mm_comneq_epi64(__m128i __A, __m128i __B)
1217 {
1218 return (__m128i) __builtin_ia32_pcomneqq ((__v2di)__A, (__v2di)__B);
1219 }
1220
1221 static __inline __m128i __attribute__((__always_inline__))
1222 _mm_comfalse_epi64(__m128i __A, __m128i __B)
1223 {
1224 return (__m128i) __builtin_ia32_pcomfalseq ((__v2di)__A, (__v2di)__B);
1225 }
1226
1227 static __inline __m128i __attribute__((__always_inline__))
1228 _mm_comtrue_epi64(__m128i __A, __m128i __B)
1229 {
1230 return (__m128i) __builtin_ia32_pcomtrueq ((__v2di)__A, (__v2di)__B);
1231 }
1232
1233 /* FRCZ */
1234 static __inline __m128 __attribute__((__always_inline__))
1235 _mm_frcz_ps (__m128 __A)
1236 {
1237 return (__m128) __builtin_ia32_frczps ((__v4sf)__A);
1238 }
1239
1240 static __inline __m128d __attribute__((__always_inline__))
1241 _mm_frcz_pd (__m128d __A)
1242 {
1243 return (__m128d) __builtin_ia32_frczpd ((__v2df)__A);
1244 }
1245
1246 static __inline __m128 __attribute__((__always_inline__))
1247 _mm_frcz_ss (__m128 __A, __m128 __B)
1248 {
1249 return (__m128) __builtin_ia32_frczss ((__v4sf)__A, (__v4sf)__B);
1250 }
1251
1252 static __inline __m128d __attribute__((__always_inline__))
1253 _mm_frcz_sd (__m128d __A, __m128d __B)
1254 {
1255 return (__m128d) __builtin_ia32_frczsd ((__v2df)__A, (__v2df)__B);
1256 }
1257
1258 #endif /* __SSE5__ */
1259
1260 #endif /* _BMMINTRIN_H_INCLUDED */