1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
124 (define_c_enum "unspecv" [
134 ;; All vector modes including V?TImode, used in move patterns.
135 (define_mode_iterator VMOVE
136 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
137 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
138 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
139 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
140 (V2TI "TARGET_AVX") V1TI
141 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
142 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
145 (define_mode_iterator V
146 [(V32QI "TARGET_AVX") V16QI
147 (V16HI "TARGET_AVX") V8HI
148 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
149 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
150 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
151 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
153 ;; All 128bit vector modes
154 (define_mode_iterator V_128
155 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
157 ;; All 256bit vector modes
158 (define_mode_iterator V_256
159 [V32QI V16HI V8SI V4DI V8SF V4DF])
161 ;; All 512bit vector modes
162 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
164 ;; All 256bit and 512bit vector modes
165 (define_mode_iterator V_256_512
166 [V32QI V16HI V8SI V4DI V8SF V4DF
167 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
168 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
170 ;; All vector float modes
171 (define_mode_iterator VF
172 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
173 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
175 ;; 128- and 256-bit float vector modes
176 (define_mode_iterator VF_128_256
177 [(V8SF "TARGET_AVX") V4SF
178 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
180 ;; All SFmode vector float modes
181 (define_mode_iterator VF1
182 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
184 ;; 128- and 256-bit SF vector modes
185 (define_mode_iterator VF1_128_256
186 [(V8SF "TARGET_AVX") V4SF])
188 ;; All DFmode vector float modes
189 (define_mode_iterator VF2
190 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
192 ;; 128- and 256-bit DF vector modes
193 (define_mode_iterator VF2_128_256
194 [(V4DF "TARGET_AVX") V2DF])
196 (define_mode_iterator VF2_512_256
197 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
199 ;; All 128bit vector float modes
200 (define_mode_iterator VF_128
201 [V4SF (V2DF "TARGET_SSE2")])
203 ;; All 256bit vector float modes
204 (define_mode_iterator VF_256
207 ;; All 512bit vector float modes
208 (define_mode_iterator VF_512
211 ;; All vector integer modes
212 (define_mode_iterator VI
213 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
214 (V32QI "TARGET_AVX") V16QI
215 (V16HI "TARGET_AVX") V8HI
216 (V8SI "TARGET_AVX") V4SI
217 (V4DI "TARGET_AVX") V2DI])
219 (define_mode_iterator VI_AVX2
220 [(V32QI "TARGET_AVX2") V16QI
221 (V16HI "TARGET_AVX2") V8HI
222 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
223 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
225 ;; All QImode vector integer modes
226 (define_mode_iterator VI1
227 [(V32QI "TARGET_AVX") V16QI])
229 (define_mode_iterator VI_UNALIGNED_LOADSTORE
230 [(V32QI "TARGET_AVX") V16QI
231 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
233 ;; All DImode vector integer modes
234 (define_mode_iterator VI8
235 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
237 (define_mode_iterator VI1_AVX2
238 [(V32QI "TARGET_AVX2") V16QI])
240 (define_mode_iterator VI2_AVX2
241 [(V16HI "TARGET_AVX2") V8HI])
243 (define_mode_iterator VI2_AVX512F
244 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
246 (define_mode_iterator VI4_AVX
247 [(V8SI "TARGET_AVX") V4SI])
249 (define_mode_iterator VI4_AVX2
250 [(V8SI "TARGET_AVX2") V4SI])
252 (define_mode_iterator VI4_AVX512F
253 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
255 (define_mode_iterator VI48_AVX512F
256 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
257 (V8DI "TARGET_AVX512F")])
259 (define_mode_iterator VI8_AVX2
260 [(V4DI "TARGET_AVX2") V2DI])
262 (define_mode_iterator VI8_AVX2_AVX512F
263 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
266 (define_mode_iterator V8FI
270 (define_mode_iterator V16FI
273 ;; ??? We should probably use TImode instead.
274 (define_mode_iterator VIMAX_AVX2
275 [(V2TI "TARGET_AVX2") V1TI])
277 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
278 (define_mode_iterator SSESCALARMODE
279 [(V2TI "TARGET_AVX2") TI])
281 (define_mode_iterator VI12_AVX2
282 [(V32QI "TARGET_AVX2") V16QI
283 (V16HI "TARGET_AVX2") V8HI])
285 (define_mode_iterator VI24_AVX2
286 [(V16HI "TARGET_AVX2") V8HI
287 (V8SI "TARGET_AVX2") V4SI])
289 (define_mode_iterator VI124_AVX2_48_AVX512F
290 [(V32QI "TARGET_AVX2") V16QI
291 (V16HI "TARGET_AVX2") V8HI
292 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
293 (V8DI "TARGET_AVX512F")])
295 (define_mode_iterator VI124_AVX512F
296 [(V32QI "TARGET_AVX2") V16QI
297 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
298 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
300 (define_mode_iterator VI124_AVX2
301 [(V32QI "TARGET_AVX2") V16QI
302 (V16HI "TARGET_AVX2") V8HI
303 (V8SI "TARGET_AVX2") V4SI])
305 (define_mode_iterator VI248_AVX2
306 [(V16HI "TARGET_AVX2") V8HI
307 (V8SI "TARGET_AVX2") V4SI
308 (V4DI "TARGET_AVX2") V2DI])
310 (define_mode_iterator VI248_AVX2_8_AVX512F
311 [(V16HI "TARGET_AVX2") V8HI
312 (V8SI "TARGET_AVX2") V4SI
313 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
315 (define_mode_iterator VI48_AVX2_48_AVX512F
316 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
317 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
319 (define_mode_iterator V48_AVX2
322 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
323 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
325 (define_mode_attr sse2_avx_avx512f
326 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
327 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
329 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
330 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
332 (define_mode_attr sse2_avx2
333 [(V16QI "sse2") (V32QI "avx2")
334 (V8HI "sse2") (V16HI "avx2")
335 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
336 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
337 (V1TI "sse2") (V2TI "avx2")])
339 (define_mode_attr ssse3_avx2
340 [(V16QI "ssse3") (V32QI "avx2")
341 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
342 (V4SI "ssse3") (V8SI "avx2")
343 (V2DI "ssse3") (V4DI "avx2")
344 (TI "ssse3") (V2TI "avx2")])
346 (define_mode_attr sse4_1_avx2
347 [(V16QI "sse4_1") (V32QI "avx2")
348 (V8HI "sse4_1") (V16HI "avx2")
349 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
350 (V2DI "sse4_1") (V4DI "avx2")])
352 (define_mode_attr avx_avx2
353 [(V4SF "avx") (V2DF "avx")
354 (V8SF "avx") (V4DF "avx")
355 (V4SI "avx2") (V2DI "avx2")
356 (V8SI "avx2") (V4DI "avx2")])
358 (define_mode_attr vec_avx2
359 [(V16QI "vec") (V32QI "avx2")
360 (V8HI "vec") (V16HI "avx2")
361 (V4SI "vec") (V8SI "avx2")
362 (V2DI "vec") (V4DI "avx2")])
364 (define_mode_attr avx2_avx512f
365 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
366 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
367 (V8SF "avx2") (V16SF "avx512f")
368 (V4DF "avx2") (V8DF "avx512f")])
370 (define_mode_attr shuffletype
371 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
372 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
373 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
374 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
375 (V64QI "i") (V1TI "i") (V2TI "i")])
377 (define_mode_attr ssequartermode
378 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
380 (define_mode_attr ssedoublemode
381 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
382 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
383 (V32QI "V32HI") (V16QI "V16HI")])
385 (define_mode_attr ssebytemode
386 [(V4DI "V32QI") (V2DI "V16QI")])
388 ;; All 128bit vector integer modes
389 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
391 ;; All 256bit vector integer modes
392 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
394 ;; All 512bit vector integer modes
395 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
397 ;; Various 128bit vector integer mode combinations
398 (define_mode_iterator VI12_128 [V16QI V8HI])
399 (define_mode_iterator VI14_128 [V16QI V4SI])
400 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
401 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
402 (define_mode_iterator VI24_128 [V8HI V4SI])
403 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
404 (define_mode_iterator VI48_128 [V4SI V2DI])
406 ;; Various 256bit and 512 vector integer mode combinations
407 (define_mode_iterator VI124_256_48_512
408 [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
409 (define_mode_iterator VI48_256 [V8SI V4DI])
410 (define_mode_iterator VI48_512 [V16SI V8DI])
412 ;; Int-float size matches
413 (define_mode_iterator VI4F_128 [V4SI V4SF])
414 (define_mode_iterator VI8F_128 [V2DI V2DF])
415 (define_mode_iterator VI4F_256 [V8SI V8SF])
416 (define_mode_iterator VI8F_256 [V4DI V4DF])
417 (define_mode_iterator VI8F_256_512
418 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
419 (define_mode_iterator VI48F_256_512
421 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
422 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
423 (define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
425 ;; Mapping from float mode to required SSE level
426 (define_mode_attr sse
427 [(SF "sse") (DF "sse2")
428 (V4SF "sse") (V2DF "sse2")
429 (V16SF "avx512f") (V8SF "avx")
430 (V8DF "avx512f") (V4DF "avx")])
432 (define_mode_attr sse2
433 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
434 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
436 (define_mode_attr sse3
437 [(V16QI "sse3") (V32QI "avx")])
439 (define_mode_attr sse4_1
440 [(V4SF "sse4_1") (V2DF "sse4_1")
441 (V8SF "avx") (V4DF "avx")
444 (define_mode_attr avxsizesuffix
445 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
446 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
447 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
448 (V16SF "512") (V8DF "512")
449 (V8SF "256") (V4DF "256")
450 (V4SF "") (V2DF "")])
452 ;; SSE instruction mode
453 (define_mode_attr sseinsnmode
454 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
455 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
456 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
457 (V16SF "V16SF") (V8DF "V8DF")
458 (V8SF "V8SF") (V4DF "V4DF")
459 (V4SF "V4SF") (V2DF "V2DF")
462 ;; Mapping of vector modes to corresponding mask size
463 (define_mode_attr avx512fmaskmode
465 (V16HI "HI") (V8HI "QI")
466 (V16SI "HI") (V8SI "QI") (V4SI "QI")
467 (V8DI "QI") (V4DI "QI") (V2DI "QI")
468 (V16SF "HI") (V8SF "QI") (V4SF "QI")
469 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
471 ;; Mapping of vector float modes to an integer mode of the same size
472 (define_mode_attr sseintvecmode
473 [(V16SF "V16SI") (V8DF "V8DI")
474 (V8SF "V8SI") (V4DF "V4DI")
475 (V4SF "V4SI") (V2DF "V2DI")
476 (V16SI "V16SI") (V8DI "V8DI")
477 (V8SI "V8SI") (V4DI "V4DI")
478 (V4SI "V4SI") (V2DI "V2DI")
479 (V16HI "V16HI") (V8HI "V8HI")
480 (V32QI "V32QI") (V16QI "V16QI")])
482 (define_mode_attr sseintvecmodelower
484 (V8SF "v8si") (V4DF "v4di")
485 (V4SF "v4si") (V2DF "v2di")
486 (V8SI "v8si") (V4DI "v4di")
487 (V4SI "v4si") (V2DI "v2di")
488 (V16HI "v16hi") (V8HI "v8hi")
489 (V32QI "v32qi") (V16QI "v16qi")])
491 ;; Mapping of vector modes to a vector mode of double size
492 (define_mode_attr ssedoublevecmode
493 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
494 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
495 (V8SF "V16SF") (V4DF "V8DF")
496 (V4SF "V8SF") (V2DF "V4DF")])
498 ;; Mapping of vector modes to a vector mode of half size
499 (define_mode_attr ssehalfvecmode
500 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
501 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
502 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
503 (V16SF "V8SF") (V8DF "V4DF")
504 (V8SF "V4SF") (V4DF "V2DF")
507 ;; Mapping of vector modes ti packed single mode of the same size
508 (define_mode_attr ssePSmode
509 [(V16SI "V16SF") (V8DF "V16SF")
510 (V16SF "V16SF") (V8DI "V16SF")
511 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
512 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
513 (V8SI "V8SF") (V4SI "V4SF")
514 (V4DI "V8SF") (V2DI "V4SF")
515 (V2TI "V8SF") (V1TI "V4SF")
516 (V8SF "V8SF") (V4SF "V4SF")
517 (V4DF "V8SF") (V2DF "V4SF")])
519 ;; Mapping of vector modes back to the scalar modes
520 (define_mode_attr ssescalarmode
521 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
522 (V32HI "HI") (V16HI "HI") (V8HI "HI")
523 (V16SI "SI") (V8SI "SI") (V4SI "SI")
524 (V8DI "DI") (V4DI "DI") (V2DI "DI")
525 (V16SF "SF") (V8SF "SF") (V4SF "SF")
526 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
528 ;; Mapping of vector modes to the 128bit modes
529 (define_mode_attr ssexmmmode
530 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
531 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
532 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
533 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
534 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
535 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
537 ;; Pointer size override for scalar modes (Intel asm dialect)
538 (define_mode_attr iptr
539 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
540 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
541 (V8SF "k") (V4DF "q")
542 (V4SF "k") (V2DF "q")
545 ;; Number of scalar elements in each vector type
546 (define_mode_attr ssescalarnum
547 [(V64QI "64") (V16SI "16") (V8DI "8")
548 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
549 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
550 (V16SF "16") (V8DF "8")
551 (V8SF "8") (V4DF "4")
552 (V4SF "4") (V2DF "2")])
554 ;; Mask of scalar elements in each vector type
555 (define_mode_attr ssescalarnummask
556 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
557 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
558 (V8SF "7") (V4DF "3")
559 (V4SF "3") (V2DF "1")])
561 (define_mode_attr ssescalarsize
562 [(V8DI "64") (V4DI "64") (V2DI "64")
563 (V32HI "16") (V16HI "16") (V8HI "16")
564 (V16SI "32") (V8SI "32") (V4SI "32")
565 (V16SF "32") (V8DF "64")])
567 ;; SSE prefix for integer vector modes
568 (define_mode_attr sseintprefix
569 [(V2DI "p") (V2DF "")
574 (V16SI "p") (V16SF "")])
576 ;; SSE scalar suffix for vector modes
577 (define_mode_attr ssescalarmodesuffix
579 (V8SF "ss") (V4DF "sd")
580 (V4SF "ss") (V2DF "sd")
581 (V8SI "ss") (V4DI "sd")
584 ;; Pack/unpack vector modes
585 (define_mode_attr sseunpackmode
586 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
587 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
588 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
590 (define_mode_attr ssepackmode
591 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
592 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
593 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
595 ;; Mapping of the max integer size for xop rotate immediate constraint
596 (define_mode_attr sserotatemax
597 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
599 ;; Mapping of mode to cast intrinsic name
600 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
602 ;; Instruction suffix for sign and zero extensions.
603 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
605 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
606 ;; i64x4 or f64x4 for 512bit modes.
607 (define_mode_attr i128
608 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
609 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
610 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
613 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
615 ;; Mapping of immediate bits for blend instructions
616 (define_mode_attr blendbits
617 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
619 ;; Mapping suffixes for broadcast
620 (define_mode_attr bcstscalarsuff
621 [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
623 ;; Include define_subst patterns for instructions with mask
626 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
628 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
632 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
634 ;; All of these patterns are enabled for SSE1 as well as SSE2.
635 ;; This is essential for maintaining stable calling conventions.
637 (define_expand "mov<mode>"
638 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
639 (match_operand:VMOVE 1 "nonimmediate_operand"))]
642 ix86_expand_vector_move (<MODE>mode, operands);
646 (define_insn "*mov<mode>_internal"
647 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
648 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
650 && (register_operand (operands[0], <MODE>mode)
651 || register_operand (operands[1], <MODE>mode))"
653 int mode = get_attr_mode (insn);
654 switch (which_alternative)
657 return standard_sse_constant_opcode (insn, operands[1]);
660 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
661 in avx512f, so we need to use workarounds, to access sse registers
662 16-31, which are evex-only. */
663 if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64
664 && (EXT_REX_SSE_REGNO_P (REGNO (operands[0]))
665 || EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))
667 if (memory_operand (operands[0], <MODE>mode))
669 if (GET_MODE_SIZE (<MODE>mode) == 32)
670 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
671 else if (GET_MODE_SIZE (<MODE>mode) == 16)
672 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
676 else if (memory_operand (operands[1], <MODE>mode))
678 if (GET_MODE_SIZE (<MODE>mode) == 32)
679 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
680 else if (GET_MODE_SIZE (<MODE>mode) == 16)
681 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
686 /* Reg -> reg move is always aligned. Just use wider move. */
691 return "vmovaps\t{%g1, %g0|%g0, %g1}";
694 return "vmovapd\t{%g1, %g0|%g0, %g1}";
697 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
708 && (misaligned_operand (operands[0], <MODE>mode)
709 || misaligned_operand (operands[1], <MODE>mode)))
710 return "vmovups\t{%1, %0|%0, %1}";
712 return "%vmovaps\t{%1, %0|%0, %1}";
718 && (misaligned_operand (operands[0], <MODE>mode)
719 || misaligned_operand (operands[1], <MODE>mode)))
720 return "vmovupd\t{%1, %0|%0, %1}";
722 return "%vmovapd\t{%1, %0|%0, %1}";
727 && (misaligned_operand (operands[0], <MODE>mode)
728 || misaligned_operand (operands[1], <MODE>mode)))
729 return "vmovdqu\t{%1, %0|%0, %1}";
731 return "%vmovdqa\t{%1, %0|%0, %1}";
733 if (misaligned_operand (operands[0], <MODE>mode)
734 || misaligned_operand (operands[1], <MODE>mode))
735 return "vmovdqu64\t{%1, %0|%0, %1}";
737 return "vmovdqa64\t{%1, %0|%0, %1}";
746 [(set_attr "type" "sselog1,ssemov,ssemov")
747 (set_attr "prefix" "maybe_vex")
749 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
750 (const_string "<ssePSmode>")
751 (and (eq_attr "alternative" "2")
752 (match_test "TARGET_SSE_TYPELESS_STORES"))
753 (const_string "<ssePSmode>")
754 (match_test "TARGET_AVX")
755 (const_string "<sseinsnmode>")
756 (ior (not (match_test "TARGET_SSE2"))
757 (match_test "optimize_function_for_size_p (cfun)"))
758 (const_string "V4SF")
759 (and (eq_attr "alternative" "0")
760 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
763 (const_string "<sseinsnmode>")))])
765 (define_insn "avx512f_load<mode>_mask"
766 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
768 (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
769 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
770 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
773 switch (MODE_<sseinsnmode>)
777 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
779 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
782 [(set_attr "type" "ssemov")
783 (set_attr "prefix" "evex")
784 (set_attr "memory" "none,load")
785 (set_attr "mode" "<sseinsnmode>")])
787 (define_insn "avx512f_blendm<mode>"
788 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
790 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
791 (match_operand:VI48F_512 1 "register_operand" "v")
792 (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))]
794 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
795 [(set_attr "type" "ssemov")
796 (set_attr "prefix" "evex")
797 (set_attr "mode" "<sseinsnmode>")])
799 (define_insn "avx512f_store<mode>_mask"
800 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
802 (match_operand:VI48F_512 1 "register_operand" "v")
804 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
807 switch (MODE_<sseinsnmode>)
811 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
813 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
816 [(set_attr "type" "ssemov")
817 (set_attr "prefix" "evex")
818 (set_attr "memory" "store")
819 (set_attr "mode" "<sseinsnmode>")])
821 (define_insn "sse2_movq128"
822 [(set (match_operand:V2DI 0 "register_operand" "=x")
825 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
826 (parallel [(const_int 0)]))
829 "%vmovq\t{%1, %0|%0, %q1}"
830 [(set_attr "type" "ssemov")
831 (set_attr "prefix" "maybe_vex")
832 (set_attr "mode" "TI")])
834 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
835 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
836 ;; from memory, we'd prefer to load the memory directly into the %xmm
837 ;; register. To facilitate this happy circumstance, this pattern won't
838 ;; split until after register allocation. If the 64-bit value didn't
839 ;; come from memory, this is the best we can do. This is much better
840 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
843 (define_insn_and_split "movdi_to_sse"
845 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
846 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
847 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
848 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
850 "&& reload_completed"
853 if (register_operand (operands[1], DImode))
855 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
856 Assemble the 64-bit DImode value in an xmm register. */
857 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
858 gen_rtx_SUBREG (SImode, operands[1], 0)));
859 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
860 gen_rtx_SUBREG (SImode, operands[1], 4)));
861 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
864 else if (memory_operand (operands[1], DImode))
866 rtx tmp = gen_reg_rtx (V2DImode);
867 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
868 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
875 [(set (match_operand:V4SF 0 "register_operand")
876 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
877 "TARGET_SSE && reload_completed"
880 (vec_duplicate:V4SF (match_dup 1))
884 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
885 operands[2] = CONST0_RTX (V4SFmode);
889 [(set (match_operand:V2DF 0 "register_operand")
890 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
891 "TARGET_SSE2 && reload_completed"
892 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
894 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
895 operands[2] = CONST0_RTX (DFmode);
898 (define_expand "push<mode>1"
899 [(match_operand:VMOVE 0 "register_operand")]
902 ix86_expand_push (<MODE>mode, operands[0]);
906 (define_expand "movmisalign<mode>"
907 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
908 (match_operand:VMOVE 1 "nonimmediate_operand"))]
911 ix86_expand_vector_move_misalign (<MODE>mode, operands);
915 (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
916 [(set (match_operand:VF 0 "register_operand" "=v")
918 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
920 "TARGET_SSE && <mask_mode512bit_condition>"
922 switch (get_attr_mode (insn))
927 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
929 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
932 [(set_attr "type" "ssemov")
933 (set_attr "movu" "1")
934 (set_attr "ssememalign" "8")
935 (set_attr "prefix" "maybe_vex")
937 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
938 (const_string "<ssePSmode>")
939 (match_test "TARGET_AVX")
940 (const_string "<MODE>")
941 (match_test "optimize_function_for_size_p (cfun)")
942 (const_string "V4SF")
944 (const_string "<MODE>")))])
946 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
947 [(set (match_operand:VF 0 "memory_operand" "=m")
949 [(match_operand:VF 1 "register_operand" "v")]
953 switch (get_attr_mode (insn))
958 return "%vmovups\t{%1, %0|%0, %1}";
960 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
963 [(set_attr "type" "ssemov")
964 (set_attr "movu" "1")
965 (set_attr "ssememalign" "8")
966 (set_attr "prefix" "maybe_vex")
968 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
969 (match_test "TARGET_SSE_TYPELESS_STORES"))
970 (const_string "<ssePSmode>")
971 (match_test "TARGET_AVX")
972 (const_string "<MODE>")
973 (match_test "optimize_function_for_size_p (cfun)")
974 (const_string "V4SF")
976 (const_string "<MODE>")))])
978 (define_insn "avx512f_storeu<ssemodesuffix>512_mask"
979 [(set (match_operand:VF_512 0 "memory_operand" "=m")
982 [(match_operand:VF_512 1 "register_operand" "v")]
985 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
988 switch (get_attr_mode (insn))
991 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
993 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
996 [(set_attr "type" "ssemov")
997 (set_attr "movu" "1")
998 (set_attr "memory" "store")
999 (set_attr "prefix" "evex")
1000 (set_attr "mode" "<sseinsnmode>")])
1002 (define_insn "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1003 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
1004 (unspec:VI_UNALIGNED_LOADSTORE
1005 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
1007 "TARGET_SSE2 && <mask_mode512bit_condition>"
1009 switch (get_attr_mode (insn))
1013 return "%vmovups\t{%1, %0|%0, %1}";
1015 if (<MODE>mode == V8DImode)
1016 return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1018 return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1020 return "%vmovdqu\t{%1, %0|%0, %1}";
1023 [(set_attr "type" "ssemov")
1024 (set_attr "movu" "1")
1025 (set_attr "ssememalign" "8")
1026 (set (attr "prefix_data16")
1028 (match_test "TARGET_AVX")
1030 (const_string "1")))
1031 (set_attr "prefix" "maybe_vex")
1033 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1034 (const_string "<ssePSmode>")
1035 (match_test "TARGET_AVX")
1036 (const_string "<sseinsnmode>")
1037 (match_test "optimize_function_for_size_p (cfun)")
1038 (const_string "V4SF")
1040 (const_string "<sseinsnmode>")))])
1042 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1043 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
1044 (unspec:VI_UNALIGNED_LOADSTORE
1045 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
1049 switch (get_attr_mode (insn))
1053 return "%vmovups\t{%1, %0|%0, %1}";
1055 if (<MODE>mode == V8DImode)
1056 return "vmovdqu64\t{%1, %0|%0, %1}";
1058 return "vmovdqu32\t{%1, %0|%0, %1}";
1060 return "%vmovdqu\t{%1, %0|%0, %1}";
1063 [(set_attr "type" "ssemov")
1064 (set_attr "movu" "1")
1065 (set_attr "ssememalign" "8")
1066 (set (attr "prefix_data16")
1068 (match_test "TARGET_AVX")
1070 (const_string "1")))
1071 (set_attr "prefix" "maybe_vex")
1073 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1074 (match_test "TARGET_SSE_TYPELESS_STORES"))
1075 (const_string "<ssePSmode>")
1076 (match_test "TARGET_AVX")
1077 (const_string "<sseinsnmode>")
1078 (match_test "optimize_function_for_size_p (cfun)")
1079 (const_string "V4SF")
1081 (const_string "<sseinsnmode>")))])
1083 (define_insn "avx512f_storedqu<mode>_mask"
1084 [(set (match_operand:VI48_512 0 "memory_operand" "=m")
1087 [(match_operand:VI48_512 1 "register_operand" "v")]
1090 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
1093 if (<MODE>mode == V8DImode)
1094 return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1096 return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1098 [(set_attr "type" "ssemov")
1099 (set_attr "movu" "1")
1100 (set_attr "memory" "store")
1101 (set_attr "prefix" "evex")
1102 (set_attr "mode" "<sseinsnmode>")])
1104 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1105 [(set (match_operand:VI1 0 "register_operand" "=x")
1106 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1109 "%vlddqu\t{%1, %0|%0, %1}"
1110 [(set_attr "type" "ssemov")
1111 (set_attr "movu" "1")
1112 (set_attr "ssememalign" "8")
1113 (set (attr "prefix_data16")
1115 (match_test "TARGET_AVX")
1117 (const_string "0")))
1118 (set (attr "prefix_rep")
1120 (match_test "TARGET_AVX")
1122 (const_string "1")))
1123 (set_attr "prefix" "maybe_vex")
1124 (set_attr "mode" "<sseinsnmode>")])
1126 (define_insn "sse2_movnti<mode>"
1127 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1128 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1131 "movnti\t{%1, %0|%0, %1}"
1132 [(set_attr "type" "ssemov")
1133 (set_attr "prefix_data16" "0")
1134 (set_attr "mode" "<MODE>")])
1136 (define_insn "<sse>_movnt<mode>"
1137 [(set (match_operand:VF 0 "memory_operand" "=m")
1139 [(match_operand:VF 1 "register_operand" "v")]
1142 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1143 [(set_attr "type" "ssemov")
1144 (set_attr "prefix" "maybe_vex")
1145 (set_attr "mode" "<MODE>")])
1147 (define_insn "<sse2>_movnt<mode>"
1148 [(set (match_operand:VI8 0 "memory_operand" "=m")
1149 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1152 "%vmovntdq\t{%1, %0|%0, %1}"
1153 [(set_attr "type" "ssecvt")
1154 (set (attr "prefix_data16")
1156 (match_test "TARGET_AVX")
1158 (const_string "1")))
1159 (set_attr "prefix" "maybe_vex")
1160 (set_attr "mode" "<sseinsnmode>")])
1162 ; Expand patterns for non-temporal stores. At the moment, only those
1163 ; that directly map to insns are defined; it would be possible to
1164 ; define patterns for other modes that would expand to several insns.
1166 ;; Modes handled by storent patterns.
1167 (define_mode_iterator STORENT_MODE
1168 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1169 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1170 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1171 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1172 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1174 (define_expand "storent<mode>"
1175 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1176 (unspec:STORENT_MODE
1177 [(match_operand:STORENT_MODE 1 "register_operand")]
1181 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1183 ;; Parallel floating point arithmetic
1185 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1187 (define_expand "<code><mode>2"
1188 [(set (match_operand:VF 0 "register_operand")
1190 (match_operand:VF 1 "register_operand")))]
1192 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1194 (define_insn_and_split "*absneg<mode>2"
1195 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1196 (match_operator:VF 3 "absneg_operator"
1197 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1198 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1201 "&& reload_completed"
1204 enum rtx_code absneg_op;
1210 if (MEM_P (operands[1]))
1211 op1 = operands[2], op2 = operands[1];
1213 op1 = operands[1], op2 = operands[2];
1218 if (rtx_equal_p (operands[0], operands[1]))
1224 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1225 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1226 t = gen_rtx_SET (VOIDmode, operands[0], t);
1230 [(set_attr "isa" "noavx,noavx,avx,avx")])
1232 (define_expand "<plusminus_insn><mode>3<mask_name>"
1233 [(set (match_operand:VF 0 "register_operand")
1235 (match_operand:VF 1 "nonimmediate_operand")
1236 (match_operand:VF 2 "nonimmediate_operand")))]
1237 "TARGET_SSE && <mask_mode512bit_condition>"
1238 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1240 (define_insn "*<plusminus_insn><mode>3<mask_name>"
1241 [(set (match_operand:VF 0 "register_operand" "=x,v")
1243 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,v")
1244 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1245 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
1247 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1248 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1249 [(set_attr "isa" "noavx,avx")
1250 (set_attr "type" "sseadd")
1251 (set_attr "prefix" "<mask_prefix3>")
1252 (set_attr "mode" "<MODE>")])
1254 (define_insn "<sse>_vm<plusminus_insn><mode>3"
1255 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1258 (match_operand:VF_128 1 "register_operand" "0,v")
1259 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1264 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1265 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1266 [(set_attr "isa" "noavx,avx")
1267 (set_attr "type" "sseadd")
1268 (set_attr "prefix" "orig,vex")
1269 (set_attr "mode" "<ssescalarmode>")])
1271 (define_expand "mul<mode>3<mask_name>"
1272 [(set (match_operand:VF 0 "register_operand")
1274 (match_operand:VF 1 "nonimmediate_operand")
1275 (match_operand:VF 2 "nonimmediate_operand")))]
1276 "TARGET_SSE && <mask_mode512bit_condition>"
1277 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1279 (define_insn "*mul<mode>3<mask_name>"
1280 [(set (match_operand:VF 0 "register_operand" "=x,v")
1282 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1283 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1284 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
1286 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1287 vmul<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1288 [(set_attr "isa" "noavx,avx")
1289 (set_attr "type" "ssemul")
1290 (set_attr "prefix" "<mask_prefix3>")
1291 (set_attr "btver2_decode" "direct,double")
1292 (set_attr "mode" "<MODE>")])
1294 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3"
1295 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1298 (match_operand:VF_128 1 "register_operand" "0,v")
1299 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1304 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1305 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1306 [(set_attr "isa" "noavx,avx")
1307 (set_attr "type" "sse<multdiv_mnemonic>")
1308 (set_attr "prefix" "orig,vex")
1309 (set_attr "btver2_decode" "direct,double")
1310 (set_attr "mode" "<ssescalarmode>")])
1312 (define_expand "div<mode>3"
1313 [(set (match_operand:VF2 0 "register_operand")
1314 (div:VF2 (match_operand:VF2 1 "register_operand")
1315 (match_operand:VF2 2 "nonimmediate_operand")))]
1317 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1319 (define_expand "div<mode>3"
1320 [(set (match_operand:VF1 0 "register_operand")
1321 (div:VF1 (match_operand:VF1 1 "register_operand")
1322 (match_operand:VF1 2 "nonimmediate_operand")))]
1325 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1328 && TARGET_RECIP_VEC_DIV
1329 && !optimize_insn_for_size_p ()
1330 && flag_finite_math_only && !flag_trapping_math
1331 && flag_unsafe_math_optimizations)
1333 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1338 (define_insn "<sse>_div<mode>3<mask_name>"
1339 [(set (match_operand:VF 0 "register_operand" "=x,v")
1341 (match_operand:VF 1 "register_operand" "0,v")
1342 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1343 "TARGET_SSE && <mask_mode512bit_condition>"
1345 div<ssemodesuffix>\t{%2, %0|%0, %2}
1346 vdiv<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1347 [(set_attr "isa" "noavx,avx")
1348 (set_attr "type" "ssediv")
1349 (set_attr "prefix" "<mask_prefix3>")
1350 (set_attr "mode" "<MODE>")])
1352 (define_insn "<sse>_rcp<mode>2"
1353 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1355 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1357 "%vrcpps\t{%1, %0|%0, %1}"
1358 [(set_attr "type" "sse")
1359 (set_attr "atom_sse_attr" "rcp")
1360 (set_attr "btver2_sse_attr" "rcp")
1361 (set_attr "prefix" "maybe_vex")
1362 (set_attr "mode" "<MODE>")])
1364 (define_insn "sse_vmrcpv4sf2"
1365 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1367 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1369 (match_operand:V4SF 2 "register_operand" "0,x")
1373 rcpss\t{%1, %0|%0, %k1}
1374 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1375 [(set_attr "isa" "noavx,avx")
1376 (set_attr "type" "sse")
1377 (set_attr "ssememalign" "32")
1378 (set_attr "atom_sse_attr" "rcp")
1379 (set_attr "btver2_sse_attr" "rcp")
1380 (set_attr "prefix" "orig,vex")
1381 (set_attr "mode" "SF")])
1383 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1384 [(set (match_operand:VF_512 0 "register_operand" "=v")
1386 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1389 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1390 [(set_attr "type" "sse")
1391 (set_attr "prefix" "evex")
1392 (set_attr "mode" "<MODE>")])
1394 (define_insn "*srcp14<mode>"
1395 [(set (match_operand:VF_128 0 "register_operand" "=v")
1398 [(match_operand:VF_128 1 "register_operand" "v")
1399 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
1404 "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1405 [(set_attr "type" "sse")
1406 (set_attr "prefix" "evex")
1407 (set_attr "mode" "<MODE>")])
1409 (define_expand "sqrt<mode>2"
1410 [(set (match_operand:VF2 0 "register_operand")
1411 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1414 (define_expand "sqrt<mode>2"
1415 [(set (match_operand:VF1 0 "register_operand")
1416 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1420 && TARGET_RECIP_VEC_SQRT
1421 && !optimize_insn_for_size_p ()
1422 && flag_finite_math_only && !flag_trapping_math
1423 && flag_unsafe_math_optimizations)
1425 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1430 (define_insn "<sse>_sqrt<mode>2<mask_name>"
1431 [(set (match_operand:VF 0 "register_operand" "=v")
1432 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "vm")))]
1433 "TARGET_SSE && <mask_mode512bit_condition>"
1434 "%vsqrt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1435 [(set_attr "type" "sse")
1436 (set_attr "atom_sse_attr" "sqrt")
1437 (set_attr "btver2_sse_attr" "sqrt")
1438 (set_attr "prefix" "maybe_vex")
1439 (set_attr "mode" "<MODE>")])
1441 (define_insn "<sse>_vmsqrt<mode>2"
1442 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1445 (match_operand:VF_128 1 "nonimmediate_operand" "xm,vm"))
1446 (match_operand:VF_128 2 "register_operand" "0,v")
1450 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1451 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1452 [(set_attr "isa" "noavx,avx")
1453 (set_attr "type" "sse")
1454 (set_attr "atom_sse_attr" "sqrt")
1455 (set_attr "prefix" "orig,vex")
1456 (set_attr "btver2_sse_attr" "sqrt")
1457 (set_attr "mode" "<ssescalarmode>")])
1459 (define_expand "rsqrt<mode>2"
1460 [(set (match_operand:VF1_128_256 0 "register_operand")
1462 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1465 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1469 (define_insn "<sse>_rsqrt<mode>2"
1470 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1472 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1474 "%vrsqrtps\t{%1, %0|%0, %1}"
1475 [(set_attr "type" "sse")
1476 (set_attr "prefix" "maybe_vex")
1477 (set_attr "mode" "<MODE>")])
1479 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1480 [(set (match_operand:VF_512 0 "register_operand" "=v")
1482 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1485 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1486 [(set_attr "type" "sse")
1487 (set_attr "prefix" "evex")
1488 (set_attr "mode" "<MODE>")])
1490 (define_insn "*rsqrt14<mode>"
1491 [(set (match_operand:VF_128 0 "register_operand" "=v")
1494 [(match_operand:VF_128 1 "register_operand" "v")
1495 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
1500 "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1501 [(set_attr "type" "sse")
1502 (set_attr "prefix" "evex")
1503 (set_attr "mode" "<MODE>")])
1505 (define_insn "sse_vmrsqrtv4sf2"
1506 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1508 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1510 (match_operand:V4SF 2 "register_operand" "0,x")
1514 rsqrtss\t{%1, %0|%0, %k1}
1515 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1516 [(set_attr "isa" "noavx,avx")
1517 (set_attr "type" "sse")
1518 (set_attr "ssememalign" "32")
1519 (set_attr "prefix" "orig,vex")
1520 (set_attr "mode" "SF")])
1522 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1523 ;; isn't really correct, as those rtl operators aren't defined when
1524 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1526 (define_expand "<code><mode>3<mask_name>"
1527 [(set (match_operand:VF 0 "register_operand")
1529 (match_operand:VF 1 "nonimmediate_operand")
1530 (match_operand:VF 2 "nonimmediate_operand")))]
1531 "TARGET_SSE && <mask_mode512bit_condition>"
1533 if (!flag_finite_math_only)
1534 operands[1] = force_reg (<MODE>mode, operands[1]);
1535 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1538 (define_insn "*<code><mode>3_finite<mask_name>"
1539 [(set (match_operand:VF 0 "register_operand" "=x,v")
1541 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1542 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1543 "TARGET_SSE && flag_finite_math_only
1544 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1545 && <mask_mode512bit_condition>"
1547 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1548 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1549 [(set_attr "isa" "noavx,avx")
1550 (set_attr "type" "sseadd")
1551 (set_attr "btver2_sse_attr" "maxmin")
1552 (set_attr "prefix" "<mask_prefix3>")
1553 (set_attr "mode" "<MODE>")])
1555 (define_insn "*<code><mode>3<mask_name>"
1556 [(set (match_operand:VF 0 "register_operand" "=x,v")
1558 (match_operand:VF 1 "register_operand" "0,v")
1559 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1560 "TARGET_SSE && !flag_finite_math_only
1561 && <mask_mode512bit_condition>"
1563 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1564 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1565 [(set_attr "isa" "noavx,avx")
1566 (set_attr "type" "sseadd")
1567 (set_attr "btver2_sse_attr" "maxmin")
1568 (set_attr "prefix" "<mask_prefix3>")
1569 (set_attr "mode" "<MODE>")])
1571 (define_insn "<sse>_vm<code><mode>3"
1572 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1575 (match_operand:VF_128 1 "register_operand" "0,v")
1576 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1581 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1582 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1583 [(set_attr "isa" "noavx,avx")
1584 (set_attr "type" "sse")
1585 (set_attr "btver2_sse_attr" "maxmin")
1586 (set_attr "prefix" "orig,vex")
1587 (set_attr "mode" "<ssescalarmode>")])
1589 ;; These versions of the min/max patterns implement exactly the operations
1590 ;; min = (op1 < op2 ? op1 : op2)
1591 ;; max = (!(op1 < op2) ? op1 : op2)
1592 ;; Their operands are not commutative, and thus they may be used in the
1593 ;; presence of -0.0 and NaN.
1595 (define_insn "*ieee_smin<mode>3"
1596 [(set (match_operand:VF 0 "register_operand" "=v,v")
1598 [(match_operand:VF 1 "register_operand" "0,v")
1599 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1603 min<ssemodesuffix>\t{%2, %0|%0, %2}
1604 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1605 [(set_attr "isa" "noavx,avx")
1606 (set_attr "type" "sseadd")
1607 (set_attr "prefix" "orig,vex")
1608 (set_attr "mode" "<MODE>")])
1610 (define_insn "*ieee_smax<mode>3"
1611 [(set (match_operand:VF 0 "register_operand" "=v,v")
1613 [(match_operand:VF 1 "register_operand" "0,v")
1614 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1618 max<ssemodesuffix>\t{%2, %0|%0, %2}
1619 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1620 [(set_attr "isa" "noavx,avx")
1621 (set_attr "type" "sseadd")
1622 (set_attr "prefix" "orig,vex")
1623 (set_attr "mode" "<MODE>")])
1625 (define_insn "avx_addsubv4df3"
1626 [(set (match_operand:V4DF 0 "register_operand" "=x")
1629 (match_operand:V4DF 1 "register_operand" "x")
1630 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1631 (minus:V4DF (match_dup 1) (match_dup 2))
1634 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1635 [(set_attr "type" "sseadd")
1636 (set_attr "prefix" "vex")
1637 (set_attr "mode" "V4DF")])
1639 (define_insn "sse3_addsubv2df3"
1640 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1643 (match_operand:V2DF 1 "register_operand" "0,x")
1644 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1645 (minus:V2DF (match_dup 1) (match_dup 2))
1649 addsubpd\t{%2, %0|%0, %2}
1650 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1651 [(set_attr "isa" "noavx,avx")
1652 (set_attr "type" "sseadd")
1653 (set_attr "atom_unit" "complex")
1654 (set_attr "prefix" "orig,vex")
1655 (set_attr "mode" "V2DF")])
1657 (define_insn "avx_addsubv8sf3"
1658 [(set (match_operand:V8SF 0 "register_operand" "=x")
1661 (match_operand:V8SF 1 "register_operand" "x")
1662 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1663 (minus:V8SF (match_dup 1) (match_dup 2))
1666 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1667 [(set_attr "type" "sseadd")
1668 (set_attr "prefix" "vex")
1669 (set_attr "mode" "V8SF")])
1671 (define_insn "sse3_addsubv4sf3"
1672 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1675 (match_operand:V4SF 1 "register_operand" "0,x")
1676 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1677 (minus:V4SF (match_dup 1) (match_dup 2))
1681 addsubps\t{%2, %0|%0, %2}
1682 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1683 [(set_attr "isa" "noavx,avx")
1684 (set_attr "type" "sseadd")
1685 (set_attr "prefix" "orig,vex")
1686 (set_attr "prefix_rep" "1,*")
1687 (set_attr "mode" "V4SF")])
1689 (define_insn "avx_h<plusminus_insn>v4df3"
1690 [(set (match_operand:V4DF 0 "register_operand" "=x")
1695 (match_operand:V4DF 1 "register_operand" "x")
1696 (parallel [(const_int 0)]))
1697 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1700 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1701 (parallel [(const_int 0)]))
1702 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1705 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1706 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1708 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1709 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1711 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1712 [(set_attr "type" "sseadd")
1713 (set_attr "prefix" "vex")
1714 (set_attr "mode" "V4DF")])
1716 (define_expand "sse3_haddv2df3"
1717 [(set (match_operand:V2DF 0 "register_operand")
1721 (match_operand:V2DF 1 "register_operand")
1722 (parallel [(const_int 0)]))
1723 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1726 (match_operand:V2DF 2 "nonimmediate_operand")
1727 (parallel [(const_int 0)]))
1728 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1731 (define_insn "*sse3_haddv2df3"
1732 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1736 (match_operand:V2DF 1 "register_operand" "0,x")
1737 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1740 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1743 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1744 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1747 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1749 && INTVAL (operands[3]) != INTVAL (operands[4])
1750 && INTVAL (operands[5]) != INTVAL (operands[6])"
1752 haddpd\t{%2, %0|%0, %2}
1753 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1754 [(set_attr "isa" "noavx,avx")
1755 (set_attr "type" "sseadd")
1756 (set_attr "prefix" "orig,vex")
1757 (set_attr "mode" "V2DF")])
1759 (define_insn "sse3_hsubv2df3"
1760 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1764 (match_operand:V2DF 1 "register_operand" "0,x")
1765 (parallel [(const_int 0)]))
1766 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1769 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1770 (parallel [(const_int 0)]))
1771 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1774 hsubpd\t{%2, %0|%0, %2}
1775 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1776 [(set_attr "isa" "noavx,avx")
1777 (set_attr "type" "sseadd")
1778 (set_attr "prefix" "orig,vex")
1779 (set_attr "mode" "V2DF")])
1781 (define_insn "*sse3_haddv2df3_low"
1782 [(set (match_operand:DF 0 "register_operand" "=x,x")
1785 (match_operand:V2DF 1 "register_operand" "0,x")
1786 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1789 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1791 && INTVAL (operands[2]) != INTVAL (operands[3])"
1793 haddpd\t{%0, %0|%0, %0}
1794 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1795 [(set_attr "isa" "noavx,avx")
1796 (set_attr "type" "sseadd1")
1797 (set_attr "prefix" "orig,vex")
1798 (set_attr "mode" "V2DF")])
1800 (define_insn "*sse3_hsubv2df3_low"
1801 [(set (match_operand:DF 0 "register_operand" "=x,x")
1804 (match_operand:V2DF 1 "register_operand" "0,x")
1805 (parallel [(const_int 0)]))
1808 (parallel [(const_int 1)]))))]
1811 hsubpd\t{%0, %0|%0, %0}
1812 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1813 [(set_attr "isa" "noavx,avx")
1814 (set_attr "type" "sseadd1")
1815 (set_attr "prefix" "orig,vex")
1816 (set_attr "mode" "V2DF")])
1818 (define_insn "avx_h<plusminus_insn>v8sf3"
1819 [(set (match_operand:V8SF 0 "register_operand" "=x")
1825 (match_operand:V8SF 1 "register_operand" "x")
1826 (parallel [(const_int 0)]))
1827 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1829 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1830 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1834 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1835 (parallel [(const_int 0)]))
1836 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1838 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1839 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1843 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1844 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1846 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1847 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1850 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1851 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1853 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1854 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1856 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1857 [(set_attr "type" "sseadd")
1858 (set_attr "prefix" "vex")
1859 (set_attr "mode" "V8SF")])
1861 (define_insn "sse3_h<plusminus_insn>v4sf3"
1862 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1867 (match_operand:V4SF 1 "register_operand" "0,x")
1868 (parallel [(const_int 0)]))
1869 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1871 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1872 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1876 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1877 (parallel [(const_int 0)]))
1878 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1880 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1881 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1884 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1885 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1886 [(set_attr "isa" "noavx,avx")
1887 (set_attr "type" "sseadd")
1888 (set_attr "atom_unit" "complex")
1889 (set_attr "prefix" "orig,vex")
1890 (set_attr "prefix_rep" "1,*")
1891 (set_attr "mode" "V4SF")])
1893 (define_expand "reduc_splus_v8df"
1894 [(match_operand:V8DF 0 "register_operand")
1895 (match_operand:V8DF 1 "register_operand")]
1898 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
1902 (define_expand "reduc_splus_v4df"
1903 [(match_operand:V4DF 0 "register_operand")
1904 (match_operand:V4DF 1 "register_operand")]
1907 rtx tmp = gen_reg_rtx (V4DFmode);
1908 rtx tmp2 = gen_reg_rtx (V4DFmode);
1909 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1910 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1911 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1915 (define_expand "reduc_splus_v2df"
1916 [(match_operand:V2DF 0 "register_operand")
1917 (match_operand:V2DF 1 "register_operand")]
1920 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1924 (define_expand "reduc_splus_v16sf"
1925 [(match_operand:V16SF 0 "register_operand")
1926 (match_operand:V16SF 1 "register_operand")]
1929 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
1933 (define_expand "reduc_splus_v8sf"
1934 [(match_operand:V8SF 0 "register_operand")
1935 (match_operand:V8SF 1 "register_operand")]
1938 rtx tmp = gen_reg_rtx (V8SFmode);
1939 rtx tmp2 = gen_reg_rtx (V8SFmode);
1940 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1941 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1942 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1943 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1947 (define_expand "reduc_splus_v4sf"
1948 [(match_operand:V4SF 0 "register_operand")
1949 (match_operand:V4SF 1 "register_operand")]
1954 rtx tmp = gen_reg_rtx (V4SFmode);
1955 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1956 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1959 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1963 ;; Modes handled by reduc_sm{in,ax}* patterns.
1964 (define_mode_iterator REDUC_SMINMAX_MODE
1965 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1966 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1967 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1968 (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
1969 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
1970 (V8DF "TARGET_AVX512F")])
1972 (define_expand "reduc_<code>_<mode>"
1973 [(smaxmin:REDUC_SMINMAX_MODE
1974 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
1975 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
1978 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1982 (define_expand "reduc_<code>_<mode>"
1984 (match_operand:VI48_512 0 "register_operand")
1985 (match_operand:VI48_512 1 "register_operand"))]
1988 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1992 (define_expand "reduc_<code>_<mode>"
1994 (match_operand:VI_256 0 "register_operand")
1995 (match_operand:VI_256 1 "register_operand"))]
1998 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2002 (define_expand "reduc_umin_v8hi"
2004 (match_operand:V8HI 0 "register_operand")
2005 (match_operand:V8HI 1 "register_operand"))]
2008 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2012 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2014 ;; Parallel floating point comparisons
2016 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2018 (define_insn "avx_cmp<mode>3"
2019 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2021 [(match_operand:VF_128_256 1 "register_operand" "x")
2022 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2023 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2026 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2027 [(set_attr "type" "ssecmp")
2028 (set_attr "length_immediate" "1")
2029 (set_attr "prefix" "vex")
2030 (set_attr "mode" "<MODE>")])
2032 (define_insn "avx_vmcmp<mode>3"
2033 [(set (match_operand:VF_128 0 "register_operand" "=x")
2036 [(match_operand:VF_128 1 "register_operand" "x")
2037 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2038 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2043 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2044 [(set_attr "type" "ssecmp")
2045 (set_attr "length_immediate" "1")
2046 (set_attr "prefix" "vex")
2047 (set_attr "mode" "<ssescalarmode>")])
2049 (define_insn "*<sse>_maskcmp<mode>3_comm"
2050 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2051 (match_operator:VF_128_256 3 "sse_comparison_operator"
2052 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2053 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2055 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2057 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2058 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2059 [(set_attr "isa" "noavx,avx")
2060 (set_attr "type" "ssecmp")
2061 (set_attr "length_immediate" "1")
2062 (set_attr "prefix" "orig,vex")
2063 (set_attr "mode" "<MODE>")])
2065 (define_insn "<sse>_maskcmp<mode>3"
2066 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2067 (match_operator:VF_128_256 3 "sse_comparison_operator"
2068 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2069 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2072 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2073 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2074 [(set_attr "isa" "noavx,avx")
2075 (set_attr "type" "ssecmp")
2076 (set_attr "length_immediate" "1")
2077 (set_attr "prefix" "orig,vex")
2078 (set_attr "mode" "<MODE>")])
2080 (define_insn "<sse>_vmmaskcmp<mode>3"
2081 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2083 (match_operator:VF_128 3 "sse_comparison_operator"
2084 [(match_operand:VF_128 1 "register_operand" "0,x")
2085 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2090 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2091 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2092 [(set_attr "isa" "noavx,avx")
2093 (set_attr "type" "ssecmp")
2094 (set_attr "length_immediate" "1,*")
2095 (set_attr "prefix" "orig,vex")
2096 (set_attr "mode" "<ssescalarmode>")])
2098 (define_mode_attr cmp_imm_predicate
2099 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2100 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
2102 (define_insn "avx512f_cmp<mode>3"
2103 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2104 (unspec:<avx512fmaskmode>
2105 [(match_operand:VI48F_512 1 "register_operand" "v")
2106 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
2107 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2110 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2111 [(set_attr "type" "ssecmp")
2112 (set_attr "length_immediate" "1")
2113 (set_attr "prefix" "evex")
2114 (set_attr "mode" "<sseinsnmode>")])
2116 (define_insn "avx512f_ucmp<mode>3"
2117 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2118 (unspec:<avx512fmaskmode>
2119 [(match_operand:VI48_512 1 "register_operand" "v")
2120 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
2121 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2122 UNSPEC_UNSIGNED_PCMP))]
2124 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2125 [(set_attr "type" "ssecmp")
2126 (set_attr "length_immediate" "1")
2127 (set_attr "prefix" "evex")
2128 (set_attr "mode" "<sseinsnmode>")])
2130 (define_insn "avx512f_vmcmp<mode>3"
2131 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2132 (and:<avx512fmaskmode>
2133 (unspec:<avx512fmaskmode>
2134 [(match_operand:VF_128 1 "register_operand" "v")
2135 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2136 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2140 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2141 [(set_attr "type" "ssecmp")
2142 (set_attr "length_immediate" "1")
2143 (set_attr "prefix" "evex")
2144 (set_attr "mode" "<ssescalarmode>")])
2146 (define_insn "avx512f_vmcmp<mode>3_mask"
2147 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2148 (and:<avx512fmaskmode>
2149 (unspec:<avx512fmaskmode>
2150 [(match_operand:VF_128 1 "register_operand" "v")
2151 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2152 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2154 (and:<avx512fmaskmode>
2155 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")
2158 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0%{%4%}|%0%{%4%}, %1, %2, %3}"
2159 [(set_attr "type" "ssecmp")
2160 (set_attr "length_immediate" "1")
2161 (set_attr "prefix" "evex")
2162 (set_attr "mode" "<ssescalarmode>")])
2164 (define_insn "avx512f_maskcmp<mode>3"
2165 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2166 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2167 [(match_operand:VF 1 "register_operand" "v")
2168 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2170 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2171 [(set_attr "type" "ssecmp")
2172 (set_attr "length_immediate" "1")
2173 (set_attr "prefix" "evex")
2174 (set_attr "mode" "<sseinsnmode>")])
2176 (define_insn "<sse>_comi"
2177 [(set (reg:CCFP FLAGS_REG)
2180 (match_operand:<ssevecmode> 0 "register_operand" "v")
2181 (parallel [(const_int 0)]))
2183 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
2184 (parallel [(const_int 0)]))))]
2185 "SSE_FLOAT_MODE_P (<MODE>mode)"
2186 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
2187 [(set_attr "type" "ssecomi")
2188 (set_attr "prefix" "maybe_vex")
2189 (set_attr "prefix_rep" "0")
2190 (set (attr "prefix_data16")
2191 (if_then_else (eq_attr "mode" "DF")
2193 (const_string "0")))
2194 (set_attr "mode" "<MODE>")])
2196 (define_insn "<sse>_ucomi"
2197 [(set (reg:CCFPU FLAGS_REG)
2200 (match_operand:<ssevecmode> 0 "register_operand" "v")
2201 (parallel [(const_int 0)]))
2203 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
2204 (parallel [(const_int 0)]))))]
2205 "SSE_FLOAT_MODE_P (<MODE>mode)"
2206 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
2207 [(set_attr "type" "ssecomi")
2208 (set_attr "prefix" "maybe_vex")
2209 (set_attr "prefix_rep" "0")
2210 (set (attr "prefix_data16")
2211 (if_then_else (eq_attr "mode" "DF")
2213 (const_string "0")))
2214 (set_attr "mode" "<MODE>")])
2216 (define_expand "vcond<V_512:mode><VF_512:mode>"
2217 [(set (match_operand:V_512 0 "register_operand")
2219 (match_operator 3 ""
2220 [(match_operand:VF_512 4 "nonimmediate_operand")
2221 (match_operand:VF_512 5 "nonimmediate_operand")])
2222 (match_operand:V_512 1 "general_operand")
2223 (match_operand:V_512 2 "general_operand")))]
2225 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2226 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2228 bool ok = ix86_expand_fp_vcond (operands);
2233 (define_expand "vcond<V_256:mode><VF_256:mode>"
2234 [(set (match_operand:V_256 0 "register_operand")
2236 (match_operator 3 ""
2237 [(match_operand:VF_256 4 "nonimmediate_operand")
2238 (match_operand:VF_256 5 "nonimmediate_operand")])
2239 (match_operand:V_256 1 "general_operand")
2240 (match_operand:V_256 2 "general_operand")))]
2242 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2243 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2245 bool ok = ix86_expand_fp_vcond (operands);
2250 (define_expand "vcond<V_128:mode><VF_128:mode>"
2251 [(set (match_operand:V_128 0 "register_operand")
2253 (match_operator 3 ""
2254 [(match_operand:VF_128 4 "nonimmediate_operand")
2255 (match_operand:VF_128 5 "nonimmediate_operand")])
2256 (match_operand:V_128 1 "general_operand")
2257 (match_operand:V_128 2 "general_operand")))]
2259 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2260 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2262 bool ok = ix86_expand_fp_vcond (operands);
2267 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2269 ;; Parallel floating point logical operations
2271 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2273 (define_insn "<sse>_andnot<mode>3"
2274 [(set (match_operand:VF 0 "register_operand" "=x,v")
2277 (match_operand:VF 1 "register_operand" "0,v"))
2278 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2281 static char buf[32];
2285 switch (get_attr_mode (insn))
2292 suffix = "<ssemodesuffix>";
2295 switch (which_alternative)
2298 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2301 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2307 /* There is no vandnp[sd]. Use vpandnq. */
2308 if (GET_MODE_SIZE (<MODE>mode) == 64)
2311 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2314 snprintf (buf, sizeof (buf), ops, suffix);
2317 [(set_attr "isa" "noavx,avx")
2318 (set_attr "type" "sselog")
2319 (set_attr "prefix" "orig,maybe_evex")
2321 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2322 (const_string "<ssePSmode>")
2323 (match_test "TARGET_AVX")
2324 (const_string "<MODE>")
2325 (match_test "optimize_function_for_size_p (cfun)")
2326 (const_string "V4SF")
2328 (const_string "<MODE>")))])
2330 (define_expand "<code><mode>3"
2331 [(set (match_operand:VF_128_256 0 "register_operand")
2332 (any_logic:VF_128_256
2333 (match_operand:VF_128_256 1 "nonimmediate_operand")
2334 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2336 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2338 (define_expand "<code><mode>3"
2339 [(set (match_operand:VF_512 0 "register_operand")
2341 (match_operand:VF_512 1 "nonimmediate_operand")
2342 (match_operand:VF_512 2 "nonimmediate_operand")))]
2344 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2346 (define_insn "*<code><mode>3"
2347 [(set (match_operand:VF 0 "register_operand" "=x,v")
2349 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
2350 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2351 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2353 static char buf[32];
2357 switch (get_attr_mode (insn))
2364 suffix = "<ssemodesuffix>";
2367 switch (which_alternative)
2370 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2373 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2379 /* There is no v<logic>p[sd]. Use vp<logic>q. */
2380 if (GET_MODE_SIZE (<MODE>mode) == 64)
2383 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2386 snprintf (buf, sizeof (buf), ops, suffix);
2389 [(set_attr "isa" "noavx,avx")
2390 (set_attr "type" "sselog")
2391 (set_attr "prefix" "orig,maybe_evex")
2393 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2394 (const_string "<ssePSmode>")
2395 (match_test "TARGET_AVX")
2396 (const_string "<MODE>")
2397 (match_test "optimize_function_for_size_p (cfun)")
2398 (const_string "V4SF")
2400 (const_string "<MODE>")))])
2402 (define_expand "copysign<mode>3"
2405 (not:VF (match_dup 3))
2406 (match_operand:VF 1 "nonimmediate_operand")))
2408 (and:VF (match_dup 3)
2409 (match_operand:VF 2 "nonimmediate_operand")))
2410 (set (match_operand:VF 0 "register_operand")
2411 (ior:VF (match_dup 4) (match_dup 5)))]
2414 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2416 operands[4] = gen_reg_rtx (<MODE>mode);
2417 operands[5] = gen_reg_rtx (<MODE>mode);
2420 ;; Also define scalar versions. These are used for abs, neg, and
2421 ;; conditional move. Using subregs into vector modes causes register
2422 ;; allocation lossage. These patterns do not allow memory operands
2423 ;; because the native instructions read the full 128-bits.
2425 (define_insn "*andnot<mode>3"
2426 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2429 (match_operand:MODEF 1 "register_operand" "0,x"))
2430 (match_operand:MODEF 2 "register_operand" "x,x")))]
2431 "SSE_FLOAT_MODE_P (<MODE>mode)"
2433 static char buf[32];
2436 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2438 switch (which_alternative)
2441 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2444 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2450 snprintf (buf, sizeof (buf), ops, suffix);
2453 [(set_attr "isa" "noavx,avx")
2454 (set_attr "type" "sselog")
2455 (set_attr "prefix" "orig,vex")
2457 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2458 (const_string "V4SF")
2459 (match_test "TARGET_AVX")
2460 (const_string "<ssevecmode>")
2461 (match_test "optimize_function_for_size_p (cfun)")
2462 (const_string "V4SF")
2464 (const_string "<ssevecmode>")))])
2466 (define_insn "*andnottf3"
2467 [(set (match_operand:TF 0 "register_operand" "=x,x")
2469 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2470 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2473 static char buf[32];
2476 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2478 switch (which_alternative)
2481 ops = "%s\t{%%2, %%0|%%0, %%2}";
2484 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2490 snprintf (buf, sizeof (buf), ops, tmp);
2493 [(set_attr "isa" "noavx,avx")
2494 (set_attr "type" "sselog")
2495 (set (attr "prefix_data16")
2497 (and (eq_attr "alternative" "0")
2498 (eq_attr "mode" "TI"))
2500 (const_string "*")))
2501 (set_attr "prefix" "orig,vex")
2503 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2504 (const_string "V4SF")
2505 (match_test "TARGET_AVX")
2507 (ior (not (match_test "TARGET_SSE2"))
2508 (match_test "optimize_function_for_size_p (cfun)"))
2509 (const_string "V4SF")
2511 (const_string "TI")))])
2513 (define_insn "*<code><mode>3"
2514 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2516 (match_operand:MODEF 1 "register_operand" "%0,x")
2517 (match_operand:MODEF 2 "register_operand" "x,x")))]
2518 "SSE_FLOAT_MODE_P (<MODE>mode)"
2520 static char buf[32];
2523 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2525 switch (which_alternative)
2528 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2531 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2537 snprintf (buf, sizeof (buf), ops, suffix);
2540 [(set_attr "isa" "noavx,avx")
2541 (set_attr "type" "sselog")
2542 (set_attr "prefix" "orig,vex")
2544 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2545 (const_string "V4SF")
2546 (match_test "TARGET_AVX")
2547 (const_string "<ssevecmode>")
2548 (match_test "optimize_function_for_size_p (cfun)")
2549 (const_string "V4SF")
2551 (const_string "<ssevecmode>")))])
2553 (define_expand "<code>tf3"
2554 [(set (match_operand:TF 0 "register_operand")
2556 (match_operand:TF 1 "nonimmediate_operand")
2557 (match_operand:TF 2 "nonimmediate_operand")))]
2559 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2561 (define_insn "*<code>tf3"
2562 [(set (match_operand:TF 0 "register_operand" "=x,x")
2564 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2565 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2567 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2569 static char buf[32];
2572 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2574 switch (which_alternative)
2577 ops = "%s\t{%%2, %%0|%%0, %%2}";
2580 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2586 snprintf (buf, sizeof (buf), ops, tmp);
2589 [(set_attr "isa" "noavx,avx")
2590 (set_attr "type" "sselog")
2591 (set (attr "prefix_data16")
2593 (and (eq_attr "alternative" "0")
2594 (eq_attr "mode" "TI"))
2596 (const_string "*")))
2597 (set_attr "prefix" "orig,vex")
2599 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2600 (const_string "V4SF")
2601 (match_test "TARGET_AVX")
2603 (ior (not (match_test "TARGET_SSE2"))
2604 (match_test "optimize_function_for_size_p (cfun)"))
2605 (const_string "V4SF")
2607 (const_string "TI")))])
2609 ;; There are no floating point xor for V16SF and V8DF in avx512f
2610 ;; but we need them for negation. Instead we use int versions of
2611 ;; xor. Maybe there could be a better way to do that.
2613 (define_mode_attr avx512flogicsuff
2614 [(V16SF "d") (V8DF "q")])
2616 (define_insn "avx512f_<logic><mode>"
2617 [(set (match_operand:VF_512 0 "register_operand" "=v")
2619 (match_operand:VF_512 1 "register_operand" "v")
2620 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2622 "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
2623 [(set_attr "type" "sselog")
2624 (set_attr "prefix" "evex")])
2626 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2628 ;; FMA floating point multiply/accumulate instructions. These include
2629 ;; scalar versions of the instructions as well as vector versions.
2631 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2633 ;; The standard names for scalar FMA are only available with SSE math enabled.
2634 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
2635 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
2636 ;; and TARGET_FMA4 are both false.
2637 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
2638 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
2639 ;; GAS to allow proper prefix selection. However, for the moment all hardware
2640 ;; that supports AVX512F also supports FMA so we can ignore this for now.
2641 (define_mode_iterator FMAMODEM
2642 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2643 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2644 (V4SF "TARGET_FMA || TARGET_FMA4")
2645 (V2DF "TARGET_FMA || TARGET_FMA4")
2646 (V8SF "TARGET_FMA || TARGET_FMA4")
2647 (V4DF "TARGET_FMA || TARGET_FMA4")
2648 (V16SF "TARGET_AVX512F")
2649 (V8DF "TARGET_AVX512F")])
2651 (define_expand "fma<mode>4"
2652 [(set (match_operand:FMAMODEM 0 "register_operand")
2654 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2655 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2656 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2659 (define_expand "fms<mode>4"
2660 [(set (match_operand:FMAMODEM 0 "register_operand")
2662 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2663 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2664 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2667 (define_expand "fnma<mode>4"
2668 [(set (match_operand:FMAMODEM 0 "register_operand")
2670 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2671 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2672 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2675 (define_expand "fnms<mode>4"
2676 [(set (match_operand:FMAMODEM 0 "register_operand")
2678 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2679 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2680 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2683 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2684 (define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2685 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2686 (V4SF "TARGET_FMA || TARGET_FMA4")
2687 (V2DF "TARGET_FMA || TARGET_FMA4")
2688 (V8SF "TARGET_FMA || TARGET_FMA4")
2689 (V4DF "TARGET_FMA || TARGET_FMA4")
2690 (V16SF "TARGET_AVX512F")
2691 (V8DF "TARGET_AVX512F")])
2693 (define_expand "fma4i_fmadd_<mode>"
2694 [(set (match_operand:FMAMODE 0 "register_operand")
2696 (match_operand:FMAMODE 1 "nonimmediate_operand")
2697 (match_operand:FMAMODE 2 "nonimmediate_operand")
2698 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2701 (define_insn "*fma_fmadd_<mode>"
2702 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2704 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
2705 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2706 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
2709 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2710 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2711 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2712 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2713 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2714 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2715 (set_attr "type" "ssemuladd")
2716 (set_attr "mode" "<MODE>")])
2718 (define_insn "avx512f_fmadd_<mode>_mask"
2719 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2722 (match_operand:VF_512 1 "register_operand" "0,0")
2723 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2724 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))
2726 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2729 vfmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2730 vfmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2731 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2732 (set_attr "type" "ssemuladd")
2733 (set_attr "mode" "<MODE>")])
2735 (define_insn "avx512f_fmadd_<mode>_mask3"
2736 [(set (match_operand:VF_512 0 "register_operand" "=x")
2739 (match_operand:VF_512 1 "register_operand" "x")
2740 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2741 (match_operand:VF_512 3 "register_operand" "0"))
2743 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2745 "vfmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2746 [(set_attr "isa" "fma_avx512f")
2747 (set_attr "type" "ssemuladd")
2748 (set_attr "mode" "<MODE>")])
2750 (define_insn "*fma_fmsub_<mode>"
2751 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2753 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
2754 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2756 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
2759 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2760 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2761 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2762 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2763 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2764 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2765 (set_attr "type" "ssemuladd")
2766 (set_attr "mode" "<MODE>")])
2768 (define_insn "avx512f_fmsub_<mode>_mask"
2769 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2772 (match_operand:VF_512 1 "register_operand" "0,0")
2773 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2775 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")))
2777 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2780 vfmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2781 vfmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2782 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2783 (set_attr "type" "ssemuladd")
2784 (set_attr "mode" "<MODE>")])
2786 (define_insn "avx512f_fmsub_<mode>_mask3"
2787 [(set (match_operand:VF_512 0 "register_operand" "=v")
2790 (match_operand:VF_512 1 "register_operand" "v")
2791 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2793 (match_operand:VF_512 3 "register_operand" "0")))
2795 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2797 "vfmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2798 [(set_attr "isa" "fma_avx512f")
2799 (set_attr "type" "ssemuladd")
2800 (set_attr "mode" "<MODE>")])
2802 (define_insn "*fma_fnmadd_<mode>"
2803 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2806 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
2807 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2808 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
2811 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2812 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2813 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2814 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2815 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2816 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2817 (set_attr "type" "ssemuladd")
2818 (set_attr "mode" "<MODE>")])
2820 (define_insn "avx512f_fnmadd_<mode>_mask"
2821 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2825 (match_operand:VF_512 1 "register_operand" "0,0"))
2826 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2827 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))
2829 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2832 vfnmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2833 vfnmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2834 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2835 (set_attr "type" "ssemuladd")
2836 (set_attr "mode" "<MODE>")])
2838 (define_insn "avx512f_fnmadd_<mode>_mask3"
2839 [(set (match_operand:VF_512 0 "register_operand" "=v")
2843 (match_operand:VF_512 1 "register_operand" "v"))
2844 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2845 (match_operand:VF_512 3 "register_operand" "0"))
2847 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2849 "vfnmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2850 [(set_attr "isa" "fma_avx512f")
2851 (set_attr "type" "ssemuladd")
2852 (set_attr "mode" "<MODE>")])
2854 (define_insn "*fma_fnmsub_<mode>"
2855 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2858 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
2859 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2861 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
2864 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2865 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2866 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2867 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2868 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2869 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2870 (set_attr "type" "ssemuladd")
2871 (set_attr "mode" "<MODE>")])
2873 (define_insn "avx512f_fnmsub_<mode>_mask"
2874 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2878 (match_operand:VF_512 1 "register_operand" "0,0"))
2879 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2881 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")))
2883 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2886 vfnmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2887 vfnmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2888 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2889 (set_attr "type" "ssemuladd")
2890 (set_attr "mode" "<MODE>")])
2892 (define_insn "avx512f_fnmsub_<mode>_mask3"
2893 [(set (match_operand:VF_512 0 "register_operand" "=v")
2897 (match_operand:VF_512 1 "register_operand" "v"))
2898 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2900 (match_operand:VF_512 3 "register_operand" "0")))
2902 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2904 "vfnmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2905 [(set_attr "isa" "fma_avx512f")
2906 (set_attr "type" "ssemuladd")
2907 (set_attr "mode" "<MODE>")])
2909 ;; FMA parallel floating point multiply addsub and subadd operations.
2911 ;; It would be possible to represent these without the UNSPEC as
2914 ;; (fma op1 op2 op3)
2915 ;; (fma op1 op2 (neg op3))
2918 ;; But this doesn't seem useful in practice.
2920 (define_expand "fmaddsub_<mode>"
2921 [(set (match_operand:VF 0 "register_operand")
2923 [(match_operand:VF 1 "nonimmediate_operand")
2924 (match_operand:VF 2 "nonimmediate_operand")
2925 (match_operand:VF 3 "nonimmediate_operand")]
2927 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2929 (define_insn "*fma_fmaddsub_<mode>"
2930 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
2932 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
2933 (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
2934 (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")]
2936 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)"
2938 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2939 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2940 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2941 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2942 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2943 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2944 (set_attr "type" "ssemuladd")
2945 (set_attr "mode" "<MODE>")])
2947 (define_insn "avx512f_fmaddsub_<mode>_mask"
2948 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2951 [(match_operand:VF_512 1 "register_operand" "0,0")
2952 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2953 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")]
2956 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2959 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2960 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2961 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2962 (set_attr "type" "ssemuladd")
2963 (set_attr "mode" "<MODE>")])
2965 (define_insn "avx512f_fmaddsub_<mode>_mask3"
2966 [(set (match_operand:VF_512 0 "register_operand" "=v")
2969 [(match_operand:VF_512 1 "register_operand" "v")
2970 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2971 (match_operand:VF_512 3 "register_operand" "0")]
2974 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2976 "vfmaddsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2977 [(set_attr "isa" "fma_avx512f")
2978 (set_attr "type" "ssemuladd")
2979 (set_attr "mode" "<MODE>")])
2981 (define_insn "*fma_fmsubadd_<mode>"
2982 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
2984 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
2985 (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
2987 (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))]
2989 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)"
2991 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2992 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2993 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2994 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2995 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2996 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2997 (set_attr "type" "ssemuladd")
2998 (set_attr "mode" "<MODE>")])
3000 (define_insn "avx512f_fmsubadd_<mode>_mask"
3001 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3004 [(match_operand:VF_512 1 "register_operand" "0,0")
3005 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
3007 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))]
3010 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
3013 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
3014 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
3015 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3016 (set_attr "type" "ssemuladd")
3017 (set_attr "mode" "<MODE>")])
3019 (define_insn "avx512f_fmsubadd_<mode>_mask3"
3020 [(set (match_operand:VF_512 0 "register_operand" "=v")
3023 [(match_operand:VF_512 1 "register_operand" "v")
3024 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
3026 (match_operand:VF_512 3 "register_operand" "0"))]
3029 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
3031 "vfmsubadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
3032 [(set_attr "isa" "fma_avx512f")
3033 (set_attr "type" "ssemuladd")
3034 (set_attr "mode" "<MODE>")])
3036 ;; FMA3 floating point scalar intrinsics. These merge result with
3037 ;; high-order elements from the destination register.
3039 (define_expand "fmai_vmfmadd_<mode>"
3040 [(set (match_operand:VF_128 0 "register_operand")
3043 (match_operand:VF_128 1 "nonimmediate_operand")
3044 (match_operand:VF_128 2 "nonimmediate_operand")
3045 (match_operand:VF_128 3 "nonimmediate_operand"))
3050 (define_insn "*fmai_fmadd_<mode>"
3051 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3054 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3055 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
3056 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
3059 "TARGET_FMA || TARGET_AVX512F"
3061 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3062 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3063 [(set_attr "type" "ssemuladd")
3064 (set_attr "mode" "<MODE>")])
3066 (define_insn "*fmai_fmsub_<mode>"
3067 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3070 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3071 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
3073 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
3076 "TARGET_FMA || TARGET_AVX512F"
3078 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3079 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3080 [(set_attr "type" "ssemuladd")
3081 (set_attr "mode" "<MODE>")])
3083 (define_insn "*fmai_fnmadd_<mode>"
3084 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3088 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
3089 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3090 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
3093 "TARGET_FMA || TARGET_AVX512F"
3095 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3096 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3097 [(set_attr "type" "ssemuladd")
3098 (set_attr "mode" "<MODE>")])
3100 (define_insn "*fmai_fnmsub_<mode>"
3101 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3105 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
3106 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3108 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
3111 "TARGET_FMA || TARGET_AVX512F"
3113 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3114 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3115 [(set_attr "type" "ssemuladd")
3116 (set_attr "mode" "<MODE>")])
3118 ;; FMA4 floating point scalar intrinsics. These write the
3119 ;; entire destination register, with the high-order elements zeroed.
3121 (define_expand "fma4i_vmfmadd_<mode>"
3122 [(set (match_operand:VF_128 0 "register_operand")
3125 (match_operand:VF_128 1 "nonimmediate_operand")
3126 (match_operand:VF_128 2 "nonimmediate_operand")
3127 (match_operand:VF_128 3 "nonimmediate_operand"))
3131 "operands[4] = CONST0_RTX (<MODE>mode);")
3133 (define_insn "*fma4i_vmfmadd_<mode>"
3134 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3137 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3138 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3139 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3140 (match_operand:VF_128 4 "const0_operand")
3143 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3144 [(set_attr "type" "ssemuladd")
3145 (set_attr "mode" "<MODE>")])
3147 (define_insn "*fma4i_vmfmsub_<mode>"
3148 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3151 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3152 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3154 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3155 (match_operand:VF_128 4 "const0_operand")
3158 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3159 [(set_attr "type" "ssemuladd")
3160 (set_attr "mode" "<MODE>")])
3162 (define_insn "*fma4i_vmfnmadd_<mode>"
3163 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3167 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3168 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3169 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3170 (match_operand:VF_128 4 "const0_operand")
3173 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3174 [(set_attr "type" "ssemuladd")
3175 (set_attr "mode" "<MODE>")])
3177 (define_insn "*fma4i_vmfnmsub_<mode>"
3178 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3182 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3183 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3185 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3186 (match_operand:VF_128 4 "const0_operand")
3189 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3190 [(set_attr "type" "ssemuladd")
3191 (set_attr "mode" "<MODE>")])
3193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3195 ;; Parallel single-precision floating point conversion operations
3197 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3199 (define_insn "sse_cvtpi2ps"
3200 [(set (match_operand:V4SF 0 "register_operand" "=x")
3203 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3204 (match_operand:V4SF 1 "register_operand" "0")
3207 "cvtpi2ps\t{%2, %0|%0, %2}"
3208 [(set_attr "type" "ssecvt")
3209 (set_attr "mode" "V4SF")])
3211 (define_insn "sse_cvtps2pi"
3212 [(set (match_operand:V2SI 0 "register_operand" "=y")
3214 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3216 (parallel [(const_int 0) (const_int 1)])))]
3218 "cvtps2pi\t{%1, %0|%0, %q1}"
3219 [(set_attr "type" "ssecvt")
3220 (set_attr "unit" "mmx")
3221 (set_attr "mode" "DI")])
3223 (define_insn "sse_cvttps2pi"
3224 [(set (match_operand:V2SI 0 "register_operand" "=y")
3226 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3227 (parallel [(const_int 0) (const_int 1)])))]
3229 "cvttps2pi\t{%1, %0|%0, %q1}"
3230 [(set_attr "type" "ssecvt")
3231 (set_attr "unit" "mmx")
3232 (set_attr "prefix_rep" "0")
3233 (set_attr "mode" "SF")])
3235 (define_insn "sse_cvtsi2ss"
3236 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3239 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
3240 (match_operand:V4SF 1 "register_operand" "0,0,v")
3244 cvtsi2ss\t{%2, %0|%0, %2}
3245 cvtsi2ss\t{%2, %0|%0, %2}
3246 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
3247 [(set_attr "isa" "noavx,noavx,avx")
3248 (set_attr "type" "sseicvt")
3249 (set_attr "athlon_decode" "vector,double,*")
3250 (set_attr "amdfam10_decode" "vector,double,*")
3251 (set_attr "bdver1_decode" "double,direct,*")
3252 (set_attr "btver2_decode" "double,double,double")
3253 (set_attr "prefix" "orig,orig,maybe_evex")
3254 (set_attr "mode" "SF")])
3256 (define_insn "sse_cvtsi2ssq"
3257 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3260 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
3261 (match_operand:V4SF 1 "register_operand" "0,0,v")
3263 "TARGET_SSE && TARGET_64BIT"
3265 cvtsi2ssq\t{%2, %0|%0, %2}
3266 cvtsi2ssq\t{%2, %0|%0, %2}
3267 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
3268 [(set_attr "isa" "noavx,noavx,avx")
3269 (set_attr "type" "sseicvt")
3270 (set_attr "athlon_decode" "vector,double,*")
3271 (set_attr "amdfam10_decode" "vector,double,*")
3272 (set_attr "bdver1_decode" "double,direct,*")
3273 (set_attr "btver2_decode" "double,double,double")
3274 (set_attr "length_vex" "*,*,4")
3275 (set_attr "prefix_rex" "1,1,*")
3276 (set_attr "prefix" "orig,orig,maybe_evex")
3277 (set_attr "mode" "SF")])
3279 (define_insn "sse_cvtss2si"
3280 [(set (match_operand:SI 0 "register_operand" "=r,r")
3283 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
3284 (parallel [(const_int 0)]))]
3285 UNSPEC_FIX_NOTRUNC))]
3287 "%vcvtss2si\t{%1, %0|%0, %k1}"
3288 [(set_attr "type" "sseicvt")
3289 (set_attr "athlon_decode" "double,vector")
3290 (set_attr "bdver1_decode" "double,double")
3291 (set_attr "prefix_rep" "1")
3292 (set_attr "prefix" "maybe_vex")
3293 (set_attr "mode" "SI")])
3295 (define_insn "sse_cvtss2si_2"
3296 [(set (match_operand:SI 0 "register_operand" "=r,r")
3297 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3298 UNSPEC_FIX_NOTRUNC))]
3300 "%vcvtss2si\t{%1, %0|%0, %k1}"
3301 [(set_attr "type" "sseicvt")
3302 (set_attr "athlon_decode" "double,vector")
3303 (set_attr "amdfam10_decode" "double,double")
3304 (set_attr "bdver1_decode" "double,double")
3305 (set_attr "prefix_rep" "1")
3306 (set_attr "prefix" "maybe_vex")
3307 (set_attr "mode" "SI")])
3309 (define_insn "sse_cvtss2siq"
3310 [(set (match_operand:DI 0 "register_operand" "=r,r")
3313 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
3314 (parallel [(const_int 0)]))]
3315 UNSPEC_FIX_NOTRUNC))]
3316 "TARGET_SSE && TARGET_64BIT"
3317 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3318 [(set_attr "type" "sseicvt")
3319 (set_attr "athlon_decode" "double,vector")
3320 (set_attr "bdver1_decode" "double,double")
3321 (set_attr "prefix_rep" "1")
3322 (set_attr "prefix" "maybe_vex")
3323 (set_attr "mode" "DI")])
3325 (define_insn "sse_cvtss2siq_2"
3326 [(set (match_operand:DI 0 "register_operand" "=r,r")
3327 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3328 UNSPEC_FIX_NOTRUNC))]
3329 "TARGET_SSE && TARGET_64BIT"
3330 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3331 [(set_attr "type" "sseicvt")
3332 (set_attr "athlon_decode" "double,vector")
3333 (set_attr "amdfam10_decode" "double,double")
3334 (set_attr "bdver1_decode" "double,double")
3335 (set_attr "prefix_rep" "1")
3336 (set_attr "prefix" "maybe_vex")
3337 (set_attr "mode" "DI")])
3339 (define_insn "sse_cvttss2si"
3340 [(set (match_operand:SI 0 "register_operand" "=r,r")
3343 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
3344 (parallel [(const_int 0)]))))]
3346 "%vcvttss2si\t{%1, %0|%0, %k1}"
3347 [(set_attr "type" "sseicvt")
3348 (set_attr "athlon_decode" "double,vector")
3349 (set_attr "amdfam10_decode" "double,double")
3350 (set_attr "bdver1_decode" "double,double")
3351 (set_attr "prefix_rep" "1")
3352 (set_attr "prefix" "maybe_vex")
3353 (set_attr "mode" "SI")])
3355 (define_insn "sse_cvttss2siq"
3356 [(set (match_operand:DI 0 "register_operand" "=r,r")
3359 (match_operand:V4SF 1 "nonimmediate_operand" "v,vm")
3360 (parallel [(const_int 0)]))))]
3361 "TARGET_SSE && TARGET_64BIT"
3362 "%vcvttss2si{q}\t{%1, %0|%0, %k1}"
3363 [(set_attr "type" "sseicvt")
3364 (set_attr "athlon_decode" "double,vector")
3365 (set_attr "amdfam10_decode" "double,double")
3366 (set_attr "bdver1_decode" "double,double")
3367 (set_attr "prefix_rep" "1")
3368 (set_attr "prefix" "maybe_vex")
3369 (set_attr "mode" "DI")])
3371 (define_insn "cvtusi2<ssescalarmodesuffix>32"
3372 [(set (match_operand:VF_128 0 "register_operand" "=v")
3374 (vec_duplicate:VF_128
3375 (unsigned_float:<ssescalarmode>
3376 (match_operand:SI 2 "nonimmediate_operand" "rm")))
3377 (match_operand:VF_128 1 "register_operand" "v")
3380 "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3381 [(set_attr "type" "sseicvt")
3382 (set_attr "prefix" "evex")
3383 (set_attr "mode" "<ssescalarmode>")])
3385 (define_insn "cvtusi2<ssescalarmodesuffix>64"
3386 [(set (match_operand:VF_128 0 "register_operand" "=v")
3388 (vec_duplicate:VF_128
3389 (unsigned_float:<ssescalarmode>
3390 (match_operand:DI 2 "nonimmediate_operand" "rm")))
3391 (match_operand:VF_128 1 "register_operand" "v")
3393 "TARGET_AVX512F && TARGET_64BIT"
3394 "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3395 [(set_attr "type" "sseicvt")
3396 (set_attr "prefix" "evex")
3397 (set_attr "mode" "<ssescalarmode>")])
3399 (define_insn "float<sseintvecmodelower><mode>2<mask_name>"
3400 [(set (match_operand:VF1 0 "register_operand" "=v")
3402 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
3403 "TARGET_SSE2 && <mask_mode512bit_condition>"
3404 "%vcvtdq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3405 [(set_attr "type" "ssecvt")
3406 (set_attr "prefix" "maybe_vex")
3407 (set_attr "mode" "<sseinsnmode>")])
3409 (define_insn "ufloatv16siv16sf2<mask_name>"
3410 [(set (match_operand:V16SF 0 "register_operand" "=v")
3411 (unsigned_float:V16SF
3412 (match_operand:V16SI 1 "nonimmediate_operand" "vm")))]
3414 "vcvtudq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3415 [(set_attr "type" "ssecvt")
3416 (set_attr "prefix" "evex")
3417 (set_attr "mode" "V16SF")])
3419 (define_expand "floatuns<sseintvecmodelower><mode>2"
3420 [(match_operand:VF1 0 "register_operand")
3421 (match_operand:<sseintvecmode> 1 "register_operand")]
3422 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
3424 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
3429 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
3430 (define_mode_attr sf2simodelower
3431 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
3433 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
3434 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
3436 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
3437 UNSPEC_FIX_NOTRUNC))]
3439 "%vcvtps2dq\t{%1, %0|%0, %1}"
3440 [(set_attr "type" "ssecvt")
3441 (set (attr "prefix_data16")
3443 (match_test "TARGET_AVX")
3445 (const_string "1")))
3446 (set_attr "prefix" "maybe_vex")
3447 (set_attr "mode" "<sseinsnmode>")])
3449 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name>"
3450 [(set (match_operand:V16SI 0 "register_operand" "=v")
3452 [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
3453 UNSPEC_FIX_NOTRUNC))]
3455 "vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3456 [(set_attr "type" "ssecvt")
3457 (set_attr "prefix" "evex")
3458 (set_attr "mode" "XI")])
3460 (define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name>"
3461 [(set (match_operand:V16SI 0 "register_operand" "=v")
3463 [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
3464 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3466 "vcvtps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3467 [(set_attr "type" "ssecvt")
3468 (set_attr "prefix" "evex")
3469 (set_attr "mode" "XI")])
3471 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name>"
3472 [(set (match_operand:V16SI 0 "register_operand" "=v")
3474 (match_operand:V16SF 1 "nonimmediate_operand" "vm")))]
3476 "vcvttps2<fixsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3477 [(set_attr "type" "ssecvt")
3478 (set_attr "prefix" "evex")
3479 (set_attr "mode" "XI")])
3481 (define_insn "fix_truncv8sfv8si2"
3482 [(set (match_operand:V8SI 0 "register_operand" "=x")
3483 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
3485 "vcvttps2dq\t{%1, %0|%0, %1}"
3486 [(set_attr "type" "ssecvt")
3487 (set_attr "prefix" "vex")
3488 (set_attr "mode" "OI")])
3490 (define_insn "fix_truncv4sfv4si2"
3491 [(set (match_operand:V4SI 0 "register_operand" "=x")
3492 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3494 "%vcvttps2dq\t{%1, %0|%0, %1}"
3495 [(set_attr "type" "ssecvt")
3496 (set (attr "prefix_rep")
3498 (match_test "TARGET_AVX")
3500 (const_string "1")))
3501 (set (attr "prefix_data16")
3503 (match_test "TARGET_AVX")
3505 (const_string "0")))
3506 (set_attr "prefix_data16" "0")
3507 (set_attr "prefix" "maybe_vex")
3508 (set_attr "mode" "TI")])
3510 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
3511 [(match_operand:<sseintvecmode> 0 "register_operand")
3512 (match_operand:VF1 1 "register_operand")]
3516 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3517 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
3518 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
3519 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
3523 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3525 ;; Parallel double-precision floating point conversion operations
3527 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3529 (define_insn "sse2_cvtpi2pd"
3530 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3531 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
3533 "cvtpi2pd\t{%1, %0|%0, %1}"
3534 [(set_attr "type" "ssecvt")
3535 (set_attr "unit" "mmx,*")
3536 (set_attr "prefix_data16" "1,*")
3537 (set_attr "mode" "V2DF")])
3539 (define_insn "sse2_cvtpd2pi"
3540 [(set (match_operand:V2SI 0 "register_operand" "=y")
3541 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3542 UNSPEC_FIX_NOTRUNC))]
3544 "cvtpd2pi\t{%1, %0|%0, %1}"
3545 [(set_attr "type" "ssecvt")
3546 (set_attr "unit" "mmx")
3547 (set_attr "bdver1_decode" "double")
3548 (set_attr "btver2_decode" "direct")
3549 (set_attr "prefix_data16" "1")
3550 (set_attr "mode" "DI")])
3552 (define_insn "sse2_cvttpd2pi"
3553 [(set (match_operand:V2SI 0 "register_operand" "=y")
3554 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
3556 "cvttpd2pi\t{%1, %0|%0, %1}"
3557 [(set_attr "type" "ssecvt")
3558 (set_attr "unit" "mmx")
3559 (set_attr "bdver1_decode" "double")
3560 (set_attr "prefix_data16" "1")
3561 (set_attr "mode" "TI")])
3563 (define_insn "sse2_cvtsi2sd"
3564 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
3567 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
3568 (match_operand:V2DF 1 "register_operand" "0,0,x")
3572 cvtsi2sd\t{%2, %0|%0, %2}
3573 cvtsi2sd\t{%2, %0|%0, %2}
3574 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
3575 [(set_attr "isa" "noavx,noavx,avx")
3576 (set_attr "type" "sseicvt")
3577 (set_attr "athlon_decode" "double,direct,*")
3578 (set_attr "amdfam10_decode" "vector,double,*")
3579 (set_attr "bdver1_decode" "double,direct,*")
3580 (set_attr "btver2_decode" "double,double,double")
3581 (set_attr "prefix" "orig,orig,vex")
3582 (set_attr "mode" "DF")])
3584 (define_insn "sse2_cvtsi2sdq"
3585 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
3588 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
3589 (match_operand:V2DF 1 "register_operand" "0,0,v")
3591 "TARGET_SSE2 && TARGET_64BIT"
3593 cvtsi2sdq\t{%2, %0|%0, %2}
3594 cvtsi2sdq\t{%2, %0|%0, %2}
3595 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
3596 [(set_attr "isa" "noavx,noavx,avx")
3597 (set_attr "type" "sseicvt")
3598 (set_attr "athlon_decode" "double,direct,*")
3599 (set_attr "amdfam10_decode" "vector,double,*")
3600 (set_attr "bdver1_decode" "double,direct,*")
3601 (set_attr "length_vex" "*,*,4")
3602 (set_attr "prefix_rex" "1,1,*")
3603 (set_attr "prefix" "orig,orig,maybe_evex")
3604 (set_attr "mode" "DF")])
3606 (define_insn "avx512f_vcvtss2usi"
3607 [(set (match_operand:SI 0 "register_operand" "=r")
3610 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3611 (parallel [(const_int 0)]))]
3612 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3614 "vcvtss2usi\t{%1, %0|%0, %1}"
3615 [(set_attr "type" "sseicvt")
3616 (set_attr "prefix" "evex")
3617 (set_attr "mode" "SI")])
3619 (define_insn "avx512f_vcvtss2usiq"
3620 [(set (match_operand:DI 0 "register_operand" "=r")
3623 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3624 (parallel [(const_int 0)]))]
3625 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3626 "TARGET_AVX512F && TARGET_64BIT"
3627 "vcvtss2usi\t{%1, %0|%0, %1}"
3628 [(set_attr "type" "sseicvt")
3629 (set_attr "prefix" "evex")
3630 (set_attr "mode" "DI")])
3632 (define_insn "avx512f_vcvttss2usi"
3633 [(set (match_operand:SI 0 "register_operand" "=r")
3636 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3637 (parallel [(const_int 0)]))))]
3639 "vcvttss2usi\t{%1, %0|%0, %1}"
3640 [(set_attr "type" "sseicvt")
3641 (set_attr "prefix" "evex")
3642 (set_attr "mode" "SI")])
3644 (define_insn "avx512f_vcvttss2usiq"
3645 [(set (match_operand:DI 0 "register_operand" "=r")
3648 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3649 (parallel [(const_int 0)]))))]
3650 "TARGET_AVX512F && TARGET_64BIT"
3651 "vcvttss2usi\t{%1, %0|%0, %1}"
3652 [(set_attr "type" "sseicvt")
3653 (set_attr "prefix" "evex")
3654 (set_attr "mode" "DI")])
3656 (define_insn "avx512f_vcvtsd2usi"
3657 [(set (match_operand:SI 0 "register_operand" "=r")
3660 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3661 (parallel [(const_int 0)]))]
3662 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3664 "vcvtsd2usi\t{%1, %0|%0, %1}"
3665 [(set_attr "type" "sseicvt")
3666 (set_attr "prefix" "evex")
3667 (set_attr "mode" "SI")])
3669 (define_insn "avx512f_vcvtsd2usiq"
3670 [(set (match_operand:DI 0 "register_operand" "=r")
3673 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3674 (parallel [(const_int 0)]))]
3675 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3676 "TARGET_AVX512F && TARGET_64BIT"
3677 "vcvtsd2usi\t{%1, %0|%0, %1}"
3678 [(set_attr "type" "sseicvt")
3679 (set_attr "prefix" "evex")
3680 (set_attr "mode" "DI")])
3682 (define_insn "avx512f_vcvttsd2usi"
3683 [(set (match_operand:SI 0 "register_operand" "=r")
3686 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3687 (parallel [(const_int 0)]))))]
3689 "vcvttsd2usi\t{%1, %0|%0, %1}"
3690 [(set_attr "type" "sseicvt")
3691 (set_attr "prefix" "evex")
3692 (set_attr "mode" "SI")])
3694 (define_insn "avx512f_vcvttsd2usiq"
3695 [(set (match_operand:DI 0 "register_operand" "=r")
3698 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3699 (parallel [(const_int 0)]))))]
3700 "TARGET_AVX512F && TARGET_64BIT"
3701 "vcvttsd2usi\t{%1, %0|%0, %1}"
3702 [(set_attr "type" "sseicvt")
3703 (set_attr "prefix" "evex")
3704 (set_attr "mode" "DI")])
3706 (define_insn "sse2_cvtsd2si"
3707 [(set (match_operand:SI 0 "register_operand" "=r,r")
3710 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3711 (parallel [(const_int 0)]))]
3712 UNSPEC_FIX_NOTRUNC))]
3714 "%vcvtsd2si\t{%1, %0|%0, %q1}"
3715 [(set_attr "type" "sseicvt")
3716 (set_attr "athlon_decode" "double,vector")
3717 (set_attr "bdver1_decode" "double,double")
3718 (set_attr "btver2_decode" "double,double")
3719 (set_attr "prefix_rep" "1")
3720 (set_attr "prefix" "maybe_vex")
3721 (set_attr "mode" "SI")])
3723 (define_insn "sse2_cvtsd2si_2"
3724 [(set (match_operand:SI 0 "register_operand" "=r,r")
3725 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3726 UNSPEC_FIX_NOTRUNC))]
3728 "%vcvtsd2si\t{%1, %0|%0, %q1}"
3729 [(set_attr "type" "sseicvt")
3730 (set_attr "athlon_decode" "double,vector")
3731 (set_attr "amdfam10_decode" "double,double")
3732 (set_attr "bdver1_decode" "double,double")
3733 (set_attr "prefix_rep" "1")
3734 (set_attr "prefix" "maybe_vex")
3735 (set_attr "mode" "SI")])
3737 (define_insn "sse2_cvtsd2siq"
3738 [(set (match_operand:DI 0 "register_operand" "=r,r")
3741 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3742 (parallel [(const_int 0)]))]
3743 UNSPEC_FIX_NOTRUNC))]
3744 "TARGET_SSE2 && TARGET_64BIT"
3745 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
3746 [(set_attr "type" "sseicvt")
3747 (set_attr "athlon_decode" "double,vector")
3748 (set_attr "bdver1_decode" "double,double")
3749 (set_attr "prefix_rep" "1")
3750 (set_attr "prefix" "maybe_vex")
3751 (set_attr "mode" "DI")])
3753 (define_insn "sse2_cvtsd2siq_2"
3754 [(set (match_operand:DI 0 "register_operand" "=r,r")
3755 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3756 UNSPEC_FIX_NOTRUNC))]
3757 "TARGET_SSE2 && TARGET_64BIT"
3758 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
3759 [(set_attr "type" "sseicvt")
3760 (set_attr "athlon_decode" "double,vector")
3761 (set_attr "amdfam10_decode" "double,double")
3762 (set_attr "bdver1_decode" "double,double")
3763 (set_attr "prefix_rep" "1")
3764 (set_attr "prefix" "maybe_vex")
3765 (set_attr "mode" "DI")])
3767 (define_insn "sse2_cvttsd2si"
3768 [(set (match_operand:SI 0 "register_operand" "=r,r")
3771 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3772 (parallel [(const_int 0)]))))]
3774 "%vcvttsd2si\t{%1, %0|%0, %q1}"
3775 [(set_attr "type" "sseicvt")
3776 (set_attr "athlon_decode" "double,vector")
3777 (set_attr "amdfam10_decode" "double,double")
3778 (set_attr "bdver1_decode" "double,double")
3779 (set_attr "btver2_decode" "double,double")
3780 (set_attr "prefix_rep" "1")
3781 (set_attr "prefix" "maybe_vex")
3782 (set_attr "mode" "SI")])
3784 (define_insn "sse2_cvttsd2siq"
3785 [(set (match_operand:DI 0 "register_operand" "=r,r")
3788 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3789 (parallel [(const_int 0)]))))]
3790 "TARGET_SSE2 && TARGET_64BIT"
3791 "%vcvttsd2si{q}\t{%1, %0|%0, %q1}"
3792 [(set_attr "type" "sseicvt")
3793 (set_attr "athlon_decode" "double,vector")
3794 (set_attr "amdfam10_decode" "double,double")
3795 (set_attr "bdver1_decode" "double,double")
3796 (set_attr "prefix_rep" "1")
3797 (set_attr "prefix" "maybe_vex")
3798 (set_attr "mode" "DI")])
3800 ;; For float<si2dfmode><mode>2 insn pattern
3801 (define_mode_attr si2dfmode
3802 [(V8DF "V8SI") (V4DF "V4SI")])
3803 (define_mode_attr si2dfmodelower
3804 [(V8DF "v8si") (V4DF "v4si")])
3806 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
3807 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
3808 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
3809 "TARGET_AVX && <mask_mode512bit_condition>"
3810 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3811 [(set_attr "type" "ssecvt")
3812 (set_attr "prefix" "maybe_vex")
3813 (set_attr "mode" "<MODE>")])
3815 (define_insn "ufloatv8siv8df<mask_name>"
3816 [(set (match_operand:V8DF 0 "register_operand" "=v")
3817 (unsigned_float:V8DF
3818 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
3820 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3821 [(set_attr "type" "ssecvt")
3822 (set_attr "prefix" "evex")
3823 (set_attr "mode" "V8DF")])
3825 (define_insn "avx512f_cvtdq2pd512_2"
3826 [(set (match_operand:V8DF 0 "register_operand" "=v")
3829 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
3830 (parallel [(const_int 0) (const_int 1)
3831 (const_int 2) (const_int 3)
3832 (const_int 4) (const_int 5)
3833 (const_int 6) (const_int 7)]))))]
3835 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
3836 [(set_attr "type" "ssecvt")
3837 (set_attr "prefix" "evex")
3838 (set_attr "mode" "V8DF")])
3840 (define_insn "avx_cvtdq2pd256_2"
3841 [(set (match_operand:V4DF 0 "register_operand" "=x")
3844 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
3845 (parallel [(const_int 0) (const_int 1)
3846 (const_int 2) (const_int 3)]))))]
3848 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
3849 [(set_attr "type" "ssecvt")
3850 (set_attr "prefix" "vex")
3851 (set_attr "mode" "V4DF")])
3853 (define_insn "sse2_cvtdq2pd"
3854 [(set (match_operand:V2DF 0 "register_operand" "=x")
3857 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3858 (parallel [(const_int 0) (const_int 1)]))))]
3860 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
3861 [(set_attr "type" "ssecvt")
3862 (set_attr "prefix" "maybe_vex")
3863 (set_attr "ssememalign" "64")
3864 (set_attr "mode" "V2DF")])
3866 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name>"
3867 [(set (match_operand:V8SI 0 "register_operand" "=v")
3869 [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
3870 UNSPEC_FIX_NOTRUNC))]
3872 "vcvtpd2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3873 [(set_attr "type" "ssecvt")
3874 (set_attr "prefix" "evex")
3875 (set_attr "mode" "OI")])
3877 (define_insn "avx_cvtpd2dq256"
3878 [(set (match_operand:V4SI 0 "register_operand" "=x")
3879 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3880 UNSPEC_FIX_NOTRUNC))]
3882 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
3883 [(set_attr "type" "ssecvt")
3884 (set_attr "prefix" "vex")
3885 (set_attr "mode" "OI")])
3887 (define_expand "avx_cvtpd2dq256_2"
3888 [(set (match_operand:V8SI 0 "register_operand")
3890 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
3894 "operands[2] = CONST0_RTX (V4SImode);")
3896 (define_insn "*avx_cvtpd2dq256_2"
3897 [(set (match_operand:V8SI 0 "register_operand" "=x")
3899 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3901 (match_operand:V4SI 2 "const0_operand")))]
3903 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
3904 [(set_attr "type" "ssecvt")
3905 (set_attr "prefix" "vex")
3906 (set_attr "btver2_decode" "vector")
3907 (set_attr "mode" "OI")])
3909 (define_expand "sse2_cvtpd2dq"
3910 [(set (match_operand:V4SI 0 "register_operand")
3912 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
3916 "operands[2] = CONST0_RTX (V2SImode);")
3918 (define_insn "*sse2_cvtpd2dq"
3919 [(set (match_operand:V4SI 0 "register_operand" "=x")
3921 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3923 (match_operand:V2SI 2 "const0_operand")))]
3927 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
3929 return "cvtpd2dq\t{%1, %0|%0, %1}";
3931 [(set_attr "type" "ssecvt")
3932 (set_attr "prefix_rep" "1")
3933 (set_attr "prefix_data16" "0")
3934 (set_attr "prefix" "maybe_vex")
3935 (set_attr "mode" "TI")
3936 (set_attr "amdfam10_decode" "double")
3937 (set_attr "athlon_decode" "vector")
3938 (set_attr "bdver1_decode" "double")])
3940 (define_insn "avx512f_ufix_notruncv8dfv8si<mask_name>"
3941 [(set (match_operand:V8SI 0 "register_operand" "=v")
3943 [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
3944 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3946 "vcvtpd2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3947 [(set_attr "type" "ssecvt")
3948 (set_attr "prefix" "evex")
3949 (set_attr "mode" "OI")])
3951 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name>"
3952 [(set (match_operand:V8SI 0 "register_operand" "=v")
3954 (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
3956 "vcvttpd2<fixsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3957 [(set_attr "type" "ssecvt")
3958 (set_attr "prefix" "evex")
3959 (set_attr "mode" "OI")])
3961 (define_insn "fix_truncv4dfv4si2"
3962 [(set (match_operand:V4SI 0 "register_operand" "=x")
3963 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3965 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
3966 [(set_attr "type" "ssecvt")
3967 (set_attr "prefix" "vex")
3968 (set_attr "mode" "OI")])
3970 (define_expand "avx_cvttpd2dq256_2"
3971 [(set (match_operand:V8SI 0 "register_operand")
3973 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
3976 "operands[2] = CONST0_RTX (V4SImode);")
3978 (define_insn "*avx_cvttpd2dq256_2"
3979 [(set (match_operand:V8SI 0 "register_operand" "=x")
3981 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
3982 (match_operand:V4SI 2 "const0_operand")))]
3984 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
3985 [(set_attr "type" "ssecvt")
3986 (set_attr "prefix" "vex")
3987 (set_attr "btver2_decode" "vector")
3988 (set_attr "mode" "OI")])
3990 (define_expand "sse2_cvttpd2dq"
3991 [(set (match_operand:V4SI 0 "register_operand")
3993 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
3996 "operands[2] = CONST0_RTX (V2SImode);")
3998 (define_insn "*sse2_cvttpd2dq"
3999 [(set (match_operand:V4SI 0 "register_operand" "=x")
4001 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4002 (match_operand:V2SI 2 "const0_operand")))]
4006 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
4008 return "cvttpd2dq\t{%1, %0|%0, %1}";
4010 [(set_attr "type" "ssecvt")
4011 (set_attr "amdfam10_decode" "double")
4012 (set_attr "athlon_decode" "vector")
4013 (set_attr "bdver1_decode" "double")
4014 (set_attr "prefix" "maybe_vex")
4015 (set_attr "mode" "TI")])
4017 (define_insn "sse2_cvtsd2ss"
4018 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4021 (float_truncate:V2SF
4022 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,vm")))
4023 (match_operand:V4SF 1 "register_operand" "0,0,v")
4027 cvtsd2ss\t{%2, %0|%0, %2}
4028 cvtsd2ss\t{%2, %0|%0, %q2}
4029 vcvtsd2ss\t{%2, %1, %0|%0, %1, %q2}"
4030 [(set_attr "isa" "noavx,noavx,avx")
4031 (set_attr "type" "ssecvt")
4032 (set_attr "athlon_decode" "vector,double,*")
4033 (set_attr "amdfam10_decode" "vector,double,*")
4034 (set_attr "bdver1_decode" "direct,direct,*")
4035 (set_attr "btver2_decode" "double,double,double")
4036 (set_attr "prefix" "orig,orig,vex")
4037 (set_attr "mode" "SF")])
4039 (define_insn "sse2_cvtss2sd"
4040 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4044 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,vm")
4045 (parallel [(const_int 0) (const_int 1)])))
4046 (match_operand:V2DF 1 "register_operand" "0,0,v")
4050 cvtss2sd\t{%2, %0|%0, %2}
4051 cvtss2sd\t{%2, %0|%0, %k2}
4052 vcvtss2sd\t{%2, %1, %0|%0, %1, %k2}"
4053 [(set_attr "isa" "noavx,noavx,avx")
4054 (set_attr "type" "ssecvt")
4055 (set_attr "amdfam10_decode" "vector,double,*")
4056 (set_attr "athlon_decode" "direct,direct,*")
4057 (set_attr "bdver1_decode" "direct,direct,*")
4058 (set_attr "btver2_decode" "double,double,double")
4059 (set_attr "prefix" "orig,orig,vex")
4060 (set_attr "mode" "DF")])
4062 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name>"
4063 [(set (match_operand:V8SF 0 "register_operand" "=v")
4064 (float_truncate:V8SF
4065 (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
4067 "vcvtpd2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4068 [(set_attr "type" "ssecvt")
4069 (set_attr "prefix" "evex")
4070 (set_attr "mode" "V8SF")])
4072 (define_insn "avx_cvtpd2ps256"
4073 [(set (match_operand:V4SF 0 "register_operand" "=x")
4074 (float_truncate:V4SF
4075 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4077 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
4078 [(set_attr "type" "ssecvt")
4079 (set_attr "prefix" "vex")
4080 (set_attr "btver2_decode" "vector")
4081 (set_attr "mode" "V4SF")])
4083 (define_expand "sse2_cvtpd2ps"
4084 [(set (match_operand:V4SF 0 "register_operand")
4086 (float_truncate:V2SF
4087 (match_operand:V2DF 1 "nonimmediate_operand"))
4090 "operands[2] = CONST0_RTX (V2SFmode);")
4092 (define_insn "*sse2_cvtpd2ps"
4093 [(set (match_operand:V4SF 0 "register_operand" "=x")
4095 (float_truncate:V2SF
4096 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4097 (match_operand:V2SF 2 "const0_operand")))]
4101 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
4103 return "cvtpd2ps\t{%1, %0|%0, %1}";
4105 [(set_attr "type" "ssecvt")
4106 (set_attr "amdfam10_decode" "double")
4107 (set_attr "athlon_decode" "vector")
4108 (set_attr "bdver1_decode" "double")
4109 (set_attr "prefix_data16" "1")
4110 (set_attr "prefix" "maybe_vex")
4111 (set_attr "mode" "V4SF")])
4113 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4114 (define_mode_attr sf2dfmode
4115 [(V8DF "V8SF") (V4DF "V4SF")])
4117 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name>"
4118 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4119 (float_extend:VF2_512_256
4120 (match_operand:<sf2dfmode> 1 "nonimmediate_operand" "vm")))]
4121 "TARGET_AVX && <mask_mode512bit_condition>"
4122 "vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4123 [(set_attr "type" "ssecvt")
4124 (set_attr "prefix" "maybe_vex")
4125 (set_attr "mode" "<MODE>")])
4127 (define_insn "*avx_cvtps2pd256_2"
4128 [(set (match_operand:V4DF 0 "register_operand" "=x")
4131 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4132 (parallel [(const_int 0) (const_int 1)
4133 (const_int 2) (const_int 3)]))))]
4135 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4136 [(set_attr "type" "ssecvt")
4137 (set_attr "prefix" "vex")
4138 (set_attr "mode" "V4DF")])
4140 (define_insn "vec_unpacks_lo_v16sf"
4141 [(set (match_operand:V8DF 0 "register_operand" "=v")
4144 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4145 (parallel [(const_int 0) (const_int 1)
4146 (const_int 2) (const_int 3)
4147 (const_int 4) (const_int 5)
4148 (const_int 6) (const_int 7)]))))]
4150 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4151 [(set_attr "type" "ssecvt")
4152 (set_attr "prefix" "evex")
4153 (set_attr "mode" "V8DF")])
4155 (define_insn "sse2_cvtps2pd"
4156 [(set (match_operand:V2DF 0 "register_operand" "=x")
4159 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4160 (parallel [(const_int 0) (const_int 1)]))))]
4162 "%vcvtps2pd\t{%1, %0|%0, %q1}"
4163 [(set_attr "type" "ssecvt")
4164 (set_attr "amdfam10_decode" "direct")
4165 (set_attr "athlon_decode" "double")
4166 (set_attr "bdver1_decode" "double")
4167 (set_attr "prefix_data16" "0")
4168 (set_attr "prefix" "maybe_vex")
4169 (set_attr "mode" "V2DF")])
4171 (define_expand "vec_unpacks_hi_v4sf"
4176 (match_operand:V4SF 1 "nonimmediate_operand"))
4177 (parallel [(const_int 6) (const_int 7)
4178 (const_int 2) (const_int 3)])))
4179 (set (match_operand:V2DF 0 "register_operand")
4183 (parallel [(const_int 0) (const_int 1)]))))]
4185 "operands[2] = gen_reg_rtx (V4SFmode);")
4187 (define_expand "vec_unpacks_hi_v8sf"
4190 (match_operand:V8SF 1 "nonimmediate_operand")
4191 (parallel [(const_int 4) (const_int 5)
4192 (const_int 6) (const_int 7)])))
4193 (set (match_operand:V4DF 0 "register_operand")
4197 "operands[2] = gen_reg_rtx (V4SFmode);")
4199 (define_expand "vec_unpacks_hi_v16sf"
4202 (match_operand:V16SF 1 "nonimmediate_operand")
4203 (parallel [(const_int 8) (const_int 9)
4204 (const_int 10) (const_int 11)
4205 (const_int 12) (const_int 13)
4206 (const_int 14) (const_int 15)])))
4207 (set (match_operand:V8DF 0 "register_operand")
4211 "operands[2] = gen_reg_rtx (V8SFmode);")
4213 (define_expand "vec_unpacks_lo_v4sf"
4214 [(set (match_operand:V2DF 0 "register_operand")
4217 (match_operand:V4SF 1 "nonimmediate_operand")
4218 (parallel [(const_int 0) (const_int 1)]))))]
4221 (define_expand "vec_unpacks_lo_v8sf"
4222 [(set (match_operand:V4DF 0 "register_operand")
4225 (match_operand:V8SF 1 "nonimmediate_operand")
4226 (parallel [(const_int 0) (const_int 1)
4227 (const_int 2) (const_int 3)]))))]
4230 (define_mode_attr sseunpackfltmode
4231 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
4232 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
4234 (define_expand "vec_unpacks_float_hi_<mode>"
4235 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4236 (match_operand:VI2_AVX512F 1 "register_operand")]
4239 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4241 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
4242 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4243 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4247 (define_expand "vec_unpacks_float_lo_<mode>"
4248 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4249 (match_operand:VI2_AVX512F 1 "register_operand")]
4252 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4254 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
4255 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4256 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4260 (define_expand "vec_unpacku_float_hi_<mode>"
4261 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4262 (match_operand:VI2_AVX512F 1 "register_operand")]
4265 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4267 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
4268 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4269 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4273 (define_expand "vec_unpacku_float_lo_<mode>"
4274 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4275 (match_operand:VI2_AVX512F 1 "register_operand")]
4278 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4280 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
4281 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4282 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4286 (define_expand "vec_unpacks_float_hi_v4si"
4289 (match_operand:V4SI 1 "nonimmediate_operand")
4290 (parallel [(const_int 2) (const_int 3)
4291 (const_int 2) (const_int 3)])))
4292 (set (match_operand:V2DF 0 "register_operand")
4296 (parallel [(const_int 0) (const_int 1)]))))]
4298 "operands[2] = gen_reg_rtx (V4SImode);")
4300 (define_expand "vec_unpacks_float_lo_v4si"
4301 [(set (match_operand:V2DF 0 "register_operand")
4304 (match_operand:V4SI 1 "nonimmediate_operand")
4305 (parallel [(const_int 0) (const_int 1)]))))]
4308 (define_expand "vec_unpacks_float_hi_v8si"
4311 (match_operand:V8SI 1 "nonimmediate_operand")
4312 (parallel [(const_int 4) (const_int 5)
4313 (const_int 6) (const_int 7)])))
4314 (set (match_operand:V4DF 0 "register_operand")
4318 "operands[2] = gen_reg_rtx (V4SImode);")
4320 (define_expand "vec_unpacks_float_lo_v8si"
4321 [(set (match_operand:V4DF 0 "register_operand")
4324 (match_operand:V8SI 1 "nonimmediate_operand")
4325 (parallel [(const_int 0) (const_int 1)
4326 (const_int 2) (const_int 3)]))))]
4329 (define_expand "vec_unpacks_float_hi_v16si"
4332 (match_operand:V16SI 1 "nonimmediate_operand")
4333 (parallel [(const_int 8) (const_int 9)
4334 (const_int 10) (const_int 11)
4335 (const_int 12) (const_int 13)
4336 (const_int 14) (const_int 15)])))
4337 (set (match_operand:V8DF 0 "register_operand")
4341 "operands[2] = gen_reg_rtx (V8SImode);")
4343 (define_expand "vec_unpacks_float_lo_v16si"
4344 [(set (match_operand:V8DF 0 "register_operand")
4347 (match_operand:V16SI 1 "nonimmediate_operand")
4348 (parallel [(const_int 0) (const_int 1)
4349 (const_int 2) (const_int 3)
4350 (const_int 4) (const_int 5)
4351 (const_int 6) (const_int 7)]))))]
4354 (define_expand "vec_unpacku_float_hi_v4si"
4357 (match_operand:V4SI 1 "nonimmediate_operand")
4358 (parallel [(const_int 2) (const_int 3)
4359 (const_int 2) (const_int 3)])))
4364 (parallel [(const_int 0) (const_int 1)]))))
4366 (lt:V2DF (match_dup 6) (match_dup 3)))
4368 (and:V2DF (match_dup 7) (match_dup 4)))
4369 (set (match_operand:V2DF 0 "register_operand")
4370 (plus:V2DF (match_dup 6) (match_dup 8)))]
4373 REAL_VALUE_TYPE TWO32r;
4377 real_ldexp (&TWO32r, &dconst1, 32);
4378 x = const_double_from_real_value (TWO32r, DFmode);
4380 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4381 operands[4] = force_reg (V2DFmode,
4382 ix86_build_const_vector (V2DFmode, 1, x));
4384 operands[5] = gen_reg_rtx (V4SImode);
4386 for (i = 6; i < 9; i++)
4387 operands[i] = gen_reg_rtx (V2DFmode);
4390 (define_expand "vec_unpacku_float_lo_v4si"
4394 (match_operand:V4SI 1 "nonimmediate_operand")
4395 (parallel [(const_int 0) (const_int 1)]))))
4397 (lt:V2DF (match_dup 5) (match_dup 3)))
4399 (and:V2DF (match_dup 6) (match_dup 4)))
4400 (set (match_operand:V2DF 0 "register_operand")
4401 (plus:V2DF (match_dup 5) (match_dup 7)))]
4404 REAL_VALUE_TYPE TWO32r;
4408 real_ldexp (&TWO32r, &dconst1, 32);
4409 x = const_double_from_real_value (TWO32r, DFmode);
4411 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4412 operands[4] = force_reg (V2DFmode,
4413 ix86_build_const_vector (V2DFmode, 1, x));
4415 for (i = 5; i < 8; i++)
4416 operands[i] = gen_reg_rtx (V2DFmode);
4419 (define_expand "vec_unpacku_float_hi_v8si"
4420 [(match_operand:V4DF 0 "register_operand")
4421 (match_operand:V8SI 1 "register_operand")]
4424 REAL_VALUE_TYPE TWO32r;
4428 real_ldexp (&TWO32r, &dconst1, 32);
4429 x = const_double_from_real_value (TWO32r, DFmode);
4431 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4432 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4433 tmp[5] = gen_reg_rtx (V4SImode);
4435 for (i = 2; i < 5; i++)
4436 tmp[i] = gen_reg_rtx (V4DFmode);
4437 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
4438 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
4439 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4440 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4441 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4442 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4446 (define_expand "vec_unpacku_float_lo_v8si"
4447 [(match_operand:V4DF 0 "register_operand")
4448 (match_operand:V8SI 1 "nonimmediate_operand")]
4451 REAL_VALUE_TYPE TWO32r;
4455 real_ldexp (&TWO32r, &dconst1, 32);
4456 x = const_double_from_real_value (TWO32r, DFmode);
4458 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4459 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4461 for (i = 2; i < 5; i++)
4462 tmp[i] = gen_reg_rtx (V4DFmode);
4463 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
4464 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4465 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4466 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4467 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4471 (define_expand "vec_unpacku_float_lo_v16si"
4472 [(match_operand:V8DF 0 "register_operand")
4473 (match_operand:V16SI 1 "nonimmediate_operand")]
4476 REAL_VALUE_TYPE TWO32r;
4479 real_ldexp (&TWO32r, &dconst1, 32);
4480 x = const_double_from_real_value (TWO32r, DFmode);
4482 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
4483 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
4484 tmp[2] = gen_reg_rtx (V8DFmode);
4485 k = gen_reg_rtx (QImode);
4487 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
4488 emit_insn (gen_rtx_SET (VOIDmode, k,
4489 gen_rtx_LT (QImode, tmp[2], tmp[0])));
4490 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
4491 emit_move_insn (operands[0], tmp[2]);
4495 (define_expand "vec_pack_trunc_<mode>"
4497 (float_truncate:<sf2dfmode>
4498 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
4500 (float_truncate:<sf2dfmode>
4501 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
4502 (set (match_operand:<ssePSmode> 0 "register_operand")
4503 (vec_concat:<ssePSmode>
4508 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
4509 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
4512 (define_expand "vec_pack_trunc_v2df"
4513 [(match_operand:V4SF 0 "register_operand")
4514 (match_operand:V2DF 1 "nonimmediate_operand")
4515 (match_operand:V2DF 2 "nonimmediate_operand")]
4520 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4522 tmp0 = gen_reg_rtx (V4DFmode);
4523 tmp1 = force_reg (V2DFmode, operands[1]);
4525 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4526 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
4530 tmp0 = gen_reg_rtx (V4SFmode);
4531 tmp1 = gen_reg_rtx (V4SFmode);
4533 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
4534 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
4535 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
4540 (define_expand "vec_pack_sfix_trunc_v8df"
4541 [(match_operand:V16SI 0 "register_operand")
4542 (match_operand:V8DF 1 "nonimmediate_operand")
4543 (match_operand:V8DF 2 "nonimmediate_operand")]
4548 r1 = gen_reg_rtx (V8SImode);
4549 r2 = gen_reg_rtx (V8SImode);
4551 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
4552 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
4553 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
4557 (define_expand "vec_pack_sfix_trunc_v4df"
4558 [(match_operand:V8SI 0 "register_operand")
4559 (match_operand:V4DF 1 "nonimmediate_operand")
4560 (match_operand:V4DF 2 "nonimmediate_operand")]
4565 r1 = gen_reg_rtx (V4SImode);
4566 r2 = gen_reg_rtx (V4SImode);
4568 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
4569 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
4570 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4574 (define_expand "vec_pack_sfix_trunc_v2df"
4575 [(match_operand:V4SI 0 "register_operand")
4576 (match_operand:V2DF 1 "nonimmediate_operand")
4577 (match_operand:V2DF 2 "nonimmediate_operand")]
4580 rtx tmp0, tmp1, tmp2;
4582 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4584 tmp0 = gen_reg_rtx (V4DFmode);
4585 tmp1 = force_reg (V2DFmode, operands[1]);
4587 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4588 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
4592 tmp0 = gen_reg_rtx (V4SImode);
4593 tmp1 = gen_reg_rtx (V4SImode);
4594 tmp2 = gen_reg_rtx (V2DImode);
4596 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
4597 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
4598 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4599 gen_lowpart (V2DImode, tmp0),
4600 gen_lowpart (V2DImode, tmp1)));
4601 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4606 (define_mode_attr ssepackfltmode
4607 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
4609 (define_expand "vec_pack_ufix_trunc_<mode>"
4610 [(match_operand:<ssepackfltmode> 0 "register_operand")
4611 (match_operand:VF2_128_256 1 "register_operand")
4612 (match_operand:VF2_128_256 2 "register_operand")]
4616 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4617 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
4618 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
4619 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
4620 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
4622 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
4623 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
4627 tmp[5] = gen_reg_rtx (V8SFmode);
4628 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
4629 gen_lowpart (V8SFmode, tmp[3]), 0);
4630 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
4632 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
4633 operands[0], 0, OPTAB_DIRECT);
4634 if (tmp[6] != operands[0])
4635 emit_move_insn (operands[0], tmp[6]);
4639 (define_expand "vec_pack_sfix_v4df"
4640 [(match_operand:V8SI 0 "register_operand")
4641 (match_operand:V4DF 1 "nonimmediate_operand")
4642 (match_operand:V4DF 2 "nonimmediate_operand")]
4647 r1 = gen_reg_rtx (V4SImode);
4648 r2 = gen_reg_rtx (V4SImode);
4650 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
4651 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
4652 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4656 (define_expand "vec_pack_sfix_v2df"
4657 [(match_operand:V4SI 0 "register_operand")
4658 (match_operand:V2DF 1 "nonimmediate_operand")
4659 (match_operand:V2DF 2 "nonimmediate_operand")]
4662 rtx tmp0, tmp1, tmp2;
4664 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4666 tmp0 = gen_reg_rtx (V4DFmode);
4667 tmp1 = force_reg (V2DFmode, operands[1]);
4669 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4670 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
4674 tmp0 = gen_reg_rtx (V4SImode);
4675 tmp1 = gen_reg_rtx (V4SImode);
4676 tmp2 = gen_reg_rtx (V2DImode);
4678 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
4679 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
4680 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4681 gen_lowpart (V2DImode, tmp0),
4682 gen_lowpart (V2DImode, tmp1)));
4683 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4688 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4690 ;; Parallel single-precision floating point element swizzling
4692 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4694 (define_expand "sse_movhlps_exp"
4695 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4698 (match_operand:V4SF 1 "nonimmediate_operand")
4699 (match_operand:V4SF 2 "nonimmediate_operand"))
4700 (parallel [(const_int 6)
4706 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4708 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
4710 /* Fix up the destination if needed. */
4711 if (dst != operands[0])
4712 emit_move_insn (operands[0], dst);
4717 (define_insn "sse_movhlps"
4718 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
4721 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4722 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
4723 (parallel [(const_int 6)
4727 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4729 movhlps\t{%2, %0|%0, %2}
4730 vmovhlps\t{%2, %1, %0|%0, %1, %2}
4731 movlps\t{%H2, %0|%0, %H2}
4732 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
4733 %vmovhps\t{%2, %0|%q0, %2}"
4734 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4735 (set_attr "type" "ssemov")
4736 (set_attr "ssememalign" "64")
4737 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4738 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4740 (define_expand "sse_movlhps_exp"
4741 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4744 (match_operand:V4SF 1 "nonimmediate_operand")
4745 (match_operand:V4SF 2 "nonimmediate_operand"))
4746 (parallel [(const_int 0)
4752 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4754 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
4756 /* Fix up the destination if needed. */
4757 if (dst != operands[0])
4758 emit_move_insn (operands[0], dst);
4763 (define_insn "sse_movlhps"
4764 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
4767 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4768 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
4769 (parallel [(const_int 0)
4773 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
4775 movlhps\t{%2, %0|%0, %2}
4776 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4777 movhps\t{%2, %0|%0, %q2}
4778 vmovhps\t{%2, %1, %0|%0, %1, %q2}
4779 %vmovlps\t{%2, %H0|%H0, %2}"
4780 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4781 (set_attr "type" "ssemov")
4782 (set_attr "ssememalign" "64")
4783 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4784 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4786 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
4787 [(set (match_operand:V16SF 0 "register_operand" "=v")
4790 (match_operand:V16SF 1 "register_operand" "v")
4791 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
4792 (parallel [(const_int 2) (const_int 18)
4793 (const_int 3) (const_int 19)
4794 (const_int 6) (const_int 22)
4795 (const_int 7) (const_int 23)
4796 (const_int 10) (const_int 26)
4797 (const_int 11) (const_int 27)
4798 (const_int 14) (const_int 30)
4799 (const_int 15) (const_int 31)])))]
4801 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
4802 [(set_attr "type" "sselog")
4803 (set_attr "prefix" "evex")
4804 (set_attr "mode" "V16SF")])
4806 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4807 (define_insn "avx_unpckhps256"
4808 [(set (match_operand:V8SF 0 "register_operand" "=x")
4811 (match_operand:V8SF 1 "register_operand" "x")
4812 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4813 (parallel [(const_int 2) (const_int 10)
4814 (const_int 3) (const_int 11)
4815 (const_int 6) (const_int 14)
4816 (const_int 7) (const_int 15)])))]
4818 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
4819 [(set_attr "type" "sselog")
4820 (set_attr "prefix" "vex")
4821 (set_attr "mode" "V8SF")])
4823 (define_expand "vec_interleave_highv8sf"
4827 (match_operand:V8SF 1 "register_operand" "x")
4828 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4829 (parallel [(const_int 0) (const_int 8)
4830 (const_int 1) (const_int 9)
4831 (const_int 4) (const_int 12)
4832 (const_int 5) (const_int 13)])))
4838 (parallel [(const_int 2) (const_int 10)
4839 (const_int 3) (const_int 11)
4840 (const_int 6) (const_int 14)
4841 (const_int 7) (const_int 15)])))
4842 (set (match_operand:V8SF 0 "register_operand")
4847 (parallel [(const_int 4) (const_int 5)
4848 (const_int 6) (const_int 7)
4849 (const_int 12) (const_int 13)
4850 (const_int 14) (const_int 15)])))]
4853 operands[3] = gen_reg_rtx (V8SFmode);
4854 operands[4] = gen_reg_rtx (V8SFmode);
4857 (define_insn "vec_interleave_highv4sf"
4858 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4861 (match_operand:V4SF 1 "register_operand" "0,x")
4862 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
4863 (parallel [(const_int 2) (const_int 6)
4864 (const_int 3) (const_int 7)])))]
4867 unpckhps\t{%2, %0|%0, %2}
4868 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
4869 [(set_attr "isa" "noavx,avx")
4870 (set_attr "type" "sselog")
4871 (set_attr "prefix" "orig,vex")
4872 (set_attr "mode" "V4SF")])
4874 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
4875 [(set (match_operand:V16SF 0 "register_operand" "=v")
4878 (match_operand:V16SF 1 "register_operand" "v")
4879 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
4880 (parallel [(const_int 0) (const_int 16)
4881 (const_int 1) (const_int 17)
4882 (const_int 4) (const_int 20)
4883 (const_int 5) (const_int 21)
4884 (const_int 8) (const_int 24)
4885 (const_int 9) (const_int 25)
4886 (const_int 12) (const_int 28)
4887 (const_int 13) (const_int 29)])))]
4889 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
4890 [(set_attr "type" "sselog")
4891 (set_attr "prefix" "evex")
4892 (set_attr "mode" "V16SF")])
4894 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4895 (define_insn "avx_unpcklps256"
4896 [(set (match_operand:V8SF 0 "register_operand" "=x")
4899 (match_operand:V8SF 1 "register_operand" "x")
4900 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4901 (parallel [(const_int 0) (const_int 8)
4902 (const_int 1) (const_int 9)
4903 (const_int 4) (const_int 12)
4904 (const_int 5) (const_int 13)])))]
4906 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
4907 [(set_attr "type" "sselog")
4908 (set_attr "prefix" "vex")
4909 (set_attr "mode" "V8SF")])
4911 (define_expand "vec_interleave_lowv8sf"
4915 (match_operand:V8SF 1 "register_operand" "x")
4916 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4917 (parallel [(const_int 0) (const_int 8)
4918 (const_int 1) (const_int 9)
4919 (const_int 4) (const_int 12)
4920 (const_int 5) (const_int 13)])))
4926 (parallel [(const_int 2) (const_int 10)
4927 (const_int 3) (const_int 11)
4928 (const_int 6) (const_int 14)
4929 (const_int 7) (const_int 15)])))
4930 (set (match_operand:V8SF 0 "register_operand")
4935 (parallel [(const_int 0) (const_int 1)
4936 (const_int 2) (const_int 3)
4937 (const_int 8) (const_int 9)
4938 (const_int 10) (const_int 11)])))]
4941 operands[3] = gen_reg_rtx (V8SFmode);
4942 operands[4] = gen_reg_rtx (V8SFmode);
4945 (define_insn "vec_interleave_lowv4sf"
4946 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4949 (match_operand:V4SF 1 "register_operand" "0,x")
4950 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
4951 (parallel [(const_int 0) (const_int 4)
4952 (const_int 1) (const_int 5)])))]
4955 unpcklps\t{%2, %0|%0, %2}
4956 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
4957 [(set_attr "isa" "noavx,avx")
4958 (set_attr "type" "sselog")
4959 (set_attr "prefix" "orig,vex")
4960 (set_attr "mode" "V4SF")])
4962 ;; These are modeled with the same vec_concat as the others so that we
4963 ;; capture users of shufps that can use the new instructions
4964 (define_insn "avx_movshdup256"
4965 [(set (match_operand:V8SF 0 "register_operand" "=x")
4968 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4970 (parallel [(const_int 1) (const_int 1)
4971 (const_int 3) (const_int 3)
4972 (const_int 5) (const_int 5)
4973 (const_int 7) (const_int 7)])))]
4975 "vmovshdup\t{%1, %0|%0, %1}"
4976 [(set_attr "type" "sse")
4977 (set_attr "prefix" "vex")
4978 (set_attr "mode" "V8SF")])
4980 (define_insn "sse3_movshdup"
4981 [(set (match_operand:V4SF 0 "register_operand" "=x")
4984 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4986 (parallel [(const_int 1)
4991 "%vmovshdup\t{%1, %0|%0, %1}"
4992 [(set_attr "type" "sse")
4993 (set_attr "prefix_rep" "1")
4994 (set_attr "prefix" "maybe_vex")
4995 (set_attr "mode" "V4SF")])
4997 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
4998 [(set (match_operand:V16SF 0 "register_operand" "=v")
5001 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5003 (parallel [(const_int 1) (const_int 1)
5004 (const_int 3) (const_int 3)
5005 (const_int 5) (const_int 5)
5006 (const_int 7) (const_int 7)
5007 (const_int 9) (const_int 9)
5008 (const_int 11) (const_int 11)
5009 (const_int 13) (const_int 13)
5010 (const_int 15) (const_int 15)])))]
5012 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5013 [(set_attr "type" "sse")
5014 (set_attr "prefix" "evex")
5015 (set_attr "mode" "V16SF")])
5017 (define_insn "avx_movsldup256"
5018 [(set (match_operand:V8SF 0 "register_operand" "=x")
5021 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5023 (parallel [(const_int 0) (const_int 0)
5024 (const_int 2) (const_int 2)
5025 (const_int 4) (const_int 4)
5026 (const_int 6) (const_int 6)])))]
5028 "vmovsldup\t{%1, %0|%0, %1}"
5029 [(set_attr "type" "sse")
5030 (set_attr "prefix" "vex")
5031 (set_attr "mode" "V8SF")])
5033 (define_insn "sse3_movsldup"
5034 [(set (match_operand:V4SF 0 "register_operand" "=x")
5037 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5039 (parallel [(const_int 0)
5044 "%vmovsldup\t{%1, %0|%0, %1}"
5045 [(set_attr "type" "sse")
5046 (set_attr "prefix_rep" "1")
5047 (set_attr "prefix" "maybe_vex")
5048 (set_attr "mode" "V4SF")])
5050 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
5051 [(set (match_operand:V16SF 0 "register_operand" "=v")
5054 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5056 (parallel [(const_int 0) (const_int 0)
5057 (const_int 2) (const_int 2)
5058 (const_int 4) (const_int 4)
5059 (const_int 6) (const_int 6)
5060 (const_int 8) (const_int 8)
5061 (const_int 10) (const_int 10)
5062 (const_int 12) (const_int 12)
5063 (const_int 14) (const_int 14)])))]
5065 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5066 [(set_attr "type" "sse")
5067 (set_attr "prefix" "evex")
5068 (set_attr "mode" "V16SF")])
5070 (define_expand "avx_shufps256"
5071 [(match_operand:V8SF 0 "register_operand")
5072 (match_operand:V8SF 1 "register_operand")
5073 (match_operand:V8SF 2 "nonimmediate_operand")
5074 (match_operand:SI 3 "const_int_operand")]
5077 int mask = INTVAL (operands[3]);
5078 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
5079 GEN_INT ((mask >> 0) & 3),
5080 GEN_INT ((mask >> 2) & 3),
5081 GEN_INT (((mask >> 4) & 3) + 8),
5082 GEN_INT (((mask >> 6) & 3) + 8),
5083 GEN_INT (((mask >> 0) & 3) + 4),
5084 GEN_INT (((mask >> 2) & 3) + 4),
5085 GEN_INT (((mask >> 4) & 3) + 12),
5086 GEN_INT (((mask >> 6) & 3) + 12)));
5090 ;; One bit in mask selects 2 elements.
5091 (define_insn "avx_shufps256_1"
5092 [(set (match_operand:V8SF 0 "register_operand" "=x")
5095 (match_operand:V8SF 1 "register_operand" "x")
5096 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5097 (parallel [(match_operand 3 "const_0_to_3_operand" )
5098 (match_operand 4 "const_0_to_3_operand" )
5099 (match_operand 5 "const_8_to_11_operand" )
5100 (match_operand 6 "const_8_to_11_operand" )
5101 (match_operand 7 "const_4_to_7_operand" )
5102 (match_operand 8 "const_4_to_7_operand" )
5103 (match_operand 9 "const_12_to_15_operand")
5104 (match_operand 10 "const_12_to_15_operand")])))]
5106 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
5107 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
5108 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
5109 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
5112 mask = INTVAL (operands[3]);
5113 mask |= INTVAL (operands[4]) << 2;
5114 mask |= (INTVAL (operands[5]) - 8) << 4;
5115 mask |= (INTVAL (operands[6]) - 8) << 6;
5116 operands[3] = GEN_INT (mask);
5118 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5120 [(set_attr "type" "sseshuf")
5121 (set_attr "length_immediate" "1")
5122 (set_attr "prefix" "vex")
5123 (set_attr "mode" "V8SF")])
5125 (define_expand "sse_shufps"
5126 [(match_operand:V4SF 0 "register_operand")
5127 (match_operand:V4SF 1 "register_operand")
5128 (match_operand:V4SF 2 "nonimmediate_operand")
5129 (match_operand:SI 3 "const_int_operand")]
5132 int mask = INTVAL (operands[3]);
5133 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
5134 GEN_INT ((mask >> 0) & 3),
5135 GEN_INT ((mask >> 2) & 3),
5136 GEN_INT (((mask >> 4) & 3) + 4),
5137 GEN_INT (((mask >> 6) & 3) + 4)));
5141 (define_insn "sse_shufps_<mode>"
5142 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
5143 (vec_select:VI4F_128
5144 (vec_concat:<ssedoublevecmode>
5145 (match_operand:VI4F_128 1 "register_operand" "0,x")
5146 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
5147 (parallel [(match_operand 3 "const_0_to_3_operand")
5148 (match_operand 4 "const_0_to_3_operand")
5149 (match_operand 5 "const_4_to_7_operand")
5150 (match_operand 6 "const_4_to_7_operand")])))]
5154 mask |= INTVAL (operands[3]) << 0;
5155 mask |= INTVAL (operands[4]) << 2;
5156 mask |= (INTVAL (operands[5]) - 4) << 4;
5157 mask |= (INTVAL (operands[6]) - 4) << 6;
5158 operands[3] = GEN_INT (mask);
5160 switch (which_alternative)
5163 return "shufps\t{%3, %2, %0|%0, %2, %3}";
5165 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5170 [(set_attr "isa" "noavx,avx")
5171 (set_attr "type" "sseshuf")
5172 (set_attr "length_immediate" "1")
5173 (set_attr "prefix" "orig,vex")
5174 (set_attr "mode" "V4SF")])
5176 (define_insn "sse_storehps"
5177 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5179 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
5180 (parallel [(const_int 2) (const_int 3)])))]
5183 %vmovhps\t{%1, %0|%q0, %1}
5184 %vmovhlps\t{%1, %d0|%d0, %1}
5185 %vmovlps\t{%H1, %d0|%d0, %H1}"
5186 [(set_attr "type" "ssemov")
5187 (set_attr "ssememalign" "64")
5188 (set_attr "prefix" "maybe_vex")
5189 (set_attr "mode" "V2SF,V4SF,V2SF")])
5191 (define_expand "sse_loadhps_exp"
5192 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5195 (match_operand:V4SF 1 "nonimmediate_operand")
5196 (parallel [(const_int 0) (const_int 1)]))
5197 (match_operand:V2SF 2 "nonimmediate_operand")))]
5200 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5202 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
5204 /* Fix up the destination if needed. */
5205 if (dst != operands[0])
5206 emit_move_insn (operands[0], dst);
5211 (define_insn "sse_loadhps"
5212 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5215 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5216 (parallel [(const_int 0) (const_int 1)]))
5217 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
5220 movhps\t{%2, %0|%0, %q2}
5221 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5222 movlhps\t{%2, %0|%0, %2}
5223 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5224 %vmovlps\t{%2, %H0|%H0, %2}"
5225 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5226 (set_attr "type" "ssemov")
5227 (set_attr "ssememalign" "64")
5228 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5229 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
5231 (define_insn "sse_storelps"
5232 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5234 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
5235 (parallel [(const_int 0) (const_int 1)])))]
5238 %vmovlps\t{%1, %0|%q0, %1}
5239 %vmovaps\t{%1, %0|%0, %1}
5240 %vmovlps\t{%1, %d0|%d0, %q1}"
5241 [(set_attr "type" "ssemov")
5242 (set_attr "prefix" "maybe_vex")
5243 (set_attr "mode" "V2SF,V4SF,V2SF")])
5245 (define_expand "sse_loadlps_exp"
5246 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5248 (match_operand:V2SF 2 "nonimmediate_operand")
5250 (match_operand:V4SF 1 "nonimmediate_operand")
5251 (parallel [(const_int 2) (const_int 3)]))))]
5254 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5256 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
5258 /* Fix up the destination if needed. */
5259 if (dst != operands[0])
5260 emit_move_insn (operands[0], dst);
5265 (define_insn "sse_loadlps"
5266 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5268 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
5270 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
5271 (parallel [(const_int 2) (const_int 3)]))))]
5274 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
5275 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
5276 movlps\t{%2, %0|%0, %q2}
5277 vmovlps\t{%2, %1, %0|%0, %1, %q2}
5278 %vmovlps\t{%2, %0|%q0, %2}"
5279 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5280 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
5281 (set_attr "ssememalign" "64")
5282 (set_attr "length_immediate" "1,1,*,*,*")
5283 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5284 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5286 (define_insn "sse_movss"
5287 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5289 (match_operand:V4SF 2 "register_operand" " x,x")
5290 (match_operand:V4SF 1 "register_operand" " 0,x")
5294 movss\t{%2, %0|%0, %2}
5295 vmovss\t{%2, %1, %0|%0, %1, %2}"
5296 [(set_attr "isa" "noavx,avx")
5297 (set_attr "type" "ssemov")
5298 (set_attr "prefix" "orig,vex")
5299 (set_attr "mode" "SF")])
5301 (define_insn "avx2_vec_dup<mode>"
5302 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
5303 (vec_duplicate:VF1_128_256
5305 (match_operand:V4SF 1 "register_operand" "x")
5306 (parallel [(const_int 0)]))))]
5308 "vbroadcastss\t{%1, %0|%0, %1}"
5309 [(set_attr "type" "sselog1")
5310 (set_attr "prefix" "vex")
5311 (set_attr "mode" "<MODE>")])
5313 (define_insn "avx2_vec_dupv8sf_1"
5314 [(set (match_operand:V8SF 0 "register_operand" "=x")
5317 (match_operand:V8SF 1 "register_operand" "x")
5318 (parallel [(const_int 0)]))))]
5320 "vbroadcastss\t{%x1, %0|%0, %x1}"
5321 [(set_attr "type" "sselog1")
5322 (set_attr "prefix" "vex")
5323 (set_attr "mode" "V8SF")])
5325 (define_insn "vec_dupv4sf"
5326 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
5328 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
5331 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
5332 vbroadcastss\t{%1, %0|%0, %1}
5333 shufps\t{$0, %0, %0|%0, %0, 0}"
5334 [(set_attr "isa" "avx,avx,noavx")
5335 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
5336 (set_attr "length_immediate" "1,0,1")
5337 (set_attr "prefix_extra" "0,1,*")
5338 (set_attr "prefix" "vex,vex,orig")
5339 (set_attr "mode" "V4SF")])
5341 ;; Although insertps takes register source, we prefer
5342 ;; unpcklps with register source since it is shorter.
5343 (define_insn "*vec_concatv2sf_sse4_1"
5344 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
5346 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
5347 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
5350 unpcklps\t{%2, %0|%0, %2}
5351 vunpcklps\t{%2, %1, %0|%0, %1, %2}
5352 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
5353 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
5354 %vmovss\t{%1, %0|%0, %1}
5355 punpckldq\t{%2, %0|%0, %2}
5356 movd\t{%1, %0|%0, %1}"
5357 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5358 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
5359 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
5360 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
5361 (set_attr "length_immediate" "*,*,1,1,*,*,*")
5362 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
5363 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
5365 ;; ??? In theory we can match memory for the MMX alternative, but allowing
5366 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
5367 ;; alternatives pretty much forces the MMX alternative to be chosen.
5368 (define_insn "*vec_concatv2sf_sse"
5369 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
5371 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
5372 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
5375 unpcklps\t{%2, %0|%0, %2}
5376 movss\t{%1, %0|%0, %1}
5377 punpckldq\t{%2, %0|%0, %2}
5378 movd\t{%1, %0|%0, %1}"
5379 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5380 (set_attr "mode" "V4SF,SF,DI,DI")])
5382 (define_insn "*vec_concatv4sf"
5383 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
5385 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
5386 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
5389 movlhps\t{%2, %0|%0, %2}
5390 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5391 movhps\t{%2, %0|%0, %q2}
5392 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
5393 [(set_attr "isa" "noavx,avx,noavx,avx")
5394 (set_attr "type" "ssemov")
5395 (set_attr "prefix" "orig,vex,orig,vex")
5396 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
5398 (define_expand "vec_init<mode>"
5399 [(match_operand:V_128 0 "register_operand")
5403 ix86_expand_vector_init (false, operands[0], operands[1]);
5407 ;; Avoid combining registers from different units in a single alternative,
5408 ;; see comment above inline_secondary_memory_needed function in i386.c
5409 (define_insn "vec_set<mode>_0"
5410 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
5411 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
5413 (vec_duplicate:VI4F_128
5414 (match_operand:<ssescalarmode> 2 "general_operand"
5415 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
5416 (match_operand:VI4F_128 1 "vector_move_operand"
5417 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
5421 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
5422 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
5423 %vmovd\t{%2, %0|%0, %2}
5424 movss\t{%2, %0|%0, %2}
5425 movss\t{%2, %0|%0, %2}
5426 vmovss\t{%2, %1, %0|%0, %1, %2}
5427 pinsrd\t{$0, %2, %0|%0, %2, 0}
5428 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
5432 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
5434 (cond [(eq_attr "alternative" "0,6,7")
5435 (const_string "sselog")
5436 (eq_attr "alternative" "9")
5437 (const_string "imov")
5438 (eq_attr "alternative" "10")
5439 (const_string "fmov")
5441 (const_string "ssemov")))
5442 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
5443 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
5444 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
5445 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
5447 ;; A subset is vec_setv4sf.
5448 (define_insn "*vec_setv4sf_sse4_1"
5449 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5452 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
5453 (match_operand:V4SF 1 "register_operand" "0,x")
5454 (match_operand:SI 3 "const_int_operand")))]
5456 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5457 < GET_MODE_NUNITS (V4SFmode))"
5459 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
5460 switch (which_alternative)
5463 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5465 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5470 [(set_attr "isa" "noavx,avx")
5471 (set_attr "type" "sselog")
5472 (set_attr "prefix_data16" "1,*")
5473 (set_attr "prefix_extra" "1")
5474 (set_attr "length_immediate" "1")
5475 (set_attr "prefix" "orig,vex")
5476 (set_attr "mode" "V4SF")])
5478 (define_insn "sse4_1_insertps"
5479 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5480 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
5481 (match_operand:V4SF 1 "register_operand" "0,x")
5482 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
5486 if (MEM_P (operands[2]))
5488 unsigned count_s = INTVAL (operands[3]) >> 6;
5490 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
5491 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
5493 switch (which_alternative)
5496 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5498 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5503 [(set_attr "isa" "noavx,avx")
5504 (set_attr "type" "sselog")
5505 (set_attr "prefix_data16" "1,*")
5506 (set_attr "prefix_extra" "1")
5507 (set_attr "length_immediate" "1")
5508 (set_attr "prefix" "orig,vex")
5509 (set_attr "mode" "V4SF")])
5512 [(set (match_operand:VI4F_128 0 "memory_operand")
5514 (vec_duplicate:VI4F_128
5515 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
5518 "TARGET_SSE && reload_completed"
5519 [(set (match_dup 0) (match_dup 1))]
5520 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
5522 (define_expand "vec_set<mode>"
5523 [(match_operand:V 0 "register_operand")
5524 (match_operand:<ssescalarmode> 1 "register_operand")
5525 (match_operand 2 "const_int_operand")]
5528 ix86_expand_vector_set (false, operands[0], operands[1],
5529 INTVAL (operands[2]));
5533 (define_insn_and_split "*vec_extractv4sf_0"
5534 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
5536 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
5537 (parallel [(const_int 0)])))]
5538 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5540 "&& reload_completed"
5541 [(set (match_dup 0) (match_dup 1))]
5543 if (REG_P (operands[1]))
5544 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
5546 operands[1] = adjust_address (operands[1], SFmode, 0);
5549 (define_insn_and_split "*sse4_1_extractps"
5550 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
5552 (match_operand:V4SF 1 "register_operand" "x,0,x")
5553 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
5556 %vextractps\t{%2, %1, %0|%0, %1, %2}
5559 "&& reload_completed && SSE_REG_P (operands[0])"
5562 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
5563 switch (INTVAL (operands[2]))
5567 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
5568 operands[2], operands[2],
5569 GEN_INT (INTVAL (operands[2]) + 4),
5570 GEN_INT (INTVAL (operands[2]) + 4)));
5573 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
5576 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
5581 [(set_attr "isa" "*,noavx,avx")
5582 (set_attr "type" "sselog,*,*")
5583 (set_attr "prefix_data16" "1,*,*")
5584 (set_attr "prefix_extra" "1,*,*")
5585 (set_attr "length_immediate" "1,*,*")
5586 (set_attr "prefix" "maybe_vex,*,*")
5587 (set_attr "mode" "V4SF,*,*")])
5589 (define_insn_and_split "*vec_extractv4sf_mem"
5590 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
5592 (match_operand:V4SF 1 "memory_operand" "o,o,o")
5593 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
5596 "&& reload_completed"
5597 [(set (match_dup 0) (match_dup 1))]
5599 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
5602 (define_expand "avx512f_vextract<shuffletype>32x4_mask"
5603 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
5604 (match_operand:V16FI 1 "register_operand")
5605 (match_operand:SI 2 "const_0_to_3_operand")
5606 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
5607 (match_operand:QI 4 "register_operand")]
5610 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5611 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5612 switch (INTVAL (operands[2]))
5615 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5616 operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
5617 GEN_INT (3), operands[3], operands[4]));
5620 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5621 operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
5622 GEN_INT (7), operands[3], operands[4]));
5625 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5626 operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
5627 GEN_INT (11), operands[3], operands[4]));
5630 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5631 operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
5632 GEN_INT (15), operands[3], operands[4]));
5640 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
5641 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
5642 (vec_merge:<ssequartermode>
5643 (vec_select:<ssequartermode>
5644 (match_operand:V16FI 1 "register_operand" "v")
5645 (parallel [(match_operand 2 "const_0_to_15_operand")
5646 (match_operand 3 "const_0_to_15_operand")
5647 (match_operand 4 "const_0_to_15_operand")
5648 (match_operand 5 "const_0_to_15_operand")]))
5649 (match_operand:<ssequartermode> 6 "memory_operand" "0")
5650 (match_operand:QI 7 "register_operand" "k")))]
5651 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
5652 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
5653 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
5655 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5656 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
5658 [(set_attr "type" "sselog")
5659 (set_attr "prefix_extra" "1")
5660 (set_attr "length_immediate" "1")
5661 (set_attr "memory" "store")
5662 (set_attr "prefix" "evex")
5663 (set_attr "mode" "<sseinsnmode>")])
5665 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
5666 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5667 (vec_select:<ssequartermode>
5668 (match_operand:V16FI 1 "register_operand" "v")
5669 (parallel [(match_operand 2 "const_0_to_15_operand")
5670 (match_operand 3 "const_0_to_15_operand")
5671 (match_operand 4 "const_0_to_15_operand")
5672 (match_operand 5 "const_0_to_15_operand")])))]
5673 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
5674 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
5675 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
5677 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5678 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
5680 [(set_attr "type" "sselog")
5681 (set_attr "prefix_extra" "1")
5682 (set_attr "length_immediate" "1")
5683 (set (attr "memory")
5684 (if_then_else (match_test "MEM_P (operands[0])")
5685 (const_string "store")
5686 (const_string "none")))
5687 (set_attr "prefix" "evex")
5688 (set_attr "mode" "<sseinsnmode>")])
5690 (define_expand "avx512f_vextract<shuffletype>64x4_mask"
5691 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5692 (match_operand:V8FI 1 "register_operand")
5693 (match_operand:SI 2 "const_0_to_1_operand")
5694 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
5695 (match_operand:QI 4 "register_operand")]
5698 rtx (*insn)(rtx, rtx, rtx, rtx);
5700 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5701 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5703 switch (INTVAL (operands[2]))
5706 insn = gen_vec_extract_lo_<mode>_mask;
5709 insn = gen_vec_extract_hi_<mode>_mask;
5715 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
5720 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5721 (vec_select:<ssehalfvecmode>
5722 (match_operand:V8FI 1 "nonimmediate_operand")
5723 (parallel [(const_int 0) (const_int 1)
5724 (const_int 2) (const_int 3)])))]
5725 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
5726 && reload_completed"
5729 rtx op1 = operands[1];
5731 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
5733 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
5734 emit_move_insn (operands[0], op1);
5738 (define_insn "vec_extract_lo_<mode>_maskm"
5739 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
5740 (vec_merge:<ssehalfvecmode>
5741 (vec_select:<ssehalfvecmode>
5742 (match_operand:V8FI 1 "register_operand" "v")
5743 (parallel [(const_int 0) (const_int 1)
5744 (const_int 2) (const_int 3)]))
5745 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
5746 (match_operand:QI 3 "register_operand" "k")))]
5748 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
5749 [(set_attr "type" "sselog")
5750 (set_attr "prefix_extra" "1")
5751 (set_attr "length_immediate" "1")
5752 (set_attr "prefix" "evex")
5753 (set_attr "mode" "<sseinsnmode>")])
5755 (define_insn "vec_extract_lo_<mode><mask_name>"
5756 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5757 (vec_select:<ssehalfvecmode>
5758 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
5759 (parallel [(const_int 0) (const_int 1)
5760 (const_int 2) (const_int 3)])))]
5761 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5764 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
5768 [(set_attr "type" "sselog")
5769 (set_attr "prefix_extra" "1")
5770 (set_attr "length_immediate" "1")
5771 (set (attr "memory")
5772 (if_then_else (match_test "MEM_P (operands[0])")
5773 (const_string "store")
5774 (const_string "none")))
5775 (set_attr "prefix" "evex")
5776 (set_attr "mode" "<sseinsnmode>")])
5778 (define_insn "vec_extract_hi_<mode>_maskm"
5779 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
5780 (vec_merge:<ssehalfvecmode>
5781 (vec_select:<ssehalfvecmode>
5782 (match_operand:V8FI 1 "register_operand" "v")
5783 (parallel [(const_int 4) (const_int 5)
5784 (const_int 6) (const_int 7)]))
5785 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
5786 (match_operand:QI 3 "register_operand" "k")))]
5788 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
5789 [(set_attr "type" "sselog")
5790 (set_attr "prefix_extra" "1")
5791 (set_attr "length_immediate" "1")
5792 (set_attr "memory" "store")
5793 (set_attr "prefix" "evex")
5794 (set_attr "mode" "<sseinsnmode>")])
5796 (define_insn "vec_extract_hi_<mode><mask_name>"
5797 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5798 (vec_select:<ssehalfvecmode>
5799 (match_operand:V8FI 1 "register_operand" "v")
5800 (parallel [(const_int 4) (const_int 5)
5801 (const_int 6) (const_int 7)])))]
5803 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
5804 [(set_attr "type" "sselog")
5805 (set_attr "prefix_extra" "1")
5806 (set_attr "length_immediate" "1")
5807 (set (attr "memory")
5808 (if_then_else (match_test "MEM_P (operands[0])")
5809 (const_string "store")
5810 (const_string "none")))
5811 (set_attr "prefix" "evex")
5812 (set_attr "mode" "<sseinsnmode>")])
5814 (define_expand "avx_vextractf128<mode>"
5815 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5816 (match_operand:V_256 1 "register_operand")
5817 (match_operand:SI 2 "const_0_to_1_operand")]
5820 rtx (*insn)(rtx, rtx);
5822 switch (INTVAL (operands[2]))
5825 insn = gen_vec_extract_lo_<mode>;
5828 insn = gen_vec_extract_hi_<mode>;
5834 emit_insn (insn (operands[0], operands[1]));
5838 (define_insn_and_split "vec_extract_lo_<mode>"
5839 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
5840 (vec_select:<ssehalfvecmode>
5841 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
5842 (parallel [(const_int 0) (const_int 1)
5843 (const_int 2) (const_int 3)
5844 (const_int 4) (const_int 5)
5845 (const_int 6) (const_int 7)])))]
5846 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5848 "&& reload_completed"
5851 rtx op1 = operands[1];
5853 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
5855 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
5856 emit_move_insn (operands[0], op1);
5860 (define_insn "vec_extract_hi_<mode>"
5861 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
5862 (vec_select:<ssehalfvecmode>
5863 (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
5864 (parallel [(const_int 8) (const_int 9)
5865 (const_int 10) (const_int 11)
5866 (const_int 12) (const_int 13)
5867 (const_int 14) (const_int 15)])))]
5869 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
5870 [(set_attr "type" "sselog")
5871 (set_attr "prefix_extra" "1")
5872 (set_attr "length_immediate" "1")
5873 (set_attr "memory" "none,store")
5874 (set_attr "prefix" "evex")
5875 (set_attr "mode" "XI")])
5877 (define_insn_and_split "vec_extract_lo_<mode>"
5878 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5879 (vec_select:<ssehalfvecmode>
5880 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
5881 (parallel [(const_int 0) (const_int 1)])))]
5882 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5884 "&& reload_completed"
5885 [(set (match_dup 0) (match_dup 1))]
5887 if (REG_P (operands[1]))
5888 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
5890 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
5893 (define_insn "vec_extract_hi_<mode>"
5894 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5895 (vec_select:<ssehalfvecmode>
5896 (match_operand:VI8F_256 1 "register_operand" "x,x")
5897 (parallel [(const_int 2) (const_int 3)])))]
5899 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
5900 [(set_attr "type" "sselog")
5901 (set_attr "prefix_extra" "1")
5902 (set_attr "length_immediate" "1")
5903 (set_attr "memory" "none,store")
5904 (set_attr "prefix" "vex")
5905 (set_attr "mode" "<sseinsnmode>")])
5907 (define_insn_and_split "vec_extract_lo_<mode>"
5908 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5909 (vec_select:<ssehalfvecmode>
5910 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
5911 (parallel [(const_int 0) (const_int 1)
5912 (const_int 2) (const_int 3)])))]
5913 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5915 "&& reload_completed"
5916 [(set (match_dup 0) (match_dup 1))]
5918 if (REG_P (operands[1]))
5919 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
5921 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
5924 (define_insn "vec_extract_hi_<mode>"
5925 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5926 (vec_select:<ssehalfvecmode>
5927 (match_operand:VI4F_256 1 "register_operand" "x,x")
5928 (parallel [(const_int 4) (const_int 5)
5929 (const_int 6) (const_int 7)])))]
5931 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
5932 [(set_attr "type" "sselog")
5933 (set_attr "prefix_extra" "1")
5934 (set_attr "length_immediate" "1")
5935 (set_attr "memory" "none,store")
5936 (set_attr "prefix" "vex")
5937 (set_attr "mode" "<sseinsnmode>")])
5939 (define_insn_and_split "vec_extract_lo_v32hi"
5940 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
5942 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
5943 (parallel [(const_int 0) (const_int 1)
5944 (const_int 2) (const_int 3)
5945 (const_int 4) (const_int 5)
5946 (const_int 6) (const_int 7)
5947 (const_int 8) (const_int 9)
5948 (const_int 10) (const_int 11)
5949 (const_int 12) (const_int 13)
5950 (const_int 14) (const_int 15)])))]
5951 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5953 "&& reload_completed"
5954 [(set (match_dup 0) (match_dup 1))]
5956 if (REG_P (operands[1]))
5957 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
5959 operands[1] = adjust_address (operands[1], V16HImode, 0);
5962 (define_insn "vec_extract_hi_v32hi"
5963 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
5965 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
5966 (parallel [(const_int 16) (const_int 17)
5967 (const_int 18) (const_int 19)
5968 (const_int 20) (const_int 21)
5969 (const_int 22) (const_int 23)
5970 (const_int 24) (const_int 25)
5971 (const_int 26) (const_int 27)
5972 (const_int 28) (const_int 29)
5973 (const_int 30) (const_int 31)])))]
5975 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
5976 [(set_attr "type" "sselog")
5977 (set_attr "prefix_extra" "1")
5978 (set_attr "length_immediate" "1")
5979 (set_attr "memory" "none,store")
5980 (set_attr "prefix" "evex")
5981 (set_attr "mode" "XI")])
5983 (define_insn_and_split "vec_extract_lo_v16hi"
5984 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
5986 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
5987 (parallel [(const_int 0) (const_int 1)
5988 (const_int 2) (const_int 3)
5989 (const_int 4) (const_int 5)
5990 (const_int 6) (const_int 7)])))]
5991 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5993 "&& reload_completed"
5994 [(set (match_dup 0) (match_dup 1))]
5996 if (REG_P (operands[1]))
5997 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
5999 operands[1] = adjust_address (operands[1], V8HImode, 0);
6002 (define_insn "vec_extract_hi_v16hi"
6003 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6005 (match_operand:V16HI 1 "register_operand" "x,x")
6006 (parallel [(const_int 8) (const_int 9)
6007 (const_int 10) (const_int 11)
6008 (const_int 12) (const_int 13)
6009 (const_int 14) (const_int 15)])))]
6011 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6012 [(set_attr "type" "sselog")
6013 (set_attr "prefix_extra" "1")
6014 (set_attr "length_immediate" "1")
6015 (set_attr "memory" "none,store")
6016 (set_attr "prefix" "vex")
6017 (set_attr "mode" "OI")])
6019 (define_insn_and_split "vec_extract_lo_v64qi"
6020 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6022 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
6023 (parallel [(const_int 0) (const_int 1)
6024 (const_int 2) (const_int 3)
6025 (const_int 4) (const_int 5)
6026 (const_int 6) (const_int 7)
6027 (const_int 8) (const_int 9)
6028 (const_int 10) (const_int 11)
6029 (const_int 12) (const_int 13)
6030 (const_int 14) (const_int 15)
6031 (const_int 16) (const_int 17)
6032 (const_int 18) (const_int 19)
6033 (const_int 20) (const_int 21)
6034 (const_int 22) (const_int 23)
6035 (const_int 24) (const_int 25)
6036 (const_int 26) (const_int 27)
6037 (const_int 28) (const_int 29)
6038 (const_int 30) (const_int 31)])))]
6039 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6041 "&& reload_completed"
6042 [(set (match_dup 0) (match_dup 1))]
6044 if (REG_P (operands[1]))
6045 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
6047 operands[1] = adjust_address (operands[1], V32QImode, 0);
6050 (define_insn "vec_extract_hi_v64qi"
6051 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6053 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
6054 (parallel [(const_int 32) (const_int 33)
6055 (const_int 34) (const_int 35)
6056 (const_int 36) (const_int 37)
6057 (const_int 38) (const_int 39)
6058 (const_int 40) (const_int 41)
6059 (const_int 42) (const_int 43)
6060 (const_int 44) (const_int 45)
6061 (const_int 46) (const_int 47)
6062 (const_int 48) (const_int 49)
6063 (const_int 50) (const_int 51)
6064 (const_int 52) (const_int 53)
6065 (const_int 54) (const_int 55)
6066 (const_int 56) (const_int 57)
6067 (const_int 58) (const_int 59)
6068 (const_int 60) (const_int 61)
6069 (const_int 62) (const_int 63)])))]
6071 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6072 [(set_attr "type" "sselog")
6073 (set_attr "prefix_extra" "1")
6074 (set_attr "length_immediate" "1")
6075 (set_attr "memory" "none,store")
6076 (set_attr "prefix" "evex")
6077 (set_attr "mode" "XI")])
6079 (define_insn_and_split "vec_extract_lo_v32qi"
6080 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6082 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
6083 (parallel [(const_int 0) (const_int 1)
6084 (const_int 2) (const_int 3)
6085 (const_int 4) (const_int 5)
6086 (const_int 6) (const_int 7)
6087 (const_int 8) (const_int 9)
6088 (const_int 10) (const_int 11)
6089 (const_int 12) (const_int 13)
6090 (const_int 14) (const_int 15)])))]
6091 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6093 "&& reload_completed"
6094 [(set (match_dup 0) (match_dup 1))]
6096 if (REG_P (operands[1]))
6097 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
6099 operands[1] = adjust_address (operands[1], V16QImode, 0);
6102 (define_insn "vec_extract_hi_v32qi"
6103 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6105 (match_operand:V32QI 1 "register_operand" "x,x")
6106 (parallel [(const_int 16) (const_int 17)
6107 (const_int 18) (const_int 19)
6108 (const_int 20) (const_int 21)
6109 (const_int 22) (const_int 23)
6110 (const_int 24) (const_int 25)
6111 (const_int 26) (const_int 27)
6112 (const_int 28) (const_int 29)
6113 (const_int 30) (const_int 31)])))]
6115 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6116 [(set_attr "type" "sselog")
6117 (set_attr "prefix_extra" "1")
6118 (set_attr "length_immediate" "1")
6119 (set_attr "memory" "none,store")
6120 (set_attr "prefix" "vex")
6121 (set_attr "mode" "OI")])
6123 ;; Modes handled by vec_extract patterns.
6124 (define_mode_iterator VEC_EXTRACT_MODE
6125 [(V32QI "TARGET_AVX") V16QI
6126 (V16HI "TARGET_AVX") V8HI
6127 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
6128 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
6129 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
6130 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
6132 (define_expand "vec_extract<mode>"
6133 [(match_operand:<ssescalarmode> 0 "register_operand")
6134 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
6135 (match_operand 2 "const_int_operand")]
6138 ix86_expand_vector_extract (false, operands[0], operands[1],
6139 INTVAL (operands[2]));
6143 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6145 ;; Parallel double-precision floating point element swizzling
6147 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6149 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
6150 [(set (match_operand:V8DF 0 "register_operand" "=v")
6153 (match_operand:V8DF 1 "nonimmediate_operand" "v")
6154 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6155 (parallel [(const_int 1) (const_int 9)
6156 (const_int 3) (const_int 11)
6157 (const_int 5) (const_int 13)
6158 (const_int 7) (const_int 15)])))]
6160 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6161 [(set_attr "type" "sselog")
6162 (set_attr "prefix" "evex")
6163 (set_attr "mode" "V8DF")])
6165 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6166 (define_insn "avx_unpckhpd256"
6167 [(set (match_operand:V4DF 0 "register_operand" "=x")
6170 (match_operand:V4DF 1 "register_operand" "x")
6171 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6172 (parallel [(const_int 1) (const_int 5)
6173 (const_int 3) (const_int 7)])))]
6175 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
6176 [(set_attr "type" "sselog")
6177 (set_attr "prefix" "vex")
6178 (set_attr "mode" "V4DF")])
6180 (define_expand "vec_interleave_highv4df"
6184 (match_operand:V4DF 1 "register_operand" "x")
6185 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6186 (parallel [(const_int 0) (const_int 4)
6187 (const_int 2) (const_int 6)])))
6193 (parallel [(const_int 1) (const_int 5)
6194 (const_int 3) (const_int 7)])))
6195 (set (match_operand:V4DF 0 "register_operand")
6200 (parallel [(const_int 2) (const_int 3)
6201 (const_int 6) (const_int 7)])))]
6204 operands[3] = gen_reg_rtx (V4DFmode);
6205 operands[4] = gen_reg_rtx (V4DFmode);
6209 (define_expand "vec_interleave_highv2df"
6210 [(set (match_operand:V2DF 0 "register_operand")
6213 (match_operand:V2DF 1 "nonimmediate_operand")
6214 (match_operand:V2DF 2 "nonimmediate_operand"))
6215 (parallel [(const_int 1)
6219 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
6220 operands[2] = force_reg (V2DFmode, operands[2]);
6223 (define_insn "*vec_interleave_highv2df"
6224 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
6227 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
6228 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
6229 (parallel [(const_int 1)
6231 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
6233 unpckhpd\t{%2, %0|%0, %2}
6234 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
6235 %vmovddup\t{%H1, %0|%0, %H1}
6236 movlpd\t{%H1, %0|%0, %H1}
6237 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
6238 %vmovhpd\t{%1, %0|%q0, %1}"
6239 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6240 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6241 (set_attr "ssememalign" "64")
6242 (set_attr "prefix_data16" "*,*,*,1,*,1")
6243 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6244 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6246 (define_expand "avx512f_movddup512<mask_name>"
6247 [(set (match_operand:V8DF 0 "register_operand")
6250 (match_operand:V8DF 1 "nonimmediate_operand")
6252 (parallel [(const_int 0) (const_int 8)
6253 (const_int 2) (const_int 10)
6254 (const_int 4) (const_int 12)
6255 (const_int 6) (const_int 14)])))]
6258 (define_expand "avx512f_unpcklpd512<mask_name>"
6259 [(set (match_operand:V8DF 0 "register_operand")
6262 (match_operand:V8DF 1 "register_operand")
6263 (match_operand:V8DF 2 "nonimmediate_operand"))
6264 (parallel [(const_int 0) (const_int 8)
6265 (const_int 2) (const_int 10)
6266 (const_int 4) (const_int 12)
6267 (const_int 6) (const_int 14)])))]
6270 (define_insn "*avx512f_unpcklpd512<mask_name>"
6271 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
6274 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
6275 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
6276 (parallel [(const_int 0) (const_int 8)
6277 (const_int 2) (const_int 10)
6278 (const_int 4) (const_int 12)
6279 (const_int 6) (const_int 14)])))]
6282 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
6283 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6284 [(set_attr "type" "sselog")
6285 (set_attr "prefix" "evex")
6286 (set_attr "mode" "V8DF")])
6288 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6289 (define_expand "avx_movddup256"
6290 [(set (match_operand:V4DF 0 "register_operand")
6293 (match_operand:V4DF 1 "nonimmediate_operand")
6295 (parallel [(const_int 0) (const_int 4)
6296 (const_int 2) (const_int 6)])))]
6299 (define_expand "avx_unpcklpd256"
6300 [(set (match_operand:V4DF 0 "register_operand")
6303 (match_operand:V4DF 1 "register_operand")
6304 (match_operand:V4DF 2 "nonimmediate_operand"))
6305 (parallel [(const_int 0) (const_int 4)
6306 (const_int 2) (const_int 6)])))]
6309 (define_insn "*avx_unpcklpd256"
6310 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
6313 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
6314 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
6315 (parallel [(const_int 0) (const_int 4)
6316 (const_int 2) (const_int 6)])))]
6319 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6320 vmovddup\t{%1, %0|%0, %1}"
6321 [(set_attr "type" "sselog")
6322 (set_attr "prefix" "vex")
6323 (set_attr "mode" "V4DF")])
6325 (define_expand "vec_interleave_lowv4df"
6329 (match_operand:V4DF 1 "register_operand" "x")
6330 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6331 (parallel [(const_int 0) (const_int 4)
6332 (const_int 2) (const_int 6)])))
6338 (parallel [(const_int 1) (const_int 5)
6339 (const_int 3) (const_int 7)])))
6340 (set (match_operand:V4DF 0 "register_operand")
6345 (parallel [(const_int 0) (const_int 1)
6346 (const_int 4) (const_int 5)])))]
6349 operands[3] = gen_reg_rtx (V4DFmode);
6350 operands[4] = gen_reg_rtx (V4DFmode);
6353 (define_expand "vec_interleave_lowv2df"
6354 [(set (match_operand:V2DF 0 "register_operand")
6357 (match_operand:V2DF 1 "nonimmediate_operand")
6358 (match_operand:V2DF 2 "nonimmediate_operand"))
6359 (parallel [(const_int 0)
6363 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
6364 operands[1] = force_reg (V2DFmode, operands[1]);
6367 (define_insn "*vec_interleave_lowv2df"
6368 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
6371 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
6372 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
6373 (parallel [(const_int 0)
6375 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
6377 unpcklpd\t{%2, %0|%0, %2}
6378 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6379 %vmovddup\t{%1, %0|%0, %q1}
6380 movhpd\t{%2, %0|%0, %q2}
6381 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
6382 %vmovlpd\t{%2, %H0|%H0, %2}"
6383 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6384 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6385 (set_attr "ssememalign" "64")
6386 (set_attr "prefix_data16" "*,*,*,1,*,1")
6387 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6388 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6391 [(set (match_operand:V2DF 0 "memory_operand")
6394 (match_operand:V2DF 1 "register_operand")
6396 (parallel [(const_int 0)
6398 "TARGET_SSE3 && reload_completed"
6401 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
6402 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
6403 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
6408 [(set (match_operand:V2DF 0 "register_operand")
6411 (match_operand:V2DF 1 "memory_operand")
6413 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
6414 (match_operand:SI 3 "const_int_operand")])))]
6415 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
6416 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
6418 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
6421 (define_insn "*avx512f_vmscalef<mode>"
6422 [(set (match_operand:VF_128 0 "register_operand" "=v")
6425 [(match_operand:VF_128 1 "register_operand" "v")
6426 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
6431 "%vscalef<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6432 [(set_attr "prefix" "evex")
6433 (set_attr "mode" "<ssescalarmode>")])
6435 (define_insn "avx512f_scalef<mode><mask_name>"
6436 [(set (match_operand:VF_512 0 "register_operand" "=v")
6438 [(match_operand:VF_512 1 "register_operand" "v")
6439 (match_operand:VF_512 2 "nonimmediate_operand" "vm")]
6442 "%vscalef<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6443 [(set_attr "prefix" "evex")
6444 (set_attr "mode" "<MODE>")])
6446 (define_insn "avx512f_vternlog<mode>"
6447 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6449 [(match_operand:VI48_512 1 "register_operand" "0")
6450 (match_operand:VI48_512 2 "register_operand" "v")
6451 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6452 (match_operand:SI 4 "const_0_to_255_operand")]
6455 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6456 [(set_attr "type" "sselog")
6457 (set_attr "prefix" "evex")
6458 (set_attr "mode" "<sseinsnmode>")])
6460 (define_insn "avx512f_vternlog<mode>_mask"
6461 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6464 [(match_operand:VI48_512 1 "register_operand" "0")
6465 (match_operand:VI48_512 2 "register_operand" "v")
6466 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6467 (match_operand:SI 4 "const_0_to_255_operand")]
6470 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6472 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
6473 [(set_attr "type" "sselog")
6474 (set_attr "prefix" "evex")
6475 (set_attr "mode" "<sseinsnmode>")])
6477 (define_insn "avx512f_getexp<mode><mask_name>"
6478 [(set (match_operand:VF_512 0 "register_operand" "=v")
6479 (unspec:VF_512 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
6482 "vgetexp<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
6483 [(set_attr "prefix" "evex")
6484 (set_attr "mode" "<MODE>")])
6486 (define_insn "avx512f_sgetexp<mode>"
6487 [(set (match_operand:VF_128 0 "register_operand" "=v")
6490 [(match_operand:VF_128 1 "register_operand" "v")
6491 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
6496 "vgetexp<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
6497 [(set_attr "prefix" "evex")
6498 (set_attr "mode" "<ssescalarmode>")])
6500 (define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
6501 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6502 (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
6503 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
6504 (match_operand:SI 3 "const_0_to_255_operand")]
6507 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
6508 [(set_attr "prefix" "evex")
6509 (set_attr "mode" "<sseinsnmode>")])
6511 (define_expand "avx512f_shufps512_mask"
6512 [(match_operand:V16SF 0 "register_operand")
6513 (match_operand:V16SF 1 "register_operand")
6514 (match_operand:V16SF 2 "nonimmediate_operand")
6515 (match_operand:SI 3 "const_0_to_255_operand")
6516 (match_operand:V16SF 4 "register_operand")
6517 (match_operand:HI 5 "register_operand")]
6520 int mask = INTVAL (operands[3]);
6521 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
6522 GEN_INT ((mask >> 0) & 3),
6523 GEN_INT ((mask >> 2) & 3),
6524 GEN_INT (((mask >> 4) & 3) + 16),
6525 GEN_INT (((mask >> 6) & 3) + 16),
6526 GEN_INT (((mask >> 0) & 3) + 4),
6527 GEN_INT (((mask >> 2) & 3) + 4),
6528 GEN_INT (((mask >> 4) & 3) + 20),
6529 GEN_INT (((mask >> 6) & 3) + 20),
6530 GEN_INT (((mask >> 0) & 3) + 8),
6531 GEN_INT (((mask >> 2) & 3) + 8),
6532 GEN_INT (((mask >> 4) & 3) + 24),
6533 GEN_INT (((mask >> 6) & 3) + 24),
6534 GEN_INT (((mask >> 0) & 3) + 12),
6535 GEN_INT (((mask >> 2) & 3) + 12),
6536 GEN_INT (((mask >> 4) & 3) + 28),
6537 GEN_INT (((mask >> 6) & 3) + 28),
6538 operands[4], operands[5]));
6542 (define_insn "avx512f_fixupimm<mode>"
6543 [(set (match_operand:VF_512 0 "register_operand" "=v")
6545 [(match_operand:VF_512 1 "register_operand" "0")
6546 (match_operand:VF_512 2 "register_operand" "v")
6547 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6548 (match_operand:SI 4 "const_0_to_255_operand")]
6551 "vfixupimm<ssemodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
6552 [(set_attr "prefix" "evex")
6553 (set_attr "mode" "<MODE>")])
6555 (define_insn "avx512f_fixupimm<mode>_mask"
6556 [(set (match_operand:VF_512 0 "register_operand" "=v")
6559 [(match_operand:VF_512 1 "register_operand" "0")
6560 (match_operand:VF_512 2 "register_operand" "v")
6561 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6562 (match_operand:SI 4 "const_0_to_255_operand")]
6565 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6567 "vfixupimm<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}";
6568 [(set_attr "prefix" "evex")
6569 (set_attr "mode" "<MODE>")])
6571 (define_insn "avx512f_sfixupimm<mode>"
6572 [(set (match_operand:VF_128 0 "register_operand" "=v")
6575 [(match_operand:VF_128 1 "register_operand" "0")
6576 (match_operand:VF_128 2 "register_operand" "v")
6577 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6578 (match_operand:SI 4 "const_0_to_255_operand")]
6583 "vfixupimm<ssescalarmodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
6584 [(set_attr "prefix" "evex")
6585 (set_attr "mode" "<ssescalarmode>")])
6587 (define_insn "avx512f_sfixupimm<mode>_mask"
6588 [(set (match_operand:VF_128 0 "register_operand" "=v")
6592 [(match_operand:VF_128 1 "register_operand" "0")
6593 (match_operand:VF_128 2 "register_operand" "v")
6594 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6595 (match_operand:SI 4 "const_0_to_255_operand")]
6600 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6602 "vfixupimm<ssescalarmodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}";
6603 [(set_attr "prefix" "evex")
6604 (set_attr "mode" "<ssescalarmode>")])
6606 (define_insn "avx512f_rndscale<mode><mask_name>"
6607 [(set (match_operand:VF_512 0 "register_operand" "=v")
6609 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
6610 (match_operand:SI 2 "const_0_to_255_operand")]
6613 "vrndscale<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6614 [(set_attr "length_immediate" "1")
6615 (set_attr "prefix" "evex")
6616 (set_attr "mode" "<MODE>")])
6618 (define_insn "*avx512f_rndscale<mode>"
6619 [(set (match_operand:VF_128 0 "register_operand" "=v")
6622 [(match_operand:VF_128 1 "register_operand" "v")
6623 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
6624 (match_operand:SI 3 "const_0_to_255_operand")]
6629 "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
6630 [(set_attr "length_immediate" "1")
6631 (set_attr "prefix" "evex")
6632 (set_attr "mode" "<MODE>")])
6634 ;; One bit in mask selects 2 elements.
6635 (define_insn "avx512f_shufps512_1<mask_name>"
6636 [(set (match_operand:V16SF 0 "register_operand" "=v")
6639 (match_operand:V16SF 1 "register_operand" "v")
6640 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6641 (parallel [(match_operand 3 "const_0_to_3_operand")
6642 (match_operand 4 "const_0_to_3_operand")
6643 (match_operand 5 "const_16_to_19_operand")
6644 (match_operand 6 "const_16_to_19_operand")
6645 (match_operand 7 "const_4_to_7_operand")
6646 (match_operand 8 "const_4_to_7_operand")
6647 (match_operand 9 "const_20_to_23_operand")
6648 (match_operand 10 "const_20_to_23_operand")
6649 (match_operand 11 "const_8_to_11_operand")
6650 (match_operand 12 "const_8_to_11_operand")
6651 (match_operand 13 "const_24_to_27_operand")
6652 (match_operand 14 "const_24_to_27_operand")
6653 (match_operand 15 "const_12_to_15_operand")
6654 (match_operand 16 "const_12_to_15_operand")
6655 (match_operand 17 "const_28_to_31_operand")
6656 (match_operand 18 "const_28_to_31_operand")])))]
6658 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6659 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6660 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6661 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
6662 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
6663 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
6664 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
6665 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
6666 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
6667 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
6668 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
6669 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
6672 mask = INTVAL (operands[3]);
6673 mask |= INTVAL (operands[4]) << 2;
6674 mask |= (INTVAL (operands[5]) - 16) << 4;
6675 mask |= (INTVAL (operands[6]) - 16) << 6;
6676 operands[3] = GEN_INT (mask);
6678 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
6680 [(set_attr "type" "sselog")
6681 (set_attr "length_immediate" "1")
6682 (set_attr "prefix" "evex")
6683 (set_attr "mode" "V16SF")])
6685 (define_expand "avx512f_shufpd512_mask"
6686 [(match_operand:V8DF 0 "register_operand")
6687 (match_operand:V8DF 1 "register_operand")
6688 (match_operand:V8DF 2 "nonimmediate_operand")
6689 (match_operand:SI 3 "const_0_to_255_operand")
6690 (match_operand:V8DF 4 "register_operand")
6691 (match_operand:QI 5 "register_operand")]
6694 int mask = INTVAL (operands[3]);
6695 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
6697 GEN_INT (mask & 2 ? 9 : 8),
6698 GEN_INT (mask & 4 ? 3 : 2),
6699 GEN_INT (mask & 8 ? 11 : 10),
6700 GEN_INT (mask & 16 ? 5 : 4),
6701 GEN_INT (mask & 32 ? 13 : 12),
6702 GEN_INT (mask & 64 ? 7 : 6),
6703 GEN_INT (mask & 128 ? 15 : 14),
6704 operands[4], operands[5]));
6708 (define_insn "avx512f_shufpd512_1<mask_name>"
6709 [(set (match_operand:V8DF 0 "register_operand" "=v")
6712 (match_operand:V8DF 1 "register_operand" "v")
6713 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6714 (parallel [(match_operand 3 "const_0_to_1_operand")
6715 (match_operand 4 "const_8_to_9_operand")
6716 (match_operand 5 "const_2_to_3_operand")
6717 (match_operand 6 "const_10_to_11_operand")
6718 (match_operand 7 "const_4_to_5_operand")
6719 (match_operand 8 "const_12_to_13_operand")
6720 (match_operand 9 "const_6_to_7_operand")
6721 (match_operand 10 "const_14_to_15_operand")])))]
6725 mask = INTVAL (operands[3]);
6726 mask |= (INTVAL (operands[4]) - 8) << 1;
6727 mask |= (INTVAL (operands[5]) - 2) << 2;
6728 mask |= (INTVAL (operands[6]) - 10) << 3;
6729 mask |= (INTVAL (operands[7]) - 4) << 4;
6730 mask |= (INTVAL (operands[8]) - 12) << 5;
6731 mask |= (INTVAL (operands[9]) - 6) << 6;
6732 mask |= (INTVAL (operands[10]) - 14) << 7;
6733 operands[3] = GEN_INT (mask);
6735 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6737 [(set_attr "type" "sselog")
6738 (set_attr "length_immediate" "1")
6739 (set_attr "prefix" "evex")
6740 (set_attr "mode" "V8DF")])
6742 (define_expand "avx_shufpd256"
6743 [(match_operand:V4DF 0 "register_operand")
6744 (match_operand:V4DF 1 "register_operand")
6745 (match_operand:V4DF 2 "nonimmediate_operand")
6746 (match_operand:SI 3 "const_int_operand")]
6749 int mask = INTVAL (operands[3]);
6750 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
6752 GEN_INT (mask & 2 ? 5 : 4),
6753 GEN_INT (mask & 4 ? 3 : 2),
6754 GEN_INT (mask & 8 ? 7 : 6)));
6758 (define_insn "avx_shufpd256_1"
6759 [(set (match_operand:V4DF 0 "register_operand" "=x")
6762 (match_operand:V4DF 1 "register_operand" "x")
6763 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6764 (parallel [(match_operand 3 "const_0_to_1_operand")
6765 (match_operand 4 "const_4_to_5_operand")
6766 (match_operand 5 "const_2_to_3_operand")
6767 (match_operand 6 "const_6_to_7_operand")])))]
6771 mask = INTVAL (operands[3]);
6772 mask |= (INTVAL (operands[4]) - 4) << 1;
6773 mask |= (INTVAL (operands[5]) - 2) << 2;
6774 mask |= (INTVAL (operands[6]) - 6) << 3;
6775 operands[3] = GEN_INT (mask);
6777 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6779 [(set_attr "type" "sseshuf")
6780 (set_attr "length_immediate" "1")
6781 (set_attr "prefix" "vex")
6782 (set_attr "mode" "V4DF")])
6784 (define_expand "sse2_shufpd"
6785 [(match_operand:V2DF 0 "register_operand")
6786 (match_operand:V2DF 1 "register_operand")
6787 (match_operand:V2DF 2 "nonimmediate_operand")
6788 (match_operand:SI 3 "const_int_operand")]
6791 int mask = INTVAL (operands[3]);
6792 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
6794 GEN_INT (mask & 2 ? 3 : 2)));
6798 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
6799 (define_insn "avx2_interleave_highv4di"
6800 [(set (match_operand:V4DI 0 "register_operand" "=x")
6803 (match_operand:V4DI 1 "register_operand" "x")
6804 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
6805 (parallel [(const_int 1)
6810 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
6811 [(set_attr "type" "sselog")
6812 (set_attr "prefix" "vex")
6813 (set_attr "mode" "OI")])
6815 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
6816 [(set (match_operand:V8DI 0 "register_operand" "=v")
6819 (match_operand:V8DI 1 "register_operand" "v")
6820 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
6821 (parallel [(const_int 1) (const_int 9)
6822 (const_int 3) (const_int 11)
6823 (const_int 5) (const_int 13)
6824 (const_int 7) (const_int 15)])))]
6826 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6827 [(set_attr "type" "sselog")
6828 (set_attr "prefix" "evex")
6829 (set_attr "mode" "XI")])
6831 (define_insn "vec_interleave_highv2di"
6832 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6835 (match_operand:V2DI 1 "register_operand" "0,x")
6836 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
6837 (parallel [(const_int 1)
6841 punpckhqdq\t{%2, %0|%0, %2}
6842 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
6843 [(set_attr "isa" "noavx,avx")
6844 (set_attr "type" "sselog")
6845 (set_attr "prefix_data16" "1,*")
6846 (set_attr "prefix" "orig,vex")
6847 (set_attr "mode" "TI")])
6849 (define_insn "avx2_interleave_lowv4di"
6850 [(set (match_operand:V4DI 0 "register_operand" "=x")
6853 (match_operand:V4DI 1 "register_operand" "x")
6854 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
6855 (parallel [(const_int 0)
6860 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
6861 [(set_attr "type" "sselog")
6862 (set_attr "prefix" "vex")
6863 (set_attr "mode" "OI")])
6865 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
6866 [(set (match_operand:V8DI 0 "register_operand" "=v")
6869 (match_operand:V8DI 1 "register_operand" "v")
6870 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
6871 (parallel [(const_int 0) (const_int 8)
6872 (const_int 2) (const_int 10)
6873 (const_int 4) (const_int 12)
6874 (const_int 6) (const_int 14)])))]
6876 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6877 [(set_attr "type" "sselog")
6878 (set_attr "prefix" "evex")
6879 (set_attr "mode" "XI")])
6881 (define_insn "vec_interleave_lowv2di"
6882 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6885 (match_operand:V2DI 1 "register_operand" "0,x")
6886 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
6887 (parallel [(const_int 0)
6891 punpcklqdq\t{%2, %0|%0, %2}
6892 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
6893 [(set_attr "isa" "noavx,avx")
6894 (set_attr "type" "sselog")
6895 (set_attr "prefix_data16" "1,*")
6896 (set_attr "prefix" "orig,vex")
6897 (set_attr "mode" "TI")])
6899 (define_insn "sse2_shufpd_<mode>"
6900 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
6901 (vec_select:VI8F_128
6902 (vec_concat:<ssedoublevecmode>
6903 (match_operand:VI8F_128 1 "register_operand" "0,x")
6904 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
6905 (parallel [(match_operand 3 "const_0_to_1_operand")
6906 (match_operand 4 "const_2_to_3_operand")])))]
6910 mask = INTVAL (operands[3]);
6911 mask |= (INTVAL (operands[4]) - 2) << 1;
6912 operands[3] = GEN_INT (mask);
6914 switch (which_alternative)
6917 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
6919 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6924 [(set_attr "isa" "noavx,avx")
6925 (set_attr "type" "sseshuf")
6926 (set_attr "length_immediate" "1")
6927 (set_attr "prefix" "orig,vex")
6928 (set_attr "mode" "V2DF")])
6930 ;; Avoid combining registers from different units in a single alternative,
6931 ;; see comment above inline_secondary_memory_needed function in i386.c
6932 (define_insn "sse2_storehpd"
6933 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
6935 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
6936 (parallel [(const_int 1)])))]
6937 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6939 %vmovhpd\t{%1, %0|%0, %1}
6941 vunpckhpd\t{%d1, %0|%0, %d1}
6945 [(set_attr "isa" "*,noavx,avx,*,*,*")
6946 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
6947 (set (attr "prefix_data16")
6949 (and (eq_attr "alternative" "0")
6950 (not (match_test "TARGET_AVX")))
6952 (const_string "*")))
6953 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
6954 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
6957 [(set (match_operand:DF 0 "register_operand")
6959 (match_operand:V2DF 1 "memory_operand")
6960 (parallel [(const_int 1)])))]
6961 "TARGET_SSE2 && reload_completed"
6962 [(set (match_dup 0) (match_dup 1))]
6963 "operands[1] = adjust_address (operands[1], DFmode, 8);")
6965 (define_insn "*vec_extractv2df_1_sse"
6966 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
6968 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
6969 (parallel [(const_int 1)])))]
6970 "!TARGET_SSE2 && TARGET_SSE
6971 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6973 movhps\t{%1, %0|%q0, %1}
6974 movhlps\t{%1, %0|%0, %1}
6975 movlps\t{%H1, %0|%0, %H1}"
6976 [(set_attr "type" "ssemov")
6977 (set_attr "ssememalign" "64")
6978 (set_attr "mode" "V2SF,V4SF,V2SF")])
6980 ;; Avoid combining registers from different units in a single alternative,
6981 ;; see comment above inline_secondary_memory_needed function in i386.c
6982 (define_insn "sse2_storelpd"
6983 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
6985 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
6986 (parallel [(const_int 0)])))]
6987 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6989 %vmovlpd\t{%1, %0|%0, %1}
6994 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
6995 (set_attr "prefix_data16" "1,*,*,*,*")
6996 (set_attr "prefix" "maybe_vex")
6997 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
7000 [(set (match_operand:DF 0 "register_operand")
7002 (match_operand:V2DF 1 "nonimmediate_operand")
7003 (parallel [(const_int 0)])))]
7004 "TARGET_SSE2 && reload_completed"
7005 [(set (match_dup 0) (match_dup 1))]
7007 if (REG_P (operands[1]))
7008 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
7010 operands[1] = adjust_address (operands[1], DFmode, 0);
7013 (define_insn "*vec_extractv2df_0_sse"
7014 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7016 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
7017 (parallel [(const_int 0)])))]
7018 "!TARGET_SSE2 && TARGET_SSE
7019 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7021 movlps\t{%1, %0|%0, %1}
7022 movaps\t{%1, %0|%0, %1}
7023 movlps\t{%1, %0|%0, %q1}"
7024 [(set_attr "type" "ssemov")
7025 (set_attr "mode" "V2SF,V4SF,V2SF")])
7027 (define_expand "sse2_loadhpd_exp"
7028 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7031 (match_operand:V2DF 1 "nonimmediate_operand")
7032 (parallel [(const_int 0)]))
7033 (match_operand:DF 2 "nonimmediate_operand")))]
7036 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7038 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
7040 /* Fix up the destination if needed. */
7041 if (dst != operands[0])
7042 emit_move_insn (operands[0], dst);
7047 ;; Avoid combining registers from different units in a single alternative,
7048 ;; see comment above inline_secondary_memory_needed function in i386.c
7049 (define_insn "sse2_loadhpd"
7050 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7054 (match_operand:V2DF 1 "nonimmediate_operand"
7056 (parallel [(const_int 0)]))
7057 (match_operand:DF 2 "nonimmediate_operand"
7058 " m,m,x,x,x,*f,r")))]
7059 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7061 movhpd\t{%2, %0|%0, %2}
7062 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7063 unpcklpd\t{%2, %0|%0, %2}
7064 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7068 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7069 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
7070 (set_attr "ssememalign" "64")
7071 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
7072 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
7073 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
7076 [(set (match_operand:V2DF 0 "memory_operand")
7078 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
7079 (match_operand:DF 1 "register_operand")))]
7080 "TARGET_SSE2 && reload_completed"
7081 [(set (match_dup 0) (match_dup 1))]
7082 "operands[0] = adjust_address (operands[0], DFmode, 8);")
7084 (define_expand "sse2_loadlpd_exp"
7085 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7087 (match_operand:DF 2 "nonimmediate_operand")
7089 (match_operand:V2DF 1 "nonimmediate_operand")
7090 (parallel [(const_int 1)]))))]
7093 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7095 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
7097 /* Fix up the destination if needed. */
7098 if (dst != operands[0])
7099 emit_move_insn (operands[0], dst);
7104 ;; Avoid combining registers from different units in a single alternative,
7105 ;; see comment above inline_secondary_memory_needed function in i386.c
7106 (define_insn "sse2_loadlpd"
7107 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7108 "=x,x,x,x,x,x,x,x,m,m ,m")
7110 (match_operand:DF 2 "nonimmediate_operand"
7111 " m,m,m,x,x,0,0,x,x,*f,r")
7113 (match_operand:V2DF 1 "vector_move_operand"
7114 " C,0,x,0,x,x,o,o,0,0 ,0")
7115 (parallel [(const_int 1)]))))]
7116 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7118 %vmovsd\t{%2, %0|%0, %2}
7119 movlpd\t{%2, %0|%0, %2}
7120 vmovlpd\t{%2, %1, %0|%0, %1, %2}
7121 movsd\t{%2, %0|%0, %2}
7122 vmovsd\t{%2, %1, %0|%0, %1, %2}
7123 shufpd\t{$2, %1, %0|%0, %1, 2}
7124 movhpd\t{%H1, %0|%0, %H1}
7125 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
7129 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
7131 (cond [(eq_attr "alternative" "5")
7132 (const_string "sselog")
7133 (eq_attr "alternative" "9")
7134 (const_string "fmov")
7135 (eq_attr "alternative" "10")
7136 (const_string "imov")
7138 (const_string "ssemov")))
7139 (set_attr "ssememalign" "64")
7140 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
7141 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
7142 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
7143 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
7146 [(set (match_operand:V2DF 0 "memory_operand")
7148 (match_operand:DF 1 "register_operand")
7149 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
7150 "TARGET_SSE2 && reload_completed"
7151 [(set (match_dup 0) (match_dup 1))]
7152 "operands[0] = adjust_address (operands[0], DFmode, 0);")
7154 (define_insn "sse2_movsd"
7155 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
7157 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
7158 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
7162 movsd\t{%2, %0|%0, %2}
7163 vmovsd\t{%2, %1, %0|%0, %1, %2}
7164 movlpd\t{%2, %0|%0, %q2}
7165 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
7166 %vmovlpd\t{%2, %0|%q0, %2}
7167 shufpd\t{$2, %1, %0|%0, %1, 2}
7168 movhps\t{%H1, %0|%0, %H1}
7169 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
7170 %vmovhps\t{%1, %H0|%H0, %1}"
7171 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
7174 (eq_attr "alternative" "5")
7175 (const_string "sselog")
7176 (const_string "ssemov")))
7177 (set (attr "prefix_data16")
7179 (and (eq_attr "alternative" "2,4")
7180 (not (match_test "TARGET_AVX")))
7182 (const_string "*")))
7183 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
7184 (set_attr "ssememalign" "64")
7185 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
7186 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
7188 (define_insn "vec_dupv2df"
7189 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
7191 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
7195 %vmovddup\t{%1, %0|%0, %1}"
7196 [(set_attr "isa" "noavx,sse3")
7197 (set_attr "type" "sselog1")
7198 (set_attr "prefix" "orig,maybe_vex")
7199 (set_attr "mode" "V2DF,DF")])
7201 (define_insn "*vec_concatv2df"
7202 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
7204 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
7205 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
7208 unpcklpd\t{%2, %0|%0, %2}
7209 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7210 %vmovddup\t{%1, %0|%0, %1}
7211 movhpd\t{%2, %0|%0, %2}
7212 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7213 %vmovsd\t{%1, %0|%0, %1}
7214 movlhps\t{%2, %0|%0, %2}
7215 movhps\t{%2, %0|%0, %2}"
7216 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
7219 (eq_attr "alternative" "0,1,2")
7220 (const_string "sselog")
7221 (const_string "ssemov")))
7222 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
7223 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
7224 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
7226 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7228 ;; Parallel integer down-conversion operations
7230 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7232 (define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
7233 (define_mode_attr pmov_src_mode
7234 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
7235 (define_mode_attr pmov_src_lower
7236 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
7237 (define_mode_attr pmov_suff
7238 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
7240 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
7241 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7242 (any_truncate:PMOV_DST_MODE
7243 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
7245 "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
7246 [(set_attr "type" "ssemov")
7247 (set_attr "memory" "none,store")
7248 (set_attr "prefix" "evex")
7249 (set_attr "mode" "<sseinsnmode>")])
7251 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
7252 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7253 (vec_merge:PMOV_DST_MODE
7254 (any_truncate:PMOV_DST_MODE
7255 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
7256 (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
7257 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
7259 "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7260 [(set_attr "type" "ssemov")
7261 (set_attr "memory" "none,store")
7262 (set_attr "prefix" "evex")
7263 (set_attr "mode" "<sseinsnmode>")])
7265 (define_insn "*avx512f_<code>v8div16qi2"
7266 [(set (match_operand:V16QI 0 "register_operand" "=v")
7269 (match_operand:V8DI 1 "register_operand" "v"))
7270 (const_vector:V8QI [(const_int 0) (const_int 0)
7271 (const_int 0) (const_int 0)
7272 (const_int 0) (const_int 0)
7273 (const_int 0) (const_int 0)])))]
7275 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7276 [(set_attr "type" "ssemov")
7277 (set_attr "prefix" "evex")
7278 (set_attr "mode" "TI")])
7280 (define_insn "*avx512f_<code>v8div16qi2_store"
7281 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7284 (match_operand:V8DI 1 "register_operand" "v"))
7287 (parallel [(const_int 8) (const_int 9)
7288 (const_int 10) (const_int 11)
7289 (const_int 12) (const_int 13)
7290 (const_int 14) (const_int 15)]))))]
7292 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7293 [(set_attr "type" "ssemov")
7294 (set_attr "memory" "store")
7295 (set_attr "prefix" "evex")
7296 (set_attr "mode" "TI")])
7298 (define_insn "avx512f_<code>v8div16qi2_mask"
7299 [(set (match_operand:V16QI 0 "register_operand" "=v")
7303 (match_operand:V8DI 1 "register_operand" "v"))
7305 (match_operand:V16QI 2 "vector_move_operand" "0C")
7306 (parallel [(const_int 0) (const_int 1)
7307 (const_int 2) (const_int 3)
7308 (const_int 4) (const_int 5)
7309 (const_int 6) (const_int 7)]))
7310 (match_operand:QI 3 "register_operand" "k"))
7311 (const_vector:V8QI [(const_int 0) (const_int 0)
7312 (const_int 0) (const_int 0)
7313 (const_int 0) (const_int 0)
7314 (const_int 0) (const_int 0)])))]
7316 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7317 [(set_attr "type" "ssemov")
7318 (set_attr "prefix" "evex")
7319 (set_attr "mode" "TI")])
7321 (define_insn "*avx512f_<code>v8div16qi2_store_mask"
7322 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7326 (match_operand:V8DI 1 "register_operand" "v"))
7329 (parallel [(const_int 0) (const_int 1)
7330 (const_int 2) (const_int 3)
7331 (const_int 4) (const_int 5)
7332 (const_int 6) (const_int 7)]))
7333 (match_operand:QI 2 "register_operand" "k"))
7336 (parallel [(const_int 8) (const_int 9)
7337 (const_int 10) (const_int 11)
7338 (const_int 12) (const_int 13)
7339 (const_int 14) (const_int 15)]))))]
7341 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
7342 [(set_attr "type" "ssemov")
7343 (set_attr "memory" "store")
7344 (set_attr "prefix" "evex")
7345 (set_attr "mode" "TI")])
7347 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7349 ;; Parallel integral arithmetic
7351 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7353 (define_expand "neg<mode>2"
7354 [(set (match_operand:VI_AVX2 0 "register_operand")
7357 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
7359 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
7361 (define_expand "<plusminus_insn><mode>3<mask_name>"
7362 [(set (match_operand:VI_AVX2 0 "register_operand")
7364 (match_operand:VI_AVX2 1 "nonimmediate_operand")
7365 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
7366 "TARGET_SSE2 && <mask_mode512bit_condition>"
7367 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7369 (define_insn "*<plusminus_insn><mode>3<mask_name>"
7370 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
7372 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7373 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7374 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
7376 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7377 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7378 [(set_attr "isa" "noavx,avx")
7379 (set_attr "type" "sseiadd")
7380 (set_attr "prefix_data16" "1,*")
7381 (set_attr "prefix" "<mask_prefix3>")
7382 (set_attr "mode" "<sseinsnmode>")])
7384 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
7385 [(set (match_operand:VI12_AVX2 0 "register_operand")
7386 (sat_plusminus:VI12_AVX2
7387 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
7388 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
7390 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7392 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
7393 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
7394 (sat_plusminus:VI12_AVX2
7395 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7396 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7397 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
7399 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7400 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7401 [(set_attr "isa" "noavx,avx")
7402 (set_attr "type" "sseiadd")
7403 (set_attr "prefix_data16" "1,*")
7404 (set_attr "prefix" "orig,vex")
7405 (set_attr "mode" "TI")])
7407 (define_expand "mul<mode>3"
7408 [(set (match_operand:VI1_AVX2 0 "register_operand")
7409 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
7410 (match_operand:VI1_AVX2 2 "register_operand")))]
7413 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
7417 (define_expand "mul<mode>3"
7418 [(set (match_operand:VI2_AVX2 0 "register_operand")
7419 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
7420 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
7422 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7424 (define_insn "*mul<mode>3"
7425 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7426 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
7427 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
7428 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7430 pmullw\t{%2, %0|%0, %2}
7431 vpmullw\t{%2, %1, %0|%0, %1, %2}"
7432 [(set_attr "isa" "noavx,avx")
7433 (set_attr "type" "sseimul")
7434 (set_attr "prefix_data16" "1,*")
7435 (set_attr "prefix" "orig,vex")
7436 (set_attr "mode" "<sseinsnmode>")])
7438 (define_expand "<s>mul<mode>3_highpart"
7439 [(set (match_operand:VI2_AVX2 0 "register_operand")
7441 (lshiftrt:<ssedoublemode>
7442 (mult:<ssedoublemode>
7443 (any_extend:<ssedoublemode>
7444 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
7445 (any_extend:<ssedoublemode>
7446 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
7449 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7451 (define_insn "*<s>mul<mode>3_highpart"
7452 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7454 (lshiftrt:<ssedoublemode>
7455 (mult:<ssedoublemode>
7456 (any_extend:<ssedoublemode>
7457 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
7458 (any_extend:<ssedoublemode>
7459 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
7461 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7463 pmulh<u>w\t{%2, %0|%0, %2}
7464 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
7465 [(set_attr "isa" "noavx,avx")
7466 (set_attr "type" "sseimul")
7467 (set_attr "prefix_data16" "1,*")
7468 (set_attr "prefix" "orig,vex")
7469 (set_attr "mode" "<sseinsnmode>")])
7471 (define_expand "vec_widen_umult_even_v16si<mask_name>"
7472 [(set (match_operand:V8DI 0 "register_operand")
7476 (match_operand:V16SI 1 "nonimmediate_operand")
7477 (parallel [(const_int 0) (const_int 2)
7478 (const_int 4) (const_int 6)
7479 (const_int 8) (const_int 10)
7480 (const_int 12) (const_int 14)])))
7483 (match_operand:V16SI 2 "nonimmediate_operand")
7484 (parallel [(const_int 0) (const_int 2)
7485 (const_int 4) (const_int 6)
7486 (const_int 8) (const_int 10)
7487 (const_int 12) (const_int 14)])))))]
7489 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7491 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
7492 [(set (match_operand:V8DI 0 "register_operand" "=v")
7496 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7497 (parallel [(const_int 0) (const_int 2)
7498 (const_int 4) (const_int 6)
7499 (const_int 8) (const_int 10)
7500 (const_int 12) (const_int 14)])))
7503 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7504 (parallel [(const_int 0) (const_int 2)
7505 (const_int 4) (const_int 6)
7506 (const_int 8) (const_int 10)
7507 (const_int 12) (const_int 14)])))))]
7508 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7509 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7510 [(set_attr "isa" "avx512f")
7511 (set_attr "type" "sseimul")
7512 (set_attr "prefix_extra" "1")
7513 (set_attr "prefix" "evex")
7514 (set_attr "mode" "XI")])
7516 (define_expand "vec_widen_umult_even_v8si"
7517 [(set (match_operand:V4DI 0 "register_operand")
7521 (match_operand:V8SI 1 "nonimmediate_operand")
7522 (parallel [(const_int 0) (const_int 2)
7523 (const_int 4) (const_int 6)])))
7526 (match_operand:V8SI 2 "nonimmediate_operand")
7527 (parallel [(const_int 0) (const_int 2)
7528 (const_int 4) (const_int 6)])))))]
7530 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7532 (define_insn "*vec_widen_umult_even_v8si"
7533 [(set (match_operand:V4DI 0 "register_operand" "=x")
7537 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
7538 (parallel [(const_int 0) (const_int 2)
7539 (const_int 4) (const_int 6)])))
7542 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7543 (parallel [(const_int 0) (const_int 2)
7544 (const_int 4) (const_int 6)])))))]
7545 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7546 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7547 [(set_attr "type" "sseimul")
7548 (set_attr "prefix" "vex")
7549 (set_attr "mode" "OI")])
7551 (define_expand "vec_widen_umult_even_v4si"
7552 [(set (match_operand:V2DI 0 "register_operand")
7556 (match_operand:V4SI 1 "nonimmediate_operand")
7557 (parallel [(const_int 0) (const_int 2)])))
7560 (match_operand:V4SI 2 "nonimmediate_operand")
7561 (parallel [(const_int 0) (const_int 2)])))))]
7563 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7565 (define_insn "*vec_widen_umult_even_v4si"
7566 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7570 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7571 (parallel [(const_int 0) (const_int 2)])))
7574 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7575 (parallel [(const_int 0) (const_int 2)])))))]
7576 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7578 pmuludq\t{%2, %0|%0, %2}
7579 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7580 [(set_attr "isa" "noavx,avx")
7581 (set_attr "type" "sseimul")
7582 (set_attr "prefix_data16" "1,*")
7583 (set_attr "prefix" "orig,vex")
7584 (set_attr "mode" "TI")])
7586 (define_expand "vec_widen_smult_even_v16si<mask_name>"
7587 [(set (match_operand:V8DI 0 "register_operand")
7591 (match_operand:V16SI 1 "nonimmediate_operand")
7592 (parallel [(const_int 0) (const_int 2)
7593 (const_int 4) (const_int 6)
7594 (const_int 8) (const_int 10)
7595 (const_int 12) (const_int 14)])))
7598 (match_operand:V16SI 2 "nonimmediate_operand")
7599 (parallel [(const_int 0) (const_int 2)
7600 (const_int 4) (const_int 6)
7601 (const_int 8) (const_int 10)
7602 (const_int 12) (const_int 14)])))))]
7604 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7606 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
7607 [(set (match_operand:V8DI 0 "register_operand" "=v")
7611 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7612 (parallel [(const_int 0) (const_int 2)
7613 (const_int 4) (const_int 6)
7614 (const_int 8) (const_int 10)
7615 (const_int 12) (const_int 14)])))
7618 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7619 (parallel [(const_int 0) (const_int 2)
7620 (const_int 4) (const_int 6)
7621 (const_int 8) (const_int 10)
7622 (const_int 12) (const_int 14)])))))]
7623 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7624 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7625 [(set_attr "isa" "avx512f")
7626 (set_attr "type" "sseimul")
7627 (set_attr "prefix_extra" "1")
7628 (set_attr "prefix" "evex")
7629 (set_attr "mode" "XI")])
7631 (define_expand "vec_widen_smult_even_v8si"
7632 [(set (match_operand:V4DI 0 "register_operand")
7636 (match_operand:V8SI 1 "nonimmediate_operand")
7637 (parallel [(const_int 0) (const_int 2)
7638 (const_int 4) (const_int 6)])))
7641 (match_operand:V8SI 2 "nonimmediate_operand")
7642 (parallel [(const_int 0) (const_int 2)
7643 (const_int 4) (const_int 6)])))))]
7645 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7647 (define_insn "*vec_widen_smult_even_v8si"
7648 [(set (match_operand:V4DI 0 "register_operand" "=x")
7652 (match_operand:V8SI 1 "nonimmediate_operand" "x")
7653 (parallel [(const_int 0) (const_int 2)
7654 (const_int 4) (const_int 6)])))
7657 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7658 (parallel [(const_int 0) (const_int 2)
7659 (const_int 4) (const_int 6)])))))]
7660 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7661 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7662 [(set_attr "type" "sseimul")
7663 (set_attr "prefix_extra" "1")
7664 (set_attr "prefix" "vex")
7665 (set_attr "mode" "OI")])
7667 (define_expand "sse4_1_mulv2siv2di3"
7668 [(set (match_operand:V2DI 0 "register_operand")
7672 (match_operand:V4SI 1 "nonimmediate_operand")
7673 (parallel [(const_int 0) (const_int 2)])))
7676 (match_operand:V4SI 2 "nonimmediate_operand")
7677 (parallel [(const_int 0) (const_int 2)])))))]
7679 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7681 (define_insn "*sse4_1_mulv2siv2di3"
7682 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7686 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7687 (parallel [(const_int 0) (const_int 2)])))
7690 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7691 (parallel [(const_int 0) (const_int 2)])))))]
7692 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7694 pmuldq\t{%2, %0|%0, %2}
7695 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7696 [(set_attr "isa" "noavx,avx")
7697 (set_attr "type" "sseimul")
7698 (set_attr "prefix_data16" "1,*")
7699 (set_attr "prefix_extra" "1")
7700 (set_attr "prefix" "orig,vex")
7701 (set_attr "mode" "TI")])
7703 (define_expand "avx2_pmaddwd"
7704 [(set (match_operand:V8SI 0 "register_operand")
7709 (match_operand:V16HI 1 "nonimmediate_operand")
7710 (parallel [(const_int 0) (const_int 2)
7711 (const_int 4) (const_int 6)
7712 (const_int 8) (const_int 10)
7713 (const_int 12) (const_int 14)])))
7716 (match_operand:V16HI 2 "nonimmediate_operand")
7717 (parallel [(const_int 0) (const_int 2)
7718 (const_int 4) (const_int 6)
7719 (const_int 8) (const_int 10)
7720 (const_int 12) (const_int 14)]))))
7723 (vec_select:V8HI (match_dup 1)
7724 (parallel [(const_int 1) (const_int 3)
7725 (const_int 5) (const_int 7)
7726 (const_int 9) (const_int 11)
7727 (const_int 13) (const_int 15)])))
7729 (vec_select:V8HI (match_dup 2)
7730 (parallel [(const_int 1) (const_int 3)
7731 (const_int 5) (const_int 7)
7732 (const_int 9) (const_int 11)
7733 (const_int 13) (const_int 15)]))))))]
7735 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
7737 (define_insn "*avx2_pmaddwd"
7738 [(set (match_operand:V8SI 0 "register_operand" "=x")
7743 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
7744 (parallel [(const_int 0) (const_int 2)
7745 (const_int 4) (const_int 6)
7746 (const_int 8) (const_int 10)
7747 (const_int 12) (const_int 14)])))
7750 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
7751 (parallel [(const_int 0) (const_int 2)
7752 (const_int 4) (const_int 6)
7753 (const_int 8) (const_int 10)
7754 (const_int 12) (const_int 14)]))))
7757 (vec_select:V8HI (match_dup 1)
7758 (parallel [(const_int 1) (const_int 3)
7759 (const_int 5) (const_int 7)
7760 (const_int 9) (const_int 11)
7761 (const_int 13) (const_int 15)])))
7763 (vec_select:V8HI (match_dup 2)
7764 (parallel [(const_int 1) (const_int 3)
7765 (const_int 5) (const_int 7)
7766 (const_int 9) (const_int 11)
7767 (const_int 13) (const_int 15)]))))))]
7768 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
7769 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
7770 [(set_attr "type" "sseiadd")
7771 (set_attr "prefix" "vex")
7772 (set_attr "mode" "OI")])
7774 (define_expand "sse2_pmaddwd"
7775 [(set (match_operand:V4SI 0 "register_operand")
7780 (match_operand:V8HI 1 "nonimmediate_operand")
7781 (parallel [(const_int 0) (const_int 2)
7782 (const_int 4) (const_int 6)])))
7785 (match_operand:V8HI 2 "nonimmediate_operand")
7786 (parallel [(const_int 0) (const_int 2)
7787 (const_int 4) (const_int 6)]))))
7790 (vec_select:V4HI (match_dup 1)
7791 (parallel [(const_int 1) (const_int 3)
7792 (const_int 5) (const_int 7)])))
7794 (vec_select:V4HI (match_dup 2)
7795 (parallel [(const_int 1) (const_int 3)
7796 (const_int 5) (const_int 7)]))))))]
7798 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
7800 (define_insn "*sse2_pmaddwd"
7801 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7806 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
7807 (parallel [(const_int 0) (const_int 2)
7808 (const_int 4) (const_int 6)])))
7811 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7812 (parallel [(const_int 0) (const_int 2)
7813 (const_int 4) (const_int 6)]))))
7816 (vec_select:V4HI (match_dup 1)
7817 (parallel [(const_int 1) (const_int 3)
7818 (const_int 5) (const_int 7)])))
7820 (vec_select:V4HI (match_dup 2)
7821 (parallel [(const_int 1) (const_int 3)
7822 (const_int 5) (const_int 7)]))))))]
7823 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
7825 pmaddwd\t{%2, %0|%0, %2}
7826 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
7827 [(set_attr "isa" "noavx,avx")
7828 (set_attr "type" "sseiadd")
7829 (set_attr "atom_unit" "simul")
7830 (set_attr "prefix_data16" "1,*")
7831 (set_attr "prefix" "orig,vex")
7832 (set_attr "mode" "TI")])
7834 (define_expand "mul<mode>3<mask_name>"
7835 [(set (match_operand:VI4_AVX512F 0 "register_operand")
7837 (match_operand:VI4_AVX512F 1 "general_vector_operand")
7838 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
7839 "TARGET_SSE2 && <mask_mode512bit_condition>"
7843 if (!nonimmediate_operand (operands[1], <MODE>mode))
7844 operands[1] = force_reg (<MODE>mode, operands[1]);
7845 if (!nonimmediate_operand (operands[2], <MODE>mode))
7846 operands[2] = force_reg (<MODE>mode, operands[2]);
7847 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
7851 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
7856 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
7857 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
7859 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
7860 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
7861 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
7863 pmulld\t{%2, %0|%0, %2}
7864 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7865 [(set_attr "isa" "noavx,avx")
7866 (set_attr "type" "sseimul")
7867 (set_attr "prefix_extra" "1")
7868 (set_attr "prefix" "<mask_prefix3>")
7869 (set_attr "btver2_decode" "vector,vector")
7870 (set_attr "mode" "<sseinsnmode>")])
7872 (define_expand "mul<mode>3"
7873 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
7874 (mult:VI8_AVX2_AVX512F
7875 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
7876 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
7879 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
7883 (define_expand "vec_widen_<s>mult_hi_<mode>"
7884 [(match_operand:<sseunpackmode> 0 "register_operand")
7885 (any_extend:<sseunpackmode>
7886 (match_operand:VI124_AVX2 1 "register_operand"))
7887 (match_operand:VI124_AVX2 2 "register_operand")]
7890 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
7895 (define_expand "vec_widen_<s>mult_lo_<mode>"
7896 [(match_operand:<sseunpackmode> 0 "register_operand")
7897 (any_extend:<sseunpackmode>
7898 (match_operand:VI124_AVX2 1 "register_operand"))
7899 (match_operand:VI124_AVX2 2 "register_operand")]
7902 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
7907 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
7908 ;; named patterns, but signed V4SI needs special help for plain SSE2.
7909 (define_expand "vec_widen_smult_even_v4si"
7910 [(match_operand:V2DI 0 "register_operand")
7911 (match_operand:V4SI 1 "nonimmediate_operand")
7912 (match_operand:V4SI 2 "nonimmediate_operand")]
7915 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
7920 (define_expand "vec_widen_<s>mult_odd_<mode>"
7921 [(match_operand:<sseunpackmode> 0 "register_operand")
7922 (any_extend:<sseunpackmode>
7923 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
7924 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
7927 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
7932 (define_expand "sdot_prod<mode>"
7933 [(match_operand:<sseunpackmode> 0 "register_operand")
7934 (match_operand:VI2_AVX2 1 "register_operand")
7935 (match_operand:VI2_AVX2 2 "register_operand")
7936 (match_operand:<sseunpackmode> 3 "register_operand")]
7939 rtx t = gen_reg_rtx (<sseunpackmode>mode);
7940 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
7941 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
7942 gen_rtx_PLUS (<sseunpackmode>mode,
7947 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
7948 ;; back together when madd is available.
7949 (define_expand "sdot_prodv4si"
7950 [(match_operand:V2DI 0 "register_operand")
7951 (match_operand:V4SI 1 "register_operand")
7952 (match_operand:V4SI 2 "register_operand")
7953 (match_operand:V2DI 3 "register_operand")]
7956 rtx t = gen_reg_rtx (V2DImode);
7957 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
7958 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
7962 (define_insn "ashr<mode>3"
7963 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
7965 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
7966 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
7969 psra<ssemodesuffix>\t{%2, %0|%0, %2}
7970 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7971 [(set_attr "isa" "noavx,avx")
7972 (set_attr "type" "sseishft")
7973 (set (attr "length_immediate")
7974 (if_then_else (match_operand 2 "const_int_operand")
7976 (const_string "0")))
7977 (set_attr "prefix_data16" "1,*")
7978 (set_attr "prefix" "orig,vex")
7979 (set_attr "mode" "<sseinsnmode>")])
7981 (define_insn "ashr<mode>3<mask_name>"
7982 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
7984 (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
7985 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
7986 "TARGET_AVX512F && <mask_mode512bit_condition>"
7987 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7988 [(set_attr "type" "sseishft")
7989 (set (attr "length_immediate")
7990 (if_then_else (match_operand 2 "const_int_operand")
7992 (const_string "0")))
7993 (set_attr "mode" "<sseinsnmode>")])
7995 (define_insn "<shift_insn><mode>3"
7996 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
7997 (any_lshift:VI248_AVX2
7998 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
7999 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
8002 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
8003 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8004 [(set_attr "isa" "noavx,avx")
8005 (set_attr "type" "sseishft")
8006 (set (attr "length_immediate")
8007 (if_then_else (match_operand 2 "const_int_operand")
8009 (const_string "0")))
8010 (set_attr "prefix_data16" "1,*")
8011 (set_attr "prefix" "orig,vex")
8012 (set_attr "mode" "<sseinsnmode>")])
8014 (define_insn "<shift_insn><mode>3<mask_name>"
8015 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8016 (any_lshift:VI48_512
8017 (match_operand:VI48_512 1 "register_operand" "v,m")
8018 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
8019 "TARGET_AVX512F && <mask_mode512bit_condition>"
8020 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8021 [(set_attr "isa" "avx512f")
8022 (set_attr "type" "sseishft")
8023 (set (attr "length_immediate")
8024 (if_then_else (match_operand 2 "const_int_operand")
8026 (const_string "0")))
8027 (set_attr "prefix" "evex")
8028 (set_attr "mode" "<sseinsnmode>")])
8031 (define_expand "vec_shl_<mode>"
8034 (match_operand:VI_128 1 "register_operand")
8035 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8036 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8039 operands[1] = gen_lowpart (V1TImode, operands[1]);
8040 operands[3] = gen_reg_rtx (V1TImode);
8041 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8044 (define_insn "<sse2_avx2>_ashl<mode>3"
8045 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8047 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8048 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8051 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8053 switch (which_alternative)
8056 return "pslldq\t{%2, %0|%0, %2}";
8058 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
8063 [(set_attr "isa" "noavx,avx")
8064 (set_attr "type" "sseishft")
8065 (set_attr "length_immediate" "1")
8066 (set_attr "prefix_data16" "1,*")
8067 (set_attr "prefix" "orig,vex")
8068 (set_attr "mode" "<sseinsnmode>")])
8070 (define_expand "vec_shr_<mode>"
8073 (match_operand:VI_128 1 "register_operand")
8074 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8075 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8078 operands[1] = gen_lowpart (V1TImode, operands[1]);
8079 operands[3] = gen_reg_rtx (V1TImode);
8080 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8083 (define_insn "<sse2_avx2>_lshr<mode>3"
8084 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8085 (lshiftrt:VIMAX_AVX2
8086 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8087 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8090 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8092 switch (which_alternative)
8095 return "psrldq\t{%2, %0|%0, %2}";
8097 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
8102 [(set_attr "isa" "noavx,avx")
8103 (set_attr "type" "sseishft")
8104 (set_attr "length_immediate" "1")
8105 (set_attr "atom_unit" "sishuf")
8106 (set_attr "prefix_data16" "1,*")
8107 (set_attr "prefix" "orig,vex")
8108 (set_attr "mode" "<sseinsnmode>")])
8110 (define_insn "avx512f_<rotate>v<mode><mask_name>"
8111 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8112 (any_rotate:VI48_512
8113 (match_operand:VI48_512 1 "register_operand" "v")
8114 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
8116 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8117 [(set_attr "prefix" "evex")
8118 (set_attr "mode" "<sseinsnmode>")])
8120 (define_insn "avx512f_<rotate><mode><mask_name>"
8121 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8122 (any_rotate:VI48_512
8123 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
8124 (match_operand:SI 2 "const_0_to_255_operand")))]
8126 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8127 [(set_attr "prefix" "evex")
8128 (set_attr "mode" "<sseinsnmode>")])
8130 (define_expand "<code><mode>3<mask_name>"
8131 [(set (match_operand:VI124_256_48_512 0 "register_operand")
8132 (maxmin:VI124_256_48_512
8133 (match_operand:VI124_256_48_512 1 "nonimmediate_operand")
8134 (match_operand:VI124_256_48_512 2 "nonimmediate_operand")))]
8135 "TARGET_AVX2 && <mask_mode512bit_condition>"
8136 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8138 (define_insn "*avx2_<code><mode>3<mask_name>"
8139 [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
8140 (maxmin:VI124_256_48_512
8141 (match_operand:VI124_256_48_512 1 "nonimmediate_operand" "%v")
8142 (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "vm")))]
8143 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
8144 && <mask_mode512bit_condition>"
8145 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8146 [(set_attr "type" "sseiadd")
8147 (set_attr "prefix_extra" "1")
8148 (set_attr "prefix" "maybe_evex")
8149 (set_attr "mode" "OI")])
8151 (define_expand "<code><mode>3"
8152 [(set (match_operand:VI8_AVX2 0 "register_operand")
8154 (match_operand:VI8_AVX2 1 "register_operand")
8155 (match_operand:VI8_AVX2 2 "register_operand")))]
8162 xops[0] = operands[0];
8164 if (<CODE> == SMAX || <CODE> == UMAX)
8166 xops[1] = operands[1];
8167 xops[2] = operands[2];
8171 xops[1] = operands[2];
8172 xops[2] = operands[1];
8175 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
8177 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
8178 xops[4] = operands[1];
8179 xops[5] = operands[2];
8181 ok = ix86_expand_int_vcond (xops);
8186 (define_expand "<code><mode>3"
8187 [(set (match_operand:VI124_128 0 "register_operand")
8189 (match_operand:VI124_128 1 "nonimmediate_operand")
8190 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8193 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
8194 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8200 xops[0] = operands[0];
8201 operands[1] = force_reg (<MODE>mode, operands[1]);
8202 operands[2] = force_reg (<MODE>mode, operands[2]);
8206 xops[1] = operands[1];
8207 xops[2] = operands[2];
8211 xops[1] = operands[2];
8212 xops[2] = operands[1];
8215 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
8216 xops[4] = operands[1];
8217 xops[5] = operands[2];
8219 ok = ix86_expand_int_vcond (xops);
8225 (define_insn "*sse4_1_<code><mode>3"
8226 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
8228 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
8229 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
8230 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8232 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8233 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8234 [(set_attr "isa" "noavx,avx")
8235 (set_attr "type" "sseiadd")
8236 (set_attr "prefix_extra" "1,*")
8237 (set_attr "prefix" "orig,vex")
8238 (set_attr "mode" "TI")])
8240 (define_insn "*<code>v8hi3"
8241 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8243 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8244 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
8245 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
8247 p<maxmin_int>w\t{%2, %0|%0, %2}
8248 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
8249 [(set_attr "isa" "noavx,avx")
8250 (set_attr "type" "sseiadd")
8251 (set_attr "prefix_data16" "1,*")
8252 (set_attr "prefix_extra" "*,1")
8253 (set_attr "prefix" "orig,vex")
8254 (set_attr "mode" "TI")])
8256 (define_expand "<code><mode>3"
8257 [(set (match_operand:VI124_128 0 "register_operand")
8259 (match_operand:VI124_128 1 "nonimmediate_operand")
8260 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8263 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
8264 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8265 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
8267 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
8268 operands[1] = force_reg (<MODE>mode, operands[1]);
8269 if (rtx_equal_p (op3, op2))
8270 op3 = gen_reg_rtx (V8HImode);
8271 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
8272 emit_insn (gen_addv8hi3 (op0, op3, op2));
8280 operands[1] = force_reg (<MODE>mode, operands[1]);
8281 operands[2] = force_reg (<MODE>mode, operands[2]);
8283 xops[0] = operands[0];
8287 xops[1] = operands[1];
8288 xops[2] = operands[2];
8292 xops[1] = operands[2];
8293 xops[2] = operands[1];
8296 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
8297 xops[4] = operands[1];
8298 xops[5] = operands[2];
8300 ok = ix86_expand_int_vcond (xops);
8306 (define_insn "*sse4_1_<code><mode>3"
8307 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
8309 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
8310 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
8311 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8313 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8314 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8315 [(set_attr "isa" "noavx,avx")
8316 (set_attr "type" "sseiadd")
8317 (set_attr "prefix_extra" "1,*")
8318 (set_attr "prefix" "orig,vex")
8319 (set_attr "mode" "TI")])
8321 (define_insn "*<code>v16qi3"
8322 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
8324 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
8325 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
8326 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
8328 p<maxmin_int>b\t{%2, %0|%0, %2}
8329 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
8330 [(set_attr "isa" "noavx,avx")
8331 (set_attr "type" "sseiadd")
8332 (set_attr "prefix_data16" "1,*")
8333 (set_attr "prefix_extra" "*,1")
8334 (set_attr "prefix" "orig,vex")
8335 (set_attr "mode" "TI")])
8337 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8339 ;; Parallel integral comparisons
8341 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8343 (define_expand "avx2_eq<mode>3"
8344 [(set (match_operand:VI_256 0 "register_operand")
8346 (match_operand:VI_256 1 "nonimmediate_operand")
8347 (match_operand:VI_256 2 "nonimmediate_operand")))]
8349 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8351 (define_insn "*avx2_eq<mode>3"
8352 [(set (match_operand:VI_256 0 "register_operand" "=x")
8354 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
8355 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8356 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8357 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8358 [(set_attr "type" "ssecmp")
8359 (set_attr "prefix_extra" "1")
8360 (set_attr "prefix" "vex")
8361 (set_attr "mode" "OI")])
8363 (define_expand "avx512f_eq<mode>3"
8364 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
8365 (unspec:<avx512fmaskmode>
8366 [(match_operand:VI48_512 1 "register_operand")
8367 (match_operand:VI48_512 2 "nonimmediate_operand")]
8370 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8372 (define_insn "avx512f_eq<mode>3_1"
8373 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8374 (unspec:<avx512fmaskmode>
8375 [(match_operand:VI48_512 1 "register_operand" "%v")
8376 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8378 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8379 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8380 [(set_attr "type" "ssecmp")
8381 (set_attr "prefix_extra" "1")
8382 (set_attr "prefix" "evex")
8383 (set_attr "mode" "<sseinsnmode>")])
8385 (define_insn "*sse4_1_eqv2di3"
8386 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8388 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
8389 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8390 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
8392 pcmpeqq\t{%2, %0|%0, %2}
8393 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
8394 [(set_attr "isa" "noavx,avx")
8395 (set_attr "type" "ssecmp")
8396 (set_attr "prefix_extra" "1")
8397 (set_attr "prefix" "orig,vex")
8398 (set_attr "mode" "TI")])
8400 (define_insn "*sse2_eq<mode>3"
8401 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8403 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
8404 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8405 "TARGET_SSE2 && !TARGET_XOP
8406 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8408 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
8409 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8410 [(set_attr "isa" "noavx,avx")
8411 (set_attr "type" "ssecmp")
8412 (set_attr "prefix_data16" "1,*")
8413 (set_attr "prefix" "orig,vex")
8414 (set_attr "mode" "TI")])
8416 (define_expand "sse2_eq<mode>3"
8417 [(set (match_operand:VI124_128 0 "register_operand")
8419 (match_operand:VI124_128 1 "nonimmediate_operand")
8420 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8421 "TARGET_SSE2 && !TARGET_XOP "
8422 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8424 (define_expand "sse4_1_eqv2di3"
8425 [(set (match_operand:V2DI 0 "register_operand")
8427 (match_operand:V2DI 1 "nonimmediate_operand")
8428 (match_operand:V2DI 2 "nonimmediate_operand")))]
8430 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
8432 (define_insn "sse4_2_gtv2di3"
8433 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8435 (match_operand:V2DI 1 "register_operand" "0,x")
8436 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8439 pcmpgtq\t{%2, %0|%0, %2}
8440 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
8441 [(set_attr "isa" "noavx,avx")
8442 (set_attr "type" "ssecmp")
8443 (set_attr "prefix_extra" "1")
8444 (set_attr "prefix" "orig,vex")
8445 (set_attr "mode" "TI")])
8447 (define_insn "avx2_gt<mode>3"
8448 [(set (match_operand:VI_256 0 "register_operand" "=x")
8450 (match_operand:VI_256 1 "register_operand" "x")
8451 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8453 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8454 [(set_attr "type" "ssecmp")
8455 (set_attr "prefix_extra" "1")
8456 (set_attr "prefix" "vex")
8457 (set_attr "mode" "OI")])
8459 (define_insn "avx512f_gt<mode>3"
8460 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8461 (unspec:<avx512fmaskmode>
8462 [(match_operand:VI48_512 1 "register_operand" "v")
8463 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
8465 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8466 [(set_attr "type" "ssecmp")
8467 (set_attr "prefix_extra" "1")
8468 (set_attr "prefix" "evex")
8469 (set_attr "mode" "<sseinsnmode>")])
8471 (define_insn "sse2_gt<mode>3"
8472 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8474 (match_operand:VI124_128 1 "register_operand" "0,x")
8475 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8476 "TARGET_SSE2 && !TARGET_XOP"
8478 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
8479 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8480 [(set_attr "isa" "noavx,avx")
8481 (set_attr "type" "ssecmp")
8482 (set_attr "prefix_data16" "1,*")
8483 (set_attr "prefix" "orig,vex")
8484 (set_attr "mode" "TI")])
8486 (define_expand "vcond<V_512:mode><VI_512:mode>"
8487 [(set (match_operand:V_512 0 "register_operand")
8489 (match_operator 3 ""
8490 [(match_operand:VI_512 4 "nonimmediate_operand")
8491 (match_operand:VI_512 5 "general_operand")])
8492 (match_operand:V_512 1)
8493 (match_operand:V_512 2)))]
8495 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8496 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8498 bool ok = ix86_expand_int_vcond (operands);
8503 (define_expand "vcond<V_256:mode><VI_256:mode>"
8504 [(set (match_operand:V_256 0 "register_operand")
8506 (match_operator 3 ""
8507 [(match_operand:VI_256 4 "nonimmediate_operand")
8508 (match_operand:VI_256 5 "general_operand")])
8509 (match_operand:V_256 1)
8510 (match_operand:V_256 2)))]
8512 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8513 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8515 bool ok = ix86_expand_int_vcond (operands);
8520 (define_expand "vcond<V_128:mode><VI124_128:mode>"
8521 [(set (match_operand:V_128 0 "register_operand")
8523 (match_operator 3 ""
8524 [(match_operand:VI124_128 4 "nonimmediate_operand")
8525 (match_operand:VI124_128 5 "general_operand")])
8526 (match_operand:V_128 1)
8527 (match_operand:V_128 2)))]
8529 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8530 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8532 bool ok = ix86_expand_int_vcond (operands);
8537 (define_expand "vcond<VI8F_128:mode>v2di"
8538 [(set (match_operand:VI8F_128 0 "register_operand")
8539 (if_then_else:VI8F_128
8540 (match_operator 3 ""
8541 [(match_operand:V2DI 4 "nonimmediate_operand")
8542 (match_operand:V2DI 5 "general_operand")])
8543 (match_operand:VI8F_128 1)
8544 (match_operand:VI8F_128 2)))]
8547 bool ok = ix86_expand_int_vcond (operands);
8552 (define_expand "vcondu<V_512:mode><VI_512:mode>"
8553 [(set (match_operand:V_512 0 "register_operand")
8555 (match_operator 3 ""
8556 [(match_operand:VI_512 4 "nonimmediate_operand")
8557 (match_operand:VI_512 5 "nonimmediate_operand")])
8558 (match_operand:V_512 1 "general_operand")
8559 (match_operand:V_512 2 "general_operand")))]
8561 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8562 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8564 bool ok = ix86_expand_int_vcond (operands);
8569 (define_expand "vcondu<V_256:mode><VI_256:mode>"
8570 [(set (match_operand:V_256 0 "register_operand")
8572 (match_operator 3 ""
8573 [(match_operand:VI_256 4 "nonimmediate_operand")
8574 (match_operand:VI_256 5 "nonimmediate_operand")])
8575 (match_operand:V_256 1 "general_operand")
8576 (match_operand:V_256 2 "general_operand")))]
8578 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8579 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8581 bool ok = ix86_expand_int_vcond (operands);
8586 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
8587 [(set (match_operand:V_128 0 "register_operand")
8589 (match_operator 3 ""
8590 [(match_operand:VI124_128 4 "nonimmediate_operand")
8591 (match_operand:VI124_128 5 "nonimmediate_operand")])
8592 (match_operand:V_128 1 "general_operand")
8593 (match_operand:V_128 2 "general_operand")))]
8595 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8596 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8598 bool ok = ix86_expand_int_vcond (operands);
8603 (define_expand "vcondu<VI8F_128:mode>v2di"
8604 [(set (match_operand:VI8F_128 0 "register_operand")
8605 (if_then_else:VI8F_128
8606 (match_operator 3 ""
8607 [(match_operand:V2DI 4 "nonimmediate_operand")
8608 (match_operand:V2DI 5 "nonimmediate_operand")])
8609 (match_operand:VI8F_128 1 "general_operand")
8610 (match_operand:VI8F_128 2 "general_operand")))]
8613 bool ok = ix86_expand_int_vcond (operands);
8618 (define_mode_iterator VEC_PERM_AVX2
8619 [V16QI V8HI V4SI V2DI V4SF V2DF
8620 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8621 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
8622 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
8623 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
8624 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
8626 (define_expand "vec_perm<mode>"
8627 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
8628 (match_operand:VEC_PERM_AVX2 1 "register_operand")
8629 (match_operand:VEC_PERM_AVX2 2 "register_operand")
8630 (match_operand:<sseintvecmode> 3 "register_operand")]
8631 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
8633 ix86_expand_vec_perm (operands);
8637 (define_mode_iterator VEC_PERM_CONST
8638 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
8639 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
8640 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
8641 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
8642 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
8643 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8644 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
8645 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
8647 (define_expand "vec_perm_const<mode>"
8648 [(match_operand:VEC_PERM_CONST 0 "register_operand")
8649 (match_operand:VEC_PERM_CONST 1 "register_operand")
8650 (match_operand:VEC_PERM_CONST 2 "register_operand")
8651 (match_operand:<sseintvecmode> 3)]
8654 if (ix86_expand_vec_perm_const (operands))
8660 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8662 ;; Parallel bitwise logical operations
8664 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8666 (define_expand "one_cmpl<mode>2"
8667 [(set (match_operand:VI 0 "register_operand")
8668 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
8672 int i, n = GET_MODE_NUNITS (<MODE>mode);
8673 rtvec v = rtvec_alloc (n);
8675 for (i = 0; i < n; ++i)
8676 RTVEC_ELT (v, i) = constm1_rtx;
8678 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
8681 (define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
8682 [(set (match_operand:VI_AVX2 0 "register_operand")
8684 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
8685 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
8686 "TARGET_SSE2 && <mask_mode512bit_condition>")
8688 (define_insn "*andnot<mode>3<mask_name>"
8689 [(set (match_operand:VI 0 "register_operand" "=x,v")
8691 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
8692 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
8693 "TARGET_SSE && <mask_mode512bit_condition>"
8695 static char buf[64];
8699 switch (get_attr_mode (insn))
8702 gcc_assert (TARGET_AVX512F);
8704 tmp = "pandn<ssemodesuffix>";
8708 gcc_assert (TARGET_AVX2);
8710 gcc_assert (TARGET_SSE2);
8716 gcc_assert (TARGET_AVX);
8718 gcc_assert (TARGET_SSE);
8727 switch (which_alternative)
8730 ops = "%s\t{%%2, %%0|%%0, %%2}";
8733 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
8739 snprintf (buf, sizeof (buf), ops, tmp);
8742 [(set_attr "isa" "noavx,avx")
8743 (set_attr "type" "sselog")
8744 (set (attr "prefix_data16")
8746 (and (eq_attr "alternative" "0")
8747 (eq_attr "mode" "TI"))
8749 (const_string "*")))
8750 (set_attr "prefix" "<mask_prefix3>")
8752 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
8753 (const_string "<ssePSmode>")
8754 (match_test "TARGET_AVX2")
8755 (const_string "<sseinsnmode>")
8756 (match_test "TARGET_AVX")
8758 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
8759 (const_string "V8SF")
8760 (const_string "<sseinsnmode>"))
8761 (ior (not (match_test "TARGET_SSE2"))
8762 (match_test "optimize_function_for_size_p (cfun)"))
8763 (const_string "V4SF")
8765 (const_string "<sseinsnmode>")))])
8767 (define_expand "<code><mode>3"
8768 [(set (match_operand:VI 0 "register_operand")
8770 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
8771 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
8774 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
8778 (define_insn "<mask_codefor><code><mode>3<mask_name>"
8779 [(set (match_operand:VI 0 "register_operand" "=x,v")
8781 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
8782 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
8783 "TARGET_SSE && <mask_mode512bit_condition>
8784 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8786 static char buf[64];
8790 switch (get_attr_mode (insn))
8793 gcc_assert (TARGET_AVX512F);
8794 tmp = "p<logic><ssemodesuffix>";
8798 gcc_assert (TARGET_AVX2);
8800 gcc_assert (TARGET_SSE2);
8806 gcc_assert (TARGET_AVX512F);
8808 gcc_assert (TARGET_AVX);
8810 gcc_assert (TARGET_SSE);
8819 switch (which_alternative)
8822 ops = "%s\t{%%2, %%0|%%0, %%2}";
8825 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
8831 snprintf (buf, sizeof (buf), ops, tmp);
8834 [(set_attr "isa" "noavx,avx")
8835 (set_attr "type" "sselog")
8836 (set (attr "prefix_data16")
8838 (and (eq_attr "alternative" "0")
8839 (eq_attr "mode" "TI"))
8841 (const_string "*")))
8842 (set_attr "prefix" "<mask_prefix3>")
8844 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
8845 (const_string "<ssePSmode>")
8846 (match_test "TARGET_AVX2")
8847 (const_string "<sseinsnmode>")
8848 (match_test "TARGET_AVX")
8850 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
8851 (const_string "V8SF")
8852 (const_string "<sseinsnmode>"))
8853 (ior (not (match_test "TARGET_SSE2"))
8854 (match_test "optimize_function_for_size_p (cfun)"))
8855 (const_string "V4SF")
8857 (const_string "<sseinsnmode>")))])
8859 (define_insn "avx512f_testm<mode>3"
8860 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8861 (unspec:<avx512fmaskmode>
8862 [(match_operand:VI48_512 1 "register_operand" "v")
8863 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8866 "vptestm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8867 [(set_attr "prefix" "evex")
8868 (set_attr "mode" "<sseinsnmode>")])
8870 (define_insn "avx512f_testnm<mode>3"
8871 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8872 (unspec:<avx512fmaskmode>
8873 [(match_operand:VI48_512 1 "register_operand" "v")
8874 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8877 "%vptestnm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8878 [(set_attr "prefix" "evex")
8879 (set_attr "mode" "<sseinsnmode>")])
8881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8883 ;; Parallel integral element swizzling
8885 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8887 (define_expand "vec_pack_trunc_<mode>"
8888 [(match_operand:<ssepackmode> 0 "register_operand")
8889 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
8890 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
8893 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
8894 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
8895 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
8899 (define_insn "<sse2_avx2>_packsswb"
8900 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8901 (vec_concat:VI1_AVX2
8902 (ss_truncate:<ssehalfvecmode>
8903 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
8904 (ss_truncate:<ssehalfvecmode>
8905 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
8908 packsswb\t{%2, %0|%0, %2}
8909 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
8910 [(set_attr "isa" "noavx,avx")
8911 (set_attr "type" "sselog")
8912 (set_attr "prefix_data16" "1,*")
8913 (set_attr "prefix" "orig,vex")
8914 (set_attr "mode" "<sseinsnmode>")])
8916 (define_insn "<sse2_avx2>_packssdw"
8917 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
8918 (vec_concat:VI2_AVX2
8919 (ss_truncate:<ssehalfvecmode>
8920 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
8921 (ss_truncate:<ssehalfvecmode>
8922 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
8925 packssdw\t{%2, %0|%0, %2}
8926 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
8927 [(set_attr "isa" "noavx,avx")
8928 (set_attr "type" "sselog")
8929 (set_attr "prefix_data16" "1,*")
8930 (set_attr "prefix" "orig,vex")
8931 (set_attr "mode" "<sseinsnmode>")])
8933 (define_insn "<sse2_avx2>_packuswb"
8934 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8935 (vec_concat:VI1_AVX2
8936 (us_truncate:<ssehalfvecmode>
8937 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
8938 (us_truncate:<ssehalfvecmode>
8939 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
8942 packuswb\t{%2, %0|%0, %2}
8943 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
8944 [(set_attr "isa" "noavx,avx")
8945 (set_attr "type" "sselog")
8946 (set_attr "prefix_data16" "1,*")
8947 (set_attr "prefix" "orig,vex")
8948 (set_attr "mode" "<sseinsnmode>")])
8950 (define_insn "avx2_interleave_highv32qi"
8951 [(set (match_operand:V32QI 0 "register_operand" "=x")
8954 (match_operand:V32QI 1 "register_operand" "x")
8955 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
8956 (parallel [(const_int 8) (const_int 40)
8957 (const_int 9) (const_int 41)
8958 (const_int 10) (const_int 42)
8959 (const_int 11) (const_int 43)
8960 (const_int 12) (const_int 44)
8961 (const_int 13) (const_int 45)
8962 (const_int 14) (const_int 46)
8963 (const_int 15) (const_int 47)
8964 (const_int 24) (const_int 56)
8965 (const_int 25) (const_int 57)
8966 (const_int 26) (const_int 58)
8967 (const_int 27) (const_int 59)
8968 (const_int 28) (const_int 60)
8969 (const_int 29) (const_int 61)
8970 (const_int 30) (const_int 62)
8971 (const_int 31) (const_int 63)])))]
8973 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
8974 [(set_attr "type" "sselog")
8975 (set_attr "prefix" "vex")
8976 (set_attr "mode" "OI")])
8978 (define_insn "vec_interleave_highv16qi"
8979 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
8982 (match_operand:V16QI 1 "register_operand" "0,x")
8983 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
8984 (parallel [(const_int 8) (const_int 24)
8985 (const_int 9) (const_int 25)
8986 (const_int 10) (const_int 26)
8987 (const_int 11) (const_int 27)
8988 (const_int 12) (const_int 28)
8989 (const_int 13) (const_int 29)
8990 (const_int 14) (const_int 30)
8991 (const_int 15) (const_int 31)])))]
8994 punpckhbw\t{%2, %0|%0, %2}
8995 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
8996 [(set_attr "isa" "noavx,avx")
8997 (set_attr "type" "sselog")
8998 (set_attr "prefix_data16" "1,*")
8999 (set_attr "prefix" "orig,vex")
9000 (set_attr "mode" "TI")])
9002 (define_insn "avx2_interleave_lowv32qi"
9003 [(set (match_operand:V32QI 0 "register_operand" "=x")
9006 (match_operand:V32QI 1 "register_operand" "x")
9007 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9008 (parallel [(const_int 0) (const_int 32)
9009 (const_int 1) (const_int 33)
9010 (const_int 2) (const_int 34)
9011 (const_int 3) (const_int 35)
9012 (const_int 4) (const_int 36)
9013 (const_int 5) (const_int 37)
9014 (const_int 6) (const_int 38)
9015 (const_int 7) (const_int 39)
9016 (const_int 16) (const_int 48)
9017 (const_int 17) (const_int 49)
9018 (const_int 18) (const_int 50)
9019 (const_int 19) (const_int 51)
9020 (const_int 20) (const_int 52)
9021 (const_int 21) (const_int 53)
9022 (const_int 22) (const_int 54)
9023 (const_int 23) (const_int 55)])))]
9025 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9026 [(set_attr "type" "sselog")
9027 (set_attr "prefix" "vex")
9028 (set_attr "mode" "OI")])
9030 (define_insn "vec_interleave_lowv16qi"
9031 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9034 (match_operand:V16QI 1 "register_operand" "0,x")
9035 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9036 (parallel [(const_int 0) (const_int 16)
9037 (const_int 1) (const_int 17)
9038 (const_int 2) (const_int 18)
9039 (const_int 3) (const_int 19)
9040 (const_int 4) (const_int 20)
9041 (const_int 5) (const_int 21)
9042 (const_int 6) (const_int 22)
9043 (const_int 7) (const_int 23)])))]
9046 punpcklbw\t{%2, %0|%0, %2}
9047 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9048 [(set_attr "isa" "noavx,avx")
9049 (set_attr "type" "sselog")
9050 (set_attr "prefix_data16" "1,*")
9051 (set_attr "prefix" "orig,vex")
9052 (set_attr "mode" "TI")])
9054 (define_insn "avx2_interleave_highv16hi"
9055 [(set (match_operand:V16HI 0 "register_operand" "=x")
9058 (match_operand:V16HI 1 "register_operand" "x")
9059 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9060 (parallel [(const_int 4) (const_int 20)
9061 (const_int 5) (const_int 21)
9062 (const_int 6) (const_int 22)
9063 (const_int 7) (const_int 23)
9064 (const_int 12) (const_int 28)
9065 (const_int 13) (const_int 29)
9066 (const_int 14) (const_int 30)
9067 (const_int 15) (const_int 31)])))]
9069 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9070 [(set_attr "type" "sselog")
9071 (set_attr "prefix" "vex")
9072 (set_attr "mode" "OI")])
9074 (define_insn "vec_interleave_highv8hi"
9075 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9078 (match_operand:V8HI 1 "register_operand" "0,x")
9079 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9080 (parallel [(const_int 4) (const_int 12)
9081 (const_int 5) (const_int 13)
9082 (const_int 6) (const_int 14)
9083 (const_int 7) (const_int 15)])))]
9086 punpckhwd\t{%2, %0|%0, %2}
9087 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9088 [(set_attr "isa" "noavx,avx")
9089 (set_attr "type" "sselog")
9090 (set_attr "prefix_data16" "1,*")
9091 (set_attr "prefix" "orig,vex")
9092 (set_attr "mode" "TI")])
9094 (define_insn "avx2_interleave_lowv16hi"
9095 [(set (match_operand:V16HI 0 "register_operand" "=x")
9098 (match_operand:V16HI 1 "register_operand" "x")
9099 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9100 (parallel [(const_int 0) (const_int 16)
9101 (const_int 1) (const_int 17)
9102 (const_int 2) (const_int 18)
9103 (const_int 3) (const_int 19)
9104 (const_int 8) (const_int 24)
9105 (const_int 9) (const_int 25)
9106 (const_int 10) (const_int 26)
9107 (const_int 11) (const_int 27)])))]
9109 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9110 [(set_attr "type" "sselog")
9111 (set_attr "prefix" "vex")
9112 (set_attr "mode" "OI")])
9114 (define_insn "vec_interleave_lowv8hi"
9115 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9118 (match_operand:V8HI 1 "register_operand" "0,x")
9119 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9120 (parallel [(const_int 0) (const_int 8)
9121 (const_int 1) (const_int 9)
9122 (const_int 2) (const_int 10)
9123 (const_int 3) (const_int 11)])))]
9126 punpcklwd\t{%2, %0|%0, %2}
9127 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9128 [(set_attr "isa" "noavx,avx")
9129 (set_attr "type" "sselog")
9130 (set_attr "prefix_data16" "1,*")
9131 (set_attr "prefix" "orig,vex")
9132 (set_attr "mode" "TI")])
9134 (define_insn "avx2_interleave_highv8si"
9135 [(set (match_operand:V8SI 0 "register_operand" "=x")
9138 (match_operand:V8SI 1 "register_operand" "x")
9139 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9140 (parallel [(const_int 2) (const_int 10)
9141 (const_int 3) (const_int 11)
9142 (const_int 6) (const_int 14)
9143 (const_int 7) (const_int 15)])))]
9145 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9146 [(set_attr "type" "sselog")
9147 (set_attr "prefix" "vex")
9148 (set_attr "mode" "OI")])
9150 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
9151 [(set (match_operand:V16SI 0 "register_operand" "=v")
9154 (match_operand:V16SI 1 "register_operand" "v")
9155 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9156 (parallel [(const_int 2) (const_int 18)
9157 (const_int 3) (const_int 19)
9158 (const_int 6) (const_int 22)
9159 (const_int 7) (const_int 23)
9160 (const_int 10) (const_int 26)
9161 (const_int 11) (const_int 27)
9162 (const_int 14) (const_int 30)
9163 (const_int 15) (const_int 31)])))]
9165 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9166 [(set_attr "type" "sselog")
9167 (set_attr "prefix" "evex")
9168 (set_attr "mode" "XI")])
9171 (define_insn "vec_interleave_highv4si"
9172 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9175 (match_operand:V4SI 1 "register_operand" "0,x")
9176 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9177 (parallel [(const_int 2) (const_int 6)
9178 (const_int 3) (const_int 7)])))]
9181 punpckhdq\t{%2, %0|%0, %2}
9182 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9183 [(set_attr "isa" "noavx,avx")
9184 (set_attr "type" "sselog")
9185 (set_attr "prefix_data16" "1,*")
9186 (set_attr "prefix" "orig,vex")
9187 (set_attr "mode" "TI")])
9189 (define_insn "avx2_interleave_lowv8si"
9190 [(set (match_operand:V8SI 0 "register_operand" "=x")
9193 (match_operand:V8SI 1 "register_operand" "x")
9194 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9195 (parallel [(const_int 0) (const_int 8)
9196 (const_int 1) (const_int 9)
9197 (const_int 4) (const_int 12)
9198 (const_int 5) (const_int 13)])))]
9200 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9201 [(set_attr "type" "sselog")
9202 (set_attr "prefix" "vex")
9203 (set_attr "mode" "OI")])
9205 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
9206 [(set (match_operand:V16SI 0 "register_operand" "=v")
9209 (match_operand:V16SI 1 "register_operand" "v")
9210 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9211 (parallel [(const_int 0) (const_int 16)
9212 (const_int 1) (const_int 17)
9213 (const_int 4) (const_int 20)
9214 (const_int 5) (const_int 21)
9215 (const_int 8) (const_int 24)
9216 (const_int 9) (const_int 25)
9217 (const_int 12) (const_int 28)
9218 (const_int 13) (const_int 29)])))]
9220 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9221 [(set_attr "type" "sselog")
9222 (set_attr "prefix" "evex")
9223 (set_attr "mode" "XI")])
9225 (define_insn "vec_interleave_lowv4si"
9226 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9229 (match_operand:V4SI 1 "register_operand" "0,x")
9230 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9231 (parallel [(const_int 0) (const_int 4)
9232 (const_int 1) (const_int 5)])))]
9235 punpckldq\t{%2, %0|%0, %2}
9236 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9237 [(set_attr "isa" "noavx,avx")
9238 (set_attr "type" "sselog")
9239 (set_attr "prefix_data16" "1,*")
9240 (set_attr "prefix" "orig,vex")
9241 (set_attr "mode" "TI")])
9243 (define_expand "vec_interleave_high<mode>"
9244 [(match_operand:VI_256 0 "register_operand" "=x")
9245 (match_operand:VI_256 1 "register_operand" "x")
9246 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9249 rtx t1 = gen_reg_rtx (<MODE>mode);
9250 rtx t2 = gen_reg_rtx (<MODE>mode);
9251 rtx t3 = gen_reg_rtx (V4DImode);
9252 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9253 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9254 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9255 gen_lowpart (V4DImode, t2),
9256 GEN_INT (1 + (3 << 4))));
9257 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9261 (define_expand "vec_interleave_low<mode>"
9262 [(match_operand:VI_256 0 "register_operand" "=x")
9263 (match_operand:VI_256 1 "register_operand" "x")
9264 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9267 rtx t1 = gen_reg_rtx (<MODE>mode);
9268 rtx t2 = gen_reg_rtx (<MODE>mode);
9269 rtx t3 = gen_reg_rtx (V4DImode);
9270 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9271 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9272 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9273 gen_lowpart (V4DImode, t2),
9274 GEN_INT (0 + (2 << 4))));
9275 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9279 ;; Modes handled by pinsr patterns.
9280 (define_mode_iterator PINSR_MODE
9281 [(V16QI "TARGET_SSE4_1") V8HI
9282 (V4SI "TARGET_SSE4_1")
9283 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
9285 (define_mode_attr sse2p4_1
9286 [(V16QI "sse4_1") (V8HI "sse2")
9287 (V4SI "sse4_1") (V2DI "sse4_1")])
9289 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
9290 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
9291 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
9292 (vec_merge:PINSR_MODE
9293 (vec_duplicate:PINSR_MODE
9294 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
9295 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
9296 (match_operand:SI 3 "const_int_operand")))]
9298 && ((unsigned) exact_log2 (INTVAL (operands[3]))
9299 < GET_MODE_NUNITS (<MODE>mode))"
9301 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
9303 switch (which_alternative)
9306 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9307 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
9310 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
9312 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9313 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
9316 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9321 [(set_attr "isa" "noavx,noavx,avx,avx")
9322 (set_attr "type" "sselog")
9323 (set (attr "prefix_rex")
9325 (and (not (match_test "TARGET_AVX"))
9326 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
9328 (const_string "*")))
9329 (set (attr "prefix_data16")
9331 (and (not (match_test "TARGET_AVX"))
9332 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9334 (const_string "*")))
9335 (set (attr "prefix_extra")
9337 (and (not (match_test "TARGET_AVX"))
9338 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9340 (const_string "1")))
9341 (set_attr "length_immediate" "1")
9342 (set_attr "prefix" "orig,orig,vex,vex")
9343 (set_attr "mode" "TI")])
9345 (define_expand "avx512f_vinsert<shuffletype>32x4_mask"
9346 [(match_operand:V16FI 0 "register_operand")
9347 (match_operand:V16FI 1 "register_operand")
9348 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
9349 (match_operand:SI 3 "const_0_to_3_operand")
9350 (match_operand:V16FI 4 "register_operand")
9351 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9354 switch (INTVAL (operands[3]))
9357 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9358 operands[1], operands[2], GEN_INT (0xFFF), operands[4],
9362 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9363 operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
9367 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9368 operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
9372 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9373 operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
9383 (define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
9384 [(set (match_operand:V16FI 0 "register_operand" "=v")
9386 (match_operand:V16FI 1 "register_operand" "v")
9387 (vec_duplicate:V16FI
9388 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
9389 (match_operand:SI 3 "const_int_operand" "n")))]
9393 if (INTVAL (operands[3]) == 0xFFF)
9395 else if ( INTVAL (operands[3]) == 0xF0FF)
9397 else if ( INTVAL (operands[3]) == 0xFF0F)
9399 else if ( INTVAL (operands[3]) == 0xFFF0)
9404 operands[3] = GEN_INT (mask);
9406 return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9408 [(set_attr "type" "sselog")
9409 (set_attr "length_immediate" "1")
9410 (set_attr "prefix" "evex")
9411 (set_attr "mode" "<sseinsnmode>")])
9413 (define_expand "avx512f_vinsert<shuffletype>64x4_mask"
9414 [(match_operand:V8FI 0 "register_operand")
9415 (match_operand:V8FI 1 "register_operand")
9416 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
9417 (match_operand:SI 3 "const_0_to_1_operand")
9418 (match_operand:V8FI 4 "register_operand")
9419 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9422 int mask = INTVAL (operands[3]);
9424 emit_insn (gen_vec_set_lo_<mode>_mask
9425 (operands[0], operands[1], operands[2],
9426 operands[4], operands[5]));
9428 emit_insn (gen_vec_set_hi_<mode>_mask
9429 (operands[0], operands[1], operands[2],
9430 operands[4], operands[5]));
9434 (define_insn "vec_set_lo_<mode><mask_name>"
9435 [(set (match_operand:V8FI 0 "register_operand" "=v")
9437 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9438 (vec_select:<ssehalfvecmode>
9439 (match_operand:V8FI 1 "register_operand" "v")
9440 (parallel [(const_int 4) (const_int 5)
9441 (const_int 6) (const_int 7)]))))]
9443 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
9444 [(set_attr "type" "sselog")
9445 (set_attr "length_immediate" "1")
9446 (set_attr "prefix" "evex")
9447 (set_attr "mode" "XI")])
9449 (define_insn "vec_set_hi_<mode><mask_name>"
9450 [(set (match_operand:V8FI 0 "register_operand" "=v")
9452 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9453 (vec_select:<ssehalfvecmode>
9454 (match_operand:V8FI 1 "register_operand" "v")
9455 (parallel [(const_int 0) (const_int 1)
9456 (const_int 2) (const_int 3)]))))]
9458 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
9459 [(set_attr "type" "sselog")
9460 (set_attr "length_immediate" "1")
9461 (set_attr "prefix" "evex")
9462 (set_attr "mode" "XI")])
9464 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
9465 [(match_operand:V8FI 0 "register_operand")
9466 (match_operand:V8FI 1 "register_operand")
9467 (match_operand:V8FI 2 "nonimmediate_operand")
9468 (match_operand:SI 3 "const_0_to_255_operand")
9469 (match_operand:V8FI 4 "register_operand")
9470 (match_operand:QI 5 "register_operand")]
9473 int mask = INTVAL (operands[3]);
9474 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
9475 (operands[0], operands[1], operands[2],
9476 GEN_INT (((mask >> 0) & 3) * 2),
9477 GEN_INT (((mask >> 0) & 3) * 2 + 1),
9478 GEN_INT (((mask >> 2) & 3) * 2),
9479 GEN_INT (((mask >> 2) & 3) * 2 + 1),
9480 GEN_INT (((mask >> 4) & 3) * 2 + 8),
9481 GEN_INT (((mask >> 4) & 3) * 2 + 9),
9482 GEN_INT (((mask >> 6) & 3) * 2 + 8),
9483 GEN_INT (((mask >> 6) & 3) * 2 + 9),
9484 operands[4], operands[5]));
9488 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
9489 [(set (match_operand:V8FI 0 "register_operand" "=v")
9491 (vec_concat:<ssedoublemode>
9492 (match_operand:V8FI 1 "register_operand" "v")
9493 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
9494 (parallel [(match_operand 3 "const_0_to_7_operand")
9495 (match_operand 4 "const_0_to_7_operand")
9496 (match_operand 5 "const_0_to_7_operand")
9497 (match_operand 6 "const_0_to_7_operand")
9498 (match_operand 7 "const_8_to_15_operand")
9499 (match_operand 8 "const_8_to_15_operand")
9500 (match_operand 9 "const_8_to_15_operand")
9501 (match_operand 10 "const_8_to_15_operand")])))]
9503 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9504 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
9505 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9506 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
9509 mask = INTVAL (operands[3]) / 2;
9510 mask |= INTVAL (operands[5]) / 2 << 2;
9511 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
9512 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
9513 operands[3] = GEN_INT (mask);
9515 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9517 [(set_attr "type" "sselog")
9518 (set_attr "length_immediate" "1")
9519 (set_attr "prefix" "evex")
9520 (set_attr "mode" "<sseinsnmode>")])
9522 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
9523 [(match_operand:V16FI 0 "register_operand")
9524 (match_operand:V16FI 1 "register_operand")
9525 (match_operand:V16FI 2 "nonimmediate_operand")
9526 (match_operand:SI 3 "const_0_to_255_operand")
9527 (match_operand:V16FI 4 "register_operand")
9528 (match_operand:HI 5 "register_operand")]
9531 int mask = INTVAL (operands[3]);
9532 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
9533 (operands[0], operands[1], operands[2],
9534 GEN_INT (((mask >> 0) & 3) * 4),
9535 GEN_INT (((mask >> 0) & 3) * 4 + 1),
9536 GEN_INT (((mask >> 0) & 3) * 4 + 2),
9537 GEN_INT (((mask >> 0) & 3) * 4 + 3),
9538 GEN_INT (((mask >> 2) & 3) * 4),
9539 GEN_INT (((mask >> 2) & 3) * 4 + 1),
9540 GEN_INT (((mask >> 2) & 3) * 4 + 2),
9541 GEN_INT (((mask >> 2) & 3) * 4 + 3),
9542 GEN_INT (((mask >> 4) & 3) * 4 + 16),
9543 GEN_INT (((mask >> 4) & 3) * 4 + 17),
9544 GEN_INT (((mask >> 4) & 3) * 4 + 18),
9545 GEN_INT (((mask >> 4) & 3) * 4 + 19),
9546 GEN_INT (((mask >> 6) & 3) * 4 + 16),
9547 GEN_INT (((mask >> 6) & 3) * 4 + 17),
9548 GEN_INT (((mask >> 6) & 3) * 4 + 18),
9549 GEN_INT (((mask >> 6) & 3) * 4 + 19),
9550 operands[4], operands[5]));
9554 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
9555 [(set (match_operand:V16FI 0 "register_operand" "=v")
9557 (vec_concat:<ssedoublemode>
9558 (match_operand:V16FI 1 "register_operand" "v")
9559 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
9560 (parallel [(match_operand 3 "const_0_to_15_operand")
9561 (match_operand 4 "const_0_to_15_operand")
9562 (match_operand 5 "const_0_to_15_operand")
9563 (match_operand 6 "const_0_to_15_operand")
9564 (match_operand 7 "const_0_to_15_operand")
9565 (match_operand 8 "const_0_to_15_operand")
9566 (match_operand 9 "const_0_to_15_operand")
9567 (match_operand 10 "const_0_to_15_operand")
9568 (match_operand 11 "const_16_to_31_operand")
9569 (match_operand 12 "const_16_to_31_operand")
9570 (match_operand 13 "const_16_to_31_operand")
9571 (match_operand 14 "const_16_to_31_operand")
9572 (match_operand 15 "const_16_to_31_operand")
9573 (match_operand 16 "const_16_to_31_operand")
9574 (match_operand 17 "const_16_to_31_operand")
9575 (match_operand 18 "const_16_to_31_operand")])))]
9577 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9578 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
9579 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
9580 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9581 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
9582 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
9583 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
9584 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
9585 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
9586 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
9587 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
9588 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
9591 mask = INTVAL (operands[3]) / 4;
9592 mask |= INTVAL (operands[7]) / 4 << 2;
9593 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
9594 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
9595 operands[3] = GEN_INT (mask);
9597 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9599 [(set_attr "type" "sselog")
9600 (set_attr "length_immediate" "1")
9601 (set_attr "prefix" "evex")
9602 (set_attr "mode" "<sseinsnmode>")])
9604 (define_expand "avx512f_pshufdv3_mask"
9605 [(match_operand:V16SI 0 "register_operand")
9606 (match_operand:V16SI 1 "nonimmediate_operand")
9607 (match_operand:SI 2 "const_0_to_255_operand")
9608 (match_operand:V16SI 3 "register_operand")
9609 (match_operand:HI 4 "register_operand")]
9612 int mask = INTVAL (operands[2]);
9613 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
9614 GEN_INT ((mask >> 0) & 3),
9615 GEN_INT ((mask >> 2) & 3),
9616 GEN_INT ((mask >> 4) & 3),
9617 GEN_INT ((mask >> 6) & 3),
9618 GEN_INT (((mask >> 0) & 3) + 4),
9619 GEN_INT (((mask >> 2) & 3) + 4),
9620 GEN_INT (((mask >> 4) & 3) + 4),
9621 GEN_INT (((mask >> 6) & 3) + 4),
9622 GEN_INT (((mask >> 0) & 3) + 8),
9623 GEN_INT (((mask >> 2) & 3) + 8),
9624 GEN_INT (((mask >> 4) & 3) + 8),
9625 GEN_INT (((mask >> 6) & 3) + 8),
9626 GEN_INT (((mask >> 0) & 3) + 12),
9627 GEN_INT (((mask >> 2) & 3) + 12),
9628 GEN_INT (((mask >> 4) & 3) + 12),
9629 GEN_INT (((mask >> 6) & 3) + 12),
9630 operands[3], operands[4]));
9634 (define_insn "avx512f_pshufd_1<mask_name>"
9635 [(set (match_operand:V16SI 0 "register_operand" "=v")
9637 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
9638 (parallel [(match_operand 2 "const_0_to_3_operand")
9639 (match_operand 3 "const_0_to_3_operand")
9640 (match_operand 4 "const_0_to_3_operand")
9641 (match_operand 5 "const_0_to_3_operand")
9642 (match_operand 6 "const_4_to_7_operand")
9643 (match_operand 7 "const_4_to_7_operand")
9644 (match_operand 8 "const_4_to_7_operand")
9645 (match_operand 9 "const_4_to_7_operand")
9646 (match_operand 10 "const_8_to_11_operand")
9647 (match_operand 11 "const_8_to_11_operand")
9648 (match_operand 12 "const_8_to_11_operand")
9649 (match_operand 13 "const_8_to_11_operand")
9650 (match_operand 14 "const_12_to_15_operand")
9651 (match_operand 15 "const_12_to_15_operand")
9652 (match_operand 16 "const_12_to_15_operand")
9653 (match_operand 17 "const_12_to_15_operand")])))]
9655 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
9656 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
9657 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
9658 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
9659 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
9660 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
9661 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
9662 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
9663 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
9664 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
9665 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
9666 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
9669 mask |= INTVAL (operands[2]) << 0;
9670 mask |= INTVAL (operands[3]) << 2;
9671 mask |= INTVAL (operands[4]) << 4;
9672 mask |= INTVAL (operands[5]) << 6;
9673 operands[2] = GEN_INT (mask);
9675 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
9677 [(set_attr "type" "sselog1")
9678 (set_attr "prefix" "evex")
9679 (set_attr "length_immediate" "1")
9680 (set_attr "mode" "XI")])
9682 (define_expand "avx2_pshufdv3"
9683 [(match_operand:V8SI 0 "register_operand")
9684 (match_operand:V8SI 1 "nonimmediate_operand")
9685 (match_operand:SI 2 "const_0_to_255_operand")]
9688 int mask = INTVAL (operands[2]);
9689 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
9690 GEN_INT ((mask >> 0) & 3),
9691 GEN_INT ((mask >> 2) & 3),
9692 GEN_INT ((mask >> 4) & 3),
9693 GEN_INT ((mask >> 6) & 3),
9694 GEN_INT (((mask >> 0) & 3) + 4),
9695 GEN_INT (((mask >> 2) & 3) + 4),
9696 GEN_INT (((mask >> 4) & 3) + 4),
9697 GEN_INT (((mask >> 6) & 3) + 4)));
9701 (define_insn "avx2_pshufd_1"
9702 [(set (match_operand:V8SI 0 "register_operand" "=x")
9704 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
9705 (parallel [(match_operand 2 "const_0_to_3_operand")
9706 (match_operand 3 "const_0_to_3_operand")
9707 (match_operand 4 "const_0_to_3_operand")
9708 (match_operand 5 "const_0_to_3_operand")
9709 (match_operand 6 "const_4_to_7_operand")
9710 (match_operand 7 "const_4_to_7_operand")
9711 (match_operand 8 "const_4_to_7_operand")
9712 (match_operand 9 "const_4_to_7_operand")])))]
9714 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
9715 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
9716 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
9717 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
9720 mask |= INTVAL (operands[2]) << 0;
9721 mask |= INTVAL (operands[3]) << 2;
9722 mask |= INTVAL (operands[4]) << 4;
9723 mask |= INTVAL (operands[5]) << 6;
9724 operands[2] = GEN_INT (mask);
9726 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
9728 [(set_attr "type" "sselog1")
9729 (set_attr "prefix" "vex")
9730 (set_attr "length_immediate" "1")
9731 (set_attr "mode" "OI")])
9733 (define_expand "sse2_pshufd"
9734 [(match_operand:V4SI 0 "register_operand")
9735 (match_operand:V4SI 1 "nonimmediate_operand")
9736 (match_operand:SI 2 "const_int_operand")]
9739 int mask = INTVAL (operands[2]);
9740 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
9741 GEN_INT ((mask >> 0) & 3),
9742 GEN_INT ((mask >> 2) & 3),
9743 GEN_INT ((mask >> 4) & 3),
9744 GEN_INT ((mask >> 6) & 3)));
9748 (define_insn "sse2_pshufd_1"
9749 [(set (match_operand:V4SI 0 "register_operand" "=x")
9751 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9752 (parallel [(match_operand 2 "const_0_to_3_operand")
9753 (match_operand 3 "const_0_to_3_operand")
9754 (match_operand 4 "const_0_to_3_operand")
9755 (match_operand 5 "const_0_to_3_operand")])))]
9759 mask |= INTVAL (operands[2]) << 0;
9760 mask |= INTVAL (operands[3]) << 2;
9761 mask |= INTVAL (operands[4]) << 4;
9762 mask |= INTVAL (operands[5]) << 6;
9763 operands[2] = GEN_INT (mask);
9765 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
9767 [(set_attr "type" "sselog1")
9768 (set_attr "prefix_data16" "1")
9769 (set_attr "prefix" "maybe_vex")
9770 (set_attr "length_immediate" "1")
9771 (set_attr "mode" "TI")])
9773 (define_expand "avx2_pshuflwv3"
9774 [(match_operand:V16HI 0 "register_operand")
9775 (match_operand:V16HI 1 "nonimmediate_operand")
9776 (match_operand:SI 2 "const_0_to_255_operand")]
9779 int mask = INTVAL (operands[2]);
9780 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
9781 GEN_INT ((mask >> 0) & 3),
9782 GEN_INT ((mask >> 2) & 3),
9783 GEN_INT ((mask >> 4) & 3),
9784 GEN_INT ((mask >> 6) & 3),
9785 GEN_INT (((mask >> 0) & 3) + 8),
9786 GEN_INT (((mask >> 2) & 3) + 8),
9787 GEN_INT (((mask >> 4) & 3) + 8),
9788 GEN_INT (((mask >> 6) & 3) + 8)));
9792 (define_insn "avx2_pshuflw_1"
9793 [(set (match_operand:V16HI 0 "register_operand" "=x")
9795 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
9796 (parallel [(match_operand 2 "const_0_to_3_operand")
9797 (match_operand 3 "const_0_to_3_operand")
9798 (match_operand 4 "const_0_to_3_operand")
9799 (match_operand 5 "const_0_to_3_operand")
9804 (match_operand 6 "const_8_to_11_operand")
9805 (match_operand 7 "const_8_to_11_operand")
9806 (match_operand 8 "const_8_to_11_operand")
9807 (match_operand 9 "const_8_to_11_operand")
9813 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
9814 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
9815 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
9816 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
9819 mask |= INTVAL (operands[2]) << 0;
9820 mask |= INTVAL (operands[3]) << 2;
9821 mask |= INTVAL (operands[4]) << 4;
9822 mask |= INTVAL (operands[5]) << 6;
9823 operands[2] = GEN_INT (mask);
9825 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
9827 [(set_attr "type" "sselog")
9828 (set_attr "prefix" "vex")
9829 (set_attr "length_immediate" "1")
9830 (set_attr "mode" "OI")])
9832 (define_expand "sse2_pshuflw"
9833 [(match_operand:V8HI 0 "register_operand")
9834 (match_operand:V8HI 1 "nonimmediate_operand")
9835 (match_operand:SI 2 "const_int_operand")]
9838 int mask = INTVAL (operands[2]);
9839 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
9840 GEN_INT ((mask >> 0) & 3),
9841 GEN_INT ((mask >> 2) & 3),
9842 GEN_INT ((mask >> 4) & 3),
9843 GEN_INT ((mask >> 6) & 3)));
9847 (define_insn "sse2_pshuflw_1"
9848 [(set (match_operand:V8HI 0 "register_operand" "=x")
9850 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9851 (parallel [(match_operand 2 "const_0_to_3_operand")
9852 (match_operand 3 "const_0_to_3_operand")
9853 (match_operand 4 "const_0_to_3_operand")
9854 (match_operand 5 "const_0_to_3_operand")
9862 mask |= INTVAL (operands[2]) << 0;
9863 mask |= INTVAL (operands[3]) << 2;
9864 mask |= INTVAL (operands[4]) << 4;
9865 mask |= INTVAL (operands[5]) << 6;
9866 operands[2] = GEN_INT (mask);
9868 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
9870 [(set_attr "type" "sselog")
9871 (set_attr "prefix_data16" "0")
9872 (set_attr "prefix_rep" "1")
9873 (set_attr "prefix" "maybe_vex")
9874 (set_attr "length_immediate" "1")
9875 (set_attr "mode" "TI")])
9877 (define_expand "avx2_pshufhwv3"
9878 [(match_operand:V16HI 0 "register_operand")
9879 (match_operand:V16HI 1 "nonimmediate_operand")
9880 (match_operand:SI 2 "const_0_to_255_operand")]
9883 int mask = INTVAL (operands[2]);
9884 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
9885 GEN_INT (((mask >> 0) & 3) + 4),
9886 GEN_INT (((mask >> 2) & 3) + 4),
9887 GEN_INT (((mask >> 4) & 3) + 4),
9888 GEN_INT (((mask >> 6) & 3) + 4),
9889 GEN_INT (((mask >> 0) & 3) + 12),
9890 GEN_INT (((mask >> 2) & 3) + 12),
9891 GEN_INT (((mask >> 4) & 3) + 12),
9892 GEN_INT (((mask >> 6) & 3) + 12)));
9896 (define_insn "avx2_pshufhw_1"
9897 [(set (match_operand:V16HI 0 "register_operand" "=x")
9899 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
9900 (parallel [(const_int 0)
9904 (match_operand 2 "const_4_to_7_operand")
9905 (match_operand 3 "const_4_to_7_operand")
9906 (match_operand 4 "const_4_to_7_operand")
9907 (match_operand 5 "const_4_to_7_operand")
9912 (match_operand 6 "const_12_to_15_operand")
9913 (match_operand 7 "const_12_to_15_operand")
9914 (match_operand 8 "const_12_to_15_operand")
9915 (match_operand 9 "const_12_to_15_operand")])))]
9917 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
9918 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
9919 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
9920 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
9923 mask |= (INTVAL (operands[2]) - 4) << 0;
9924 mask |= (INTVAL (operands[3]) - 4) << 2;
9925 mask |= (INTVAL (operands[4]) - 4) << 4;
9926 mask |= (INTVAL (operands[5]) - 4) << 6;
9927 operands[2] = GEN_INT (mask);
9929 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
9931 [(set_attr "type" "sselog")
9932 (set_attr "prefix" "vex")
9933 (set_attr "length_immediate" "1")
9934 (set_attr "mode" "OI")])
9936 (define_expand "sse2_pshufhw"
9937 [(match_operand:V8HI 0 "register_operand")
9938 (match_operand:V8HI 1 "nonimmediate_operand")
9939 (match_operand:SI 2 "const_int_operand")]
9942 int mask = INTVAL (operands[2]);
9943 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
9944 GEN_INT (((mask >> 0) & 3) + 4),
9945 GEN_INT (((mask >> 2) & 3) + 4),
9946 GEN_INT (((mask >> 4) & 3) + 4),
9947 GEN_INT (((mask >> 6) & 3) + 4)));
9951 (define_insn "sse2_pshufhw_1"
9952 [(set (match_operand:V8HI 0 "register_operand" "=x")
9954 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9955 (parallel [(const_int 0)
9959 (match_operand 2 "const_4_to_7_operand")
9960 (match_operand 3 "const_4_to_7_operand")
9961 (match_operand 4 "const_4_to_7_operand")
9962 (match_operand 5 "const_4_to_7_operand")])))]
9966 mask |= (INTVAL (operands[2]) - 4) << 0;
9967 mask |= (INTVAL (operands[3]) - 4) << 2;
9968 mask |= (INTVAL (operands[4]) - 4) << 4;
9969 mask |= (INTVAL (operands[5]) - 4) << 6;
9970 operands[2] = GEN_INT (mask);
9972 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
9974 [(set_attr "type" "sselog")
9975 (set_attr "prefix_rep" "1")
9976 (set_attr "prefix_data16" "0")
9977 (set_attr "prefix" "maybe_vex")
9978 (set_attr "length_immediate" "1")
9979 (set_attr "mode" "TI")])
9981 (define_expand "sse2_loadd"
9982 [(set (match_operand:V4SI 0 "register_operand")
9985 (match_operand:SI 1 "nonimmediate_operand"))
9989 "operands[2] = CONST0_RTX (V4SImode);")
9991 (define_insn "sse2_loadld"
9992 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
9995 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
9996 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
10000 %vmovd\t{%2, %0|%0, %2}
10001 %vmovd\t{%2, %0|%0, %2}
10002 movss\t{%2, %0|%0, %2}
10003 movss\t{%2, %0|%0, %2}
10004 vmovss\t{%2, %1, %0|%0, %1, %2}"
10005 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
10006 (set_attr "type" "ssemov")
10007 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
10008 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
10010 (define_insn "*vec_extract<mode>"
10011 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
10012 (vec_select:<ssescalarmode>
10013 (match_operand:VI12_128 1 "register_operand" "x,x")
10015 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
10018 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
10019 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10020 [(set_attr "type" "sselog1")
10021 (set (attr "prefix_data16")
10023 (and (eq_attr "alternative" "0")
10024 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10026 (const_string "*")))
10027 (set (attr "prefix_extra")
10029 (and (eq_attr "alternative" "0")
10030 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10032 (const_string "1")))
10033 (set_attr "length_immediate" "1")
10034 (set_attr "prefix" "maybe_vex")
10035 (set_attr "mode" "TI")])
10037 (define_insn "*vec_extractv8hi_sse2"
10038 [(set (match_operand:HI 0 "register_operand" "=r")
10040 (match_operand:V8HI 1 "register_operand" "x")
10042 [(match_operand:SI 2 "const_0_to_7_operand")])))]
10043 "TARGET_SSE2 && !TARGET_SSE4_1"
10044 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
10045 [(set_attr "type" "sselog1")
10046 (set_attr "prefix_data16" "1")
10047 (set_attr "length_immediate" "1")
10048 (set_attr "mode" "TI")])
10050 (define_insn "*vec_extractv16qi_zext"
10051 [(set (match_operand:SWI48 0 "register_operand" "=r")
10054 (match_operand:V16QI 1 "register_operand" "x")
10056 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
10058 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
10059 [(set_attr "type" "sselog1")
10060 (set_attr "prefix_extra" "1")
10061 (set_attr "length_immediate" "1")
10062 (set_attr "prefix" "maybe_vex")
10063 (set_attr "mode" "TI")])
10065 (define_insn "*vec_extractv8hi_zext"
10066 [(set (match_operand:SWI48 0 "register_operand" "=r")
10069 (match_operand:V8HI 1 "register_operand" "x")
10071 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
10073 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
10074 [(set_attr "type" "sselog1")
10075 (set_attr "prefix_data16" "1")
10076 (set_attr "length_immediate" "1")
10077 (set_attr "prefix" "maybe_vex")
10078 (set_attr "mode" "TI")])
10080 (define_insn "*vec_extract<mode>_mem"
10081 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
10082 (vec_select:<ssescalarmode>
10083 (match_operand:VI12_128 1 "memory_operand" "o")
10085 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10089 (define_insn "*vec_extract<ssevecmodelower>_0"
10090 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
10092 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
10093 (parallel [(const_int 0)])))]
10094 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10096 [(set_attr "isa" "*,sse4,*,*")])
10098 (define_insn_and_split "*vec_extractv4si_0_zext"
10099 [(set (match_operand:DI 0 "register_operand" "=r")
10102 (match_operand:V4SI 1 "register_operand" "x")
10103 (parallel [(const_int 0)]))))]
10104 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
10106 "&& reload_completed"
10107 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10108 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
10110 (define_insn "*vec_extractv2di_0_sse"
10111 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
10113 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
10114 (parallel [(const_int 0)])))]
10115 "TARGET_SSE && !TARGET_64BIT
10116 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10120 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
10122 (match_operand:<ssevecmode> 1 "register_operand")
10123 (parallel [(const_int 0)])))]
10124 "TARGET_SSE && reload_completed"
10125 [(set (match_dup 0) (match_dup 1))]
10126 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
10128 (define_insn "*vec_extractv4si"
10129 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
10131 (match_operand:V4SI 1 "register_operand" "x,0,x")
10132 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
10135 switch (which_alternative)
10138 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
10141 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10142 return "psrldq\t{%2, %0|%0, %2}";
10145 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10146 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10149 gcc_unreachable ();
10152 [(set_attr "isa" "*,noavx,avx")
10153 (set_attr "type" "sselog1,sseishft1,sseishft1")
10154 (set_attr "prefix_extra" "1,*,*")
10155 (set_attr "length_immediate" "1")
10156 (set_attr "prefix" "maybe_vex,orig,vex")
10157 (set_attr "mode" "TI")])
10159 (define_insn "*vec_extractv4si_zext"
10160 [(set (match_operand:DI 0 "register_operand" "=r")
10163 (match_operand:V4SI 1 "register_operand" "x")
10164 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10165 "TARGET_64BIT && TARGET_SSE4_1"
10166 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
10167 [(set_attr "type" "sselog1")
10168 (set_attr "prefix_extra" "1")
10169 (set_attr "length_immediate" "1")
10170 (set_attr "prefix" "maybe_vex")
10171 (set_attr "mode" "TI")])
10173 (define_insn "*vec_extractv4si_mem"
10174 [(set (match_operand:SI 0 "register_operand" "=x,r")
10176 (match_operand:V4SI 1 "memory_operand" "o,o")
10177 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
10181 (define_insn_and_split "*vec_extractv4si_zext_mem"
10182 [(set (match_operand:DI 0 "register_operand" "=x,r")
10185 (match_operand:V4SI 1 "memory_operand" "o,o")
10186 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10187 "TARGET_64BIT && TARGET_SSE"
10189 "&& reload_completed"
10190 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10192 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
10195 (define_insn "*vec_extractv2di_1"
10196 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
10198 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
10199 (parallel [(const_int 1)])))]
10200 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10202 %vpextrq\t{$1, %1, %0|%0, %1, 1}
10203 %vmovhps\t{%1, %0|%0, %1}
10204 psrldq\t{$8, %0|%0, 8}
10205 vpsrldq\t{$8, %1, %0|%0, %1, 8}
10206 movhlps\t{%1, %0|%0, %1}
10209 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
10210 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
10211 (set_attr "length_immediate" "1,*,1,1,*,*,*")
10212 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
10213 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
10214 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
10215 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
10218 [(set (match_operand:<ssescalarmode> 0 "register_operand")
10219 (vec_select:<ssescalarmode>
10220 (match_operand:VI_128 1 "memory_operand")
10222 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10223 "TARGET_SSE && reload_completed"
10224 [(set (match_dup 0) (match_dup 1))]
10226 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
10228 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
10231 (define_insn "*vec_dupv4si"
10232 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10233 (vec_duplicate:V4SI
10234 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
10237 %vpshufd\t{$0, %1, %0|%0, %1, 0}
10238 vbroadcastss\t{%1, %0|%0, %1}
10239 shufps\t{$0, %0, %0|%0, %0, 0}"
10240 [(set_attr "isa" "sse2,avx,noavx")
10241 (set_attr "type" "sselog1,ssemov,sselog1")
10242 (set_attr "length_immediate" "1,0,1")
10243 (set_attr "prefix_extra" "0,1,*")
10244 (set_attr "prefix" "maybe_vex,vex,orig")
10245 (set_attr "mode" "TI,V4SF,V4SF")])
10247 (define_insn "*vec_dupv2di"
10248 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
10249 (vec_duplicate:V2DI
10250 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
10254 vpunpcklqdq\t{%d1, %0|%0, %d1}
10255 %vmovddup\t{%1, %0|%0, %1}
10257 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
10258 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
10259 (set_attr "prefix" "orig,vex,maybe_vex,orig")
10260 (set_attr "mode" "TI,TI,DF,V4SF")])
10262 (define_insn "*vec_concatv2si_sse4_1"
10263 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
10265 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
10266 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
10269 pinsrd\t{$1, %2, %0|%0, %2, 1}
10270 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
10271 punpckldq\t{%2, %0|%0, %2}
10272 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
10273 %vmovd\t{%1, %0|%0, %1}
10274 punpckldq\t{%2, %0|%0, %2}
10275 movd\t{%1, %0|%0, %1}"
10276 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10277 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
10278 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
10279 (set_attr "length_immediate" "1,1,*,*,*,*,*")
10280 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
10281 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
10283 ;; ??? In theory we can match memory for the MMX alternative, but allowing
10284 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
10285 ;; alternatives pretty much forces the MMX alternative to be chosen.
10286 (define_insn "*vec_concatv2si"
10287 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
10289 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
10290 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
10291 "TARGET_SSE && !TARGET_SSE4_1"
10293 punpckldq\t{%2, %0|%0, %2}
10294 movd\t{%1, %0|%0, %1}
10295 movd\t{%1, %0|%0, %1}
10296 unpcklps\t{%2, %0|%0, %2}
10297 movss\t{%1, %0|%0, %1}
10298 punpckldq\t{%2, %0|%0, %2}
10299 movd\t{%1, %0|%0, %1}"
10300 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
10301 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
10302 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
10304 (define_insn "*vec_concatv4si"
10305 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
10307 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
10308 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
10311 punpcklqdq\t{%2, %0|%0, %2}
10312 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10313 movlhps\t{%2, %0|%0, %2}
10314 movhps\t{%2, %0|%0, %q2}
10315 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
10316 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
10317 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
10318 (set_attr "prefix" "orig,vex,orig,orig,vex")
10319 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
10321 ;; movd instead of movq is required to handle broken assemblers.
10322 (define_insn "vec_concatv2di"
10323 [(set (match_operand:V2DI 0 "register_operand"
10324 "=x,x ,Yi,x ,!x,x,x,x,x,x")
10326 (match_operand:DI 1 "nonimmediate_operand"
10327 " 0,x ,r ,xm,*y,0,x,0,0,x")
10328 (match_operand:DI 2 "vector_move_operand"
10329 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
10332 pinsrq\t{$1, %2, %0|%0, %2, 1}
10333 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
10334 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
10335 %vmovq\t{%1, %0|%0, %1}
10336 movq2dq\t{%1, %0|%0, %1}
10337 punpcklqdq\t{%2, %0|%0, %2}
10338 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10339 movlhps\t{%2, %0|%0, %2}
10340 movhps\t{%2, %0|%0, %2}
10341 vmovhps\t{%2, %1, %0|%0, %1, %2}"
10342 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
10345 (eq_attr "alternative" "0,1,5,6")
10346 (const_string "sselog")
10347 (const_string "ssemov")))
10348 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
10349 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
10350 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
10351 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
10352 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
10354 (define_expand "vec_unpacks_lo_<mode>"
10355 [(match_operand:<sseunpackmode> 0 "register_operand")
10356 (match_operand:VI124_AVX512F 1 "register_operand")]
10358 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
10360 (define_expand "vec_unpacks_hi_<mode>"
10361 [(match_operand:<sseunpackmode> 0 "register_operand")
10362 (match_operand:VI124_AVX512F 1 "register_operand")]
10364 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
10366 (define_expand "vec_unpacku_lo_<mode>"
10367 [(match_operand:<sseunpackmode> 0 "register_operand")
10368 (match_operand:VI124_AVX512F 1 "register_operand")]
10370 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
10372 (define_expand "vec_unpacku_hi_<mode>"
10373 [(match_operand:<sseunpackmode> 0 "register_operand")
10374 (match_operand:VI124_AVX512F 1 "register_operand")]
10376 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
10378 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10382 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10384 (define_expand "<sse2_avx2>_uavg<mode>3"
10385 [(set (match_operand:VI12_AVX2 0 "register_operand")
10386 (truncate:VI12_AVX2
10387 (lshiftrt:<ssedoublemode>
10388 (plus:<ssedoublemode>
10389 (plus:<ssedoublemode>
10390 (zero_extend:<ssedoublemode>
10391 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
10392 (zero_extend:<ssedoublemode>
10393 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
10398 operands[3] = CONST1_RTX(<MODE>mode);
10399 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
10402 (define_insn "*<sse2_avx2>_uavg<mode>3"
10403 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
10404 (truncate:VI12_AVX2
10405 (lshiftrt:<ssedoublemode>
10406 (plus:<ssedoublemode>
10407 (plus:<ssedoublemode>
10408 (zero_extend:<ssedoublemode>
10409 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
10410 (zero_extend:<ssedoublemode>
10411 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
10412 (match_operand:VI12_AVX2 3 "const1_operand"))
10414 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
10416 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
10417 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10418 [(set_attr "isa" "noavx,avx")
10419 (set_attr "type" "sseiadd")
10420 (set_attr "prefix_data16" "1,*")
10421 (set_attr "prefix" "orig,vex")
10422 (set_attr "mode" "<sseinsnmode>")])
10424 ;; The correct representation for this is absolutely enormous, and
10425 ;; surely not generally useful.
10426 (define_insn "<sse2_avx2>_psadbw"
10427 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
10429 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
10430 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
10434 psadbw\t{%2, %0|%0, %2}
10435 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
10436 [(set_attr "isa" "noavx,avx")
10437 (set_attr "type" "sseiadd")
10438 (set_attr "atom_unit" "simul")
10439 (set_attr "prefix_data16" "1,*")
10440 (set_attr "prefix" "orig,vex")
10441 (set_attr "mode" "<sseinsnmode>")])
10443 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
10444 [(set (match_operand:SI 0 "register_operand" "=r")
10446 [(match_operand:VF_128_256 1 "register_operand" "x")]
10449 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
10450 [(set_attr "type" "ssemov")
10451 (set_attr "prefix" "maybe_vex")
10452 (set_attr "mode" "<MODE>")])
10454 (define_insn "avx2_pmovmskb"
10455 [(set (match_operand:SI 0 "register_operand" "=r")
10456 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
10459 "vpmovmskb\t{%1, %0|%0, %1}"
10460 [(set_attr "type" "ssemov")
10461 (set_attr "prefix" "vex")
10462 (set_attr "mode" "DI")])
10464 (define_insn "sse2_pmovmskb"
10465 [(set (match_operand:SI 0 "register_operand" "=r")
10466 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
10469 "%vpmovmskb\t{%1, %0|%0, %1}"
10470 [(set_attr "type" "ssemov")
10471 (set_attr "prefix_data16" "1")
10472 (set_attr "prefix" "maybe_vex")
10473 (set_attr "mode" "SI")])
10475 (define_expand "sse2_maskmovdqu"
10476 [(set (match_operand:V16QI 0 "memory_operand")
10477 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
10478 (match_operand:V16QI 2 "register_operand")
10483 (define_insn "*sse2_maskmovdqu"
10484 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
10485 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
10486 (match_operand:V16QI 2 "register_operand" "x")
10487 (mem:V16QI (match_dup 0))]
10491 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
10492 that requires %v to be at the beginning of the opcode name. */
10493 if (Pmode != word_mode)
10494 fputs ("\taddr32", asm_out_file);
10495 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
10497 [(set_attr "type" "ssemov")
10498 (set_attr "prefix_data16" "1")
10499 (set (attr "length_address")
10500 (symbol_ref ("Pmode != word_mode")))
10501 ;; The implicit %rdi operand confuses default length_vex computation.
10502 (set (attr "length_vex")
10503 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
10504 (set_attr "prefix" "maybe_vex")
10505 (set_attr "mode" "TI")])
10507 (define_insn "sse_ldmxcsr"
10508 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
10512 [(set_attr "type" "sse")
10513 (set_attr "atom_sse_attr" "mxcsr")
10514 (set_attr "prefix" "maybe_vex")
10515 (set_attr "memory" "load")])
10517 (define_insn "sse_stmxcsr"
10518 [(set (match_operand:SI 0 "memory_operand" "=m")
10519 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
10522 [(set_attr "type" "sse")
10523 (set_attr "atom_sse_attr" "mxcsr")
10524 (set_attr "prefix" "maybe_vex")
10525 (set_attr "memory" "store")])
10527 (define_insn "sse2_clflush"
10528 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
10532 [(set_attr "type" "sse")
10533 (set_attr "atom_sse_attr" "fence")
10534 (set_attr "memory" "unknown")])
10537 (define_insn "sse3_mwait"
10538 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
10539 (match_operand:SI 1 "register_operand" "c")]
10542 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
10543 ;; Since 32bit register operands are implicitly zero extended to 64bit,
10544 ;; we only need to set up 32bit registers.
10546 [(set_attr "length" "3")])
10548 (define_insn "sse3_monitor_<mode>"
10549 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
10550 (match_operand:SI 1 "register_operand" "c")
10551 (match_operand:SI 2 "register_operand" "d")]
10554 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
10555 ;; RCX and RDX are used. Since 32bit register operands are implicitly
10556 ;; zero extended to 64bit, we only need to set up 32bit registers.
10558 [(set (attr "length")
10559 (symbol_ref ("(Pmode != word_mode) + 3")))])
10561 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10563 ;; SSSE3 instructions
10565 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10567 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
10569 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
10570 [(set (match_operand:V16HI 0 "register_operand" "=x")
10575 (ssse3_plusminus:HI
10577 (match_operand:V16HI 1 "register_operand" "x")
10578 (parallel [(const_int 0)]))
10579 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10580 (ssse3_plusminus:HI
10581 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10582 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10584 (ssse3_plusminus:HI
10585 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10586 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10587 (ssse3_plusminus:HI
10588 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10589 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10592 (ssse3_plusminus:HI
10593 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
10594 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
10595 (ssse3_plusminus:HI
10596 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
10597 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
10599 (ssse3_plusminus:HI
10600 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
10601 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
10602 (ssse3_plusminus:HI
10603 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
10604 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
10608 (ssse3_plusminus:HI
10610 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
10611 (parallel [(const_int 0)]))
10612 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10613 (ssse3_plusminus:HI
10614 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10615 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10617 (ssse3_plusminus:HI
10618 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10619 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10620 (ssse3_plusminus:HI
10621 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10622 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
10625 (ssse3_plusminus:HI
10626 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
10627 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
10628 (ssse3_plusminus:HI
10629 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
10630 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
10632 (ssse3_plusminus:HI
10633 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
10634 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
10635 (ssse3_plusminus:HI
10636 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
10637 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
10639 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10640 [(set_attr "type" "sseiadd")
10641 (set_attr "prefix_extra" "1")
10642 (set_attr "prefix" "vex")
10643 (set_attr "mode" "OI")])
10645 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
10646 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10650 (ssse3_plusminus:HI
10652 (match_operand:V8HI 1 "register_operand" "0,x")
10653 (parallel [(const_int 0)]))
10654 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10655 (ssse3_plusminus:HI
10656 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10657 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10659 (ssse3_plusminus:HI
10660 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10661 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10662 (ssse3_plusminus:HI
10663 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10664 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10667 (ssse3_plusminus:HI
10669 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
10670 (parallel [(const_int 0)]))
10671 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10672 (ssse3_plusminus:HI
10673 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10674 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10676 (ssse3_plusminus:HI
10677 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10678 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10679 (ssse3_plusminus:HI
10680 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10681 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
10684 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
10685 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10686 [(set_attr "isa" "noavx,avx")
10687 (set_attr "type" "sseiadd")
10688 (set_attr "atom_unit" "complex")
10689 (set_attr "prefix_data16" "1,*")
10690 (set_attr "prefix_extra" "1")
10691 (set_attr "prefix" "orig,vex")
10692 (set_attr "mode" "TI")])
10694 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
10695 [(set (match_operand:V4HI 0 "register_operand" "=y")
10698 (ssse3_plusminus:HI
10700 (match_operand:V4HI 1 "register_operand" "0")
10701 (parallel [(const_int 0)]))
10702 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10703 (ssse3_plusminus:HI
10704 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10705 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10707 (ssse3_plusminus:HI
10709 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
10710 (parallel [(const_int 0)]))
10711 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10712 (ssse3_plusminus:HI
10713 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10714 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
10716 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
10717 [(set_attr "type" "sseiadd")
10718 (set_attr "atom_unit" "complex")
10719 (set_attr "prefix_extra" "1")
10720 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
10721 (set_attr "mode" "DI")])
10723 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
10724 [(set (match_operand:V8SI 0 "register_operand" "=x")
10730 (match_operand:V8SI 1 "register_operand" "x")
10731 (parallel [(const_int 0)]))
10732 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
10734 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
10735 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
10738 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
10739 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
10741 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
10742 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
10747 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
10748 (parallel [(const_int 0)]))
10749 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
10751 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
10752 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
10755 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
10756 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
10758 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
10759 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
10761 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
10762 [(set_attr "type" "sseiadd")
10763 (set_attr "prefix_extra" "1")
10764 (set_attr "prefix" "vex")
10765 (set_attr "mode" "OI")])
10767 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
10768 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10773 (match_operand:V4SI 1 "register_operand" "0,x")
10774 (parallel [(const_int 0)]))
10775 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
10777 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
10778 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
10782 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
10783 (parallel [(const_int 0)]))
10784 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
10786 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
10787 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
10790 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
10791 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
10792 [(set_attr "isa" "noavx,avx")
10793 (set_attr "type" "sseiadd")
10794 (set_attr "atom_unit" "complex")
10795 (set_attr "prefix_data16" "1,*")
10796 (set_attr "prefix_extra" "1")
10797 (set_attr "prefix" "orig,vex")
10798 (set_attr "mode" "TI")])
10800 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
10801 [(set (match_operand:V2SI 0 "register_operand" "=y")
10805 (match_operand:V2SI 1 "register_operand" "0")
10806 (parallel [(const_int 0)]))
10807 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
10810 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
10811 (parallel [(const_int 0)]))
10812 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
10814 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
10815 [(set_attr "type" "sseiadd")
10816 (set_attr "atom_unit" "complex")
10817 (set_attr "prefix_extra" "1")
10818 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
10819 (set_attr "mode" "DI")])
10821 (define_insn "avx2_pmaddubsw256"
10822 [(set (match_operand:V16HI 0 "register_operand" "=x")
10827 (match_operand:V32QI 1 "register_operand" "x")
10828 (parallel [(const_int 0) (const_int 2)
10829 (const_int 4) (const_int 6)
10830 (const_int 8) (const_int 10)
10831 (const_int 12) (const_int 14)
10832 (const_int 16) (const_int 18)
10833 (const_int 20) (const_int 22)
10834 (const_int 24) (const_int 26)
10835 (const_int 28) (const_int 30)])))
10838 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
10839 (parallel [(const_int 0) (const_int 2)
10840 (const_int 4) (const_int 6)
10841 (const_int 8) (const_int 10)
10842 (const_int 12) (const_int 14)
10843 (const_int 16) (const_int 18)
10844 (const_int 20) (const_int 22)
10845 (const_int 24) (const_int 26)
10846 (const_int 28) (const_int 30)]))))
10849 (vec_select:V16QI (match_dup 1)
10850 (parallel [(const_int 1) (const_int 3)
10851 (const_int 5) (const_int 7)
10852 (const_int 9) (const_int 11)
10853 (const_int 13) (const_int 15)
10854 (const_int 17) (const_int 19)
10855 (const_int 21) (const_int 23)
10856 (const_int 25) (const_int 27)
10857 (const_int 29) (const_int 31)])))
10859 (vec_select:V16QI (match_dup 2)
10860 (parallel [(const_int 1) (const_int 3)
10861 (const_int 5) (const_int 7)
10862 (const_int 9) (const_int 11)
10863 (const_int 13) (const_int 15)
10864 (const_int 17) (const_int 19)
10865 (const_int 21) (const_int 23)
10866 (const_int 25) (const_int 27)
10867 (const_int 29) (const_int 31)]))))))]
10869 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
10870 [(set_attr "type" "sseiadd")
10871 (set_attr "prefix_extra" "1")
10872 (set_attr "prefix" "vex")
10873 (set_attr "mode" "OI")])
10875 (define_insn "ssse3_pmaddubsw128"
10876 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10881 (match_operand:V16QI 1 "register_operand" "0,x")
10882 (parallel [(const_int 0) (const_int 2)
10883 (const_int 4) (const_int 6)
10884 (const_int 8) (const_int 10)
10885 (const_int 12) (const_int 14)])))
10888 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
10889 (parallel [(const_int 0) (const_int 2)
10890 (const_int 4) (const_int 6)
10891 (const_int 8) (const_int 10)
10892 (const_int 12) (const_int 14)]))))
10895 (vec_select:V8QI (match_dup 1)
10896 (parallel [(const_int 1) (const_int 3)
10897 (const_int 5) (const_int 7)
10898 (const_int 9) (const_int 11)
10899 (const_int 13) (const_int 15)])))
10901 (vec_select:V8QI (match_dup 2)
10902 (parallel [(const_int 1) (const_int 3)
10903 (const_int 5) (const_int 7)
10904 (const_int 9) (const_int 11)
10905 (const_int 13) (const_int 15)]))))))]
10908 pmaddubsw\t{%2, %0|%0, %2}
10909 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
10910 [(set_attr "isa" "noavx,avx")
10911 (set_attr "type" "sseiadd")
10912 (set_attr "atom_unit" "simul")
10913 (set_attr "prefix_data16" "1,*")
10914 (set_attr "prefix_extra" "1")
10915 (set_attr "prefix" "orig,vex")
10916 (set_attr "mode" "TI")])
10918 (define_insn "ssse3_pmaddubsw"
10919 [(set (match_operand:V4HI 0 "register_operand" "=y")
10924 (match_operand:V8QI 1 "register_operand" "0")
10925 (parallel [(const_int 0) (const_int 2)
10926 (const_int 4) (const_int 6)])))
10929 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
10930 (parallel [(const_int 0) (const_int 2)
10931 (const_int 4) (const_int 6)]))))
10934 (vec_select:V4QI (match_dup 1)
10935 (parallel [(const_int 1) (const_int 3)
10936 (const_int 5) (const_int 7)])))
10938 (vec_select:V4QI (match_dup 2)
10939 (parallel [(const_int 1) (const_int 3)
10940 (const_int 5) (const_int 7)]))))))]
10942 "pmaddubsw\t{%2, %0|%0, %2}"
10943 [(set_attr "type" "sseiadd")
10944 (set_attr "atom_unit" "simul")
10945 (set_attr "prefix_extra" "1")
10946 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
10947 (set_attr "mode" "DI")])
10949 (define_mode_iterator PMULHRSW
10950 [V4HI V8HI (V16HI "TARGET_AVX2")])
10952 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
10953 [(set (match_operand:PMULHRSW 0 "register_operand")
10955 (lshiftrt:<ssedoublemode>
10956 (plus:<ssedoublemode>
10957 (lshiftrt:<ssedoublemode>
10958 (mult:<ssedoublemode>
10959 (sign_extend:<ssedoublemode>
10960 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
10961 (sign_extend:<ssedoublemode>
10962 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
10968 operands[3] = CONST1_RTX(<MODE>mode);
10969 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10972 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
10973 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
10975 (lshiftrt:<ssedoublemode>
10976 (plus:<ssedoublemode>
10977 (lshiftrt:<ssedoublemode>
10978 (mult:<ssedoublemode>
10979 (sign_extend:<ssedoublemode>
10980 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
10981 (sign_extend:<ssedoublemode>
10982 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
10984 (match_operand:VI2_AVX2 3 "const1_operand"))
10986 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
10988 pmulhrsw\t{%2, %0|%0, %2}
10989 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
10990 [(set_attr "isa" "noavx,avx")
10991 (set_attr "type" "sseimul")
10992 (set_attr "prefix_data16" "1,*")
10993 (set_attr "prefix_extra" "1")
10994 (set_attr "prefix" "orig,vex")
10995 (set_attr "mode" "<sseinsnmode>")])
10997 (define_insn "*ssse3_pmulhrswv4hi3"
10998 [(set (match_operand:V4HI 0 "register_operand" "=y")
11005 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
11007 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
11009 (match_operand:V4HI 3 "const1_operand"))
11011 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
11012 "pmulhrsw\t{%2, %0|%0, %2}"
11013 [(set_attr "type" "sseimul")
11014 (set_attr "prefix_extra" "1")
11015 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11016 (set_attr "mode" "DI")])
11018 (define_insn "<ssse3_avx2>_pshufb<mode>3"
11019 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11021 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11022 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
11026 pshufb\t{%2, %0|%0, %2}
11027 vpshufb\t{%2, %1, %0|%0, %1, %2}"
11028 [(set_attr "isa" "noavx,avx")
11029 (set_attr "type" "sselog1")
11030 (set_attr "prefix_data16" "1,*")
11031 (set_attr "prefix_extra" "1")
11032 (set_attr "prefix" "orig,vex")
11033 (set_attr "btver2_decode" "vector,vector")
11034 (set_attr "mode" "<sseinsnmode>")])
11036 (define_insn "ssse3_pshufbv8qi3"
11037 [(set (match_operand:V8QI 0 "register_operand" "=y")
11038 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
11039 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
11042 "pshufb\t{%2, %0|%0, %2}";
11043 [(set_attr "type" "sselog1")
11044 (set_attr "prefix_extra" "1")
11045 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11046 (set_attr "mode" "DI")])
11048 (define_insn "<ssse3_avx2>_psign<mode>3"
11049 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
11051 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
11052 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
11056 psign<ssemodesuffix>\t{%2, %0|%0, %2}
11057 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11058 [(set_attr "isa" "noavx,avx")
11059 (set_attr "type" "sselog1")
11060 (set_attr "prefix_data16" "1,*")
11061 (set_attr "prefix_extra" "1")
11062 (set_attr "prefix" "orig,vex")
11063 (set_attr "mode" "<sseinsnmode>")])
11065 (define_insn "ssse3_psign<mode>3"
11066 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11068 [(match_operand:MMXMODEI 1 "register_operand" "0")
11069 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
11072 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
11073 [(set_attr "type" "sselog1")
11074 (set_attr "prefix_extra" "1")
11075 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11076 (set_attr "mode" "DI")])
11078 (define_insn "<ssse3_avx2>_palignr<mode>"
11079 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
11080 (unspec:SSESCALARMODE
11081 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
11082 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
11083 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
11087 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11089 switch (which_alternative)
11092 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11094 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11096 gcc_unreachable ();
11099 [(set_attr "isa" "noavx,avx")
11100 (set_attr "type" "sseishft")
11101 (set_attr "atom_unit" "sishuf")
11102 (set_attr "prefix_data16" "1,*")
11103 (set_attr "prefix_extra" "1")
11104 (set_attr "length_immediate" "1")
11105 (set_attr "prefix" "orig,vex")
11106 (set_attr "mode" "<sseinsnmode>")])
11108 (define_insn "ssse3_palignrdi"
11109 [(set (match_operand:DI 0 "register_operand" "=y")
11110 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
11111 (match_operand:DI 2 "nonimmediate_operand" "ym")
11112 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
11116 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11117 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11119 [(set_attr "type" "sseishft")
11120 (set_attr "atom_unit" "sishuf")
11121 (set_attr "prefix_extra" "1")
11122 (set_attr "length_immediate" "1")
11123 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11124 (set_attr "mode" "DI")])
11126 (define_insn "<mask_codefor>abs<mode>2<mask_name>"
11127 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
11128 (abs:VI124_AVX2_48_AVX512F
11129 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
11130 "TARGET_SSSE3 && <mask_mode512bit_condition>"
11131 "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11132 [(set_attr "type" "sselog1")
11133 (set_attr "prefix_data16" "1")
11134 (set_attr "prefix_extra" "1")
11135 (set_attr "prefix" "maybe_vex")
11136 (set_attr "mode" "<sseinsnmode>")])
11138 (define_expand "abs<mode>2"
11139 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
11140 (abs:VI124_AVX2_48_AVX512F
11141 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
11146 ix86_expand_sse2_abs (operands[0], operands[1]);
11151 (define_insn "abs<mode>2"
11152 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11154 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
11156 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
11157 [(set_attr "type" "sselog1")
11158 (set_attr "prefix_rep" "0")
11159 (set_attr "prefix_extra" "1")
11160 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11161 (set_attr "mode" "DI")])
11163 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11165 ;; AMD SSE4A instructions
11167 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11169 (define_insn "sse4a_movnt<mode>"
11170 [(set (match_operand:MODEF 0 "memory_operand" "=m")
11172 [(match_operand:MODEF 1 "register_operand" "x")]
11175 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
11176 [(set_attr "type" "ssemov")
11177 (set_attr "mode" "<MODE>")])
11179 (define_insn "sse4a_vmmovnt<mode>"
11180 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
11181 (unspec:<ssescalarmode>
11182 [(vec_select:<ssescalarmode>
11183 (match_operand:VF_128 1 "register_operand" "x")
11184 (parallel [(const_int 0)]))]
11187 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11188 [(set_attr "type" "ssemov")
11189 (set_attr "mode" "<ssescalarmode>")])
11191 (define_insn "sse4a_extrqi"
11192 [(set (match_operand:V2DI 0 "register_operand" "=x")
11193 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11194 (match_operand 2 "const_0_to_255_operand")
11195 (match_operand 3 "const_0_to_255_operand")]
11198 "extrq\t{%3, %2, %0|%0, %2, %3}"
11199 [(set_attr "type" "sse")
11200 (set_attr "prefix_data16" "1")
11201 (set_attr "length_immediate" "2")
11202 (set_attr "mode" "TI")])
11204 (define_insn "sse4a_extrq"
11205 [(set (match_operand:V2DI 0 "register_operand" "=x")
11206 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11207 (match_operand:V16QI 2 "register_operand" "x")]
11210 "extrq\t{%2, %0|%0, %2}"
11211 [(set_attr "type" "sse")
11212 (set_attr "prefix_data16" "1")
11213 (set_attr "mode" "TI")])
11215 (define_insn "sse4a_insertqi"
11216 [(set (match_operand:V2DI 0 "register_operand" "=x")
11217 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11218 (match_operand:V2DI 2 "register_operand" "x")
11219 (match_operand 3 "const_0_to_255_operand")
11220 (match_operand 4 "const_0_to_255_operand")]
11223 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
11224 [(set_attr "type" "sseins")
11225 (set_attr "prefix_data16" "0")
11226 (set_attr "prefix_rep" "1")
11227 (set_attr "length_immediate" "2")
11228 (set_attr "mode" "TI")])
11230 (define_insn "sse4a_insertq"
11231 [(set (match_operand:V2DI 0 "register_operand" "=x")
11232 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11233 (match_operand:V2DI 2 "register_operand" "x")]
11236 "insertq\t{%2, %0|%0, %2}"
11237 [(set_attr "type" "sseins")
11238 (set_attr "prefix_data16" "0")
11239 (set_attr "prefix_rep" "1")
11240 (set_attr "mode" "TI")])
11242 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11244 ;; Intel SSE4.1 instructions
11246 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11248 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
11249 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11250 (vec_merge:VF_128_256
11251 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11252 (match_operand:VF_128_256 1 "register_operand" "0,x")
11253 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
11256 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11257 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11258 [(set_attr "isa" "noavx,avx")
11259 (set_attr "type" "ssemov")
11260 (set_attr "length_immediate" "1")
11261 (set_attr "prefix_data16" "1,*")
11262 (set_attr "prefix_extra" "1")
11263 (set_attr "prefix" "orig,vex")
11264 (set_attr "mode" "<MODE>")])
11266 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
11267 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11269 [(match_operand:VF_128_256 1 "register_operand" "0,x")
11270 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11271 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
11275 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11276 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11277 [(set_attr "isa" "noavx,avx")
11278 (set_attr "type" "ssemov")
11279 (set_attr "length_immediate" "1")
11280 (set_attr "prefix_data16" "1,*")
11281 (set_attr "prefix_extra" "1")
11282 (set_attr "prefix" "orig,vex")
11283 (set_attr "btver2_decode" "vector,vector")
11284 (set_attr "mode" "<MODE>")])
11286 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
11287 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11289 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
11290 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11291 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11295 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11296 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11297 [(set_attr "isa" "noavx,avx")
11298 (set_attr "type" "ssemul")
11299 (set_attr "length_immediate" "1")
11300 (set_attr "prefix_data16" "1,*")
11301 (set_attr "prefix_extra" "1")
11302 (set_attr "prefix" "orig,vex")
11303 (set_attr "btver2_decode" "vector,vector")
11304 (set_attr "mode" "<MODE>")])
11306 (define_insn "<sse4_1_avx2>_movntdqa"
11307 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
11308 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
11311 "%vmovntdqa\t{%1, %0|%0, %1}"
11312 [(set_attr "type" "ssemov")
11313 (set_attr "prefix_extra" "1")
11314 (set_attr "prefix" "maybe_vex")
11315 (set_attr "mode" "<sseinsnmode>")])
11317 (define_insn "<sse4_1_avx2>_mpsadbw"
11318 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11320 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11321 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11322 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11326 mpsadbw\t{%3, %2, %0|%0, %2, %3}
11327 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11328 [(set_attr "isa" "noavx,avx")
11329 (set_attr "type" "sselog1")
11330 (set_attr "length_immediate" "1")
11331 (set_attr "prefix_extra" "1")
11332 (set_attr "prefix" "orig,vex")
11333 (set_attr "btver2_decode" "vector,vector")
11334 (set_attr "mode" "<sseinsnmode>")])
11336 (define_insn "avx2_packusdw"
11337 [(set (match_operand:V16HI 0 "register_operand" "=x")
11340 (match_operand:V8SI 1 "register_operand" "x"))
11342 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
11344 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11345 [(set_attr "type" "sselog")
11346 (set_attr "prefix_extra" "1")
11347 (set_attr "prefix" "vex")
11348 (set_attr "mode" "OI")])
11350 (define_insn "sse4_1_packusdw"
11351 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11354 (match_operand:V4SI 1 "register_operand" "0,x"))
11356 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
11359 packusdw\t{%2, %0|%0, %2}
11360 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11361 [(set_attr "isa" "noavx,avx")
11362 (set_attr "type" "sselog")
11363 (set_attr "prefix_extra" "1")
11364 (set_attr "prefix" "orig,vex")
11365 (set_attr "mode" "TI")])
11367 (define_insn "<sse4_1_avx2>_pblendvb"
11368 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11370 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11371 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11372 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
11376 pblendvb\t{%3, %2, %0|%0, %2, %3}
11377 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11378 [(set_attr "isa" "noavx,avx")
11379 (set_attr "type" "ssemov")
11380 (set_attr "prefix_extra" "1")
11381 (set_attr "length_immediate" "*,1")
11382 (set_attr "prefix" "orig,vex")
11383 (set_attr "btver2_decode" "vector,vector")
11384 (set_attr "mode" "<sseinsnmode>")])
11386 (define_insn "sse4_1_pblendw"
11387 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11389 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
11390 (match_operand:V8HI 1 "register_operand" "0,x")
11391 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
11394 pblendw\t{%3, %2, %0|%0, %2, %3}
11395 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11396 [(set_attr "isa" "noavx,avx")
11397 (set_attr "type" "ssemov")
11398 (set_attr "prefix_extra" "1")
11399 (set_attr "length_immediate" "1")
11400 (set_attr "prefix" "orig,vex")
11401 (set_attr "mode" "TI")])
11403 ;; The builtin uses an 8-bit immediate. Expand that.
11404 (define_expand "avx2_pblendw"
11405 [(set (match_operand:V16HI 0 "register_operand")
11407 (match_operand:V16HI 2 "nonimmediate_operand")
11408 (match_operand:V16HI 1 "register_operand")
11409 (match_operand:SI 3 "const_0_to_255_operand")))]
11412 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
11413 operands[3] = GEN_INT (val << 8 | val);
11416 (define_insn "*avx2_pblendw"
11417 [(set (match_operand:V16HI 0 "register_operand" "=x")
11419 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
11420 (match_operand:V16HI 1 "register_operand" "x")
11421 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
11424 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
11425 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11427 [(set_attr "type" "ssemov")
11428 (set_attr "prefix_extra" "1")
11429 (set_attr "length_immediate" "1")
11430 (set_attr "prefix" "vex")
11431 (set_attr "mode" "OI")])
11433 (define_insn "avx2_pblendd<mode>"
11434 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
11435 (vec_merge:VI4_AVX2
11436 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
11437 (match_operand:VI4_AVX2 1 "register_operand" "x")
11438 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
11440 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11441 [(set_attr "type" "ssemov")
11442 (set_attr "prefix_extra" "1")
11443 (set_attr "length_immediate" "1")
11444 (set_attr "prefix" "vex")
11445 (set_attr "mode" "<sseinsnmode>")])
11447 (define_insn "sse4_1_phminposuw"
11448 [(set (match_operand:V8HI 0 "register_operand" "=x")
11449 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11450 UNSPEC_PHMINPOSUW))]
11452 "%vphminposuw\t{%1, %0|%0, %1}"
11453 [(set_attr "type" "sselog1")
11454 (set_attr "prefix_extra" "1")
11455 (set_attr "prefix" "maybe_vex")
11456 (set_attr "mode" "TI")])
11458 (define_insn "avx2_<code>v16qiv16hi2"
11459 [(set (match_operand:V16HI 0 "register_operand" "=x")
11461 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
11463 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
11464 [(set_attr "type" "ssemov")
11465 (set_attr "prefix_extra" "1")
11466 (set_attr "prefix" "vex")
11467 (set_attr "mode" "OI")])
11469 (define_insn "sse4_1_<code>v8qiv8hi2"
11470 [(set (match_operand:V8HI 0 "register_operand" "=x")
11473 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11474 (parallel [(const_int 0) (const_int 1)
11475 (const_int 2) (const_int 3)
11476 (const_int 4) (const_int 5)
11477 (const_int 6) (const_int 7)]))))]
11479 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
11480 [(set_attr "type" "ssemov")
11481 (set_attr "ssememalign" "64")
11482 (set_attr "prefix_extra" "1")
11483 (set_attr "prefix" "maybe_vex")
11484 (set_attr "mode" "TI")])
11486 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
11487 [(set (match_operand:V16SI 0 "register_operand" "=v")
11489 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
11491 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11492 [(set_attr "type" "ssemov")
11493 (set_attr "prefix" "evex")
11494 (set_attr "mode" "XI")])
11496 (define_insn "avx2_<code>v8qiv8si2"
11497 [(set (match_operand:V8SI 0 "register_operand" "=x")
11500 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11501 (parallel [(const_int 0) (const_int 1)
11502 (const_int 2) (const_int 3)
11503 (const_int 4) (const_int 5)
11504 (const_int 6) (const_int 7)]))))]
11506 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
11507 [(set_attr "type" "ssemov")
11508 (set_attr "prefix_extra" "1")
11509 (set_attr "prefix" "vex")
11510 (set_attr "mode" "OI")])
11512 (define_insn "sse4_1_<code>v4qiv4si2"
11513 [(set (match_operand:V4SI 0 "register_operand" "=x")
11516 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11517 (parallel [(const_int 0) (const_int 1)
11518 (const_int 2) (const_int 3)]))))]
11520 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
11521 [(set_attr "type" "ssemov")
11522 (set_attr "ssememalign" "32")
11523 (set_attr "prefix_extra" "1")
11524 (set_attr "prefix" "maybe_vex")
11525 (set_attr "mode" "TI")])
11527 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
11528 [(set (match_operand:V16SI 0 "register_operand" "=v")
11530 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
11532 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11533 [(set_attr "type" "ssemov")
11534 (set_attr "prefix" "evex")
11535 (set_attr "mode" "XI")])
11537 (define_insn "avx2_<code>v8hiv8si2"
11538 [(set (match_operand:V8SI 0 "register_operand" "=x")
11540 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
11542 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
11543 [(set_attr "type" "ssemov")
11544 (set_attr "prefix_extra" "1")
11545 (set_attr "prefix" "vex")
11546 (set_attr "mode" "OI")])
11548 (define_insn "sse4_1_<code>v4hiv4si2"
11549 [(set (match_operand:V4SI 0 "register_operand" "=x")
11552 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11553 (parallel [(const_int 0) (const_int 1)
11554 (const_int 2) (const_int 3)]))))]
11556 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
11557 [(set_attr "type" "ssemov")
11558 (set_attr "ssememalign" "64")
11559 (set_attr "prefix_extra" "1")
11560 (set_attr "prefix" "maybe_vex")
11561 (set_attr "mode" "TI")])
11563 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
11564 [(set (match_operand:V8DI 0 "register_operand" "=v")
11567 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
11568 (parallel [(const_int 0) (const_int 1)
11569 (const_int 2) (const_int 3)
11570 (const_int 4) (const_int 5)
11571 (const_int 6) (const_int 7)]))))]
11573 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
11574 [(set_attr "type" "ssemov")
11575 (set_attr "prefix" "evex")
11576 (set_attr "mode" "XI")])
11578 (define_insn "avx2_<code>v4qiv4di2"
11579 [(set (match_operand:V4DI 0 "register_operand" "=x")
11582 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11583 (parallel [(const_int 0) (const_int 1)
11584 (const_int 2) (const_int 3)]))))]
11586 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
11587 [(set_attr "type" "ssemov")
11588 (set_attr "prefix_extra" "1")
11589 (set_attr "prefix" "vex")
11590 (set_attr "mode" "OI")])
11592 (define_insn "sse4_1_<code>v2qiv2di2"
11593 [(set (match_operand:V2DI 0 "register_operand" "=x")
11596 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11597 (parallel [(const_int 0) (const_int 1)]))))]
11599 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
11600 [(set_attr "type" "ssemov")
11601 (set_attr "ssememalign" "16")
11602 (set_attr "prefix_extra" "1")
11603 (set_attr "prefix" "maybe_vex")
11604 (set_attr "mode" "TI")])
11606 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
11607 [(set (match_operand:V8DI 0 "register_operand" "=v")
11609 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
11611 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11612 [(set_attr "type" "ssemov")
11613 (set_attr "prefix" "evex")
11614 (set_attr "mode" "XI")])
11616 (define_insn "avx2_<code>v4hiv4di2"
11617 [(set (match_operand:V4DI 0 "register_operand" "=x")
11620 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11621 (parallel [(const_int 0) (const_int 1)
11622 (const_int 2) (const_int 3)]))))]
11624 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
11625 [(set_attr "type" "ssemov")
11626 (set_attr "prefix_extra" "1")
11627 (set_attr "prefix" "vex")
11628 (set_attr "mode" "OI")])
11630 (define_insn "sse4_1_<code>v2hiv2di2"
11631 [(set (match_operand:V2DI 0 "register_operand" "=x")
11634 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11635 (parallel [(const_int 0) (const_int 1)]))))]
11637 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
11638 [(set_attr "type" "ssemov")
11639 (set_attr "ssememalign" "32")
11640 (set_attr "prefix_extra" "1")
11641 (set_attr "prefix" "maybe_vex")
11642 (set_attr "mode" "TI")])
11644 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
11645 [(set (match_operand:V8DI 0 "register_operand" "=v")
11647 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
11649 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11650 [(set_attr "type" "ssemov")
11651 (set_attr "prefix" "evex")
11652 (set_attr "mode" "XI")])
11654 (define_insn "avx2_<code>v4siv4di2"
11655 [(set (match_operand:V4DI 0 "register_operand" "=x")
11657 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
11659 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
11660 [(set_attr "type" "ssemov")
11661 (set_attr "prefix_extra" "1")
11662 (set_attr "mode" "OI")])
11664 (define_insn "sse4_1_<code>v2siv2di2"
11665 [(set (match_operand:V2DI 0 "register_operand" "=x")
11668 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11669 (parallel [(const_int 0) (const_int 1)]))))]
11671 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
11672 [(set_attr "type" "ssemov")
11673 (set_attr "ssememalign" "64")
11674 (set_attr "prefix_extra" "1")
11675 (set_attr "prefix" "maybe_vex")
11676 (set_attr "mode" "TI")])
11678 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
11679 ;; setting FLAGS_REG. But it is not a really compare instruction.
11680 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
11681 [(set (reg:CC FLAGS_REG)
11682 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
11683 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
11686 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
11687 [(set_attr "type" "ssecomi")
11688 (set_attr "prefix_extra" "1")
11689 (set_attr "prefix" "vex")
11690 (set_attr "mode" "<MODE>")])
11692 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
11693 ;; But it is not a really compare instruction.
11694 (define_insn "avx_ptest256"
11695 [(set (reg:CC FLAGS_REG)
11696 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
11697 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
11700 "vptest\t{%1, %0|%0, %1}"
11701 [(set_attr "type" "ssecomi")
11702 (set_attr "prefix_extra" "1")
11703 (set_attr "prefix" "vex")
11704 (set_attr "btver2_decode" "vector")
11705 (set_attr "mode" "OI")])
11707 (define_insn "sse4_1_ptest"
11708 [(set (reg:CC FLAGS_REG)
11709 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
11710 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11713 "%vptest\t{%1, %0|%0, %1}"
11714 [(set_attr "type" "ssecomi")
11715 (set_attr "prefix_extra" "1")
11716 (set_attr "prefix" "maybe_vex")
11717 (set_attr "mode" "TI")])
11719 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
11720 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
11722 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
11723 (match_operand:SI 2 "const_0_to_15_operand" "n")]
11726 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11727 [(set_attr "type" "ssecvt")
11728 (set (attr "prefix_data16")
11730 (match_test "TARGET_AVX")
11732 (const_string "1")))
11733 (set_attr "prefix_extra" "1")
11734 (set_attr "length_immediate" "1")
11735 (set_attr "prefix" "maybe_vex")
11736 (set_attr "mode" "<MODE>")])
11738 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
11739 [(match_operand:<sseintvecmode> 0 "register_operand")
11740 (match_operand:VF1_128_256 1 "nonimmediate_operand")
11741 (match_operand:SI 2 "const_0_to_15_operand")]
11744 rtx tmp = gen_reg_rtx (<MODE>mode);
11747 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
11750 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
11754 (define_expand "avx512f_roundpd512"
11755 [(match_operand:V8DF 0 "register_operand")
11756 (match_operand:V8DF 1 "nonimmediate_operand")
11757 (match_operand:SI 2 "const_0_to_15_operand")]
11760 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
11764 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
11765 [(match_operand:<ssepackfltmode> 0 "register_operand")
11766 (match_operand:VF2 1 "nonimmediate_operand")
11767 (match_operand:VF2 2 "nonimmediate_operand")
11768 (match_operand:SI 3 "const_0_to_15_operand")]
11773 if (<MODE>mode == V2DFmode
11774 && TARGET_AVX && !TARGET_PREFER_AVX128)
11776 rtx tmp2 = gen_reg_rtx (V4DFmode);
11778 tmp0 = gen_reg_rtx (V4DFmode);
11779 tmp1 = force_reg (V2DFmode, operands[1]);
11781 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
11782 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
11783 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
11787 tmp0 = gen_reg_rtx (<MODE>mode);
11788 tmp1 = gen_reg_rtx (<MODE>mode);
11791 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
11794 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
11797 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
11802 (define_insn "sse4_1_round<ssescalarmodesuffix>"
11803 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
11806 [(match_operand:VF_128 2 "register_operand" "x,x")
11807 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
11809 (match_operand:VF_128 1 "register_operand" "0,x")
11813 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
11814 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11815 [(set_attr "isa" "noavx,avx")
11816 (set_attr "type" "ssecvt")
11817 (set_attr "length_immediate" "1")
11818 (set_attr "prefix_data16" "1,*")
11819 (set_attr "prefix_extra" "1")
11820 (set_attr "prefix" "orig,vex")
11821 (set_attr "mode" "<MODE>")])
11823 (define_expand "round<mode>2"
11824 [(set (match_dup 4)
11826 (match_operand:VF 1 "register_operand")
11828 (set (match_operand:VF 0 "register_operand")
11830 [(match_dup 4) (match_dup 5)]
11832 "TARGET_ROUND && !flag_trapping_math"
11834 enum machine_mode scalar_mode;
11835 const struct real_format *fmt;
11836 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
11837 rtx half, vec_half;
11839 scalar_mode = GET_MODE_INNER (<MODE>mode);
11841 /* load nextafter (0.5, 0.0) */
11842 fmt = REAL_MODE_FORMAT (scalar_mode);
11843 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
11844 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
11845 half = const_double_from_real_value (pred_half, scalar_mode);
11847 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
11848 vec_half = force_reg (<MODE>mode, vec_half);
11850 operands[3] = gen_reg_rtx (<MODE>mode);
11851 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
11853 operands[4] = gen_reg_rtx (<MODE>mode);
11854 operands[5] = GEN_INT (ROUND_TRUNC);
11857 (define_expand "round<mode>2_sfix"
11858 [(match_operand:<sseintvecmode> 0 "register_operand")
11859 (match_operand:VF1_128_256 1 "register_operand")]
11860 "TARGET_ROUND && !flag_trapping_math"
11862 rtx tmp = gen_reg_rtx (<MODE>mode);
11864 emit_insn (gen_round<mode>2 (tmp, operands[1]));
11867 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
11871 (define_expand "round<mode>2_vec_pack_sfix"
11872 [(match_operand:<ssepackfltmode> 0 "register_operand")
11873 (match_operand:VF2 1 "register_operand")
11874 (match_operand:VF2 2 "register_operand")]
11875 "TARGET_ROUND && !flag_trapping_math"
11879 if (<MODE>mode == V2DFmode
11880 && TARGET_AVX && !TARGET_PREFER_AVX128)
11882 rtx tmp2 = gen_reg_rtx (V4DFmode);
11884 tmp0 = gen_reg_rtx (V4DFmode);
11885 tmp1 = force_reg (V2DFmode, operands[1]);
11887 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
11888 emit_insn (gen_roundv4df2 (tmp2, tmp0));
11889 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
11893 tmp0 = gen_reg_rtx (<MODE>mode);
11894 tmp1 = gen_reg_rtx (<MODE>mode);
11896 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
11897 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
11900 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
11905 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11907 ;; Intel SSE4.2 string/text processing instructions
11909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11911 (define_insn_and_split "sse4_2_pcmpestr"
11912 [(set (match_operand:SI 0 "register_operand" "=c,c")
11914 [(match_operand:V16QI 2 "register_operand" "x,x")
11915 (match_operand:SI 3 "register_operand" "a,a")
11916 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
11917 (match_operand:SI 5 "register_operand" "d,d")
11918 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
11920 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
11928 (set (reg:CC FLAGS_REG)
11937 && can_create_pseudo_p ()"
11942 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
11943 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
11944 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
11947 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
11948 operands[3], operands[4],
11949 operands[5], operands[6]));
11951 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
11952 operands[3], operands[4],
11953 operands[5], operands[6]));
11954 if (flags && !(ecx || xmm0))
11955 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
11956 operands[2], operands[3],
11957 operands[4], operands[5],
11959 if (!(flags || ecx || xmm0))
11960 emit_note (NOTE_INSN_DELETED);
11964 [(set_attr "type" "sselog")
11965 (set_attr "prefix_data16" "1")
11966 (set_attr "prefix_extra" "1")
11967 (set_attr "ssememalign" "8")
11968 (set_attr "length_immediate" "1")
11969 (set_attr "memory" "none,load")
11970 (set_attr "mode" "TI")])
11972 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
11973 [(set (match_operand:SI 0 "register_operand" "=c")
11975 [(match_operand:V16QI 2 "register_operand" "x")
11976 (match_operand:SI 3 "register_operand" "a")
11978 [(match_operand:V16QI 4 "memory_operand" "m")]
11980 (match_operand:SI 5 "register_operand" "d")
11981 (match_operand:SI 6 "const_0_to_255_operand" "n")]
11983 (set (match_operand:V16QI 1 "register_operand" "=Yz")
11987 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
11991 (set (reg:CC FLAGS_REG)
11995 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
12000 && can_create_pseudo_p ()"
12005 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12006 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12007 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12010 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12011 operands[3], operands[4],
12012 operands[5], operands[6]));
12014 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12015 operands[3], operands[4],
12016 operands[5], operands[6]));
12017 if (flags && !(ecx || xmm0))
12018 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12019 operands[2], operands[3],
12020 operands[4], operands[5],
12022 if (!(flags || ecx || xmm0))
12023 emit_note (NOTE_INSN_DELETED);
12027 [(set_attr "type" "sselog")
12028 (set_attr "prefix_data16" "1")
12029 (set_attr "prefix_extra" "1")
12030 (set_attr "ssememalign" "8")
12031 (set_attr "length_immediate" "1")
12032 (set_attr "memory" "load")
12033 (set_attr "mode" "TI")])
12035 (define_insn "sse4_2_pcmpestri"
12036 [(set (match_operand:SI 0 "register_operand" "=c,c")
12038 [(match_operand:V16QI 1 "register_operand" "x,x")
12039 (match_operand:SI 2 "register_operand" "a,a")
12040 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12041 (match_operand:SI 4 "register_operand" "d,d")
12042 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12044 (set (reg:CC FLAGS_REG)
12053 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
12054 [(set_attr "type" "sselog")
12055 (set_attr "prefix_data16" "1")
12056 (set_attr "prefix_extra" "1")
12057 (set_attr "prefix" "maybe_vex")
12058 (set_attr "ssememalign" "8")
12059 (set_attr "length_immediate" "1")
12060 (set_attr "btver2_decode" "vector")
12061 (set_attr "memory" "none,load")
12062 (set_attr "mode" "TI")])
12064 (define_insn "sse4_2_pcmpestrm"
12065 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12067 [(match_operand:V16QI 1 "register_operand" "x,x")
12068 (match_operand:SI 2 "register_operand" "a,a")
12069 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12070 (match_operand:SI 4 "register_operand" "d,d")
12071 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12073 (set (reg:CC FLAGS_REG)
12082 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
12083 [(set_attr "type" "sselog")
12084 (set_attr "prefix_data16" "1")
12085 (set_attr "prefix_extra" "1")
12086 (set_attr "ssememalign" "8")
12087 (set_attr "length_immediate" "1")
12088 (set_attr "prefix" "maybe_vex")
12089 (set_attr "btver2_decode" "vector")
12090 (set_attr "memory" "none,load")
12091 (set_attr "mode" "TI")])
12093 (define_insn "sse4_2_pcmpestr_cconly"
12094 [(set (reg:CC FLAGS_REG)
12096 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12097 (match_operand:SI 3 "register_operand" "a,a,a,a")
12098 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
12099 (match_operand:SI 5 "register_operand" "d,d,d,d")
12100 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
12102 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12103 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12106 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12107 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12108 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
12109 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
12110 [(set_attr "type" "sselog")
12111 (set_attr "prefix_data16" "1")
12112 (set_attr "prefix_extra" "1")
12113 (set_attr "ssememalign" "8")
12114 (set_attr "length_immediate" "1")
12115 (set_attr "memory" "none,load,none,load")
12116 (set_attr "btver2_decode" "vector,vector,vector,vector")
12117 (set_attr "prefix" "maybe_vex")
12118 (set_attr "mode" "TI")])
12120 (define_insn_and_split "sse4_2_pcmpistr"
12121 [(set (match_operand:SI 0 "register_operand" "=c,c")
12123 [(match_operand:V16QI 2 "register_operand" "x,x")
12124 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12125 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
12127 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12133 (set (reg:CC FLAGS_REG)
12140 && can_create_pseudo_p ()"
12145 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12146 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12147 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12150 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12151 operands[3], operands[4]));
12153 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12154 operands[3], operands[4]));
12155 if (flags && !(ecx || xmm0))
12156 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12157 operands[2], operands[3],
12159 if (!(flags || ecx || xmm0))
12160 emit_note (NOTE_INSN_DELETED);
12164 [(set_attr "type" "sselog")
12165 (set_attr "prefix_data16" "1")
12166 (set_attr "prefix_extra" "1")
12167 (set_attr "ssememalign" "8")
12168 (set_attr "length_immediate" "1")
12169 (set_attr "memory" "none,load")
12170 (set_attr "mode" "TI")])
12172 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
12173 [(set (match_operand:SI 0 "register_operand" "=c")
12175 [(match_operand:V16QI 2 "register_operand" "x")
12177 [(match_operand:V16QI 3 "memory_operand" "m")]
12179 (match_operand:SI 4 "const_0_to_255_operand" "n")]
12181 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12184 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12187 (set (reg:CC FLAGS_REG)
12190 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12194 && can_create_pseudo_p ()"
12199 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12200 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12201 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12204 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12205 operands[3], operands[4]));
12207 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12208 operands[3], operands[4]));
12209 if (flags && !(ecx || xmm0))
12210 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12211 operands[2], operands[3],
12213 if (!(flags || ecx || xmm0))
12214 emit_note (NOTE_INSN_DELETED);
12218 [(set_attr "type" "sselog")
12219 (set_attr "prefix_data16" "1")
12220 (set_attr "prefix_extra" "1")
12221 (set_attr "ssememalign" "8")
12222 (set_attr "length_immediate" "1")
12223 (set_attr "memory" "load")
12224 (set_attr "mode" "TI")])
12226 (define_insn "sse4_2_pcmpistri"
12227 [(set (match_operand:SI 0 "register_operand" "=c,c")
12229 [(match_operand:V16QI 1 "register_operand" "x,x")
12230 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12231 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12233 (set (reg:CC FLAGS_REG)
12240 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
12241 [(set_attr "type" "sselog")
12242 (set_attr "prefix_data16" "1")
12243 (set_attr "prefix_extra" "1")
12244 (set_attr "ssememalign" "8")
12245 (set_attr "length_immediate" "1")
12246 (set_attr "prefix" "maybe_vex")
12247 (set_attr "memory" "none,load")
12248 (set_attr "btver2_decode" "vector")
12249 (set_attr "mode" "TI")])
12251 (define_insn "sse4_2_pcmpistrm"
12252 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12254 [(match_operand:V16QI 1 "register_operand" "x,x")
12255 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12256 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12258 (set (reg:CC FLAGS_REG)
12265 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
12266 [(set_attr "type" "sselog")
12267 (set_attr "prefix_data16" "1")
12268 (set_attr "prefix_extra" "1")
12269 (set_attr "ssememalign" "8")
12270 (set_attr "length_immediate" "1")
12271 (set_attr "prefix" "maybe_vex")
12272 (set_attr "memory" "none,load")
12273 (set_attr "btver2_decode" "vector")
12274 (set_attr "mode" "TI")])
12276 (define_insn "sse4_2_pcmpistr_cconly"
12277 [(set (reg:CC FLAGS_REG)
12279 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12280 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
12281 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
12283 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12284 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12287 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12288 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12289 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
12290 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
12291 [(set_attr "type" "sselog")
12292 (set_attr "prefix_data16" "1")
12293 (set_attr "prefix_extra" "1")
12294 (set_attr "ssememalign" "8")
12295 (set_attr "length_immediate" "1")
12296 (set_attr "memory" "none,load,none,load")
12297 (set_attr "prefix" "maybe_vex")
12298 (set_attr "btver2_decode" "vector,vector,vector,vector")
12299 (set_attr "mode" "TI")])
12301 (define_expand "avx512pf_gatherpf<mode>"
12303 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12304 (mem:<ssescalarmode>
12306 [(match_operand 2 "vsib_address_operand")
12307 (match_operand:VI48_512 1 "register_operand")
12308 (match_operand:SI 3 "const1248_operand")]))
12309 (match_operand:SI 4 "const_0_to_1_operand")]
12310 UNSPEC_GATHER_PREFETCH)]
12314 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12315 operands[3]), UNSPEC_VSIBADDR);
12318 (define_insn "*avx512pf_gatherpf<mode>_mask"
12320 [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
12321 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
12323 [(match_operand:P 2 "vsib_address_operand" "Tv")
12324 (match_operand:VI48_512 1 "register_operand" "v")
12325 (match_operand:SI 3 "const1248_operand" "n")]
12327 (match_operand:SI 4 "const_0_to_1_operand" "n")]
12328 UNSPEC_GATHER_PREFETCH)]
12331 switch (INTVAL (operands[4]))
12334 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12336 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12338 gcc_unreachable ();
12341 [(set_attr "type" "sse")
12342 (set_attr "prefix" "evex")
12343 (set_attr "mode" "XI")])
12345 (define_insn "*avx512pf_gatherpf<mode>"
12348 (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
12350 [(match_operand:P 1 "vsib_address_operand" "Tv")
12351 (match_operand:VI48_512 0 "register_operand" "v")
12352 (match_operand:SI 2 "const1248_operand" "n")]
12354 (match_operand:SI 3 "const_0_to_1_operand" "n")]
12355 UNSPEC_GATHER_PREFETCH)]
12358 switch (INTVAL (operands[3]))
12361 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
12363 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
12365 gcc_unreachable ();
12368 [(set_attr "type" "sse")
12369 (set_attr "prefix" "evex")
12370 (set_attr "mode" "XI")])
12372 (define_expand "avx512pf_scatterpf<mode>"
12374 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12375 (mem:<ssescalarmode>
12377 [(match_operand 2 "vsib_address_operand")
12378 (match_operand:VI48_512 1 "register_operand")
12379 (match_operand:SI 3 "const1248_operand")]))
12380 (match_operand:SI 4 "const_0_to_1_operand")]
12381 UNSPEC_SCATTER_PREFETCH)]
12385 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12386 operands[3]), UNSPEC_VSIBADDR);
12389 (define_insn "*avx512pf_scatterpf<mode>_mask"
12391 [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
12392 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
12394 [(match_operand:P 2 "vsib_address_operand" "Tv")
12395 (match_operand:VI48_512 1 "register_operand" "v")
12396 (match_operand:SI 3 "const1248_operand" "n")]
12398 (match_operand:SI 4 "const_0_to_1_operand" "n")]
12399 UNSPEC_SCATTER_PREFETCH)]
12402 switch (INTVAL (operands[4]))
12405 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12407 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12409 gcc_unreachable ();
12412 [(set_attr "type" "sse")
12413 (set_attr "prefix" "evex")
12414 (set_attr "mode" "XI")])
12416 (define_insn "*avx512pf_scatterpf<mode>"
12419 (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
12421 [(match_operand:P 1 "vsib_address_operand" "Tv")
12422 (match_operand:VI48_512 0 "register_operand" "v")
12423 (match_operand:SI 2 "const1248_operand" "n")]
12425 (match_operand:SI 3 "const_0_to_1_operand" "n")]
12426 UNSPEC_SCATTER_PREFETCH)]
12429 switch (INTVAL (operands[3]))
12432 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
12434 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
12436 gcc_unreachable ();
12439 [(set_attr "type" "sse")
12440 (set_attr "prefix" "evex")
12441 (set_attr "mode" "XI")])
12443 (define_insn "avx512er_exp2<mode><mask_name>"
12444 [(set (match_operand:VF_512 0 "register_operand" "=v")
12446 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
12449 "vexp2<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12450 [(set_attr "prefix" "evex")
12451 (set_attr "mode" "<MODE>")])
12453 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name>"
12454 [(set (match_operand:VF_512 0 "register_operand" "=v")
12456 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
12459 "vrcp28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12460 [(set_attr "prefix" "evex")
12461 (set_attr "mode" "<MODE>")])
12463 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name>"
12464 [(set (match_operand:VF_512 0 "register_operand" "=v")
12466 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
12469 "vrsqrt28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12470 [(set_attr "prefix" "evex")
12471 (set_attr "mode" "<MODE>")])
12473 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12475 ;; XOP instructions
12477 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12479 (define_code_iterator xop_plus [plus ss_plus])
12481 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
12482 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
12484 ;; XOP parallel integer multiply/add instructions.
12486 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
12487 [(set (match_operand:VI24_128 0 "register_operand" "=x")
12490 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
12491 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
12492 (match_operand:VI24_128 3 "register_operand" "x")))]
12494 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12495 [(set_attr "type" "ssemuladd")
12496 (set_attr "mode" "TI")])
12498 (define_insn "xop_p<macs>dql"
12499 [(set (match_operand:V2DI 0 "register_operand" "=x")
12504 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
12505 (parallel [(const_int 0) (const_int 2)])))
12508 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12509 (parallel [(const_int 0) (const_int 2)]))))
12510 (match_operand:V2DI 3 "register_operand" "x")))]
12512 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12513 [(set_attr "type" "ssemuladd")
12514 (set_attr "mode" "TI")])
12516 (define_insn "xop_p<macs>dqh"
12517 [(set (match_operand:V2DI 0 "register_operand" "=x")
12522 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
12523 (parallel [(const_int 1) (const_int 3)])))
12526 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12527 (parallel [(const_int 1) (const_int 3)]))))
12528 (match_operand:V2DI 3 "register_operand" "x")))]
12530 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12531 [(set_attr "type" "ssemuladd")
12532 (set_attr "mode" "TI")])
12534 ;; XOP parallel integer multiply/add instructions for the intrinisics
12535 (define_insn "xop_p<macs>wd"
12536 [(set (match_operand:V4SI 0 "register_operand" "=x")
12541 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
12542 (parallel [(const_int 1) (const_int 3)
12543 (const_int 5) (const_int 7)])))
12546 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12547 (parallel [(const_int 1) (const_int 3)
12548 (const_int 5) (const_int 7)]))))
12549 (match_operand:V4SI 3 "register_operand" "x")))]
12551 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12552 [(set_attr "type" "ssemuladd")
12553 (set_attr "mode" "TI")])
12555 (define_insn "xop_p<madcs>wd"
12556 [(set (match_operand:V4SI 0 "register_operand" "=x")
12562 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
12563 (parallel [(const_int 0) (const_int 2)
12564 (const_int 4) (const_int 6)])))
12567 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12568 (parallel [(const_int 0) (const_int 2)
12569 (const_int 4) (const_int 6)]))))
12574 (parallel [(const_int 1) (const_int 3)
12575 (const_int 5) (const_int 7)])))
12579 (parallel [(const_int 1) (const_int 3)
12580 (const_int 5) (const_int 7)])))))
12581 (match_operand:V4SI 3 "register_operand" "x")))]
12583 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12584 [(set_attr "type" "ssemuladd")
12585 (set_attr "mode" "TI")])
12587 ;; XOP parallel XMM conditional moves
12588 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
12589 [(set (match_operand:V 0 "register_operand" "=x,x")
12591 (match_operand:V 3 "nonimmediate_operand" "x,m")
12592 (match_operand:V 1 "register_operand" "x,x")
12593 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
12595 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12596 [(set_attr "type" "sse4arg")])
12598 ;; XOP horizontal add/subtract instructions
12599 (define_insn "xop_phadd<u>bw"
12600 [(set (match_operand:V8HI 0 "register_operand" "=x")
12604 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12605 (parallel [(const_int 0) (const_int 2)
12606 (const_int 4) (const_int 6)
12607 (const_int 8) (const_int 10)
12608 (const_int 12) (const_int 14)])))
12612 (parallel [(const_int 1) (const_int 3)
12613 (const_int 5) (const_int 7)
12614 (const_int 9) (const_int 11)
12615 (const_int 13) (const_int 15)])))))]
12617 "vphadd<u>bw\t{%1, %0|%0, %1}"
12618 [(set_attr "type" "sseiadd1")])
12620 (define_insn "xop_phadd<u>bd"
12621 [(set (match_operand:V4SI 0 "register_operand" "=x")
12626 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12627 (parallel [(const_int 0) (const_int 4)
12628 (const_int 8) (const_int 12)])))
12632 (parallel [(const_int 1) (const_int 5)
12633 (const_int 9) (const_int 13)]))))
12638 (parallel [(const_int 2) (const_int 6)
12639 (const_int 10) (const_int 14)])))
12643 (parallel [(const_int 3) (const_int 7)
12644 (const_int 11) (const_int 15)]))))))]
12646 "vphadd<u>bd\t{%1, %0|%0, %1}"
12647 [(set_attr "type" "sseiadd1")])
12649 (define_insn "xop_phadd<u>bq"
12650 [(set (match_operand:V2DI 0 "register_operand" "=x")
12656 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12657 (parallel [(const_int 0) (const_int 8)])))
12661 (parallel [(const_int 1) (const_int 9)]))))
12666 (parallel [(const_int 2) (const_int 10)])))
12670 (parallel [(const_int 3) (const_int 11)])))))
12676 (parallel [(const_int 4) (const_int 12)])))
12680 (parallel [(const_int 5) (const_int 13)]))))
12685 (parallel [(const_int 6) (const_int 14)])))
12689 (parallel [(const_int 7) (const_int 15)])))))))]
12691 "vphadd<u>bq\t{%1, %0|%0, %1}"
12692 [(set_attr "type" "sseiadd1")])
12694 (define_insn "xop_phadd<u>wd"
12695 [(set (match_operand:V4SI 0 "register_operand" "=x")
12699 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12700 (parallel [(const_int 0) (const_int 2)
12701 (const_int 4) (const_int 6)])))
12705 (parallel [(const_int 1) (const_int 3)
12706 (const_int 5) (const_int 7)])))))]
12708 "vphadd<u>wd\t{%1, %0|%0, %1}"
12709 [(set_attr "type" "sseiadd1")])
12711 (define_insn "xop_phadd<u>wq"
12712 [(set (match_operand:V2DI 0 "register_operand" "=x")
12717 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12718 (parallel [(const_int 0) (const_int 4)])))
12722 (parallel [(const_int 1) (const_int 5)]))))
12727 (parallel [(const_int 2) (const_int 6)])))
12731 (parallel [(const_int 3) (const_int 7)]))))))]
12733 "vphadd<u>wq\t{%1, %0|%0, %1}"
12734 [(set_attr "type" "sseiadd1")])
12736 (define_insn "xop_phadd<u>dq"
12737 [(set (match_operand:V2DI 0 "register_operand" "=x")
12741 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
12742 (parallel [(const_int 0) (const_int 2)])))
12746 (parallel [(const_int 1) (const_int 3)])))))]
12748 "vphadd<u>dq\t{%1, %0|%0, %1}"
12749 [(set_attr "type" "sseiadd1")])
12751 (define_insn "xop_phsubbw"
12752 [(set (match_operand:V8HI 0 "register_operand" "=x")
12756 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12757 (parallel [(const_int 0) (const_int 2)
12758 (const_int 4) (const_int 6)
12759 (const_int 8) (const_int 10)
12760 (const_int 12) (const_int 14)])))
12764 (parallel [(const_int 1) (const_int 3)
12765 (const_int 5) (const_int 7)
12766 (const_int 9) (const_int 11)
12767 (const_int 13) (const_int 15)])))))]
12769 "vphsubbw\t{%1, %0|%0, %1}"
12770 [(set_attr "type" "sseiadd1")])
12772 (define_insn "xop_phsubwd"
12773 [(set (match_operand:V4SI 0 "register_operand" "=x")
12777 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12778 (parallel [(const_int 0) (const_int 2)
12779 (const_int 4) (const_int 6)])))
12783 (parallel [(const_int 1) (const_int 3)
12784 (const_int 5) (const_int 7)])))))]
12786 "vphsubwd\t{%1, %0|%0, %1}"
12787 [(set_attr "type" "sseiadd1")])
12789 (define_insn "xop_phsubdq"
12790 [(set (match_operand:V2DI 0 "register_operand" "=x")
12794 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
12795 (parallel [(const_int 0) (const_int 2)])))
12799 (parallel [(const_int 1) (const_int 3)])))))]
12801 "vphsubdq\t{%1, %0|%0, %1}"
12802 [(set_attr "type" "sseiadd1")])
12804 ;; XOP permute instructions
12805 (define_insn "xop_pperm"
12806 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
12808 [(match_operand:V16QI 1 "register_operand" "x,x")
12809 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12810 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
12811 UNSPEC_XOP_PERMUTE))]
12812 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12813 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12814 [(set_attr "type" "sse4arg")
12815 (set_attr "mode" "TI")])
12817 ;; XOP pack instructions that combine two vectors into a smaller vector
12818 (define_insn "xop_pperm_pack_v2di_v4si"
12819 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
12822 (match_operand:V2DI 1 "register_operand" "x,x"))
12824 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
12825 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
12826 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12827 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12828 [(set_attr "type" "sse4arg")
12829 (set_attr "mode" "TI")])
12831 (define_insn "xop_pperm_pack_v4si_v8hi"
12832 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
12835 (match_operand:V4SI 1 "register_operand" "x,x"))
12837 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
12838 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
12839 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12840 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12841 [(set_attr "type" "sse4arg")
12842 (set_attr "mode" "TI")])
12844 (define_insn "xop_pperm_pack_v8hi_v16qi"
12845 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
12848 (match_operand:V8HI 1 "register_operand" "x,x"))
12850 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
12851 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
12852 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12853 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12854 [(set_attr "type" "sse4arg")
12855 (set_attr "mode" "TI")])
12857 ;; XOP packed rotate instructions
12858 (define_expand "rotl<mode>3"
12859 [(set (match_operand:VI_128 0 "register_operand")
12861 (match_operand:VI_128 1 "nonimmediate_operand")
12862 (match_operand:SI 2 "general_operand")))]
12865 /* If we were given a scalar, convert it to parallel */
12866 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
12868 rtvec vs = rtvec_alloc (<ssescalarnum>);
12869 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
12870 rtx reg = gen_reg_rtx (<MODE>mode);
12871 rtx op2 = operands[2];
12874 if (GET_MODE (op2) != <ssescalarmode>mode)
12876 op2 = gen_reg_rtx (<ssescalarmode>mode);
12877 convert_move (op2, operands[2], false);
12880 for (i = 0; i < <ssescalarnum>; i++)
12881 RTVEC_ELT (vs, i) = op2;
12883 emit_insn (gen_vec_init<mode> (reg, par));
12884 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
12889 (define_expand "rotr<mode>3"
12890 [(set (match_operand:VI_128 0 "register_operand")
12892 (match_operand:VI_128 1 "nonimmediate_operand")
12893 (match_operand:SI 2 "general_operand")))]
12896 /* If we were given a scalar, convert it to parallel */
12897 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
12899 rtvec vs = rtvec_alloc (<ssescalarnum>);
12900 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
12901 rtx neg = gen_reg_rtx (<MODE>mode);
12902 rtx reg = gen_reg_rtx (<MODE>mode);
12903 rtx op2 = operands[2];
12906 if (GET_MODE (op2) != <ssescalarmode>mode)
12908 op2 = gen_reg_rtx (<ssescalarmode>mode);
12909 convert_move (op2, operands[2], false);
12912 for (i = 0; i < <ssescalarnum>; i++)
12913 RTVEC_ELT (vs, i) = op2;
12915 emit_insn (gen_vec_init<mode> (reg, par));
12916 emit_insn (gen_neg<mode>2 (neg, reg));
12917 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
12922 (define_insn "xop_rotl<mode>3"
12923 [(set (match_operand:VI_128 0 "register_operand" "=x")
12925 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
12926 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
12928 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12929 [(set_attr "type" "sseishft")
12930 (set_attr "length_immediate" "1")
12931 (set_attr "mode" "TI")])
12933 (define_insn "xop_rotr<mode>3"
12934 [(set (match_operand:VI_128 0 "register_operand" "=x")
12936 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
12937 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
12941 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
12942 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
12944 [(set_attr "type" "sseishft")
12945 (set_attr "length_immediate" "1")
12946 (set_attr "mode" "TI")])
12948 (define_expand "vrotr<mode>3"
12949 [(match_operand:VI_128 0 "register_operand")
12950 (match_operand:VI_128 1 "register_operand")
12951 (match_operand:VI_128 2 "register_operand")]
12954 rtx reg = gen_reg_rtx (<MODE>mode);
12955 emit_insn (gen_neg<mode>2 (reg, operands[2]));
12956 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
12960 (define_expand "vrotl<mode>3"
12961 [(match_operand:VI_128 0 "register_operand")
12962 (match_operand:VI_128 1 "register_operand")
12963 (match_operand:VI_128 2 "register_operand")]
12966 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
12970 (define_insn "xop_vrotl<mode>3"
12971 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
12972 (if_then_else:VI_128
12974 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
12977 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
12981 (neg:VI_128 (match_dup 2)))))]
12982 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12983 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12984 [(set_attr "type" "sseishft")
12985 (set_attr "prefix_data16" "0")
12986 (set_attr "prefix_extra" "2")
12987 (set_attr "mode" "TI")])
12989 ;; XOP packed shift instructions.
12990 (define_expand "vlshr<mode>3"
12991 [(set (match_operand:VI12_128 0 "register_operand")
12993 (match_operand:VI12_128 1 "register_operand")
12994 (match_operand:VI12_128 2 "nonimmediate_operand")))]
12997 rtx neg = gen_reg_rtx (<MODE>mode);
12998 emit_insn (gen_neg<mode>2 (neg, operands[2]));
12999 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13003 (define_expand "vlshr<mode>3"
13004 [(set (match_operand:VI48_128 0 "register_operand")
13006 (match_operand:VI48_128 1 "register_operand")
13007 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13008 "TARGET_AVX2 || TARGET_XOP"
13012 rtx neg = gen_reg_rtx (<MODE>mode);
13013 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13014 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13019 (define_expand "vlshr<mode>3"
13020 [(set (match_operand:VI48_512 0 "register_operand")
13022 (match_operand:VI48_512 1 "register_operand")
13023 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13026 (define_expand "vlshr<mode>3"
13027 [(set (match_operand:VI48_256 0 "register_operand")
13029 (match_operand:VI48_256 1 "register_operand")
13030 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13033 (define_expand "vashr<mode>3"
13034 [(set (match_operand:VI128_128 0 "register_operand")
13035 (ashiftrt:VI128_128
13036 (match_operand:VI128_128 1 "register_operand")
13037 (match_operand:VI128_128 2 "nonimmediate_operand")))]
13040 rtx neg = gen_reg_rtx (<MODE>mode);
13041 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13042 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
13046 (define_expand "vashrv4si3"
13047 [(set (match_operand:V4SI 0 "register_operand")
13048 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
13049 (match_operand:V4SI 2 "nonimmediate_operand")))]
13050 "TARGET_AVX2 || TARGET_XOP"
13054 rtx neg = gen_reg_rtx (V4SImode);
13055 emit_insn (gen_negv4si2 (neg, operands[2]));
13056 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
13061 (define_expand "vashrv16si3"
13062 [(set (match_operand:V16SI 0 "register_operand")
13063 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
13064 (match_operand:V16SI 2 "nonimmediate_operand")))]
13067 (define_expand "vashrv8si3"
13068 [(set (match_operand:V8SI 0 "register_operand")
13069 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
13070 (match_operand:V8SI 2 "nonimmediate_operand")))]
13073 (define_expand "vashl<mode>3"
13074 [(set (match_operand:VI12_128 0 "register_operand")
13076 (match_operand:VI12_128 1 "register_operand")
13077 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13080 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13084 (define_expand "vashl<mode>3"
13085 [(set (match_operand:VI48_128 0 "register_operand")
13087 (match_operand:VI48_128 1 "register_operand")
13088 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13089 "TARGET_AVX2 || TARGET_XOP"
13093 operands[2] = force_reg (<MODE>mode, operands[2]);
13094 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13099 (define_expand "vashl<mode>3"
13100 [(set (match_operand:VI48_512 0 "register_operand")
13102 (match_operand:VI48_512 1 "register_operand")
13103 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13106 (define_expand "vashl<mode>3"
13107 [(set (match_operand:VI48_256 0 "register_operand")
13109 (match_operand:VI48_256 1 "register_operand")
13110 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13113 (define_insn "xop_sha<mode>3"
13114 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13115 (if_then_else:VI_128
13117 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13120 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13124 (neg:VI_128 (match_dup 2)))))]
13125 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13126 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13127 [(set_attr "type" "sseishft")
13128 (set_attr "prefix_data16" "0")
13129 (set_attr "prefix_extra" "2")
13130 (set_attr "mode" "TI")])
13132 (define_insn "xop_shl<mode>3"
13133 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13134 (if_then_else:VI_128
13136 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13139 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13143 (neg:VI_128 (match_dup 2)))))]
13144 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13145 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13146 [(set_attr "type" "sseishft")
13147 (set_attr "prefix_data16" "0")
13148 (set_attr "prefix_extra" "2")
13149 (set_attr "mode" "TI")])
13151 (define_expand "<shift_insn><mode>3"
13152 [(set (match_operand:VI1_AVX2 0 "register_operand")
13153 (any_shift:VI1_AVX2
13154 (match_operand:VI1_AVX2 1 "register_operand")
13155 (match_operand:SI 2 "nonmemory_operand")))]
13158 if (TARGET_XOP && <MODE>mode == V16QImode)
13160 bool negate = false;
13161 rtx (*gen) (rtx, rtx, rtx);
13165 if (<CODE> != ASHIFT)
13167 if (CONST_INT_P (operands[2]))
13168 operands[2] = GEN_INT (-INTVAL (operands[2]));
13172 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
13173 for (i = 0; i < 16; i++)
13174 XVECEXP (par, 0, i) = operands[2];
13176 tmp = gen_reg_rtx (V16QImode);
13177 emit_insn (gen_vec_initv16qi (tmp, par));
13180 emit_insn (gen_negv16qi2 (tmp, tmp));
13182 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
13183 emit_insn (gen (operands[0], operands[1], tmp));
13186 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
13190 (define_expand "ashrv2di3"
13191 [(set (match_operand:V2DI 0 "register_operand")
13193 (match_operand:V2DI 1 "register_operand")
13194 (match_operand:DI 2 "nonmemory_operand")))]
13197 rtx reg = gen_reg_rtx (V2DImode);
13199 bool negate = false;
13202 if (CONST_INT_P (operands[2]))
13203 operands[2] = GEN_INT (-INTVAL (operands[2]));
13207 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
13208 for (i = 0; i < 2; i++)
13209 XVECEXP (par, 0, i) = operands[2];
13211 emit_insn (gen_vec_initv2di (reg, par));
13214 emit_insn (gen_negv2di2 (reg, reg));
13216 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
13220 ;; XOP FRCZ support
13221 (define_insn "xop_frcz<mode>2"
13222 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
13224 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
13227 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
13228 [(set_attr "type" "ssecvt1")
13229 (set_attr "mode" "<MODE>")])
13231 (define_expand "xop_vmfrcz<mode>2"
13232 [(set (match_operand:VF_128 0 "register_operand")
13235 [(match_operand:VF_128 1 "nonimmediate_operand")]
13240 "operands[3] = CONST0_RTX (<MODE>mode);")
13242 (define_insn "*xop_vmfrcz<mode>2"
13243 [(set (match_operand:VF_128 0 "register_operand" "=x")
13246 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
13248 (match_operand:VF_128 2 "const0_operand")
13251 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
13252 [(set_attr "type" "ssecvt1")
13253 (set_attr "mode" "<MODE>")])
13255 (define_insn "xop_maskcmp<mode>3"
13256 [(set (match_operand:VI_128 0 "register_operand" "=x")
13257 (match_operator:VI_128 1 "ix86_comparison_int_operator"
13258 [(match_operand:VI_128 2 "register_operand" "x")
13259 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13261 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13262 [(set_attr "type" "sse4arg")
13263 (set_attr "prefix_data16" "0")
13264 (set_attr "prefix_rep" "0")
13265 (set_attr "prefix_extra" "2")
13266 (set_attr "length_immediate" "1")
13267 (set_attr "mode" "TI")])
13269 (define_insn "xop_maskcmp_uns<mode>3"
13270 [(set (match_operand:VI_128 0 "register_operand" "=x")
13271 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
13272 [(match_operand:VI_128 2 "register_operand" "x")
13273 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13275 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13276 [(set_attr "type" "ssecmp")
13277 (set_attr "prefix_data16" "0")
13278 (set_attr "prefix_rep" "0")
13279 (set_attr "prefix_extra" "2")
13280 (set_attr "length_immediate" "1")
13281 (set_attr "mode" "TI")])
13283 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
13284 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
13285 ;; the exact instruction generated for the intrinsic.
13286 (define_insn "xop_maskcmp_uns2<mode>3"
13287 [(set (match_operand:VI_128 0 "register_operand" "=x")
13289 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
13290 [(match_operand:VI_128 2 "register_operand" "x")
13291 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
13292 UNSPEC_XOP_UNSIGNED_CMP))]
13294 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13295 [(set_attr "type" "ssecmp")
13296 (set_attr "prefix_data16" "0")
13297 (set_attr "prefix_extra" "2")
13298 (set_attr "length_immediate" "1")
13299 (set_attr "mode" "TI")])
13301 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
13302 ;; being added here to be complete.
13303 (define_insn "xop_pcom_tf<mode>3"
13304 [(set (match_operand:VI_128 0 "register_operand" "=x")
13306 [(match_operand:VI_128 1 "register_operand" "x")
13307 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
13308 (match_operand:SI 3 "const_int_operand" "n")]
13309 UNSPEC_XOP_TRUEFALSE))]
13312 return ((INTVAL (operands[3]) != 0)
13313 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13314 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
13316 [(set_attr "type" "ssecmp")
13317 (set_attr "prefix_data16" "0")
13318 (set_attr "prefix_extra" "2")
13319 (set_attr "length_immediate" "1")
13320 (set_attr "mode" "TI")])
13322 (define_insn "xop_vpermil2<mode>3"
13323 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
13325 [(match_operand:VF_128_256 1 "register_operand" "x")
13326 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
13327 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
13328 (match_operand:SI 4 "const_0_to_3_operand" "n")]
13331 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
13332 [(set_attr "type" "sse4arg")
13333 (set_attr "length_immediate" "1")
13334 (set_attr "mode" "<MODE>")])
13336 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13338 (define_insn "aesenc"
13339 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13340 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13341 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13345 aesenc\t{%2, %0|%0, %2}
13346 vaesenc\t{%2, %1, %0|%0, %1, %2}"
13347 [(set_attr "isa" "noavx,avx")
13348 (set_attr "type" "sselog1")
13349 (set_attr "prefix_extra" "1")
13350 (set_attr "prefix" "orig,vex")
13351 (set_attr "btver2_decode" "double,double")
13352 (set_attr "mode" "TI")])
13354 (define_insn "aesenclast"
13355 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13356 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13357 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13358 UNSPEC_AESENCLAST))]
13361 aesenclast\t{%2, %0|%0, %2}
13362 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
13363 [(set_attr "isa" "noavx,avx")
13364 (set_attr "type" "sselog1")
13365 (set_attr "prefix_extra" "1")
13366 (set_attr "prefix" "orig,vex")
13367 (set_attr "btver2_decode" "double,double")
13368 (set_attr "mode" "TI")])
13370 (define_insn "aesdec"
13371 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13372 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13373 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13377 aesdec\t{%2, %0|%0, %2}
13378 vaesdec\t{%2, %1, %0|%0, %1, %2}"
13379 [(set_attr "isa" "noavx,avx")
13380 (set_attr "type" "sselog1")
13381 (set_attr "prefix_extra" "1")
13382 (set_attr "prefix" "orig,vex")
13383 (set_attr "btver2_decode" "double,double")
13384 (set_attr "mode" "TI")])
13386 (define_insn "aesdeclast"
13387 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13388 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13389 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13390 UNSPEC_AESDECLAST))]
13393 aesdeclast\t{%2, %0|%0, %2}
13394 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
13395 [(set_attr "isa" "noavx,avx")
13396 (set_attr "type" "sselog1")
13397 (set_attr "prefix_extra" "1")
13398 (set_attr "prefix" "orig,vex")
13399 (set_attr "btver2_decode" "double,double")
13400 (set_attr "mode" "TI")])
13402 (define_insn "aesimc"
13403 [(set (match_operand:V2DI 0 "register_operand" "=x")
13404 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
13407 "%vaesimc\t{%1, %0|%0, %1}"
13408 [(set_attr "type" "sselog1")
13409 (set_attr "prefix_extra" "1")
13410 (set_attr "prefix" "maybe_vex")
13411 (set_attr "mode" "TI")])
13413 (define_insn "aeskeygenassist"
13414 [(set (match_operand:V2DI 0 "register_operand" "=x")
13415 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
13416 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13417 UNSPEC_AESKEYGENASSIST))]
13419 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
13420 [(set_attr "type" "sselog1")
13421 (set_attr "prefix_extra" "1")
13422 (set_attr "length_immediate" "1")
13423 (set_attr "prefix" "maybe_vex")
13424 (set_attr "mode" "TI")])
13426 (define_insn "pclmulqdq"
13427 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13428 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13429 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
13430 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13434 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
13435 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13436 [(set_attr "isa" "noavx,avx")
13437 (set_attr "type" "sselog1")
13438 (set_attr "prefix_extra" "1")
13439 (set_attr "length_immediate" "1")
13440 (set_attr "prefix" "orig,vex")
13441 (set_attr "mode" "TI")])
13443 (define_expand "avx_vzeroall"
13444 [(match_par_dup 0 [(const_int 0)])]
13447 int nregs = TARGET_64BIT ? 16 : 8;
13450 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
13452 XVECEXP (operands[0], 0, 0)
13453 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
13456 for (regno = 0; regno < nregs; regno++)
13457 XVECEXP (operands[0], 0, regno + 1)
13458 = gen_rtx_SET (VOIDmode,
13459 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
13460 CONST0_RTX (V8SImode));
13463 (define_insn "*avx_vzeroall"
13464 [(match_parallel 0 "vzeroall_operation"
13465 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
13468 [(set_attr "type" "sse")
13469 (set_attr "modrm" "0")
13470 (set_attr "memory" "none")
13471 (set_attr "prefix" "vex")
13472 (set_attr "btver2_decode" "vector")
13473 (set_attr "mode" "OI")])
13475 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
13476 ;; if the upper 128bits are unused.
13477 (define_insn "avx_vzeroupper"
13478 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
13481 [(set_attr "type" "sse")
13482 (set_attr "modrm" "0")
13483 (set_attr "memory" "none")
13484 (set_attr "prefix" "vex")
13485 (set_attr "btver2_decode" "vector")
13486 (set_attr "mode" "OI")])
13488 (define_insn "avx2_pbroadcast<mode>"
13489 [(set (match_operand:VI 0 "register_operand" "=x")
13491 (vec_select:<ssescalarmode>
13492 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
13493 (parallel [(const_int 0)]))))]
13495 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
13496 [(set_attr "type" "ssemov")
13497 (set_attr "prefix_extra" "1")
13498 (set_attr "prefix" "vex")
13499 (set_attr "mode" "<sseinsnmode>")])
13501 (define_insn "avx2_pbroadcast<mode>_1"
13502 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
13503 (vec_duplicate:VI_256
13504 (vec_select:<ssescalarmode>
13505 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
13506 (parallel [(const_int 0)]))))]
13509 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
13510 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
13511 [(set_attr "type" "ssemov")
13512 (set_attr "prefix_extra" "1")
13513 (set_attr "prefix" "vex")
13514 (set_attr "mode" "<sseinsnmode>")])
13516 (define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
13517 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
13518 (unspec:VI48F_256_512
13519 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
13520 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
13522 "TARGET_AVX2 && <mask_mode512bit_condition>"
13523 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
13524 [(set_attr "type" "sselog")
13525 (set_attr "prefix" "<mask_prefix2>")
13526 (set_attr "mode" "<sseinsnmode>")])
13528 (define_expand "<avx2_avx512f>_perm<mode>"
13529 [(match_operand:VI8F_256_512 0 "register_operand")
13530 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
13531 (match_operand:SI 2 "const_0_to_255_operand")]
13534 int mask = INTVAL (operands[2]);
13535 emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
13536 GEN_INT ((mask >> 0) & 3),
13537 GEN_INT ((mask >> 2) & 3),
13538 GEN_INT ((mask >> 4) & 3),
13539 GEN_INT ((mask >> 6) & 3)));
13543 (define_expand "avx512f_perm<mode>_mask"
13544 [(match_operand:V8FI 0 "register_operand")
13545 (match_operand:V8FI 1 "nonimmediate_operand")
13546 (match_operand:SI 2 "const_0_to_255_operand")
13547 (match_operand:V8FI 3 "vector_move_operand")
13548 (match_operand:<avx512fmaskmode> 4 "register_operand")]
13551 int mask = INTVAL (operands[2]);
13552 emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
13553 GEN_INT ((mask >> 0) & 3),
13554 GEN_INT ((mask >> 2) & 3),
13555 GEN_INT ((mask >> 4) & 3),
13556 GEN_INT ((mask >> 6) & 3),
13557 operands[3], operands[4]));
13561 (define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
13562 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
13563 (vec_select:VI8F_256_512
13564 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
13565 (parallel [(match_operand 2 "const_0_to_3_operand")
13566 (match_operand 3 "const_0_to_3_operand")
13567 (match_operand 4 "const_0_to_3_operand")
13568 (match_operand 5 "const_0_to_3_operand")])))]
13569 "TARGET_AVX2 && <mask_mode512bit_condition>"
13572 mask |= INTVAL (operands[2]) << 0;
13573 mask |= INTVAL (operands[3]) << 2;
13574 mask |= INTVAL (operands[4]) << 4;
13575 mask |= INTVAL (operands[5]) << 6;
13576 operands[2] = GEN_INT (mask);
13577 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13579 [(set_attr "type" "sselog")
13580 (set_attr "prefix" "<mask_prefix2>")
13581 (set_attr "mode" "<sseinsnmode>")])
13583 (define_insn "avx2_permv2ti"
13584 [(set (match_operand:V4DI 0 "register_operand" "=x")
13586 [(match_operand:V4DI 1 "register_operand" "x")
13587 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
13588 (match_operand:SI 3 "const_0_to_255_operand" "n")]
13591 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13592 [(set_attr "type" "sselog")
13593 (set_attr "prefix" "vex")
13594 (set_attr "mode" "OI")])
13596 (define_insn "avx2_vec_dupv4df"
13597 [(set (match_operand:V4DF 0 "register_operand" "=x")
13598 (vec_duplicate:V4DF
13600 (match_operand:V2DF 1 "register_operand" "x")
13601 (parallel [(const_int 0)]))))]
13603 "vbroadcastsd\t{%1, %0|%0, %1}"
13604 [(set_attr "type" "sselog1")
13605 (set_attr "prefix" "vex")
13606 (set_attr "mode" "V4DF")])
13608 ;; Modes handled by AVX vec_dup patterns.
13609 (define_mode_iterator AVX_VEC_DUP_MODE
13610 [V8SI V8SF V4DI V4DF])
13612 (define_insn "vec_dup<mode>"
13613 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
13614 (vec_duplicate:AVX_VEC_DUP_MODE
13615 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
13618 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
13619 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
13621 [(set_attr "type" "ssemov")
13622 (set_attr "prefix_extra" "1")
13623 (set_attr "prefix" "vex")
13624 (set_attr "isa" "*,avx2,noavx2")
13625 (set_attr "mode" "V8SF")])
13627 (define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
13628 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13629 (vec_duplicate:VI48F_512
13630 (vec_select:<ssescalarmode>
13631 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
13632 (parallel [(const_int 0)]))))]
13634 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13635 [(set_attr "type" "ssemov")
13636 (set_attr "prefix" "evex")
13637 (set_attr "mode" "<sseinsnmode>")])
13639 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
13640 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
13641 (vec_duplicate:V16FI
13642 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
13645 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
13646 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13647 [(set_attr "type" "ssemov")
13648 (set_attr "prefix" "evex")
13649 (set_attr "mode" "<sseinsnmode>")])
13651 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
13652 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
13653 (vec_duplicate:V8FI
13654 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
13657 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
13658 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13659 [(set_attr "type" "ssemov")
13660 (set_attr "prefix" "evex")
13661 (set_attr "mode" "<sseinsnmode>")])
13663 (define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
13664 [(set (match_operand:VI48_512 0 "register_operand" "=v")
13665 (vec_duplicate:VI48_512
13666 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
13667 "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
13668 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13669 [(set_attr "type" "ssemov")
13670 (set_attr "prefix" "evex")
13671 (set_attr "mode" "<sseinsnmode>")])
13673 (define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
13674 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13675 (vec_duplicate:VI48F_512
13676 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
13678 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13679 [(set_attr "type" "ssemov")
13680 (set_attr "prefix" "evex")
13681 (set_attr "mode" "<sseinsnmode>")])
13683 (define_insn "avx2_vbroadcasti128_<mode>"
13684 [(set (match_operand:VI_256 0 "register_operand" "=x")
13686 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
13689 "vbroadcasti128\t{%1, %0|%0, %1}"
13690 [(set_attr "type" "ssemov")
13691 (set_attr "prefix_extra" "1")
13692 (set_attr "prefix" "vex")
13693 (set_attr "mode" "OI")])
13696 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
13697 (vec_duplicate:AVX_VEC_DUP_MODE
13698 (match_operand:<ssescalarmode> 1 "register_operand")))]
13699 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
13700 [(set (match_dup 2)
13701 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
13703 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
13704 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
13706 (define_insn "avx_vbroadcastf128_<mode>"
13707 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
13709 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
13713 vbroadcast<i128>\t{%1, %0|%0, %1}
13714 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
13715 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
13716 [(set_attr "type" "ssemov,sselog1,sselog1")
13717 (set_attr "prefix_extra" "1")
13718 (set_attr "length_immediate" "0,1,1")
13719 (set_attr "prefix" "vex")
13720 (set_attr "mode" "<sseinsnmode>")])
13722 (define_insn "avx512cd_maskb_vec_dupv8di"
13723 [(set (match_operand:V8DI 0 "register_operand" "=v")
13724 (vec_duplicate:V8DI
13726 (match_operand:QI 1 "register_operand" "k"))))]
13728 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
13729 [(set_attr "type" "mskmov")
13730 (set_attr "prefix" "evex")
13731 (set_attr "mode" "XI")])
13733 (define_insn "avx512cd_maskw_vec_dupv16si"
13734 [(set (match_operand:V16SI 0 "register_operand" "=v")
13735 (vec_duplicate:V16SI
13737 (match_operand:HI 1 "register_operand" "k"))))]
13739 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
13740 [(set_attr "type" "mskmov")
13741 (set_attr "prefix" "evex")
13742 (set_attr "mode" "XI")])
13744 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
13745 ;; If it so happens that the input is in memory, use vbroadcast.
13746 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
13747 (define_insn "*avx_vperm_broadcast_v4sf"
13748 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
13750 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
13751 (match_parallel 2 "avx_vbroadcast_operand"
13752 [(match_operand 3 "const_int_operand" "C,n,n")])))]
13755 int elt = INTVAL (operands[3]);
13756 switch (which_alternative)
13760 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
13761 return "vbroadcastss\t{%1, %0|%0, %k1}";
13763 operands[2] = GEN_INT (elt * 0x55);
13764 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
13766 gcc_unreachable ();
13769 [(set_attr "type" "ssemov,ssemov,sselog1")
13770 (set_attr "prefix_extra" "1")
13771 (set_attr "length_immediate" "0,0,1")
13772 (set_attr "prefix" "vex")
13773 (set_attr "mode" "SF,SF,V4SF")])
13775 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
13776 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
13778 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
13779 (match_parallel 2 "avx_vbroadcast_operand"
13780 [(match_operand 3 "const_int_operand" "C,n,n")])))]
13783 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
13784 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
13786 rtx op0 = operands[0], op1 = operands[1];
13787 int elt = INTVAL (operands[3]);
13793 if (TARGET_AVX2 && elt == 0)
13795 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
13800 /* Shuffle element we care about into all elements of the 128-bit lane.
13801 The other lane gets shuffled too, but we don't care. */
13802 if (<MODE>mode == V4DFmode)
13803 mask = (elt & 1 ? 15 : 0);
13805 mask = (elt & 3) * 0x55;
13806 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
13808 /* Shuffle the lane we care about into both lanes of the dest. */
13809 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
13810 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
13814 operands[1] = adjust_address (op1, <ssescalarmode>mode,
13815 elt * GET_MODE_SIZE (<ssescalarmode>mode));
13818 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
13819 [(set (match_operand:VF2 0 "register_operand")
13821 (match_operand:VF2 1 "nonimmediate_operand")
13822 (match_operand:SI 2 "const_0_to_255_operand")))]
13823 "TARGET_AVX && <mask_mode512bit_condition>"
13825 int mask = INTVAL (operands[2]);
13826 rtx perm[<ssescalarnum>];
13829 for (i = 0; i < <ssescalarnum>; i = i + 2)
13831 perm[i] = GEN_INT (((mask >> i) & 1) + i);
13832 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
13836 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
13839 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
13840 [(set (match_operand:VF1 0 "register_operand")
13842 (match_operand:VF1 1 "nonimmediate_operand")
13843 (match_operand:SI 2 "const_0_to_255_operand")))]
13844 "TARGET_AVX && <mask_mode512bit_condition>"
13846 int mask = INTVAL (operands[2]);
13847 rtx perm[<ssescalarnum>];
13850 for (i = 0; i < <ssescalarnum>; i = i + 4)
13852 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
13853 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
13854 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
13855 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
13859 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
13862 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
13863 [(set (match_operand:VF 0 "register_operand" "=v")
13865 (match_operand:VF 1 "nonimmediate_operand" "vm")
13866 (match_parallel 2 ""
13867 [(match_operand 3 "const_int_operand")])))]
13868 "TARGET_AVX && <mask_mode512bit_condition>
13869 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
13871 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
13872 operands[2] = GEN_INT (mask);
13873 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
13875 [(set_attr "type" "sselog")
13876 (set_attr "prefix_extra" "1")
13877 (set_attr "length_immediate" "1")
13878 (set_attr "prefix" "<mask_prefix>")
13879 (set_attr "mode" "<sseinsnmode>")])
13881 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
13882 [(set (match_operand:VF 0 "register_operand" "=v")
13884 [(match_operand:VF 1 "register_operand" "v")
13885 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
13887 "TARGET_AVX && <mask_mode512bit_condition>"
13888 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13889 [(set_attr "type" "sselog")
13890 (set_attr "prefix_extra" "1")
13891 (set_attr "btver2_decode" "vector")
13892 (set_attr "prefix" "<mask_prefix>")
13893 (set_attr "mode" "<sseinsnmode>")])
13895 (define_insn "avx512f_vpermi2var<mode>3"
13896 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13898 [(match_operand:VI48F_512 1 "register_operand" "v")
13899 (match_operand:<sseintvecmode> 2 "register_operand" "0")
13900 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13903 "vpermi2<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}"
13904 [(set_attr "type" "sselog")
13905 (set_attr "prefix" "evex")
13906 (set_attr "mode" "<sseinsnmode>")])
13908 (define_insn "avx512f_vpermi2var<mode>3_mask"
13909 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13910 (vec_merge:VI48F_512
13912 [(match_operand:VI48F_512 1 "register_operand" "v")
13913 (match_operand:<sseintvecmode> 2 "register_operand" "0")
13914 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13915 UNSPEC_VPERMI2_MASK)
13917 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
13919 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
13920 [(set_attr "type" "sselog")
13921 (set_attr "prefix" "evex")
13922 (set_attr "mode" "<sseinsnmode>")])
13924 (define_insn "avx512f_vpermt2var<mode>3"
13925 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13927 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
13928 (match_operand:VI48F_512 2 "register_operand" "0")
13929 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13932 "vpermt2<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}"
13933 [(set_attr "type" "sselog")
13934 (set_attr "prefix" "evex")
13935 (set_attr "mode" "<sseinsnmode>")])
13937 (define_insn "avx512f_vpermt2var<mode>3_mask"
13938 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13939 (vec_merge:VI48F_512
13941 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
13942 (match_operand:VI48F_512 2 "register_operand" "0")
13943 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13946 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
13948 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
13949 [(set_attr "type" "sselog")
13950 (set_attr "prefix" "evex")
13951 (set_attr "mode" "<sseinsnmode>")])
13953 (define_expand "avx_vperm2f128<mode>3"
13954 [(set (match_operand:AVX256MODE2P 0 "register_operand")
13955 (unspec:AVX256MODE2P
13956 [(match_operand:AVX256MODE2P 1 "register_operand")
13957 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
13958 (match_operand:SI 3 "const_0_to_255_operand")]
13959 UNSPEC_VPERMIL2F128))]
13962 int mask = INTVAL (operands[3]);
13963 if ((mask & 0x88) == 0)
13965 rtx perm[<ssescalarnum>], t1, t2;
13966 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
13968 base = (mask & 3) * nelt2;
13969 for (i = 0; i < nelt2; ++i)
13970 perm[i] = GEN_INT (base + i);
13972 base = ((mask >> 4) & 3) * nelt2;
13973 for (i = 0; i < nelt2; ++i)
13974 perm[i + nelt2] = GEN_INT (base + i);
13976 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
13977 operands[1], operands[2]);
13978 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
13979 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
13980 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
13986 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
13987 ;; means that in order to represent this properly in rtl we'd have to
13988 ;; nest *another* vec_concat with a zero operand and do the select from
13989 ;; a 4x wide vector. That doesn't seem very nice.
13990 (define_insn "*avx_vperm2f128<mode>_full"
13991 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
13992 (unspec:AVX256MODE2P
13993 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
13994 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
13995 (match_operand:SI 3 "const_0_to_255_operand" "n")]
13996 UNSPEC_VPERMIL2F128))]
13998 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13999 [(set_attr "type" "sselog")
14000 (set_attr "prefix_extra" "1")
14001 (set_attr "length_immediate" "1")
14002 (set_attr "prefix" "vex")
14003 (set_attr "mode" "<sseinsnmode>")])
14005 (define_insn "*avx_vperm2f128<mode>_nozero"
14006 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
14007 (vec_select:AVX256MODE2P
14008 (vec_concat:<ssedoublevecmode>
14009 (match_operand:AVX256MODE2P 1 "register_operand" "x")
14010 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
14011 (match_parallel 3 ""
14012 [(match_operand 4 "const_int_operand")])))]
14014 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
14016 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
14018 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
14020 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
14021 operands[3] = GEN_INT (mask);
14022 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14024 [(set_attr "type" "sselog")
14025 (set_attr "prefix_extra" "1")
14026 (set_attr "length_immediate" "1")
14027 (set_attr "prefix" "vex")
14028 (set_attr "mode" "<sseinsnmode>")])
14030 (define_expand "avx_vinsertf128<mode>"
14031 [(match_operand:V_256 0 "register_operand")
14032 (match_operand:V_256 1 "register_operand")
14033 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14034 (match_operand:SI 3 "const_0_to_1_operand")]
14037 rtx (*insn)(rtx, rtx, rtx);
14039 switch (INTVAL (operands[3]))
14042 insn = gen_vec_set_lo_<mode>;
14045 insn = gen_vec_set_hi_<mode>;
14048 gcc_unreachable ();
14051 emit_insn (insn (operands[0], operands[1], operands[2]));
14055 (define_insn "avx2_vec_set_lo_v4di"
14056 [(set (match_operand:V4DI 0 "register_operand" "=x")
14058 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
14060 (match_operand:V4DI 1 "register_operand" "x")
14061 (parallel [(const_int 2) (const_int 3)]))))]
14063 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14064 [(set_attr "type" "sselog")
14065 (set_attr "prefix_extra" "1")
14066 (set_attr "length_immediate" "1")
14067 (set_attr "prefix" "vex")
14068 (set_attr "mode" "OI")])
14070 (define_insn "avx2_vec_set_hi_v4di"
14071 [(set (match_operand:V4DI 0 "register_operand" "=x")
14074 (match_operand:V4DI 1 "register_operand" "x")
14075 (parallel [(const_int 0) (const_int 1)]))
14076 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
14078 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14079 [(set_attr "type" "sselog")
14080 (set_attr "prefix_extra" "1")
14081 (set_attr "length_immediate" "1")
14082 (set_attr "prefix" "vex")
14083 (set_attr "mode" "OI")])
14085 (define_insn "vec_set_lo_<mode>"
14086 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14087 (vec_concat:VI8F_256
14088 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14089 (vec_select:<ssehalfvecmode>
14090 (match_operand:VI8F_256 1 "register_operand" "x")
14091 (parallel [(const_int 2) (const_int 3)]))))]
14093 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14094 [(set_attr "type" "sselog")
14095 (set_attr "prefix_extra" "1")
14096 (set_attr "length_immediate" "1")
14097 (set_attr "prefix" "vex")
14098 (set_attr "mode" "<sseinsnmode>")])
14100 (define_insn "vec_set_hi_<mode>"
14101 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14102 (vec_concat:VI8F_256
14103 (vec_select:<ssehalfvecmode>
14104 (match_operand:VI8F_256 1 "register_operand" "x")
14105 (parallel [(const_int 0) (const_int 1)]))
14106 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14108 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14109 [(set_attr "type" "sselog")
14110 (set_attr "prefix_extra" "1")
14111 (set_attr "length_immediate" "1")
14112 (set_attr "prefix" "vex")
14113 (set_attr "mode" "<sseinsnmode>")])
14115 (define_insn "vec_set_lo_<mode>"
14116 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14117 (vec_concat:VI4F_256
14118 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14119 (vec_select:<ssehalfvecmode>
14120 (match_operand:VI4F_256 1 "register_operand" "x")
14121 (parallel [(const_int 4) (const_int 5)
14122 (const_int 6) (const_int 7)]))))]
14124 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14125 [(set_attr "type" "sselog")
14126 (set_attr "prefix_extra" "1")
14127 (set_attr "length_immediate" "1")
14128 (set_attr "prefix" "vex")
14129 (set_attr "mode" "<sseinsnmode>")])
14131 (define_insn "vec_set_hi_<mode>"
14132 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14133 (vec_concat:VI4F_256
14134 (vec_select:<ssehalfvecmode>
14135 (match_operand:VI4F_256 1 "register_operand" "x")
14136 (parallel [(const_int 0) (const_int 1)
14137 (const_int 2) (const_int 3)]))
14138 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14140 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14141 [(set_attr "type" "sselog")
14142 (set_attr "prefix_extra" "1")
14143 (set_attr "length_immediate" "1")
14144 (set_attr "prefix" "vex")
14145 (set_attr "mode" "<sseinsnmode>")])
14147 (define_insn "vec_set_lo_v16hi"
14148 [(set (match_operand:V16HI 0 "register_operand" "=x")
14150 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
14152 (match_operand:V16HI 1 "register_operand" "x")
14153 (parallel [(const_int 8) (const_int 9)
14154 (const_int 10) (const_int 11)
14155 (const_int 12) (const_int 13)
14156 (const_int 14) (const_int 15)]))))]
14158 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14159 [(set_attr "type" "sselog")
14160 (set_attr "prefix_extra" "1")
14161 (set_attr "length_immediate" "1")
14162 (set_attr "prefix" "vex")
14163 (set_attr "mode" "OI")])
14165 (define_insn "vec_set_hi_v16hi"
14166 [(set (match_operand:V16HI 0 "register_operand" "=x")
14169 (match_operand:V16HI 1 "register_operand" "x")
14170 (parallel [(const_int 0) (const_int 1)
14171 (const_int 2) (const_int 3)
14172 (const_int 4) (const_int 5)
14173 (const_int 6) (const_int 7)]))
14174 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
14176 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14177 [(set_attr "type" "sselog")
14178 (set_attr "prefix_extra" "1")
14179 (set_attr "length_immediate" "1")
14180 (set_attr "prefix" "vex")
14181 (set_attr "mode" "OI")])
14183 (define_insn "vec_set_lo_v32qi"
14184 [(set (match_operand:V32QI 0 "register_operand" "=x")
14186 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
14188 (match_operand:V32QI 1 "register_operand" "x")
14189 (parallel [(const_int 16) (const_int 17)
14190 (const_int 18) (const_int 19)
14191 (const_int 20) (const_int 21)
14192 (const_int 22) (const_int 23)
14193 (const_int 24) (const_int 25)
14194 (const_int 26) (const_int 27)
14195 (const_int 28) (const_int 29)
14196 (const_int 30) (const_int 31)]))))]
14198 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14199 [(set_attr "type" "sselog")
14200 (set_attr "prefix_extra" "1")
14201 (set_attr "length_immediate" "1")
14202 (set_attr "prefix" "vex")
14203 (set_attr "mode" "OI")])
14205 (define_insn "vec_set_hi_v32qi"
14206 [(set (match_operand:V32QI 0 "register_operand" "=x")
14209 (match_operand:V32QI 1 "register_operand" "x")
14210 (parallel [(const_int 0) (const_int 1)
14211 (const_int 2) (const_int 3)
14212 (const_int 4) (const_int 5)
14213 (const_int 6) (const_int 7)
14214 (const_int 8) (const_int 9)
14215 (const_int 10) (const_int 11)
14216 (const_int 12) (const_int 13)
14217 (const_int 14) (const_int 15)]))
14218 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
14220 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14221 [(set_attr "type" "sselog")
14222 (set_attr "prefix_extra" "1")
14223 (set_attr "length_immediate" "1")
14224 (set_attr "prefix" "vex")
14225 (set_attr "mode" "OI")])
14227 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
14228 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
14230 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
14231 (match_operand:V48_AVX2 1 "memory_operand" "m")]
14234 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
14235 [(set_attr "type" "sselog1")
14236 (set_attr "prefix_extra" "1")
14237 (set_attr "prefix" "vex")
14238 (set_attr "btver2_decode" "vector")
14239 (set_attr "mode" "<sseinsnmode>")])
14241 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
14242 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
14244 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
14245 (match_operand:V48_AVX2 2 "register_operand" "x")
14249 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14250 [(set_attr "type" "sselog1")
14251 (set_attr "prefix_extra" "1")
14252 (set_attr "prefix" "vex")
14253 (set_attr "btver2_decode" "vector")
14254 (set_attr "mode" "<sseinsnmode>")])
14256 (define_expand "maskload<mode>"
14257 [(set (match_operand:V48_AVX2 0 "register_operand")
14259 [(match_operand:<sseintvecmode> 2 "register_operand")
14260 (match_operand:V48_AVX2 1 "memory_operand")]
14264 (define_expand "maskstore<mode>"
14265 [(set (match_operand:V48_AVX2 0 "memory_operand")
14267 [(match_operand:<sseintvecmode> 2 "register_operand")
14268 (match_operand:V48_AVX2 1 "register_operand")
14273 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
14274 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
14275 (unspec:AVX256MODE2P
14276 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
14280 "&& reload_completed"
14283 rtx op0 = operands[0];
14284 rtx op1 = operands[1];
14286 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
14288 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
14289 emit_move_insn (op0, op1);
14293 (define_expand "vec_init<mode>"
14294 [(match_operand:V_256 0 "register_operand")
14298 ix86_expand_vector_init (false, operands[0], operands[1]);
14302 (define_expand "vec_init<mode>"
14303 [(match_operand:VI48F_512 0 "register_operand")
14307 ix86_expand_vector_init (false, operands[0], operands[1]);
14311 (define_expand "avx2_extracti128"
14312 [(match_operand:V2DI 0 "nonimmediate_operand")
14313 (match_operand:V4DI 1 "register_operand")
14314 (match_operand:SI 2 "const_0_to_1_operand")]
14317 rtx (*insn)(rtx, rtx);
14319 switch (INTVAL (operands[2]))
14322 insn = gen_vec_extract_lo_v4di;
14325 insn = gen_vec_extract_hi_v4di;
14328 gcc_unreachable ();
14331 emit_insn (insn (operands[0], operands[1]));
14335 (define_expand "avx2_inserti128"
14336 [(match_operand:V4DI 0 "register_operand")
14337 (match_operand:V4DI 1 "register_operand")
14338 (match_operand:V2DI 2 "nonimmediate_operand")
14339 (match_operand:SI 3 "const_0_to_1_operand")]
14342 rtx (*insn)(rtx, rtx, rtx);
14344 switch (INTVAL (operands[3]))
14347 insn = gen_avx2_vec_set_lo_v4di;
14350 insn = gen_avx2_vec_set_hi_v4di;
14353 gcc_unreachable ();
14356 emit_insn (insn (operands[0], operands[1], operands[2]));
14360 (define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
14361 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
14362 (ashiftrt:VI48_AVX512F
14363 (match_operand:VI48_AVX512F 1 "register_operand" "v")
14364 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
14365 "TARGET_AVX2 && <mask_mode512bit_condition>"
14366 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14367 [(set_attr "type" "sseishft")
14368 (set_attr "prefix" "maybe_evex")
14369 (set_attr "mode" "<sseinsnmode>")])
14371 (define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
14372 [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
14373 (any_lshift:VI48_AVX2_48_AVX512F
14374 (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
14375 (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
14376 "TARGET_AVX2 && <mask_mode512bit_condition>"
14377 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14378 [(set_attr "type" "sseishft")
14379 (set_attr "prefix" "maybe_evex")
14380 (set_attr "mode" "<sseinsnmode>")])
14382 ;; For avx_vec_concat<mode> insn pattern
14383 (define_mode_attr concat_tg_mode
14384 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
14385 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
14387 (define_insn "avx_vec_concat<mode>"
14388 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
14389 (vec_concat:V_256_512
14390 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
14391 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
14394 switch (which_alternative)
14397 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
14399 switch (get_attr_mode (insn))
14402 return "vmovaps\t{%1, %t0|%t0, %1}";
14404 return "vmovapd\t{%1, %t0|%t0, %1}";
14406 return "vmovaps\t{%1, %x0|%x0, %1}";
14408 return "vmovapd\t{%1, %x0|%x0, %1}";
14410 return "vmovdqa\t{%1, %t0|%t0, %1}";
14412 return "vmovdqa\t{%1, %x0|%x0, %1}";
14414 gcc_unreachable ();
14417 gcc_unreachable ();
14420 [(set_attr "type" "sselog,ssemov")
14421 (set_attr "prefix_extra" "1,*")
14422 (set_attr "length_immediate" "1,*")
14423 (set_attr "prefix" "maybe_evex")
14424 (set_attr "mode" "<sseinsnmode>")])
14426 (define_insn "vcvtph2ps"
14427 [(set (match_operand:V4SF 0 "register_operand" "=x")
14429 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
14431 (parallel [(const_int 0) (const_int 1)
14432 (const_int 2) (const_int 3)])))]
14434 "vcvtph2ps\t{%1, %0|%0, %1}"
14435 [(set_attr "type" "ssecvt")
14436 (set_attr "prefix" "vex")
14437 (set_attr "mode" "V4SF")])
14439 (define_insn "*vcvtph2ps_load"
14440 [(set (match_operand:V4SF 0 "register_operand" "=x")
14441 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
14442 UNSPEC_VCVTPH2PS))]
14444 "vcvtph2ps\t{%1, %0|%0, %1}"
14445 [(set_attr "type" "ssecvt")
14446 (set_attr "prefix" "vex")
14447 (set_attr "mode" "V8SF")])
14449 (define_insn "vcvtph2ps256"
14450 [(set (match_operand:V8SF 0 "register_operand" "=x")
14451 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
14452 UNSPEC_VCVTPH2PS))]
14454 "vcvtph2ps\t{%1, %0|%0, %1}"
14455 [(set_attr "type" "ssecvt")
14456 (set_attr "prefix" "vex")
14457 (set_attr "btver2_decode" "double")
14458 (set_attr "mode" "V8SF")])
14460 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name>"
14461 [(set (match_operand:V16SF 0 "register_operand" "=v")
14463 [(match_operand:V16HI 1 "nonimmediate_operand" "vm")]
14464 UNSPEC_VCVTPH2PS))]
14466 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14467 [(set_attr "type" "ssecvt")
14468 (set_attr "prefix" "evex")
14469 (set_attr "mode" "V16SF")])
14471 (define_expand "vcvtps2ph"
14472 [(set (match_operand:V8HI 0 "register_operand")
14474 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
14475 (match_operand:SI 2 "const_0_to_255_operand")]
14479 "operands[3] = CONST0_RTX (V4HImode);")
14481 (define_insn "*vcvtps2ph"
14482 [(set (match_operand:V8HI 0 "register_operand" "=x")
14484 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
14485 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14487 (match_operand:V4HI 3 "const0_operand")))]
14489 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14490 [(set_attr "type" "ssecvt")
14491 (set_attr "prefix" "vex")
14492 (set_attr "mode" "V4SF")])
14494 (define_insn "*vcvtps2ph_store"
14495 [(set (match_operand:V4HI 0 "memory_operand" "=m")
14496 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
14497 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14498 UNSPEC_VCVTPS2PH))]
14500 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14501 [(set_attr "type" "ssecvt")
14502 (set_attr "prefix" "vex")
14503 (set_attr "mode" "V4SF")])
14505 (define_insn "vcvtps2ph256"
14506 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
14507 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
14508 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14509 UNSPEC_VCVTPS2PH))]
14511 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14512 [(set_attr "type" "ssecvt")
14513 (set_attr "prefix" "vex")
14514 (set_attr "btver2_decode" "vector")
14515 (set_attr "mode" "V8SF")])
14517 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
14518 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
14520 [(match_operand:V16SF 1 "register_operand" "v")
14521 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14522 UNSPEC_VCVTPS2PH))]
14524 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14525 [(set_attr "type" "ssecvt")
14526 (set_attr "prefix" "evex")
14527 (set_attr "mode" "V16SF")])
14529 ;; For gather* insn patterns
14530 (define_mode_iterator VEC_GATHER_MODE
14531 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
14532 (define_mode_attr VEC_GATHER_IDXSI
14533 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
14534 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
14535 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
14536 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
14538 (define_mode_attr VEC_GATHER_IDXDI
14539 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
14540 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
14541 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
14542 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
14544 (define_mode_attr VEC_GATHER_SRCDI
14545 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
14546 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
14547 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
14548 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
14550 (define_expand "avx2_gathersi<mode>"
14551 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
14552 (unspec:VEC_GATHER_MODE
14553 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
14554 (mem:<ssescalarmode>
14556 [(match_operand 2 "vsib_address_operand")
14557 (match_operand:<VEC_GATHER_IDXSI>
14558 3 "register_operand")
14559 (match_operand:SI 5 "const1248_operand ")]))
14560 (mem:BLK (scratch))
14561 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
14563 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
14567 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14568 operands[5]), UNSPEC_VSIBADDR);
14571 (define_insn "*avx2_gathersi<mode>"
14572 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14573 (unspec:VEC_GATHER_MODE
14574 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
14575 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
14577 [(match_operand:P 3 "vsib_address_operand" "Tv")
14578 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
14579 (match_operand:SI 6 "const1248_operand" "n")]
14581 (mem:BLK (scratch))
14582 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
14584 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14586 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
14587 [(set_attr "type" "ssemov")
14588 (set_attr "prefix" "vex")
14589 (set_attr "mode" "<sseinsnmode>")])
14591 (define_insn "*avx2_gathersi<mode>_2"
14592 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14593 (unspec:VEC_GATHER_MODE
14595 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14597 [(match_operand:P 2 "vsib_address_operand" "Tv")
14598 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
14599 (match_operand:SI 5 "const1248_operand" "n")]
14601 (mem:BLK (scratch))
14602 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
14604 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14606 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
14607 [(set_attr "type" "ssemov")
14608 (set_attr "prefix" "vex")
14609 (set_attr "mode" "<sseinsnmode>")])
14611 (define_expand "avx2_gatherdi<mode>"
14612 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
14613 (unspec:VEC_GATHER_MODE
14614 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
14615 (mem:<ssescalarmode>
14617 [(match_operand 2 "vsib_address_operand")
14618 (match_operand:<VEC_GATHER_IDXDI>
14619 3 "register_operand")
14620 (match_operand:SI 5 "const1248_operand ")]))
14621 (mem:BLK (scratch))
14622 (match_operand:<VEC_GATHER_SRCDI>
14623 4 "register_operand")]
14625 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
14629 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14630 operands[5]), UNSPEC_VSIBADDR);
14633 (define_insn "*avx2_gatherdi<mode>"
14634 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14635 (unspec:VEC_GATHER_MODE
14636 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
14637 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
14639 [(match_operand:P 3 "vsib_address_operand" "Tv")
14640 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
14641 (match_operand:SI 6 "const1248_operand" "n")]
14643 (mem:BLK (scratch))
14644 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
14646 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14648 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
14649 [(set_attr "type" "ssemov")
14650 (set_attr "prefix" "vex")
14651 (set_attr "mode" "<sseinsnmode>")])
14653 (define_insn "*avx2_gatherdi<mode>_2"
14654 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14655 (unspec:VEC_GATHER_MODE
14657 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14659 [(match_operand:P 2 "vsib_address_operand" "Tv")
14660 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
14661 (match_operand:SI 5 "const1248_operand" "n")]
14663 (mem:BLK (scratch))
14664 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
14666 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14669 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
14670 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
14671 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
14673 [(set_attr "type" "ssemov")
14674 (set_attr "prefix" "vex")
14675 (set_attr "mode" "<sseinsnmode>")])
14677 (define_insn "*avx2_gatherdi<mode>_3"
14678 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
14679 (vec_select:<VEC_GATHER_SRCDI>
14681 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
14682 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
14684 [(match_operand:P 3 "vsib_address_operand" "Tv")
14685 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
14686 (match_operand:SI 6 "const1248_operand" "n")]
14688 (mem:BLK (scratch))
14689 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
14691 (parallel [(const_int 0) (const_int 1)
14692 (const_int 2) (const_int 3)])))
14693 (clobber (match_scratch:VI4F_256 1 "=&x"))]
14695 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
14696 [(set_attr "type" "ssemov")
14697 (set_attr "prefix" "vex")
14698 (set_attr "mode" "<sseinsnmode>")])
14700 (define_insn "*avx2_gatherdi<mode>_4"
14701 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
14702 (vec_select:<VEC_GATHER_SRCDI>
14705 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14707 [(match_operand:P 2 "vsib_address_operand" "Tv")
14708 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
14709 (match_operand:SI 5 "const1248_operand" "n")]
14711 (mem:BLK (scratch))
14712 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
14714 (parallel [(const_int 0) (const_int 1)
14715 (const_int 2) (const_int 3)])))
14716 (clobber (match_scratch:VI4F_256 1 "=&x"))]
14718 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
14719 [(set_attr "type" "ssemov")
14720 (set_attr "prefix" "vex")
14721 (set_attr "mode" "<sseinsnmode>")])
14723 (define_expand "avx512f_gathersi<mode>"
14724 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
14726 [(match_operand:VI48F_512 1 "register_operand")
14727 (match_operand:<avx512fmaskmode> 4 "register_operand")
14728 (mem:<ssescalarmode>
14730 [(match_operand 2 "vsib_address_operand")
14731 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
14732 (match_operand:SI 5 "const1248_operand")]))]
14734 (clobber (match_scratch:<avx512fmaskmode> 7))])]
14738 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14739 operands[5]), UNSPEC_VSIBADDR);
14742 (define_insn "*avx512f_gathersi<mode>"
14743 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14745 [(match_operand:VI48F_512 1 "register_operand" "0")
14746 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
14747 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14749 [(match_operand:P 4 "vsib_address_operand" "Tv")
14750 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
14751 (match_operand:SI 5 "const1248_operand" "n")]
14752 UNSPEC_VSIBADDR)])]
14754 (clobber (match_scratch:<avx512fmaskmode> 2 "=&k"))]
14756 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
14757 [(set_attr "type" "ssemov")
14758 (set_attr "prefix" "evex")
14759 (set_attr "mode" "<sseinsnmode>")])
14761 (define_insn "*avx512f_gathersi<mode>_2"
14762 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14765 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
14766 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
14768 [(match_operand:P 3 "vsib_address_operand" "Tv")
14769 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
14770 (match_operand:SI 4 "const1248_operand" "n")]
14771 UNSPEC_VSIBADDR)])]
14773 (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
14775 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
14776 [(set_attr "type" "ssemov")
14777 (set_attr "prefix" "evex")
14778 (set_attr "mode" "<sseinsnmode>")])
14781 (define_expand "avx512f_gatherdi<mode>"
14782 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
14784 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
14785 (match_operand:QI 4 "register_operand")
14786 (mem:<ssescalarmode>
14788 [(match_operand 2 "vsib_address_operand")
14789 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
14790 (match_operand:SI 5 "const1248_operand")]))]
14792 (clobber (match_scratch:QI 7))])]
14796 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14797 operands[5]), UNSPEC_VSIBADDR);
14800 (define_insn "*avx512f_gatherdi<mode>"
14801 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14803 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
14804 (match_operand:QI 7 "register_operand" "2")
14805 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14807 [(match_operand:P 4 "vsib_address_operand" "Tv")
14808 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
14809 (match_operand:SI 5 "const1248_operand" "n")]
14810 UNSPEC_VSIBADDR)])]
14812 (clobber (match_scratch:QI 2 "=&k"))]
14814 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
14815 [(set_attr "type" "ssemov")
14816 (set_attr "prefix" "evex")
14817 (set_attr "mode" "<sseinsnmode>")])
14819 (define_insn "*avx512f_gatherdi<mode>_2"
14820 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14823 (match_operand:QI 6 "register_operand" "1")
14824 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
14826 [(match_operand:P 3 "vsib_address_operand" "Tv")
14827 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
14828 (match_operand:SI 4 "const1248_operand" "n")]
14829 UNSPEC_VSIBADDR)])]
14831 (clobber (match_scratch:QI 1 "=&k"))]
14834 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
14835 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
14836 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
14838 [(set_attr "type" "ssemov")
14839 (set_attr "prefix" "evex")
14840 (set_attr "mode" "<sseinsnmode>")])
14842 (define_expand "avx512f_scattersi<mode>"
14843 [(parallel [(set (mem:VI48F_512
14845 [(match_operand 0 "vsib_address_operand")
14846 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
14847 (match_operand:SI 4 "const1248_operand")]))
14849 [(match_operand:<avx512fmaskmode> 1 "register_operand")
14850 (match_operand:VI48F_512 3 "register_operand")]
14852 (clobber (match_scratch:<avx512fmaskmode> 6))])]
14856 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
14857 operands[4]), UNSPEC_VSIBADDR);
14860 (define_insn "*avx512f_scattersi<mode>"
14861 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
14863 [(match_operand:P 0 "vsib_address_operand" "Tv")
14864 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
14865 (match_operand:SI 4 "const1248_operand" "n")]
14868 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
14869 (match_operand:VI48F_512 3 "register_operand" "v")]
14871 (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
14873 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
14874 [(set_attr "type" "ssemov")
14875 (set_attr "prefix" "evex")
14876 (set_attr "mode" "<sseinsnmode>")])
14878 (define_expand "avx512f_scatterdi<mode>"
14879 [(parallel [(set (mem:VI48F_512
14881 [(match_operand 0 "vsib_address_operand")
14882 (match_operand:V8DI 2 "register_operand")
14883 (match_operand:SI 4 "const1248_operand")]))
14885 [(match_operand:QI 1 "register_operand")
14886 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
14888 (clobber (match_scratch:QI 6))])]
14892 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
14893 operands[4]), UNSPEC_VSIBADDR);
14896 (define_insn "*avx512f_scatterdi<mode>"
14897 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
14899 [(match_operand:P 0 "vsib_address_operand" "Tv")
14900 (match_operand:V8DI 2 "register_operand" "v")
14901 (match_operand:SI 4 "const1248_operand" "n")]
14904 [(match_operand:QI 6 "register_operand" "1")
14905 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
14907 (clobber (match_scratch:QI 1 "=&k"))]
14909 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
14910 [(set_attr "type" "ssemov")
14911 (set_attr "prefix" "evex")
14912 (set_attr "mode" "<sseinsnmode>")])
14914 (define_insn "avx512f_compress<mode>_mask"
14915 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14917 [(match_operand:VI48F_512 1 "register_operand" "v")
14918 (match_operand:VI48F_512 2 "vector_move_operand" "0C")
14919 (match_operand:<avx512fmaskmode> 3 "register_operand" "k")]
14922 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14923 [(set_attr "type" "ssemov")
14924 (set_attr "prefix" "evex")
14925 (set_attr "mode" "<sseinsnmode>")])
14927 (define_insn "avx512f_compressstore<mode>_mask"
14928 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
14930 [(match_operand:VI48F_512 1 "register_operand" "x")
14932 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")]
14933 UNSPEC_COMPRESS_STORE))]
14935 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
14936 [(set_attr "type" "ssemov")
14937 (set_attr "prefix" "evex")
14938 (set_attr "memory" "store")
14939 (set_attr "mode" "<sseinsnmode>")])
14941 (define_insn "avx512f_expand<mode>_mask"
14942 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
14944 [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
14945 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
14946 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")]
14949 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14950 [(set_attr "type" "ssemov")
14951 (set_attr "prefix" "evex")
14952 (set_attr "memory" "none,load")
14953 (set_attr "mode" "<sseinsnmode>")])
14955 (define_insn "avx512f_getmant<mode><mask_name>"
14956 [(set (match_operand:VF_512 0 "register_operand" "=v")
14958 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
14959 (match_operand:SI 2 "const_0_to_15_operand")]
14962 "vgetmant<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
14963 [(set_attr "prefix" "evex")
14964 (set_attr "mode" "<MODE>")])
14966 (define_insn "avx512f_getmant<mode>"
14967 [(set (match_operand:VF_128 0 "register_operand" "=v")
14970 [(match_operand:VF_128 1 "register_operand" "v")
14971 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
14972 (match_operand:SI 3 "const_0_to_15_operand")]
14977 "vgetmant<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14978 [(set_attr "prefix" "evex")
14979 (set_attr "mode" "<ssescalarmode>")])
14981 (define_insn "clz<mode>2<mask_name>"
14982 [(set (match_operand:VI48_512 0 "register_operand" "=v")
14984 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
14986 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14987 [(set_attr "type" "sse")
14988 (set_attr "prefix" "evex")
14989 (set_attr "mode" "<sseinsnmode>")])
14991 (define_insn "<mask_codefor>conflict<mode><mask_name>"
14992 [(set (match_operand:VI48_512 0 "register_operand" "=v")
14994 [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
14997 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14998 [(set_attr "type" "sse")
14999 (set_attr "prefix" "evex")
15000 (set_attr "mode" "<sseinsnmode>")])