1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2019 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
56 UNSPEC_XOP_UNSIGNED_CMP
67 UNSPEC_AESKEYGENASSIST
88 ;; For AVX512F support
90 UNSPEC_UNSIGNED_FIX_NOTRUNC
105 UNSPEC_COMPRESS_STORE
115 ;; For embed. rounding feature
116 UNSPEC_EMBEDDED_ROUNDING
118 ;; For AVX512PF support
119 UNSPEC_GATHER_PREFETCH
120 UNSPEC_SCATTER_PREFETCH
122 ;; For AVX512ER support
136 ;; For AVX512BW support
144 ;; For AVX512DQ support
149 ;; For AVX512IFMA support
153 ;; For AVX512VBMI support
156 ;; For AVX5124FMAPS/AVX5124VNNIW support
163 UNSPEC_GF2P8AFFINEINV
167 ;; For AVX512VBMI2 support
173 ;; For AVX512VNNI support
174 UNSPEC_VPMADDUBSWACCD
175 UNSPEC_VPMADDUBSWACCSSD
177 UNSPEC_VPMADDWDACCSSD
185 ;; For VPCLMULQDQ support
188 ;; For AVX512BITALG support
191 ;; For AVX512BF16 support
192 UNSPEC_VCVTNE2PS2BF16
197 (define_c_enum "unspecv" [
207 ;; All vector modes including V?TImode, used in move patterns.
208 (define_mode_iterator VMOVE
209 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
210 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
211 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
212 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
213 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
214 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
215 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
217 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
218 (define_mode_iterator V48_AVX512VL
219 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
220 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
221 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
222 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
224 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
225 (define_mode_iterator VI12_AVX512VL
226 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
227 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
229 ;; Same iterator, but without supposed TARGET_AVX512BW
230 (define_mode_iterator VI12_AVX512VLBW
231 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
232 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
233 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
235 (define_mode_iterator VI1_AVX512VL
236 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
239 (define_mode_iterator V
240 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
241 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
242 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
243 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
244 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
245 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
247 ;; All 128bit vector modes
248 (define_mode_iterator V_128
249 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
251 ;; All 256bit vector modes
252 (define_mode_iterator V_256
253 [V32QI V16HI V8SI V4DI V8SF V4DF])
255 ;; All 128bit and 256bit vector modes
256 (define_mode_iterator V_128_256
257 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
259 ;; All 512bit vector modes
260 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
262 ;; All 256bit and 512bit vector modes
263 (define_mode_iterator V_256_512
264 [V32QI V16HI V8SI V4DI V8SF V4DF
265 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
266 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
268 ;; All vector float modes
269 (define_mode_iterator VF
270 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
271 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
273 ;; 128- and 256-bit float vector modes
274 (define_mode_iterator VF_128_256
275 [(V8SF "TARGET_AVX") V4SF
276 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
278 ;; All SFmode vector float modes
279 (define_mode_iterator VF1
280 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
282 ;; 128- and 256-bit SF vector modes
283 (define_mode_iterator VF1_128_256
284 [(V8SF "TARGET_AVX") V4SF])
286 (define_mode_iterator VF1_128_256VL
287 [V8SF (V4SF "TARGET_AVX512VL")])
289 ;; All DFmode vector float modes
290 (define_mode_iterator VF2
291 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
293 ;; 128- and 256-bit DF vector modes
294 (define_mode_iterator VF2_128_256
295 [(V4DF "TARGET_AVX") V2DF])
297 (define_mode_iterator VF2_512_256
298 [(V8DF "TARGET_AVX512F") V4DF])
300 (define_mode_iterator VF2_512_256VL
301 [V8DF (V4DF "TARGET_AVX512VL")])
303 ;; All 128bit vector float modes
304 (define_mode_iterator VF_128
305 [V4SF (V2DF "TARGET_SSE2")])
307 ;; All 256bit vector float modes
308 (define_mode_iterator VF_256
311 ;; All 512bit vector float modes
312 (define_mode_iterator VF_512
315 (define_mode_iterator VI48_AVX512VL
316 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
317 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
319 (define_mode_iterator VF_AVX512VL
320 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
321 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
323 (define_mode_iterator VF2_AVX512VL
324 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
326 (define_mode_iterator VF1_AVX512VL
327 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
329 ;; All vector integer modes
330 (define_mode_iterator VI
331 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
332 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
333 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
334 (V8SI "TARGET_AVX") V4SI
335 (V4DI "TARGET_AVX") V2DI])
337 (define_mode_iterator VI_AVX2
338 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
339 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
340 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
341 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
343 ;; All QImode vector integer modes
344 (define_mode_iterator VI1
345 [(V32QI "TARGET_AVX") V16QI])
347 ;; All DImode vector integer modes
348 (define_mode_iterator V_AVX
349 [V16QI V8HI V4SI V2DI V4SF V2DF
350 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
351 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
352 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
354 (define_mode_iterator VI48_AVX
356 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
358 (define_mode_iterator VI8
359 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
361 (define_mode_iterator VI8_FVL
362 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
364 (define_mode_iterator VI8_AVX512VL
365 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
367 (define_mode_iterator VI8_256_512
368 [V8DI (V4DI "TARGET_AVX512VL")])
370 (define_mode_iterator VI1_AVX2
371 [(V32QI "TARGET_AVX2") V16QI])
373 (define_mode_iterator VI1_AVX512
374 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
376 (define_mode_iterator VI1_AVX512F
377 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
379 (define_mode_iterator VI2_AVX2
380 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
382 (define_mode_iterator VI2_AVX512F
383 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
385 (define_mode_iterator VI4_AVX
386 [(V8SI "TARGET_AVX") V4SI])
388 (define_mode_iterator VI4_AVX2
389 [(V8SI "TARGET_AVX2") V4SI])
391 (define_mode_iterator VI4_AVX512F
392 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
394 (define_mode_iterator VI4_AVX512VL
395 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
397 (define_mode_iterator VI48_AVX512F_AVX512VL
398 [V4SI V8SI (V16SI "TARGET_AVX512F")
399 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
401 (define_mode_iterator VI2_AVX512VL
402 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
404 (define_mode_iterator VI1_AVX512VL_F
405 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
407 (define_mode_iterator VI8_AVX2_AVX512BW
408 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
410 (define_mode_iterator VI8_AVX2
411 [(V4DI "TARGET_AVX2") V2DI])
413 (define_mode_iterator VI8_AVX2_AVX512F
414 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
416 (define_mode_iterator VI8_AVX_AVX512F
417 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
419 (define_mode_iterator VI4_128_8_256
423 (define_mode_iterator V8FI
427 (define_mode_iterator V16FI
430 ;; ??? We should probably use TImode instead.
431 (define_mode_iterator VIMAX_AVX2_AVX512BW
432 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
434 ;; Suppose TARGET_AVX512BW as baseline
435 (define_mode_iterator VIMAX_AVX512VL
436 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
438 (define_mode_iterator VIMAX_AVX2
439 [(V2TI "TARGET_AVX2") V1TI])
441 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
442 (define_mode_iterator SSESCALARMODE
443 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
445 (define_mode_iterator VI12_AVX2
446 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
447 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
449 (define_mode_iterator VI24_AVX2
450 [(V16HI "TARGET_AVX2") V8HI
451 (V8SI "TARGET_AVX2") V4SI])
453 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
454 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
455 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
456 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
458 (define_mode_iterator VI124_AVX2
459 [(V32QI "TARGET_AVX2") V16QI
460 (V16HI "TARGET_AVX2") V8HI
461 (V8SI "TARGET_AVX2") V4SI])
463 (define_mode_iterator VI2_AVX2_AVX512BW
464 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
466 (define_mode_iterator VI248_AVX512VL
468 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
469 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
470 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
472 (define_mode_iterator VI48_AVX2
473 [(V8SI "TARGET_AVX2") V4SI
474 (V4DI "TARGET_AVX2") V2DI])
476 (define_mode_iterator VI248_AVX2
477 [(V16HI "TARGET_AVX2") V8HI
478 (V8SI "TARGET_AVX2") V4SI
479 (V4DI "TARGET_AVX2") V2DI])
481 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
482 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
483 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
484 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
486 (define_mode_iterator VI248_AVX512BW
487 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
489 (define_mode_iterator VI248_AVX512BW_AVX512VL
490 [(V32HI "TARGET_AVX512BW")
491 (V4DI "TARGET_AVX512VL") V16SI V8DI])
493 ;; Suppose TARGET_AVX512VL as baseline
494 (define_mode_iterator VI248_AVX512BW_1
495 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
499 (define_mode_iterator VI248_AVX512BW_2
500 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
504 (define_mode_iterator VI48_AVX512F
505 [(V16SI "TARGET_AVX512F") V8SI V4SI
506 (V8DI "TARGET_AVX512F") V4DI V2DI])
508 (define_mode_iterator VI48_AVX_AVX512F
509 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
510 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
512 (define_mode_iterator VI12_AVX_AVX512F
513 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
514 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
516 (define_mode_iterator V48_AVX2
519 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
520 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
522 (define_mode_iterator VI1_AVX512VLBW
523 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL")
524 (V16QI "TARGET_AVX512VL")])
526 (define_mode_attr avx512
527 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
528 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
529 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
530 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
531 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
532 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
534 (define_mode_attr sse2_avx_avx512f
535 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
536 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
537 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
538 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
539 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
540 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
542 (define_mode_attr sse2_avx2
543 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
544 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
545 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
546 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
547 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
549 (define_mode_attr ssse3_avx2
550 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
551 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
552 (V4SI "ssse3") (V8SI "avx2")
553 (V2DI "ssse3") (V4DI "avx2")
554 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
556 (define_mode_attr sse4_1_avx2
557 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
558 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
559 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
560 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
562 (define_mode_attr avx_avx2
563 [(V4SF "avx") (V2DF "avx")
564 (V8SF "avx") (V4DF "avx")
565 (V4SI "avx2") (V2DI "avx2")
566 (V8SI "avx2") (V4DI "avx2")])
568 (define_mode_attr vec_avx2
569 [(V16QI "vec") (V32QI "avx2")
570 (V8HI "vec") (V16HI "avx2")
571 (V4SI "vec") (V8SI "avx2")
572 (V2DI "vec") (V4DI "avx2")])
574 (define_mode_attr avx2_avx512
575 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
576 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
577 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
578 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
579 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
581 (define_mode_attr shuffletype
582 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
583 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
584 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
585 (V32HI "i") (V16HI "i") (V8HI "i")
586 (V64QI "i") (V32QI "i") (V16QI "i")
587 (V4TI "i") (V2TI "i") (V1TI "i")])
589 (define_mode_attr ssequartermode
590 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
592 (define_mode_attr ssequarterinsnmode
593 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
595 (define_mode_attr ssedoublemodelower
596 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
597 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
598 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
600 (define_mode_attr ssedoublemode
601 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
602 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
603 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
604 (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
605 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
606 (V4DI "V8DI") (V8DI "V16DI")])
608 (define_mode_attr ssebytemode
609 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
610 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
612 ;; All 128bit vector integer modes
613 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
615 ;; All 256bit vector integer modes
616 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
618 ;; Various 128bit vector integer mode combinations
619 (define_mode_iterator VI12_128 [V16QI V8HI])
620 (define_mode_iterator VI14_128 [V16QI V4SI])
621 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
622 (define_mode_iterator VI24_128 [V8HI V4SI])
623 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
624 (define_mode_iterator VI48_128 [V4SI V2DI])
626 ;; Various 256bit and 512 vector integer mode combinations
627 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
628 (define_mode_iterator VI124_256_AVX512F_AVX512BW
630 (V64QI "TARGET_AVX512BW")
631 (V32HI "TARGET_AVX512BW")
632 (V16SI "TARGET_AVX512F")])
633 (define_mode_iterator VI48_256 [V8SI V4DI])
634 (define_mode_iterator VI48_512 [V16SI V8DI])
635 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
636 (define_mode_iterator VI_AVX512BW
637 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
639 ;; Int-float size matches
640 (define_mode_iterator VI4F_128 [V4SI V4SF])
641 (define_mode_iterator VI8F_128 [V2DI V2DF])
642 (define_mode_iterator VI4F_256 [V8SI V8SF])
643 (define_mode_iterator VI8F_256 [V4DI V4DF])
644 (define_mode_iterator VI4F_256_512
646 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
647 (define_mode_iterator VI48F_256_512
649 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
650 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
651 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
652 (define_mode_iterator VF48_I1248
653 [V16SI V16SF V8DI V8DF V32HI V64QI])
654 (define_mode_iterator VI48F
655 [V16SI V16SF V8DI V8DF
656 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
657 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
658 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
659 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
660 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
662 (define_mode_iterator VF_AVX512
663 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
664 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
667 (define_mode_attr avx512bcst
668 [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
669 (V8SI "%{1to8%}") (V4DI "%{1to4%}")
670 (V16SI "%{1to16%}") (V8DI "%{1to8%}")
671 (V4SF "%{1to4%}") (V2DF "%{1to2%}")
672 (V8SF "%{1to8%}") (V4DF "%{1to4%}")
673 (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
675 ;; Mapping from float mode to required SSE level
676 (define_mode_attr sse
677 [(SF "sse") (DF "sse2")
678 (V4SF "sse") (V2DF "sse2")
679 (V16SF "avx512f") (V8SF "avx")
680 (V8DF "avx512f") (V4DF "avx")])
682 (define_mode_attr sse2
683 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
684 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
686 (define_mode_attr sse3
687 [(V16QI "sse3") (V32QI "avx")])
689 (define_mode_attr sse4_1
690 [(V4SF "sse4_1") (V2DF "sse4_1")
691 (V8SF "avx") (V4DF "avx")
693 (V4DI "avx") (V2DI "sse4_1")
694 (V8SI "avx") (V4SI "sse4_1")
695 (V16QI "sse4_1") (V32QI "avx")
696 (V8HI "sse4_1") (V16HI "avx")])
698 (define_mode_attr avxsizesuffix
699 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
700 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
701 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
702 (V16SF "512") (V8DF "512")
703 (V8SF "256") (V4DF "256")
704 (V4SF "") (V2DF "")])
706 ;; SSE instruction mode
707 (define_mode_attr sseinsnmode
708 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
709 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
710 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
711 (V16SF "V16SF") (V8DF "V8DF")
712 (V8SF "V8SF") (V4DF "V4DF")
713 (V4SF "V4SF") (V2DF "V2DF")
716 ;; Mapping of vector modes to corresponding mask size
717 (define_mode_attr avx512fmaskmode
718 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
719 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
720 (V16SI "HI") (V8SI "QI") (V4SI "QI")
721 (V8DI "QI") (V4DI "QI") (V2DI "QI")
722 (V16SF "HI") (V8SF "QI") (V4SF "QI")
723 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
725 ;; Mapping of vector modes to corresponding mask size
726 (define_mode_attr avx512fmaskmodelower
727 [(V64QI "di") (V32QI "si") (V16QI "hi")
728 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
729 (V16SI "hi") (V8SI "qi") (V4SI "qi")
730 (V8DI "qi") (V4DI "qi") (V2DI "qi")
731 (V16SF "hi") (V8SF "qi") (V4SF "qi")
732 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
734 ;; Mapping of vector modes to corresponding mask half size
735 (define_mode_attr avx512fmaskhalfmode
736 [(V64QI "SI") (V32QI "HI") (V16QI "QI")
737 (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
738 (V16SI "QI") (V8SI "QI") (V4SI "QI")
739 (V8DI "QI") (V4DI "QI") (V2DI "QI")
740 (V16SF "QI") (V8SF "QI") (V4SF "QI")
741 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
743 ;; Mapping of vector float modes to an integer mode of the same size
744 (define_mode_attr sseintvecmode
745 [(V16SF "V16SI") (V8DF "V8DI")
746 (V8SF "V8SI") (V4DF "V4DI")
747 (V4SF "V4SI") (V2DF "V2DI")
748 (V16SI "V16SI") (V8DI "V8DI")
749 (V8SI "V8SI") (V4DI "V4DI")
750 (V4SI "V4SI") (V2DI "V2DI")
751 (V16HI "V16HI") (V8HI "V8HI")
752 (V32HI "V32HI") (V64QI "V64QI")
753 (V32QI "V32QI") (V16QI "V16QI")])
755 (define_mode_attr sseintvecmode2
756 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
757 (V8SF "OI") (V4SF "TI")])
759 (define_mode_attr sseintvecmodelower
760 [(V16SF "v16si") (V8DF "v8di")
761 (V8SF "v8si") (V4DF "v4di")
762 (V4SF "v4si") (V2DF "v2di")
763 (V8SI "v8si") (V4DI "v4di")
764 (V4SI "v4si") (V2DI "v2di")
765 (V16HI "v16hi") (V8HI "v8hi")
766 (V32QI "v32qi") (V16QI "v16qi")])
768 ;; Mapping of vector modes to a vector mode of double size
769 (define_mode_attr ssedoublevecmode
770 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
771 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
772 (V8SF "V16SF") (V4DF "V8DF")
773 (V4SF "V8SF") (V2DF "V4DF")])
775 ;; Mapping of vector modes to a vector mode of half size
776 (define_mode_attr ssehalfvecmode
777 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
778 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
779 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
780 (V16SF "V8SF") (V8DF "V4DF")
781 (V8SF "V4SF") (V4DF "V2DF")
784 (define_mode_attr ssehalfvecmodelower
785 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
786 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
787 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
788 (V16SF "v8sf") (V8DF "v4df")
789 (V8SF "v4sf") (V4DF "v2df")
792 ;; Mapping of vector modes ti packed single mode of the same size
793 (define_mode_attr ssePSmode
794 [(V16SI "V16SF") (V8DF "V16SF")
795 (V16SF "V16SF") (V8DI "V16SF")
796 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
797 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
798 (V8SI "V8SF") (V4SI "V4SF")
799 (V4DI "V8SF") (V2DI "V4SF")
800 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
801 (V8SF "V8SF") (V4SF "V4SF")
802 (V4DF "V8SF") (V2DF "V4SF")])
804 (define_mode_attr ssePSmode2
805 [(V8DI "V8SF") (V4DI "V4SF")])
807 ;; Mapping of vector modes back to the scalar modes
808 (define_mode_attr ssescalarmode
809 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
810 (V32HI "HI") (V16HI "HI") (V8HI "HI")
811 (V16SI "SI") (V8SI "SI") (V4SI "SI")
812 (V8DI "DI") (V4DI "DI") (V2DI "DI")
813 (V16SF "SF") (V8SF "SF") (V4SF "SF")
814 (V8DF "DF") (V4DF "DF") (V2DF "DF")
815 (V4TI "TI") (V2TI "TI")])
817 ;; Mapping of vector modes back to the scalar modes
818 (define_mode_attr ssescalarmodelower
819 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
820 (V32HI "hi") (V16HI "hi") (V8HI "hi")
821 (V16SI "si") (V8SI "si") (V4SI "si")
822 (V8DI "di") (V4DI "di") (V2DI "di")
823 (V16SF "sf") (V8SF "sf") (V4SF "sf")
824 (V8DF "df") (V4DF "df") (V2DF "df")
825 (V4TI "ti") (V2TI "ti")])
827 ;; Mapping of vector modes to the 128bit modes
828 (define_mode_attr ssexmmmode
829 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
830 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
831 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
832 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
833 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
834 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
836 ;; Pointer size override for scalar modes (Intel asm dialect)
837 (define_mode_attr iptr
838 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
839 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
840 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
841 (V16SF "k") (V8DF "q")
842 (V8SF "k") (V4DF "q")
843 (V4SF "k") (V2DF "q")
846 ;; Number of scalar elements in each vector type
847 (define_mode_attr ssescalarnum
848 [(V64QI "64") (V16SI "16") (V8DI "8")
849 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
850 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
851 (V16SF "16") (V8DF "8")
852 (V8SF "8") (V4DF "4")
853 (V4SF "4") (V2DF "2")])
855 ;; Mask of scalar elements in each vector type
856 (define_mode_attr ssescalarnummask
857 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
858 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
859 (V8SF "7") (V4DF "3")
860 (V4SF "3") (V2DF "1")])
862 (define_mode_attr ssescalarsize
863 [(V4TI "64") (V2TI "64") (V1TI "64")
864 (V8DI "64") (V4DI "64") (V2DI "64")
865 (V64QI "8") (V32QI "8") (V16QI "8")
866 (V32HI "16") (V16HI "16") (V8HI "16")
867 (V16SI "32") (V8SI "32") (V4SI "32")
868 (V16SF "32") (V8SF "32") (V4SF "32")
869 (V8DF "64") (V4DF "64") (V2DF "64")])
871 ;; SSE prefix for integer vector modes
872 (define_mode_attr sseintprefix
873 [(V2DI "p") (V2DF "")
878 (V16SI "p") (V16SF "")
879 (V16QI "p") (V8HI "p")
880 (V32QI "p") (V16HI "p")
881 (V64QI "p") (V32HI "p")])
883 ;; SSE scalar suffix for vector modes
884 (define_mode_attr ssescalarmodesuffix
886 (V16SF "ss") (V8DF "sd")
887 (V8SF "ss") (V4DF "sd")
888 (V4SF "ss") (V2DF "sd")
889 (V16SI "d") (V8DI "q")
890 (V8SI "d") (V4DI "q")
891 (V4SI "d") (V2DI "q")])
893 ;; Pack/unpack vector modes
894 (define_mode_attr sseunpackmode
895 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
896 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
897 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
899 (define_mode_attr ssepackmode
900 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
901 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
902 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
904 ;; Mapping of the max integer size for xop rotate immediate constraint
905 (define_mode_attr sserotatemax
906 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
908 ;; Mapping of mode to cast intrinsic name
909 (define_mode_attr castmode
910 [(V8SI "si") (V8SF "ps") (V4DF "pd")
911 (V16SI "si") (V16SF "ps") (V8DF "pd")])
913 ;; Instruction suffix for sign and zero extensions.
914 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
916 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
917 ;; i64x4 or f64x4 for 512bit modes.
918 (define_mode_attr i128
919 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
920 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
921 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
923 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
924 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
925 (define_mode_attr i128vldq
926 [(V8SF "f32x4") (V4DF "f64x2")
927 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
930 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
931 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
933 ;; Mapping for dbpsabbw modes
934 (define_mode_attr dbpsadbwmode
935 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
937 ;; Mapping suffixes for broadcast
938 (define_mode_attr bcstscalarsuff
939 [(V64QI "b") (V32QI "b") (V16QI "b")
940 (V32HI "w") (V16HI "w") (V8HI "w")
941 (V16SI "d") (V8SI "d") (V4SI "d")
942 (V8DI "q") (V4DI "q") (V2DI "q")
943 (V16SF "ss") (V8SF "ss") (V4SF "ss")
944 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
946 ;; Tie mode of assembler operand to mode iterator
947 (define_mode_attr xtg_mode
948 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
949 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
950 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
952 ;; Half mask mode for unpacks
953 (define_mode_attr HALFMASKMODE
954 [(DI "SI") (SI "HI")])
956 ;; Double mask mode for packs
957 (define_mode_attr DOUBLEMASKMODE
958 [(HI "SI") (SI "DI")])
961 ;; Include define_subst patterns for instructions with mask
964 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
966 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
970 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
972 ;; All of these patterns are enabled for SSE1 as well as SSE2.
973 ;; This is essential for maintaining stable calling conventions.
975 (define_expand "mov<mode>"
976 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
977 (match_operand:VMOVE 1 "nonimmediate_operand"))]
980 ix86_expand_vector_move (<MODE>mode, operands);
984 (define_insn "mov<mode>_internal"
985 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
987 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
990 && (register_operand (operands[0], <MODE>mode)
991 || register_operand (operands[1], <MODE>mode))"
993 switch (get_attr_type (insn))
996 return standard_sse_constant_opcode (insn, operands);
999 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
1000 in avx512f, so we need to use workarounds, to access sse registers
1001 16-31, which are evex-only. In avx512vl we don't need workarounds. */
1002 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
1003 && (EXT_REX_SSE_REG_P (operands[0])
1004 || EXT_REX_SSE_REG_P (operands[1])))
1006 if (memory_operand (operands[0], <MODE>mode))
1008 if (<MODE_SIZE> == 32)
1009 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
1010 else if (<MODE_SIZE> == 16)
1011 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
1015 else if (memory_operand (operands[1], <MODE>mode))
1017 if (<MODE_SIZE> == 32)
1018 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
1019 else if (<MODE_SIZE> == 16)
1020 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
1025 /* Reg -> reg move is always aligned. Just use wider move. */
1026 switch (get_attr_mode (insn))
1030 return "vmovaps\t{%g1, %g0|%g0, %g1}";
1033 return "vmovapd\t{%g1, %g0|%g0, %g1}";
1036 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
1042 switch (get_attr_mode (insn))
1047 if (misaligned_operand (operands[0], <MODE>mode)
1048 || misaligned_operand (operands[1], <MODE>mode))
1049 return "%vmovups\t{%1, %0|%0, %1}";
1051 return "%vmovaps\t{%1, %0|%0, %1}";
1056 if (misaligned_operand (operands[0], <MODE>mode)
1057 || misaligned_operand (operands[1], <MODE>mode))
1058 return "%vmovupd\t{%1, %0|%0, %1}";
1060 return "%vmovapd\t{%1, %0|%0, %1}";
1064 if (misaligned_operand (operands[0], <MODE>mode)
1065 || misaligned_operand (operands[1], <MODE>mode))
1066 return TARGET_AVX512VL
1067 && (<MODE>mode == V4SImode
1068 || <MODE>mode == V2DImode
1069 || <MODE>mode == V8SImode
1070 || <MODE>mode == V4DImode
1072 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1073 : "%vmovdqu\t{%1, %0|%0, %1}";
1075 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
1076 : "%vmovdqa\t{%1, %0|%0, %1}";
1078 if (misaligned_operand (operands[0], <MODE>mode)
1079 || misaligned_operand (operands[1], <MODE>mode))
1080 return (<MODE>mode == V16SImode
1081 || <MODE>mode == V8DImode
1083 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1084 : "vmovdqu64\t{%1, %0|%0, %1}";
1086 return "vmovdqa64\t{%1, %0|%0, %1}";
1096 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1097 (set_attr "prefix" "maybe_vex")
1099 (cond [(and (eq_attr "alternative" "1")
1100 (match_test "TARGET_AVX512VL"))
1101 (const_string "<sseinsnmode>")
1102 (and (match_test "<MODE_SIZE> == 16")
1103 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1104 (and (eq_attr "alternative" "3")
1105 (match_test "TARGET_SSE_TYPELESS_STORES"))))
1106 (const_string "<ssePSmode>")
1107 (match_test "TARGET_AVX")
1108 (const_string "<sseinsnmode>")
1109 (ior (not (match_test "TARGET_SSE2"))
1110 (match_test "optimize_function_for_size_p (cfun)"))
1111 (const_string "V4SF")
1112 (and (eq_attr "alternative" "0")
1113 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1116 (const_string "<sseinsnmode>")))
1117 (set (attr "enabled")
1118 (cond [(and (match_test "<MODE_SIZE> == 16")
1119 (eq_attr "alternative" "1"))
1120 (symbol_ref "TARGET_SSE2")
1121 (and (match_test "<MODE_SIZE> == 32")
1122 (eq_attr "alternative" "1"))
1123 (symbol_ref "TARGET_AVX2")
1125 (symbol_ref "true")))])
1127 (define_insn "<avx512>_load<mode>_mask"
1128 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1129 (vec_merge:V48_AVX512VL
1130 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1131 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1132 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1135 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1137 if (misaligned_operand (operands[1], <MODE>mode))
1138 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1140 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1144 if (misaligned_operand (operands[1], <MODE>mode))
1145 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1147 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1150 [(set_attr "type" "ssemov")
1151 (set_attr "prefix" "evex")
1152 (set_attr "memory" "none,load")
1153 (set_attr "mode" "<sseinsnmode>")])
1155 (define_insn "<avx512>_load<mode>_mask"
1156 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1157 (vec_merge:VI12_AVX512VL
1158 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1159 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1160 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1162 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1163 [(set_attr "type" "ssemov")
1164 (set_attr "prefix" "evex")
1165 (set_attr "memory" "none,load")
1166 (set_attr "mode" "<sseinsnmode>")])
1168 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
1169 [(set (match_operand:VF_128 0 "register_operand" "=v")
1172 (match_operand:VF_128 2 "register_operand" "v")
1173 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1174 (match_operand:QI 4 "register_operand" "Yk"))
1175 (match_operand:VF_128 1 "register_operand" "v")
1178 "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1179 [(set_attr "type" "ssemov")
1180 (set_attr "prefix" "evex")
1181 (set_attr "mode" "<ssescalarmode>")])
1183 (define_expand "avx512f_load<mode>_mask"
1184 [(set (match_operand:<ssevecmode> 0 "register_operand")
1185 (vec_merge:<ssevecmode>
1186 (vec_merge:<ssevecmode>
1187 (vec_duplicate:<ssevecmode>
1188 (match_operand:MODEF 1 "memory_operand"))
1189 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1190 (match_operand:QI 3 "register_operand"))
1194 "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1196 (define_insn "*avx512f_load<mode>_mask"
1197 [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1198 (vec_merge:<ssevecmode>
1199 (vec_merge:<ssevecmode>
1200 (vec_duplicate:<ssevecmode>
1201 (match_operand:MODEF 1 "memory_operand" "m"))
1202 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1203 (match_operand:QI 3 "register_operand" "Yk"))
1204 (match_operand:<ssevecmode> 4 "const0_operand" "C")
1207 "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1208 [(set_attr "type" "ssemov")
1209 (set_attr "prefix" "evex")
1210 (set_attr "memory" "load")
1211 (set_attr "mode" "<MODE>")])
1213 (define_insn "avx512f_store<mode>_mask"
1214 [(set (match_operand:MODEF 0 "memory_operand" "=m")
1216 (and:QI (match_operand:QI 2 "register_operand" "Yk")
1219 (match_operand:<ssevecmode> 1 "register_operand" "v")
1220 (parallel [(const_int 0)]))
1223 "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1224 [(set_attr "type" "ssemov")
1225 (set_attr "prefix" "evex")
1226 (set_attr "memory" "store")
1227 (set_attr "mode" "<MODE>")])
1229 (define_insn "<avx512>_blendm<mode>"
1230 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1231 (vec_merge:V48_AVX512VL
1232 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1233 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1234 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1236 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1237 [(set_attr "type" "ssemov")
1238 (set_attr "prefix" "evex")
1239 (set_attr "mode" "<sseinsnmode>")])
1241 (define_insn "<avx512>_blendm<mode>"
1242 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1243 (vec_merge:VI12_AVX512VL
1244 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1245 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1246 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1248 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1249 [(set_attr "type" "ssemov")
1250 (set_attr "prefix" "evex")
1251 (set_attr "mode" "<sseinsnmode>")])
1253 (define_insn "<avx512>_store<mode>_mask"
1254 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1255 (vec_merge:V48_AVX512VL
1256 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1258 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1261 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1263 if (misaligned_operand (operands[0], <MODE>mode))
1264 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1266 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1270 if (misaligned_operand (operands[0], <MODE>mode))
1271 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1273 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1276 [(set_attr "type" "ssemov")
1277 (set_attr "prefix" "evex")
1278 (set_attr "memory" "store")
1279 (set_attr "mode" "<sseinsnmode>")])
1281 (define_insn "<avx512>_store<mode>_mask"
1282 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1283 (vec_merge:VI12_AVX512VL
1284 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1286 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1288 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1289 [(set_attr "type" "ssemov")
1290 (set_attr "prefix" "evex")
1291 (set_attr "memory" "store")
1292 (set_attr "mode" "<sseinsnmode>")])
1294 (define_insn "sse2_movq128"
1295 [(set (match_operand:V2DI 0 "register_operand" "=v")
1298 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1299 (parallel [(const_int 0)]))
1302 "%vmovq\t{%1, %0|%0, %q1}"
1303 [(set_attr "type" "ssemov")
1304 (set_attr "prefix" "maybe_vex")
1305 (set_attr "mode" "TI")])
1307 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1308 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1309 ;; from memory, we'd prefer to load the memory directly into the %xmm
1310 ;; register. To facilitate this happy circumstance, this pattern won't
1311 ;; split until after register allocation. If the 64-bit value didn't
1312 ;; come from memory, this is the best we can do. This is much better
1313 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1316 (define_insn_and_split "movdi_to_sse"
1317 [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1318 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1319 UNSPEC_MOVDI_TO_SSE))
1320 (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1321 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1323 "&& reload_completed"
1326 if (register_operand (operands[1], DImode))
1328 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1329 Assemble the 64-bit DImode value in an xmm register. */
1330 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1331 gen_lowpart (SImode, operands[1])));
1333 emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1334 gen_highpart (SImode, operands[1]),
1338 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1339 gen_highpart (SImode, operands[1])));
1340 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1344 else if (memory_operand (operands[1], DImode))
1345 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1346 operands[1], const0_rtx));
1351 [(set_attr "isa" "sse4,*,*")])
1354 [(set (match_operand:V4SF 0 "register_operand")
1355 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1356 "TARGET_SSE && reload_completed"
1359 (vec_duplicate:V4SF (match_dup 1))
1363 operands[1] = gen_lowpart (SFmode, operands[1]);
1364 operands[2] = CONST0_RTX (V4SFmode);
1368 [(set (match_operand:V2DF 0 "register_operand")
1369 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1370 "TARGET_SSE2 && reload_completed"
1371 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1373 operands[1] = gen_lowpart (DFmode, operands[1]);
1374 operands[2] = CONST0_RTX (DFmode);
1377 (define_expand "movmisalign<mode>"
1378 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1379 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1382 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1386 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1388 [(set (match_operand:V2DF 0 "sse_reg_operand")
1389 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1390 (match_operand:DF 4 "const0_operand")))
1391 (set (match_operand:V2DF 2 "sse_reg_operand")
1392 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1393 (parallel [(const_int 0)]))
1394 (match_operand:DF 3 "memory_operand")))]
1395 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1396 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1397 [(set (match_dup 2) (match_dup 5))]
1398 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1401 [(set (match_operand:DF 0 "sse_reg_operand")
1402 (match_operand:DF 1 "memory_operand"))
1403 (set (match_operand:V2DF 2 "sse_reg_operand")
1404 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1405 (match_operand:DF 3 "memory_operand")))]
1406 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1407 && REGNO (operands[4]) == REGNO (operands[2])
1408 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1409 [(set (match_dup 2) (match_dup 5))]
1410 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1412 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1414 [(set (match_operand:DF 0 "memory_operand")
1415 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1416 (parallel [(const_int 0)])))
1417 (set (match_operand:DF 2 "memory_operand")
1418 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1419 (parallel [(const_int 1)])))]
1420 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1421 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1422 [(set (match_dup 4) (match_dup 1))]
1423 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1425 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1426 [(set (match_operand:VI1 0 "register_operand" "=x")
1427 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1430 "%vlddqu\t{%1, %0|%0, %1}"
1431 [(set_attr "type" "ssemov")
1432 (set_attr "movu" "1")
1433 (set (attr "prefix_data16")
1435 (match_test "TARGET_AVX")
1437 (const_string "0")))
1438 (set (attr "prefix_rep")
1440 (match_test "TARGET_AVX")
1442 (const_string "1")))
1443 (set_attr "prefix" "maybe_vex")
1444 (set_attr "mode" "<sseinsnmode>")])
1446 (define_insn "sse2_movnti<mode>"
1447 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1448 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1451 "movnti\t{%1, %0|%0, %1}"
1452 [(set_attr "type" "ssemov")
1453 (set_attr "prefix_data16" "0")
1454 (set_attr "mode" "<MODE>")])
1456 (define_insn "<sse>_movnt<mode>"
1457 [(set (match_operand:VF 0 "memory_operand" "=m")
1459 [(match_operand:VF 1 "register_operand" "v")]
1462 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1463 [(set_attr "type" "ssemov")
1464 (set_attr "prefix" "maybe_vex")
1465 (set_attr "mode" "<MODE>")])
1467 (define_insn "<sse2>_movnt<mode>"
1468 [(set (match_operand:VI8 0 "memory_operand" "=m")
1469 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1472 "%vmovntdq\t{%1, %0|%0, %1}"
1473 [(set_attr "type" "ssecvt")
1474 (set (attr "prefix_data16")
1476 (match_test "TARGET_AVX")
1478 (const_string "1")))
1479 (set_attr "prefix" "maybe_vex")
1480 (set_attr "mode" "<sseinsnmode>")])
1482 ; Expand patterns for non-temporal stores. At the moment, only those
1483 ; that directly map to insns are defined; it would be possible to
1484 ; define patterns for other modes that would expand to several insns.
1486 ;; Modes handled by storent patterns.
1487 (define_mode_iterator STORENT_MODE
1488 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1489 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1490 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1491 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1492 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1494 (define_expand "storent<mode>"
1495 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1496 (unspec:STORENT_MODE
1497 [(match_operand:STORENT_MODE 1 "register_operand")]
1501 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1507 ;; All integer modes with AVX512BW/DQ.
1508 (define_mode_iterator SWI1248_AVX512BWDQ
1509 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1511 ;; All integer modes with AVX512BW, where HImode operation
1512 ;; can be used instead of QImode.
1513 (define_mode_iterator SWI1248_AVX512BW
1514 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1516 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1517 (define_mode_iterator SWI1248_AVX512BWDQ2
1518 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1519 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1521 (define_expand "kmov<mskmodesuffix>"
1522 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1523 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1525 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1527 (define_insn "k<code><mode>"
1528 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1529 (any_logic:SWI1248_AVX512BW
1530 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1531 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1532 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1535 if (get_attr_mode (insn) == MODE_HI)
1536 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1538 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1540 [(set_attr "type" "msklog")
1541 (set_attr "prefix" "vex")
1543 (cond [(and (match_test "<MODE>mode == QImode")
1544 (not (match_test "TARGET_AVX512DQ")))
1547 (const_string "<MODE>")))])
1549 (define_insn "kandn<mode>"
1550 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1551 (and:SWI1248_AVX512BW
1552 (not:SWI1248_AVX512BW
1553 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1554 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1555 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1558 if (get_attr_mode (insn) == MODE_HI)
1559 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1561 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1563 [(set_attr "type" "msklog")
1564 (set_attr "prefix" "vex")
1566 (cond [(and (match_test "<MODE>mode == QImode")
1567 (not (match_test "TARGET_AVX512DQ")))
1570 (const_string "<MODE>")))])
1572 (define_insn "kxnor<mode>"
1573 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1574 (not:SWI1248_AVX512BW
1575 (xor:SWI1248_AVX512BW
1576 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1577 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1578 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1581 if (get_attr_mode (insn) == MODE_HI)
1582 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1584 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1586 [(set_attr "type" "msklog")
1587 (set_attr "prefix" "vex")
1589 (cond [(and (match_test "<MODE>mode == QImode")
1590 (not (match_test "TARGET_AVX512DQ")))
1593 (const_string "<MODE>")))])
1595 (define_insn "knot<mode>"
1596 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1597 (not:SWI1248_AVX512BW
1598 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1599 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1602 if (get_attr_mode (insn) == MODE_HI)
1603 return "knotw\t{%1, %0|%0, %1}";
1605 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1607 [(set_attr "type" "msklog")
1608 (set_attr "prefix" "vex")
1610 (cond [(and (match_test "<MODE>mode == QImode")
1611 (not (match_test "TARGET_AVX512DQ")))
1614 (const_string "<MODE>")))])
1616 (define_insn "kadd<mode>"
1617 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1618 (plus:SWI1248_AVX512BWDQ2
1619 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1620 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1621 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1623 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1624 [(set_attr "type" "msklog")
1625 (set_attr "prefix" "vex")
1626 (set_attr "mode" "<MODE>")])
1628 ;; Mask variant shift mnemonics
1629 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1631 (define_insn "k<code><mode>"
1632 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1633 (any_lshift:SWI1248_AVX512BWDQ
1634 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1635 (match_operand:QI 2 "immediate_operand" "n")))
1636 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1638 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1639 [(set_attr "type" "msklog")
1640 (set_attr "prefix" "vex")
1641 (set_attr "mode" "<MODE>")])
1643 (define_insn "ktest<mode>"
1644 [(set (reg:CC FLAGS_REG)
1646 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1647 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1650 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1651 [(set_attr "mode" "<MODE>")
1652 (set_attr "type" "msklog")
1653 (set_attr "prefix" "vex")])
1655 (define_insn "kortest<mode>"
1656 [(set (reg:CC FLAGS_REG)
1658 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1659 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1662 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1663 [(set_attr "mode" "<MODE>")
1664 (set_attr "type" "msklog")
1665 (set_attr "prefix" "vex")])
1667 (define_insn "kunpckhi"
1668 [(set (match_operand:HI 0 "register_operand" "=k")
1671 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1673 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1675 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1676 [(set_attr "mode" "HI")
1677 (set_attr "type" "msklog")
1678 (set_attr "prefix" "vex")])
1680 (define_insn "kunpcksi"
1681 [(set (match_operand:SI 0 "register_operand" "=k")
1684 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1686 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1688 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1689 [(set_attr "mode" "SI")])
1691 (define_insn "kunpckdi"
1692 [(set (match_operand:DI 0 "register_operand" "=k")
1695 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1697 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1699 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1700 [(set_attr "mode" "DI")])
1703 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1705 ;; Parallel floating point arithmetic
1707 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1709 (define_expand "<code><mode>2"
1710 [(set (match_operand:VF 0 "register_operand")
1712 (match_operand:VF 1 "register_operand")))]
1714 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1716 (define_insn_and_split "*absneg<mode>2"
1717 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1718 (match_operator:VF 3 "absneg_operator"
1719 [(match_operand:VF 1 "vector_operand" "0, xBm,v, m")]))
1720 (use (match_operand:VF 2 "vector_operand" "xBm,0, vm,v"))]
1723 "&& reload_completed"
1726 enum rtx_code absneg_op;
1732 if (MEM_P (operands[1]))
1733 op1 = operands[2], op2 = operands[1];
1735 op1 = operands[1], op2 = operands[2];
1740 if (rtx_equal_p (operands[0], operands[1]))
1746 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1747 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1748 t = gen_rtx_SET (operands[0], t);
1752 [(set_attr "isa" "noavx,noavx,avx,avx")])
1754 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1755 [(set (match_operand:VF 0 "register_operand")
1757 (match_operand:VF 1 "<round_nimm_predicate>")
1758 (match_operand:VF 2 "<round_nimm_predicate>")))]
1759 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1760 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1762 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1763 [(set (match_operand:VF 0 "register_operand" "=x,v")
1765 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1766 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1767 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1768 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1770 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1771 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1772 [(set_attr "isa" "noavx,avx")
1773 (set_attr "type" "sseadd")
1774 (set_attr "prefix" "<mask_prefix3>")
1775 (set_attr "mode" "<MODE>")])
1777 (define_insn "*sub<mode>3<mask_name>_bcst"
1778 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1780 (match_operand:VF_AVX512 1 "register_operand" "v")
1781 (vec_duplicate:VF_AVX512
1782 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1784 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
1785 && <mask_mode512bit_condition>"
1786 "vsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<avx512bcst>}"
1787 [(set_attr "prefix" "evex")
1788 (set_attr "type" "sseadd")
1789 (set_attr "mode" "<MODE>")])
1791 (define_insn "*add<mode>3<mask_name>_bcst"
1792 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1794 (vec_duplicate:VF_AVX512
1795 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1796 (match_operand:VF_AVX512 2 "register_operand" "v")))]
1798 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
1799 && <mask_mode512bit_condition>"
1800 "vadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
1801 [(set_attr "prefix" "evex")
1802 (set_attr "type" "sseadd")
1803 (set_attr "mode" "<MODE>")])
1805 (define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1806 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1809 (match_operand:VF_128 1 "register_operand" "0,v")
1810 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1815 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1816 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1817 [(set_attr "isa" "noavx,avx")
1818 (set_attr "type" "sseadd")
1819 (set_attr "prefix" "<round_scalar_prefix>")
1820 (set_attr "mode" "<ssescalarmode>")])
1822 (define_expand "mul<mode>3<mask_name><round_name>"
1823 [(set (match_operand:VF 0 "register_operand")
1825 (match_operand:VF 1 "<round_nimm_predicate>")
1826 (match_operand:VF 2 "<round_nimm_predicate>")))]
1827 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1828 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1830 (define_insn "*mul<mode>3<mask_name><round_name>"
1831 [(set (match_operand:VF 0 "register_operand" "=x,v")
1833 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1834 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1836 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1837 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1839 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1840 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1841 [(set_attr "isa" "noavx,avx")
1842 (set_attr "type" "ssemul")
1843 (set_attr "prefix" "<mask_prefix3>")
1844 (set_attr "btver2_decode" "direct,double")
1845 (set_attr "mode" "<MODE>")])
1847 (define_insn "*mul<mode>3<mask_name>_bcst"
1848 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1850 (vec_duplicate:VF_AVX512
1851 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1852 (match_operand:VF_AVX512 2 "register_operand" "v")))]
1853 "TARGET_AVX512F && <mask_mode512bit_condition>"
1854 "vmul<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<<avx512bcst>>}"
1855 [(set_attr "prefix" "evex")
1856 (set_attr "type" "ssemul")
1857 (set_attr "mode" "<MODE>")])
1859 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1860 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1863 (match_operand:VF_128 1 "register_operand" "0,v")
1864 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1869 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1870 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1871 [(set_attr "isa" "noavx,avx")
1872 (set_attr "type" "sse<multdiv_mnemonic>")
1873 (set_attr "prefix" "<round_scalar_prefix>")
1874 (set_attr "btver2_decode" "direct,double")
1875 (set_attr "mode" "<ssescalarmode>")])
1877 (define_expand "div<mode>3"
1878 [(set (match_operand:VF2 0 "register_operand")
1879 (div:VF2 (match_operand:VF2 1 "register_operand")
1880 (match_operand:VF2 2 "vector_operand")))]
1882 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1884 (define_expand "div<mode>3"
1885 [(set (match_operand:VF1 0 "register_operand")
1886 (div:VF1 (match_operand:VF1 1 "register_operand")
1887 (match_operand:VF1 2 "vector_operand")))]
1890 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1893 && TARGET_RECIP_VEC_DIV
1894 && !optimize_insn_for_size_p ()
1895 && flag_finite_math_only && !flag_trapping_math
1896 && flag_unsafe_math_optimizations)
1898 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1903 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1904 [(set (match_operand:VF 0 "register_operand" "=x,v")
1906 (match_operand:VF 1 "register_operand" "0,v")
1907 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1908 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1910 div<ssemodesuffix>\t{%2, %0|%0, %2}
1911 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1912 [(set_attr "isa" "noavx,avx")
1913 (set_attr "type" "ssediv")
1914 (set_attr "prefix" "<mask_prefix3>")
1915 (set_attr "mode" "<MODE>")])
1917 (define_insn "*<avx512>_div<mode>3<mask_name>_bcst"
1918 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1920 (match_operand:VF_AVX512 1 "register_operand" "v")
1921 (vec_duplicate:VF_AVX512
1922 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1923 "TARGET_AVX512F && <mask_mode512bit_condition>"
1924 "vdiv<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<<avx512bcst>>}"
1925 [(set_attr "prefix" "evex")
1926 (set_attr "type" "ssediv")
1927 (set_attr "mode" "<MODE>")])
1929 (define_insn "<sse>_rcp<mode>2"
1930 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1932 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1934 "%vrcpps\t{%1, %0|%0, %1}"
1935 [(set_attr "type" "sse")
1936 (set_attr "atom_sse_attr" "rcp")
1937 (set_attr "btver2_sse_attr" "rcp")
1938 (set_attr "prefix" "maybe_vex")
1939 (set_attr "mode" "<MODE>")])
1941 (define_insn "sse_vmrcpv4sf2"
1942 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1944 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1946 (match_operand:V4SF 2 "register_operand" "0,x")
1950 rcpss\t{%1, %0|%0, %k1}
1951 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1952 [(set_attr "isa" "noavx,avx")
1953 (set_attr "type" "sse")
1954 (set_attr "atom_sse_attr" "rcp")
1955 (set_attr "btver2_sse_attr" "rcp")
1956 (set_attr "prefix" "orig,vex")
1957 (set_attr "mode" "SF")])
1959 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1960 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1962 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1965 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1966 [(set_attr "type" "sse")
1967 (set_attr "prefix" "evex")
1968 (set_attr "mode" "<MODE>")])
1970 (define_insn "srcp14<mode>"
1971 [(set (match_operand:VF_128 0 "register_operand" "=v")
1974 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1976 (match_operand:VF_128 2 "register_operand" "v")
1979 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1980 [(set_attr "type" "sse")
1981 (set_attr "prefix" "evex")
1982 (set_attr "mode" "<MODE>")])
1984 (define_insn "srcp14<mode>_mask"
1985 [(set (match_operand:VF_128 0 "register_operand" "=v")
1989 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1991 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1992 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1993 (match_operand:VF_128 2 "register_operand" "v")
1996 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1997 [(set_attr "type" "sse")
1998 (set_attr "prefix" "evex")
1999 (set_attr "mode" "<MODE>")])
2001 (define_expand "sqrt<mode>2"
2002 [(set (match_operand:VF2 0 "register_operand")
2003 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
2006 (define_expand "sqrt<mode>2"
2007 [(set (match_operand:VF1 0 "register_operand")
2008 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
2012 && TARGET_RECIP_VEC_SQRT
2013 && !optimize_insn_for_size_p ()
2014 && flag_finite_math_only && !flag_trapping_math
2015 && flag_unsafe_math_optimizations)
2017 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2022 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2023 [(set (match_operand:VF 0 "register_operand" "=x,v")
2024 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2025 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2027 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2028 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2029 [(set_attr "isa" "noavx,avx")
2030 (set_attr "type" "sse")
2031 (set_attr "atom_sse_attr" "sqrt")
2032 (set_attr "btver2_sse_attr" "sqrt")
2033 (set_attr "prefix" "maybe_vex")
2034 (set_attr "mode" "<MODE>")])
2036 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2037 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2040 (match_operand:VF_128 1 "vector_operand" "xBm,<round_scalar_constraint>"))
2041 (match_operand:VF_128 2 "register_operand" "0,v")
2045 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2046 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2047 [(set_attr "isa" "noavx,avx")
2048 (set_attr "type" "sse")
2049 (set_attr "atom_sse_attr" "sqrt")
2050 (set_attr "prefix" "<round_scalar_prefix>")
2051 (set_attr "btver2_sse_attr" "sqrt")
2052 (set_attr "mode" "<ssescalarmode>")])
2054 (define_expand "rsqrt<mode>2"
2055 [(set (match_operand:VF1_128_256 0 "register_operand")
2057 [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
2058 "TARGET_SSE && TARGET_SSE_MATH"
2060 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2064 (define_expand "rsqrtv16sf2"
2065 [(set (match_operand:V16SF 0 "register_operand")
2067 [(match_operand:V16SF 1 "vector_operand")]
2069 "TARGET_AVX512ER && TARGET_SSE_MATH"
2071 ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
2075 (define_insn "<sse>_rsqrt<mode>2"
2076 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2078 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2080 "%vrsqrtps\t{%1, %0|%0, %1}"
2081 [(set_attr "type" "sse")
2082 (set_attr "prefix" "maybe_vex")
2083 (set_attr "mode" "<MODE>")])
2085 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2086 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2088 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2091 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2092 [(set_attr "type" "sse")
2093 (set_attr "prefix" "evex")
2094 (set_attr "mode" "<MODE>")])
2096 (define_insn "rsqrt14<mode>"
2097 [(set (match_operand:VF_128 0 "register_operand" "=v")
2100 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2102 (match_operand:VF_128 2 "register_operand" "v")
2105 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2106 [(set_attr "type" "sse")
2107 (set_attr "prefix" "evex")
2108 (set_attr "mode" "<MODE>")])
2110 (define_insn "rsqrt14_<mode>_mask"
2111 [(set (match_operand:VF_128 0 "register_operand" "=v")
2115 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2117 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2118 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2119 (match_operand:VF_128 2 "register_operand" "v")
2122 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2123 [(set_attr "type" "sse")
2124 (set_attr "prefix" "evex")
2125 (set_attr "mode" "<MODE>")])
2127 (define_insn "sse_vmrsqrtv4sf2"
2128 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2130 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2132 (match_operand:V4SF 2 "register_operand" "0,x")
2136 rsqrtss\t{%1, %0|%0, %k1}
2137 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2138 [(set_attr "isa" "noavx,avx")
2139 (set_attr "type" "sse")
2140 (set_attr "prefix" "orig,vex")
2141 (set_attr "mode" "SF")])
2143 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2144 [(set (match_operand:VF 0 "register_operand")
2146 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2147 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2148 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2150 if (!flag_finite_math_only || flag_signed_zeros)
2152 operands[1] = force_reg (<MODE>mode, operands[1]);
2153 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2154 (operands[0], operands[1], operands[2]
2155 <mask_operand_arg34>
2156 <round_saeonly_mask_arg3>));
2160 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2163 ;; These versions of the min/max patterns are intentionally ignorant of
2164 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2165 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2166 ;; are undefined in this condition, we're certain this is correct.
2168 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2169 [(set (match_operand:VF 0 "register_operand" "=x,v")
2171 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2172 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2174 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2175 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2177 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2178 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2179 [(set_attr "isa" "noavx,avx")
2180 (set_attr "type" "sseadd")
2181 (set_attr "btver2_sse_attr" "maxmin")
2182 (set_attr "prefix" "<mask_prefix3>")
2183 (set_attr "mode" "<MODE>")])
2185 ;; These versions of the min/max patterns implement exactly the operations
2186 ;; min = (op1 < op2 ? op1 : op2)
2187 ;; max = (!(op1 < op2) ? op1 : op2)
2188 ;; Their operands are not commutative, and thus they may be used in the
2189 ;; presence of -0.0 and NaN.
2191 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2192 [(set (match_operand:VF 0 "register_operand" "=x,v")
2194 [(match_operand:VF 1 "register_operand" "0,v")
2195 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2198 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2200 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2201 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2202 [(set_attr "isa" "noavx,avx")
2203 (set_attr "type" "sseadd")
2204 (set_attr "btver2_sse_attr" "maxmin")
2205 (set_attr "prefix" "<mask_prefix3>")
2206 (set_attr "mode" "<MODE>")])
2208 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2209 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2212 (match_operand:VF_128 1 "register_operand" "0,v")
2213 (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
2218 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2219 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2220 [(set_attr "isa" "noavx,avx")
2221 (set_attr "type" "sse")
2222 (set_attr "btver2_sse_attr" "maxmin")
2223 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2224 (set_attr "mode" "<ssescalarmode>")])
2226 (define_insn "avx_addsubv4df3"
2227 [(set (match_operand:V4DF 0 "register_operand" "=x")
2230 (match_operand:V4DF 1 "register_operand" "x")
2231 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2232 (plus:V4DF (match_dup 1) (match_dup 2))
2235 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2236 [(set_attr "type" "sseadd")
2237 (set_attr "prefix" "vex")
2238 (set_attr "mode" "V4DF")])
2240 (define_insn "sse3_addsubv2df3"
2241 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2244 (match_operand:V2DF 1 "register_operand" "0,x")
2245 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2246 (plus:V2DF (match_dup 1) (match_dup 2))
2250 addsubpd\t{%2, %0|%0, %2}
2251 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2252 [(set_attr "isa" "noavx,avx")
2253 (set_attr "type" "sseadd")
2254 (set_attr "atom_unit" "complex")
2255 (set_attr "prefix" "orig,vex")
2256 (set_attr "mode" "V2DF")])
2258 (define_insn "avx_addsubv8sf3"
2259 [(set (match_operand:V8SF 0 "register_operand" "=x")
2262 (match_operand:V8SF 1 "register_operand" "x")
2263 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2264 (plus:V8SF (match_dup 1) (match_dup 2))
2267 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2268 [(set_attr "type" "sseadd")
2269 (set_attr "prefix" "vex")
2270 (set_attr "mode" "V8SF")])
2272 (define_insn "sse3_addsubv4sf3"
2273 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2276 (match_operand:V4SF 1 "register_operand" "0,x")
2277 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2278 (plus:V4SF (match_dup 1) (match_dup 2))
2282 addsubps\t{%2, %0|%0, %2}
2283 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2284 [(set_attr "isa" "noavx,avx")
2285 (set_attr "type" "sseadd")
2286 (set_attr "prefix" "orig,vex")
2287 (set_attr "prefix_rep" "1,*")
2288 (set_attr "mode" "V4SF")])
2291 [(set (match_operand:VF_128_256 0 "register_operand")
2292 (match_operator:VF_128_256 6 "addsub_vm_operator"
2294 (match_operand:VF_128_256 1 "register_operand")
2295 (match_operand:VF_128_256 2 "vector_operand"))
2297 (match_operand:VF_128_256 3 "vector_operand")
2298 (match_operand:VF_128_256 4 "vector_operand"))
2299 (match_operand 5 "const_int_operand")]))]
2301 && can_create_pseudo_p ()
2302 && ((rtx_equal_p (operands[1], operands[3])
2303 && rtx_equal_p (operands[2], operands[4]))
2304 || (rtx_equal_p (operands[1], operands[4])
2305 && rtx_equal_p (operands[2], operands[3])))"
2307 (vec_merge:VF_128_256
2308 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2309 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2313 [(set (match_operand:VF_128_256 0 "register_operand")
2314 (match_operator:VF_128_256 6 "addsub_vm_operator"
2316 (match_operand:VF_128_256 1 "vector_operand")
2317 (match_operand:VF_128_256 2 "vector_operand"))
2319 (match_operand:VF_128_256 3 "register_operand")
2320 (match_operand:VF_128_256 4 "vector_operand"))
2321 (match_operand 5 "const_int_operand")]))]
2323 && can_create_pseudo_p ()
2324 && ((rtx_equal_p (operands[1], operands[3])
2325 && rtx_equal_p (operands[2], operands[4]))
2326 || (rtx_equal_p (operands[1], operands[4])
2327 && rtx_equal_p (operands[2], operands[3])))"
2329 (vec_merge:VF_128_256
2330 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2331 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2334 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2336 = GEN_INT (~INTVAL (operands[5])
2337 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2341 [(set (match_operand:VF_128_256 0 "register_operand")
2342 (match_operator:VF_128_256 7 "addsub_vs_operator"
2343 [(vec_concat:<ssedoublemode>
2345 (match_operand:VF_128_256 1 "register_operand")
2346 (match_operand:VF_128_256 2 "vector_operand"))
2348 (match_operand:VF_128_256 3 "vector_operand")
2349 (match_operand:VF_128_256 4 "vector_operand")))
2350 (match_parallel 5 "addsub_vs_parallel"
2351 [(match_operand 6 "const_int_operand")])]))]
2353 && can_create_pseudo_p ()
2354 && ((rtx_equal_p (operands[1], operands[3])
2355 && rtx_equal_p (operands[2], operands[4]))
2356 || (rtx_equal_p (operands[1], operands[4])
2357 && rtx_equal_p (operands[2], operands[3])))"
2359 (vec_merge:VF_128_256
2360 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2361 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2364 int i, nelt = XVECLEN (operands[5], 0);
2365 HOST_WIDE_INT ival = 0;
2367 for (i = 0; i < nelt; i++)
2368 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2369 ival |= HOST_WIDE_INT_1 << i;
2371 operands[5] = GEN_INT (ival);
2375 [(set (match_operand:VF_128_256 0 "register_operand")
2376 (match_operator:VF_128_256 7 "addsub_vs_operator"
2377 [(vec_concat:<ssedoublemode>
2379 (match_operand:VF_128_256 1 "vector_operand")
2380 (match_operand:VF_128_256 2 "vector_operand"))
2382 (match_operand:VF_128_256 3 "register_operand")
2383 (match_operand:VF_128_256 4 "vector_operand")))
2384 (match_parallel 5 "addsub_vs_parallel"
2385 [(match_operand 6 "const_int_operand")])]))]
2387 && can_create_pseudo_p ()
2388 && ((rtx_equal_p (operands[1], operands[3])
2389 && rtx_equal_p (operands[2], operands[4]))
2390 || (rtx_equal_p (operands[1], operands[4])
2391 && rtx_equal_p (operands[2], operands[3])))"
2393 (vec_merge:VF_128_256
2394 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2395 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2398 int i, nelt = XVECLEN (operands[5], 0);
2399 HOST_WIDE_INT ival = 0;
2401 for (i = 0; i < nelt; i++)
2402 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2403 ival |= HOST_WIDE_INT_1 << i;
2405 operands[5] = GEN_INT (ival);
2408 (define_insn "avx_h<plusminus_insn>v4df3"
2409 [(set (match_operand:V4DF 0 "register_operand" "=x")
2414 (match_operand:V4DF 1 "register_operand" "x")
2415 (parallel [(const_int 0)]))
2416 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2419 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2420 (parallel [(const_int 0)]))
2421 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2424 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2425 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2427 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2428 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2430 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2431 [(set_attr "type" "sseadd")
2432 (set_attr "prefix" "vex")
2433 (set_attr "mode" "V4DF")])
2435 (define_expand "sse3_haddv2df3"
2436 [(set (match_operand:V2DF 0 "register_operand")
2440 (match_operand:V2DF 1 "register_operand")
2441 (parallel [(const_int 0)]))
2442 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2445 (match_operand:V2DF 2 "vector_operand")
2446 (parallel [(const_int 0)]))
2447 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2450 (define_insn "*sse3_haddv2df3"
2451 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2455 (match_operand:V2DF 1 "register_operand" "0,x")
2456 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2459 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2462 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2463 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2466 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2468 && INTVAL (operands[3]) != INTVAL (operands[4])
2469 && INTVAL (operands[5]) != INTVAL (operands[6])"
2471 haddpd\t{%2, %0|%0, %2}
2472 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2473 [(set_attr "isa" "noavx,avx")
2474 (set_attr "type" "sseadd")
2475 (set_attr "prefix" "orig,vex")
2476 (set_attr "mode" "V2DF")])
2478 (define_insn "sse3_hsubv2df3"
2479 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2483 (match_operand:V2DF 1 "register_operand" "0,x")
2484 (parallel [(const_int 0)]))
2485 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2488 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2489 (parallel [(const_int 0)]))
2490 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2493 hsubpd\t{%2, %0|%0, %2}
2494 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2495 [(set_attr "isa" "noavx,avx")
2496 (set_attr "type" "sseadd")
2497 (set_attr "prefix" "orig,vex")
2498 (set_attr "mode" "V2DF")])
2500 (define_insn "*sse3_haddv2df3_low"
2501 [(set (match_operand:DF 0 "register_operand" "=x,x")
2504 (match_operand:V2DF 1 "register_operand" "0,x")
2505 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2508 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2510 && INTVAL (operands[2]) != INTVAL (operands[3])"
2512 haddpd\t{%0, %0|%0, %0}
2513 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2514 [(set_attr "isa" "noavx,avx")
2515 (set_attr "type" "sseadd1")
2516 (set_attr "prefix" "orig,vex")
2517 (set_attr "mode" "V2DF")])
2519 (define_insn "*sse3_hsubv2df3_low"
2520 [(set (match_operand:DF 0 "register_operand" "=x,x")
2523 (match_operand:V2DF 1 "register_operand" "0,x")
2524 (parallel [(const_int 0)]))
2527 (parallel [(const_int 1)]))))]
2530 hsubpd\t{%0, %0|%0, %0}
2531 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2532 [(set_attr "isa" "noavx,avx")
2533 (set_attr "type" "sseadd1")
2534 (set_attr "prefix" "orig,vex")
2535 (set_attr "mode" "V2DF")])
2537 (define_insn "avx_h<plusminus_insn>v8sf3"
2538 [(set (match_operand:V8SF 0 "register_operand" "=x")
2544 (match_operand:V8SF 1 "register_operand" "x")
2545 (parallel [(const_int 0)]))
2546 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2548 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2549 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2553 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2554 (parallel [(const_int 0)]))
2555 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2557 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2558 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2562 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2563 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2565 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2566 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2569 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2570 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2572 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2573 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2575 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2576 [(set_attr "type" "sseadd")
2577 (set_attr "prefix" "vex")
2578 (set_attr "mode" "V8SF")])
2580 (define_insn "sse3_h<plusminus_insn>v4sf3"
2581 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2586 (match_operand:V4SF 1 "register_operand" "0,x")
2587 (parallel [(const_int 0)]))
2588 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2590 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2591 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2595 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2596 (parallel [(const_int 0)]))
2597 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2599 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2600 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2603 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2604 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2605 [(set_attr "isa" "noavx,avx")
2606 (set_attr "type" "sseadd")
2607 (set_attr "atom_unit" "complex")
2608 (set_attr "prefix" "orig,vex")
2609 (set_attr "prefix_rep" "1,*")
2610 (set_attr "mode" "V4SF")])
2612 (define_mode_iterator REDUC_SSE_PLUS_MODE
2613 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2615 (define_expand "reduc_plus_scal_<mode>"
2616 [(plus:REDUC_SSE_PLUS_MODE
2617 (match_operand:<ssescalarmode> 0 "register_operand")
2618 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2621 rtx tmp = gen_reg_rtx (<MODE>mode);
2622 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2623 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2628 (define_mode_iterator REDUC_PLUS_MODE
2629 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2630 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
2632 (define_expand "reduc_plus_scal_<mode>"
2633 [(plus:REDUC_PLUS_MODE
2634 (match_operand:<ssescalarmode> 0 "register_operand")
2635 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2638 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2639 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2640 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2641 emit_insn (gen_add<ssehalfvecmodelower>3
2642 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2643 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2647 ;; Modes handled by reduc_sm{in,ax}* patterns.
2648 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
2649 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2650 (V2DI "TARGET_SSE") (V4SI "TARGET_SSE") (V8HI "TARGET_SSE")
2651 (V16QI "TARGET_SSE")])
2653 (define_expand "reduc_<code>_scal_<mode>"
2654 [(smaxmin:REDUC_SSE_SMINMAX_MODE
2655 (match_operand:<ssescalarmode> 0 "register_operand")
2656 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2659 rtx tmp = gen_reg_rtx (<MODE>mode);
2660 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2661 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2666 (define_mode_iterator REDUC_SMINMAX_MODE
2667 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2668 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2669 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2670 (V64QI "TARGET_AVX512BW")
2671 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2672 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2673 (V8DF "TARGET_AVX512F")])
2675 (define_expand "reduc_<code>_scal_<mode>"
2676 [(smaxmin:REDUC_SMINMAX_MODE
2677 (match_operand:<ssescalarmode> 0 "register_operand")
2678 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2681 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2682 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2683 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2684 emit_insn (gen_<code><ssehalfvecmodelower>3
2685 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2686 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2690 (define_expand "reduc_<code>_scal_<mode>"
2691 [(umaxmin:VI_AVX512BW
2692 (match_operand:<ssescalarmode> 0 "register_operand")
2693 (match_operand:VI_AVX512BW 1 "register_operand"))]
2696 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2697 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2698 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2699 emit_insn (gen_<code><ssehalfvecmodelower>3
2700 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2701 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2705 (define_expand "reduc_<code>_scal_<mode>"
2707 (match_operand:<ssescalarmode> 0 "register_operand")
2708 (match_operand:VI_256 1 "register_operand"))]
2711 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2712 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2713 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2714 emit_insn (gen_<code><ssehalfvecmodelower>3
2715 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2716 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
2717 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
2718 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
2719 (operands[0], tmp3, const0_rtx));
2723 (define_expand "reduc_umin_scal_v8hi"
2725 (match_operand:HI 0 "register_operand")
2726 (match_operand:V8HI 1 "register_operand"))]
2729 rtx tmp = gen_reg_rtx (V8HImode);
2730 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2731 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2735 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2736 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2738 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2739 (match_operand:SI 2 "const_0_to_255_operand")]
2742 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2743 [(set_attr "type" "sse")
2744 (set_attr "prefix" "evex")
2745 (set_attr "mode" "<MODE>")])
2747 (define_insn "reduces<mode><mask_scalar_name>"
2748 [(set (match_operand:VF_128 0 "register_operand" "=v")
2751 [(match_operand:VF_128 1 "register_operand" "v")
2752 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2753 (match_operand:SI 3 "const_0_to_255_operand")]
2758 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2, %3}"
2759 [(set_attr "type" "sse")
2760 (set_attr "prefix" "evex")
2761 (set_attr "mode" "<MODE>")])
2763 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2765 ;; Parallel floating point comparisons
2767 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2769 (define_insn "avx_cmp<mode>3"
2770 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2772 [(match_operand:VF_128_256 1 "register_operand" "x")
2773 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2774 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2777 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2778 [(set_attr "type" "ssecmp")
2779 (set_attr "length_immediate" "1")
2780 (set_attr "prefix" "vex")
2781 (set_attr "mode" "<MODE>")])
2783 (define_insn "avx_vmcmp<mode>3"
2784 [(set (match_operand:VF_128 0 "register_operand" "=x")
2787 [(match_operand:VF_128 1 "register_operand" "x")
2788 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2789 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2794 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2795 [(set_attr "type" "ssecmp")
2796 (set_attr "length_immediate" "1")
2797 (set_attr "prefix" "vex")
2798 (set_attr "mode" "<ssescalarmode>")])
2800 (define_insn "*<sse>_maskcmp<mode>3_comm"
2801 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2802 (match_operator:VF_128_256 3 "sse_comparison_operator"
2803 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2804 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2806 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2808 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2809 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2810 [(set_attr "isa" "noavx,avx")
2811 (set_attr "type" "ssecmp")
2812 (set_attr "length_immediate" "1")
2813 (set_attr "prefix" "orig,vex")
2814 (set_attr "mode" "<MODE>")])
2816 (define_insn "<sse>_maskcmp<mode>3"
2817 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2818 (match_operator:VF_128_256 3 "sse_comparison_operator"
2819 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2820 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2823 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2824 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2825 [(set_attr "isa" "noavx,avx")
2826 (set_attr "type" "ssecmp")
2827 (set_attr "length_immediate" "1")
2828 (set_attr "prefix" "orig,vex")
2829 (set_attr "mode" "<MODE>")])
2831 (define_insn "<sse>_vmmaskcmp<mode>3"
2832 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2834 (match_operator:VF_128 3 "sse_comparison_operator"
2835 [(match_operand:VF_128 1 "register_operand" "0,x")
2836 (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2841 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2842 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2843 [(set_attr "isa" "noavx,avx")
2844 (set_attr "type" "ssecmp")
2845 (set_attr "length_immediate" "1,*")
2846 (set_attr "prefix" "orig,vex")
2847 (set_attr "mode" "<ssescalarmode>")])
2849 (define_mode_attr cmp_imm_predicate
2850 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2851 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2852 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2853 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2854 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2855 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2856 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2857 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2858 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2860 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2861 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2862 (unspec:<avx512fmaskmode>
2863 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2864 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2865 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2867 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2868 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2869 [(set_attr "type" "ssecmp")
2870 (set_attr "length_immediate" "1")
2871 (set_attr "prefix" "evex")
2872 (set_attr "mode" "<sseinsnmode>")])
2874 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2875 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2876 (unspec:<avx512fmaskmode>
2877 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2878 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2879 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2882 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2883 [(set_attr "type" "ssecmp")
2884 (set_attr "length_immediate" "1")
2885 (set_attr "prefix" "evex")
2886 (set_attr "mode" "<sseinsnmode>")])
2888 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2889 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2890 (unspec:<avx512fmaskmode>
2891 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2892 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2893 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2894 UNSPEC_UNSIGNED_PCMP))]
2896 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2897 [(set_attr "type" "ssecmp")
2898 (set_attr "length_immediate" "1")
2899 (set_attr "prefix" "evex")
2900 (set_attr "mode" "<sseinsnmode>")])
2902 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2903 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2904 (unspec:<avx512fmaskmode>
2905 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2906 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2907 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2908 UNSPEC_UNSIGNED_PCMP))]
2910 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2911 [(set_attr "type" "ssecmp")
2912 (set_attr "length_immediate" "1")
2913 (set_attr "prefix" "evex")
2914 (set_attr "mode" "<sseinsnmode>")])
2916 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2917 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2918 (and:<avx512fmaskmode>
2919 (unspec:<avx512fmaskmode>
2920 [(match_operand:VF_128 1 "register_operand" "v")
2921 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2922 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2926 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
2927 [(set_attr "type" "ssecmp")
2928 (set_attr "length_immediate" "1")
2929 (set_attr "prefix" "evex")
2930 (set_attr "mode" "<ssescalarmode>")])
2932 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2933 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2934 (and:<avx512fmaskmode>
2935 (unspec:<avx512fmaskmode>
2936 [(match_operand:VF_128 1 "register_operand" "v")
2937 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2938 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2940 (and:<avx512fmaskmode>
2941 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2944 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
2945 [(set_attr "type" "ssecmp")
2946 (set_attr "length_immediate" "1")
2947 (set_attr "prefix" "evex")
2948 (set_attr "mode" "<ssescalarmode>")])
2950 (define_insn "avx512f_maskcmp<mode>3"
2951 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2952 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2953 [(match_operand:VF 1 "register_operand" "v")
2954 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2956 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2957 [(set_attr "type" "ssecmp")
2958 (set_attr "length_immediate" "1")
2959 (set_attr "prefix" "evex")
2960 (set_attr "mode" "<sseinsnmode>")])
2962 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
2963 [(set (reg:CCFP FLAGS_REG)
2966 (match_operand:<ssevecmode> 0 "register_operand" "v")
2967 (parallel [(const_int 0)]))
2969 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2970 (parallel [(const_int 0)]))))]
2971 "SSE_FLOAT_MODE_P (<MODE>mode)"
2972 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2973 [(set_attr "type" "ssecomi")
2974 (set_attr "prefix" "maybe_vex")
2975 (set_attr "prefix_rep" "0")
2976 (set (attr "prefix_data16")
2977 (if_then_else (eq_attr "mode" "DF")
2979 (const_string "0")))
2980 (set_attr "mode" "<MODE>")])
2982 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2983 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2984 (match_operator:<avx512fmaskmode> 1 ""
2985 [(match_operand:V48_AVX512VL 2 "register_operand")
2986 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2989 bool ok = ix86_expand_mask_vec_cmp (operands);
2994 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2995 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2996 (match_operator:<avx512fmaskmode> 1 ""
2997 [(match_operand:VI12_AVX512VL 2 "register_operand")
2998 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3001 bool ok = ix86_expand_mask_vec_cmp (operands);
3006 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3007 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3008 (match_operator:<sseintvecmode> 1 ""
3009 [(match_operand:VI_256 2 "register_operand")
3010 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3013 bool ok = ix86_expand_int_vec_cmp (operands);
3018 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3019 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3020 (match_operator:<sseintvecmode> 1 ""
3021 [(match_operand:VI124_128 2 "register_operand")
3022 (match_operand:VI124_128 3 "vector_operand")]))]
3025 bool ok = ix86_expand_int_vec_cmp (operands);
3030 (define_expand "vec_cmpv2div2di"
3031 [(set (match_operand:V2DI 0 "register_operand")
3032 (match_operator:V2DI 1 ""
3033 [(match_operand:V2DI 2 "register_operand")
3034 (match_operand:V2DI 3 "vector_operand")]))]
3037 bool ok = ix86_expand_int_vec_cmp (operands);
3042 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3043 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3044 (match_operator:<sseintvecmode> 1 ""
3045 [(match_operand:VF_256 2 "register_operand")
3046 (match_operand:VF_256 3 "nonimmediate_operand")]))]
3049 bool ok = ix86_expand_fp_vec_cmp (operands);
3054 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3055 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3056 (match_operator:<sseintvecmode> 1 ""
3057 [(match_operand:VF_128 2 "register_operand")
3058 (match_operand:VF_128 3 "vector_operand")]))]
3061 bool ok = ix86_expand_fp_vec_cmp (operands);
3066 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3067 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3068 (match_operator:<avx512fmaskmode> 1 ""
3069 [(match_operand:VI48_AVX512VL 2 "register_operand")
3070 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
3073 bool ok = ix86_expand_mask_vec_cmp (operands);
3078 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3079 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3080 (match_operator:<avx512fmaskmode> 1 ""
3081 [(match_operand:VI12_AVX512VL 2 "register_operand")
3082 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3085 bool ok = ix86_expand_mask_vec_cmp (operands);
3090 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3091 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3092 (match_operator:<sseintvecmode> 1 ""
3093 [(match_operand:VI_256 2 "register_operand")
3094 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3097 bool ok = ix86_expand_int_vec_cmp (operands);
3102 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3103 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3104 (match_operator:<sseintvecmode> 1 ""
3105 [(match_operand:VI124_128 2 "register_operand")
3106 (match_operand:VI124_128 3 "vector_operand")]))]
3109 bool ok = ix86_expand_int_vec_cmp (operands);
3114 (define_expand "vec_cmpuv2div2di"
3115 [(set (match_operand:V2DI 0 "register_operand")
3116 (match_operator:V2DI 1 ""
3117 [(match_operand:V2DI 2 "register_operand")
3118 (match_operand:V2DI 3 "vector_operand")]))]
3121 bool ok = ix86_expand_int_vec_cmp (operands);
3126 (define_expand "vec_cmpeqv2div2di"
3127 [(set (match_operand:V2DI 0 "register_operand")
3128 (match_operator:V2DI 1 ""
3129 [(match_operand:V2DI 2 "register_operand")
3130 (match_operand:V2DI 3 "vector_operand")]))]
3133 bool ok = ix86_expand_int_vec_cmp (operands);
3138 (define_expand "vcond<V_512:mode><VF_512:mode>"
3139 [(set (match_operand:V_512 0 "register_operand")
3141 (match_operator 3 ""
3142 [(match_operand:VF_512 4 "nonimmediate_operand")
3143 (match_operand:VF_512 5 "nonimmediate_operand")])
3144 (match_operand:V_512 1 "general_operand")
3145 (match_operand:V_512 2 "general_operand")))]
3147 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3148 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3150 bool ok = ix86_expand_fp_vcond (operands);
3155 (define_expand "vcond<V_256:mode><VF_256:mode>"
3156 [(set (match_operand:V_256 0 "register_operand")
3158 (match_operator 3 ""
3159 [(match_operand:VF_256 4 "nonimmediate_operand")
3160 (match_operand:VF_256 5 "nonimmediate_operand")])
3161 (match_operand:V_256 1 "general_operand")
3162 (match_operand:V_256 2 "general_operand")))]
3164 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3165 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3167 bool ok = ix86_expand_fp_vcond (operands);
3172 (define_expand "vcond<V_128:mode><VF_128:mode>"
3173 [(set (match_operand:V_128 0 "register_operand")
3175 (match_operator 3 ""
3176 [(match_operand:VF_128 4 "vector_operand")
3177 (match_operand:VF_128 5 "vector_operand")])
3178 (match_operand:V_128 1 "general_operand")
3179 (match_operand:V_128 2 "general_operand")))]
3181 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3182 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3184 bool ok = ix86_expand_fp_vcond (operands);
3189 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3190 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3191 (vec_merge:V48_AVX512VL
3192 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3193 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3194 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3197 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3198 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3199 (vec_merge:VI12_AVX512VL
3200 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3201 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3202 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3205 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3206 [(set (match_operand:VI_256 0 "register_operand")
3208 (match_operand:VI_256 1 "nonimmediate_operand")
3209 (match_operand:VI_256 2 "nonimm_or_0_operand")
3210 (match_operand:<sseintvecmode> 3 "register_operand")))]
3213 ix86_expand_sse_movcc (operands[0], operands[3],
3214 operands[1], operands[2]);
3218 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3219 [(set (match_operand:VI124_128 0 "register_operand")
3220 (vec_merge:VI124_128
3221 (match_operand:VI124_128 1 "vector_operand")
3222 (match_operand:VI124_128 2 "nonimm_or_0_operand")
3223 (match_operand:<sseintvecmode> 3 "register_operand")))]
3226 ix86_expand_sse_movcc (operands[0], operands[3],
3227 operands[1], operands[2]);
3231 (define_expand "vcond_mask_v2div2di"
3232 [(set (match_operand:V2DI 0 "register_operand")
3234 (match_operand:V2DI 1 "vector_operand")
3235 (match_operand:V2DI 2 "nonimm_or_0_operand")
3236 (match_operand:V2DI 3 "register_operand")))]
3239 ix86_expand_sse_movcc (operands[0], operands[3],
3240 operands[1], operands[2]);
3244 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3245 [(set (match_operand:VF_256 0 "register_operand")
3247 (match_operand:VF_256 1 "nonimmediate_operand")
3248 (match_operand:VF_256 2 "nonimm_or_0_operand")
3249 (match_operand:<sseintvecmode> 3 "register_operand")))]
3252 ix86_expand_sse_movcc (operands[0], operands[3],
3253 operands[1], operands[2]);
3257 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3258 [(set (match_operand:VF_128 0 "register_operand")
3260 (match_operand:VF_128 1 "vector_operand")
3261 (match_operand:VF_128 2 "nonimm_or_0_operand")
3262 (match_operand:<sseintvecmode> 3 "register_operand")))]
3265 ix86_expand_sse_movcc (operands[0], operands[3],
3266 operands[1], operands[2]);
3270 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3272 ;; Parallel floating point logical operations
3274 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3276 (define_insn "<sse>_andnot<mode>3<mask_name>"
3277 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3280 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3281 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3282 "TARGET_SSE && <mask_avx512vl_condition>"
3288 switch (which_alternative)
3291 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3296 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3302 switch (get_attr_mode (insn))
3310 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3311 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3312 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3315 suffix = "<ssemodesuffix>";
3318 snprintf (buf, sizeof (buf), ops, suffix);
3319 output_asm_insn (buf, operands);
3322 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3323 (set_attr "type" "sselog")
3324 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3326 (cond [(and (match_test "<mask_applied>")
3327 (and (eq_attr "alternative" "1")
3328 (match_test "!TARGET_AVX512DQ")))
3329 (const_string "<sseintvecmode2>")
3330 (eq_attr "alternative" "3")
3331 (const_string "<sseintvecmode2>")
3332 (and (match_test "<MODE_SIZE> == 16")
3333 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3334 (const_string "<ssePSmode>")
3335 (match_test "TARGET_AVX")
3336 (const_string "<MODE>")
3337 (match_test "optimize_function_for_size_p (cfun)")
3338 (const_string "V4SF")
3340 (const_string "<MODE>")))])
3343 (define_insn "<sse>_andnot<mode>3<mask_name>"
3344 [(set (match_operand:VF_512 0 "register_operand" "=v")
3347 (match_operand:VF_512 1 "register_operand" "v"))
3348 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3355 suffix = "<ssemodesuffix>";
3358 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3359 if (!TARGET_AVX512DQ)
3361 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3365 snprintf (buf, sizeof (buf),
3366 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3368 output_asm_insn (buf, operands);
3371 [(set_attr "type" "sselog")
3372 (set_attr "prefix" "evex")
3374 (if_then_else (match_test "TARGET_AVX512DQ")
3375 (const_string "<sseinsnmode>")
3376 (const_string "XI")))])
3378 (define_expand "<code><mode>3<mask_name>"
3379 [(set (match_operand:VF_128_256 0 "register_operand")
3380 (any_logic:VF_128_256
3381 (match_operand:VF_128_256 1 "vector_operand")
3382 (match_operand:VF_128_256 2 "vector_operand")))]
3383 "TARGET_SSE && <mask_avx512vl_condition>"
3384 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3386 (define_expand "<code><mode>3<mask_name>"
3387 [(set (match_operand:VF_512 0 "register_operand")
3389 (match_operand:VF_512 1 "nonimmediate_operand")
3390 (match_operand:VF_512 2 "nonimmediate_operand")))]
3392 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3394 (define_insn "*<code><mode>3<mask_name>"
3395 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3396 (any_logic:VF_128_256
3397 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3398 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3399 "TARGET_SSE && <mask_avx512vl_condition>
3400 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3406 switch (which_alternative)
3409 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3414 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3420 switch (get_attr_mode (insn))
3428 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3429 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3430 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3433 suffix = "<ssemodesuffix>";
3436 snprintf (buf, sizeof (buf), ops, suffix);
3437 output_asm_insn (buf, operands);
3440 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3441 (set_attr "type" "sselog")
3442 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3444 (cond [(and (match_test "<mask_applied>")
3445 (and (eq_attr "alternative" "1")
3446 (match_test "!TARGET_AVX512DQ")))
3447 (const_string "<sseintvecmode2>")
3448 (eq_attr "alternative" "3")
3449 (const_string "<sseintvecmode2>")
3450 (and (match_test "<MODE_SIZE> == 16")
3451 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3452 (const_string "<ssePSmode>")
3453 (match_test "TARGET_AVX")
3454 (const_string "<MODE>")
3455 (match_test "optimize_function_for_size_p (cfun)")
3456 (const_string "V4SF")
3458 (const_string "<MODE>")))])
3460 (define_insn "*<code><mode>3<mask_name>"
3461 [(set (match_operand:VF_512 0 "register_operand" "=v")
3463 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3464 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3465 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3471 suffix = "<ssemodesuffix>";
3474 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3475 if (!TARGET_AVX512DQ)
3477 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3481 snprintf (buf, sizeof (buf),
3482 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3484 output_asm_insn (buf, operands);
3487 [(set_attr "type" "sselog")
3488 (set_attr "prefix" "evex")
3490 (if_then_else (match_test "TARGET_AVX512DQ")
3491 (const_string "<sseinsnmode>")
3492 (const_string "XI")))])
3494 (define_expand "copysign<mode>3"
3497 (not:VF (match_dup 3))
3498 (match_operand:VF 1 "vector_operand")))
3500 (and:VF (match_dup 3)
3501 (match_operand:VF 2 "vector_operand")))
3502 (set (match_operand:VF 0 "register_operand")
3503 (ior:VF (match_dup 4) (match_dup 5)))]
3506 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3508 operands[4] = gen_reg_rtx (<MODE>mode);
3509 operands[5] = gen_reg_rtx (<MODE>mode);
3512 (define_expand "xorsign<mode>3"
3514 (and:VF (match_dup 3)
3515 (match_operand:VF 2 "vector_operand")))
3516 (set (match_operand:VF 0 "register_operand")
3517 (xor:VF (match_dup 4)
3518 (match_operand:VF 1 "vector_operand")))]
3521 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3523 operands[4] = gen_reg_rtx (<MODE>mode);
3526 ;; Also define scalar versions. These are used for abs, neg, and
3527 ;; conditional move. Using subregs into vector modes causes register
3528 ;; allocation lossage. These patterns do not allow memory operands
3529 ;; because the native instructions read the full 128-bits.
3531 (define_insn "*andnot<mode>3"
3532 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3535 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3536 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3537 "SSE_FLOAT_MODE_P (<MODE>mode)"
3542 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3544 switch (which_alternative)
3547 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3550 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3553 if (TARGET_AVX512DQ)
3554 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3557 suffix = <MODE>mode == DFmode ? "q" : "d";
3558 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3562 if (TARGET_AVX512DQ)
3563 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3566 suffix = <MODE>mode == DFmode ? "q" : "d";
3567 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3574 snprintf (buf, sizeof (buf), ops, suffix);
3575 output_asm_insn (buf, operands);
3578 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3579 (set_attr "type" "sselog")
3580 (set_attr "prefix" "orig,vex,evex,evex")
3582 (cond [(eq_attr "alternative" "2")
3583 (if_then_else (match_test "TARGET_AVX512DQ")
3584 (const_string "<ssevecmode>")
3585 (const_string "TI"))
3586 (eq_attr "alternative" "3")
3587 (if_then_else (match_test "TARGET_AVX512DQ")
3588 (const_string "<avx512fvecmode>")
3589 (const_string "XI"))
3590 (and (match_test "<MODE_SIZE> == 16")
3591 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3592 (const_string "V4SF")
3593 (match_test "TARGET_AVX")
3594 (const_string "<ssevecmode>")
3595 (match_test "optimize_function_for_size_p (cfun)")
3596 (const_string "V4SF")
3598 (const_string "<ssevecmode>")))])
3600 (define_insn "*andnottf3"
3601 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3603 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3604 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3610 = (which_alternative >= 2 ? "pandnq"
3611 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3613 switch (which_alternative)
3616 ops = "%s\t{%%2, %%0|%%0, %%2}";
3620 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3623 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3629 snprintf (buf, sizeof (buf), ops, tmp);
3630 output_asm_insn (buf, operands);
3633 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3634 (set_attr "type" "sselog")
3635 (set (attr "prefix_data16")
3637 (and (eq_attr "alternative" "0")
3638 (eq_attr "mode" "TI"))
3640 (const_string "*")))
3641 (set_attr "prefix" "orig,vex,evex,evex")
3643 (cond [(eq_attr "alternative" "2")
3645 (eq_attr "alternative" "3")
3647 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3648 (const_string "V4SF")
3649 (match_test "TARGET_AVX")
3651 (ior (not (match_test "TARGET_SSE2"))
3652 (match_test "optimize_function_for_size_p (cfun)"))
3653 (const_string "V4SF")
3655 (const_string "TI")))])
3657 (define_insn "*<code><mode>3"
3658 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3660 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3661 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3662 "SSE_FLOAT_MODE_P (<MODE>mode)"
3667 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3669 switch (which_alternative)
3672 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3675 if (!TARGET_AVX512DQ)
3677 suffix = <MODE>mode == DFmode ? "q" : "d";
3678 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3683 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3686 if (TARGET_AVX512DQ)
3687 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3690 suffix = <MODE>mode == DFmode ? "q" : "d";
3691 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3698 snprintf (buf, sizeof (buf), ops, suffix);
3699 output_asm_insn (buf, operands);
3702 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3703 (set_attr "type" "sselog")
3704 (set_attr "prefix" "orig,vex,evex,evex")
3706 (cond [(eq_attr "alternative" "2")
3707 (if_then_else (match_test "TARGET_AVX512DQ")
3708 (const_string "<ssevecmode>")
3709 (const_string "TI"))
3710 (eq_attr "alternative" "3")
3711 (if_then_else (match_test "TARGET_AVX512DQ")
3712 (const_string "<avx512fvecmode>")
3713 (const_string "XI"))
3714 (and (match_test "<MODE_SIZE> == 16")
3715 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3716 (const_string "V4SF")
3717 (match_test "TARGET_AVX")
3718 (const_string "<ssevecmode>")
3719 (match_test "optimize_function_for_size_p (cfun)")
3720 (const_string "V4SF")
3722 (const_string "<ssevecmode>")))])
3724 (define_expand "<code>tf3"
3725 [(set (match_operand:TF 0 "register_operand")
3727 (match_operand:TF 1 "vector_operand")
3728 (match_operand:TF 2 "vector_operand")))]
3730 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3732 (define_insn "*<code>tf3"
3733 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3735 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3736 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3737 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3742 = (which_alternative >= 2 ? "p<logic>q"
3743 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3745 switch (which_alternative)
3748 ops = "%s\t{%%2, %%0|%%0, %%2}";
3752 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3755 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3761 snprintf (buf, sizeof (buf), ops, tmp);
3762 output_asm_insn (buf, operands);
3765 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3766 (set_attr "type" "sselog")
3767 (set (attr "prefix_data16")
3769 (and (eq_attr "alternative" "0")
3770 (eq_attr "mode" "TI"))
3772 (const_string "*")))
3773 (set_attr "prefix" "orig,vex,evex,evex")
3775 (cond [(eq_attr "alternative" "2")
3777 (eq_attr "alternative" "3")
3779 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3780 (const_string "V4SF")
3781 (match_test "TARGET_AVX")
3783 (ior (not (match_test "TARGET_SSE2"))
3784 (match_test "optimize_function_for_size_p (cfun)"))
3785 (const_string "V4SF")
3787 (const_string "TI")))])
3789 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3791 ;; FMA floating point multiply/accumulate instructions. These include
3792 ;; scalar versions of the instructions as well as vector versions.
3794 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3796 ;; The standard names for scalar FMA are only available with SSE math enabled.
3797 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3798 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3799 ;; and TARGET_FMA4 are both false.
3800 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3801 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3802 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3803 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3804 (define_mode_iterator FMAMODEM
3805 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3806 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3807 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3808 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3809 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3810 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3811 (V16SF "TARGET_AVX512F")
3812 (V8DF "TARGET_AVX512F")])
3814 (define_expand "fma<mode>4"
3815 [(set (match_operand:FMAMODEM 0 "register_operand")
3817 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3818 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3819 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3821 (define_expand "fms<mode>4"
3822 [(set (match_operand:FMAMODEM 0 "register_operand")
3824 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3825 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3826 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3828 (define_expand "fnma<mode>4"
3829 [(set (match_operand:FMAMODEM 0 "register_operand")
3831 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3832 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3833 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3835 (define_expand "fnms<mode>4"
3836 [(set (match_operand:FMAMODEM 0 "register_operand")
3838 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3839 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3840 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3842 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3843 (define_mode_iterator FMAMODE_AVX512
3844 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3845 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3846 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3847 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3848 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3849 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3850 (V16SF "TARGET_AVX512F")
3851 (V8DF "TARGET_AVX512F")])
3853 (define_mode_iterator FMAMODE
3854 [SF DF V4SF V2DF V8SF V4DF])
3856 (define_expand "fma4i_fmadd_<mode>"
3857 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3859 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3860 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3861 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3863 (define_expand "fma4i_fmsub_<mode>"
3864 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3866 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3867 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3869 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
3871 (define_expand "fma4i_fnmadd_<mode>"
3872 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3875 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
3876 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3877 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3879 (define_expand "fma4i_fnmsub_<mode>"
3880 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3883 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
3884 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3886 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
3888 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3889 [(match_operand:VF_AVX512VL 0 "register_operand")
3890 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3891 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3892 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3893 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3894 "TARGET_AVX512F && <round_mode512bit_condition>"
3896 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3897 operands[0], operands[1], operands[2], operands[3],
3898 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3902 (define_insn "*fma_fmadd_<mode>"
3903 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3905 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3906 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3907 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3908 "TARGET_FMA || TARGET_FMA4"
3910 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3911 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3912 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3913 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3914 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3915 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3916 (set_attr "type" "ssemuladd")
3917 (set_attr "mode" "<MODE>")])
3919 ;; Suppose AVX-512F as baseline
3920 (define_mode_iterator VF_SF_AVX512VL
3921 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3922 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3924 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3925 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3927 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3928 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3929 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3930 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3932 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3933 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3934 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3935 [(set_attr "type" "ssemuladd")
3936 (set_attr "mode" "<MODE>")])
3938 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1"
3939 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3941 (match_operand:VF_AVX512 1 "register_operand" "0,v")
3942 (match_operand:VF_AVX512 2 "register_operand" "v,0")
3943 (vec_duplicate:VF_AVX512
3944 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m"))))]
3945 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3946 "vfmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
3947 [(set_attr "type" "ssemuladd")
3948 (set_attr "mode" "<MODE>")])
3950 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2"
3951 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3953 (vec_duplicate:VF_AVX512
3954 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
3955 (match_operand:VF_AVX512 2 "register_operand" "0,v")
3956 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
3957 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3959 vfmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
3960 vfmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
3961 [(set_attr "type" "ssemuladd")
3962 (set_attr "mode" "<MODE>")])
3964 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3"
3965 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3967 (match_operand:VF_AVX512 1 "register_operand" "0,v")
3968 (vec_duplicate:VF_AVX512
3969 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
3970 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
3971 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3973 vfmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
3974 vfmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
3975 [(set_attr "type" "ssemuladd")
3976 (set_attr "mode" "<MODE>")])
3978 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3979 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3980 (vec_merge:VF_AVX512VL
3982 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3983 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
3984 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3986 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3987 "TARGET_AVX512F && <round_mode512bit_condition>"
3989 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3990 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3991 [(set_attr "type" "ssemuladd")
3992 (set_attr "mode" "<MODE>")])
3994 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3995 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3996 (vec_merge:VF_AVX512VL
3998 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
3999 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4000 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4002 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4004 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4005 [(set_attr "type" "ssemuladd")
4006 (set_attr "mode" "<MODE>")])
4008 (define_insn "*fma_fmsub_<mode>"
4009 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4011 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4012 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4014 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4015 "TARGET_FMA || TARGET_FMA4"
4017 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4018 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4019 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4020 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4021 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4022 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4023 (set_attr "type" "ssemuladd")
4024 (set_attr "mode" "<MODE>")])
4026 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
4027 [(match_operand:VF_AVX512VL 0 "register_operand")
4028 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4029 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4030 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4031 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4032 "TARGET_AVX512F && <round_mode512bit_condition>"
4034 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
4035 operands[0], operands[1], operands[2], operands[3],
4036 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4040 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4041 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4043 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4044 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4046 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
4047 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4049 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4050 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4051 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4052 [(set_attr "type" "ssemuladd")
4053 (set_attr "mode" "<MODE>")])
4055 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1"
4056 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4058 (match_operand:VF_AVX512 1 "register_operand" "0,v")
4059 (match_operand:VF_AVX512 2 "register_operand" "v,0")
4061 (vec_duplicate:VF_AVX512
4062 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
4063 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4064 "vfmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4065 [(set_attr "type" "ssemuladd")
4066 (set_attr "mode" "<MODE>")])
4068 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2"
4069 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4071 (vec_duplicate:VF_AVX512
4072 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
4073 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4075 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4076 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4078 vfmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4079 vfmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4080 [(set_attr "type" "ssemuladd")
4081 (set_attr "mode" "<MODE>")])
4083 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3"
4084 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4086 (match_operand:VF_AVX512 1 "register_operand" "0,v")
4087 (vec_duplicate:VF_AVX512
4088 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4090 (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0"))))]
4091 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4093 vfmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4094 vfmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4095 [(set_attr "type" "ssemuladd")
4096 (set_attr "mode" "<MODE>")])
4098 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
4099 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4100 (vec_merge:VF_AVX512VL
4102 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4103 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4105 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4107 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4110 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4111 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4112 [(set_attr "type" "ssemuladd")
4113 (set_attr "mode" "<MODE>")])
4115 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4116 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4117 (vec_merge:VF_AVX512VL
4119 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4120 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4122 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4124 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4125 "TARGET_AVX512F && <round_mode512bit_condition>"
4126 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4127 [(set_attr "type" "ssemuladd")
4128 (set_attr "mode" "<MODE>")])
4130 (define_insn "*fma_fnmadd_<mode>"
4131 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4134 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4135 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4136 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4137 "TARGET_FMA || TARGET_FMA4"
4139 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4140 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4141 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4142 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4143 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4144 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4145 (set_attr "type" "ssemuladd")
4146 (set_attr "mode" "<MODE>")])
4148 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4149 [(match_operand:VF_AVX512VL 0 "register_operand")
4150 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4151 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4152 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4153 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4154 "TARGET_AVX512F && <round_mode512bit_condition>"
4156 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4157 operands[0], operands[1], operands[2], operands[3],
4158 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4162 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4163 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4166 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4167 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4168 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
4169 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4171 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4172 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4173 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4174 [(set_attr "type" "ssemuladd")
4175 (set_attr "mode" "<MODE>")])
4177 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1"
4178 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4181 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4182 (match_operand:VF_AVX512 2 "register_operand" "v,0")
4183 (vec_duplicate:VF_AVX512
4184 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m"))))]
4185 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4186 "vfnmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4187 [(set_attr "type" "ssemuladd")
4188 (set_attr "mode" "<MODE>")])
4190 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2"
4191 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4194 (vec_duplicate:VF_AVX512
4195 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4196 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4197 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4198 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4200 vfnmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4201 vfnmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4202 [(set_attr "type" "ssemuladd")
4203 (set_attr "mode" "<MODE>")])
4205 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3"
4206 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4209 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4210 (vec_duplicate:VF_AVX512
4211 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4212 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4213 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4215 vfnmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4216 vfnmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4217 [(set_attr "type" "ssemuladd")
4218 (set_attr "mode" "<MODE>")])
4220 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4221 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4222 (vec_merge:VF_AVX512VL
4225 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4226 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4227 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4229 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4230 "TARGET_AVX512F && <round_mode512bit_condition>"
4232 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4233 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4234 [(set_attr "type" "ssemuladd")
4235 (set_attr "mode" "<MODE>")])
4237 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4238 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4239 (vec_merge:VF_AVX512VL
4242 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4243 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4244 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4246 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4247 "TARGET_AVX512F && <round_mode512bit_condition>"
4248 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4249 [(set_attr "type" "ssemuladd")
4250 (set_attr "mode" "<MODE>")])
4252 (define_insn "*fma_fnmsub_<mode>"
4253 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4256 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4257 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4259 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4260 "TARGET_FMA || TARGET_FMA4"
4262 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4263 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4264 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4265 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4266 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4267 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4268 (set_attr "type" "ssemuladd")
4269 (set_attr "mode" "<MODE>")])
4271 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4272 [(match_operand:VF_AVX512VL 0 "register_operand")
4273 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4274 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4275 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4276 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4277 "TARGET_AVX512F && <round_mode512bit_condition>"
4279 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4280 operands[0], operands[1], operands[2], operands[3],
4281 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4285 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4286 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4289 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4290 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4292 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
4293 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4295 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4296 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4297 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4298 [(set_attr "type" "ssemuladd")
4299 (set_attr "mode" "<MODE>")])
4301 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1"
4302 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4305 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4306 (match_operand:VF_AVX512 2 "register_operand" "v,0")
4308 (vec_duplicate:VF_AVX512
4309 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
4310 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4311 "vfnmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4312 [(set_attr "type" "ssemuladd")
4313 (set_attr "mode" "<MODE>")])
4315 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2"
4316 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4319 (vec_duplicate:VF_AVX512
4320 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4321 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4323 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4324 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4326 vfnmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4327 vfnmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4328 [(set_attr "type" "ssemuladd")
4329 (set_attr "mode" "<MODE>")])
4331 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3"
4332 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4335 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4336 (vec_duplicate:VF_AVX512
4337 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4339 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4340 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4342 vfnmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4343 vfnmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4344 [(set_attr "type" "ssemuladd")
4345 (set_attr "mode" "<MODE>")])
4347 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4348 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4349 (vec_merge:VF_AVX512VL
4352 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4353 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4355 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4357 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4358 "TARGET_AVX512F && <round_mode512bit_condition>"
4360 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4361 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4362 [(set_attr "type" "ssemuladd")
4363 (set_attr "mode" "<MODE>")])
4365 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4366 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4367 (vec_merge:VF_AVX512VL
4370 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4371 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4373 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4375 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4377 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4378 [(set_attr "type" "ssemuladd")
4379 (set_attr "mode" "<MODE>")])
4381 ;; FMA parallel floating point multiply addsub and subadd operations.
4383 ;; It would be possible to represent these without the UNSPEC as
4386 ;; (fma op1 op2 op3)
4387 ;; (fma op1 op2 (neg op3))
4390 ;; But this doesn't seem useful in practice.
4392 (define_expand "fmaddsub_<mode>"
4393 [(set (match_operand:VF 0 "register_operand")
4395 [(match_operand:VF 1 "nonimmediate_operand")
4396 (match_operand:VF 2 "nonimmediate_operand")
4397 (match_operand:VF 3 "nonimmediate_operand")]
4399 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4401 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4402 [(match_operand:VF_AVX512VL 0 "register_operand")
4403 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4404 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4405 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4406 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4409 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4410 operands[0], operands[1], operands[2], operands[3],
4411 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4415 (define_insn "*fma_fmaddsub_<mode>"
4416 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4418 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4419 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4420 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4422 "TARGET_FMA || TARGET_FMA4"
4424 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4425 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4426 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4427 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4428 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4429 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4430 (set_attr "type" "ssemuladd")
4431 (set_attr "mode" "<MODE>")])
4433 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4434 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4435 (unspec:VF_SF_AVX512VL
4436 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4437 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4438 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4440 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4442 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4443 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4444 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4445 [(set_attr "type" "ssemuladd")
4446 (set_attr "mode" "<MODE>")])
4448 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4449 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4450 (vec_merge:VF_AVX512VL
4452 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4453 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4454 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
4457 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4460 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4461 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4462 [(set_attr "type" "ssemuladd")
4463 (set_attr "mode" "<MODE>")])
4465 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4466 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4467 (vec_merge:VF_AVX512VL
4469 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4470 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4471 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4474 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4476 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4477 [(set_attr "type" "ssemuladd")
4478 (set_attr "mode" "<MODE>")])
4480 (define_insn "*fma_fmsubadd_<mode>"
4481 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4483 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4484 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4486 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4488 "TARGET_FMA || TARGET_FMA4"
4490 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4491 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4492 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4493 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4494 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4495 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4496 (set_attr "type" "ssemuladd")
4497 (set_attr "mode" "<MODE>")])
4499 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4500 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4501 (unspec:VF_SF_AVX512VL
4502 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4503 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4505 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4507 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4509 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4510 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4511 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4512 [(set_attr "type" "ssemuladd")
4513 (set_attr "mode" "<MODE>")])
4515 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4516 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4517 (vec_merge:VF_AVX512VL
4519 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4520 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4522 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
4525 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4528 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4529 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4530 [(set_attr "type" "ssemuladd")
4531 (set_attr "mode" "<MODE>")])
4533 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4534 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4535 (vec_merge:VF_AVX512VL
4537 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4538 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4540 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4543 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4545 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4546 [(set_attr "type" "ssemuladd")
4547 (set_attr "mode" "<MODE>")])
4549 ;; FMA3 floating point scalar intrinsics. These merge result with
4550 ;; high-order elements from the destination register.
4552 (define_expand "fmai_vmfmadd_<mode><round_name>"
4553 [(set (match_operand:VF_128 0 "register_operand")
4556 (match_operand:VF_128 1 "register_operand")
4557 (match_operand:VF_128 2 "<round_nimm_predicate>")
4558 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4563 (define_expand "fmai_vmfmsub_<mode><round_name>"
4564 [(set (match_operand:VF_128 0 "register_operand")
4567 (match_operand:VF_128 1 "register_operand")
4568 (match_operand:VF_128 2 "<round_nimm_predicate>")
4570 (match_operand:VF_128 3 "<round_nimm_predicate>")))
4575 (define_expand "fmai_vmfnmadd_<mode><round_name>"
4576 [(set (match_operand:VF_128 0 "register_operand")
4580 (match_operand:VF_128 2 "<round_nimm_predicate>"))
4581 (match_operand:VF_128 1 "register_operand")
4582 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4587 (define_expand "fmai_vmfnmsub_<mode><round_name>"
4588 [(set (match_operand:VF_128 0 "register_operand")
4592 (match_operand:VF_128 2 "<round_nimm_predicate>"))
4593 (match_operand:VF_128 1 "register_operand")
4595 (match_operand:VF_128 3 "<round_nimm_predicate>")))
4600 (define_insn "*fmai_fmadd_<mode>"
4601 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4604 (match_operand:VF_128 1 "register_operand" "0,0")
4605 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4606 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4609 "TARGET_FMA || TARGET_AVX512F"
4611 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4612 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4613 [(set_attr "type" "ssemuladd")
4614 (set_attr "mode" "<MODE>")])
4616 (define_insn "*fmai_fmsub_<mode>"
4617 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4620 (match_operand:VF_128 1 "register_operand" "0,0")
4621 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4623 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4626 "TARGET_FMA || TARGET_AVX512F"
4628 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4629 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4630 [(set_attr "type" "ssemuladd")
4631 (set_attr "mode" "<MODE>")])
4633 (define_insn "*fmai_fnmadd_<mode><round_name>"
4634 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4638 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4639 (match_operand:VF_128 1 "register_operand" "0,0")
4640 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4643 "TARGET_FMA || TARGET_AVX512F"
4645 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4646 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4647 [(set_attr "type" "ssemuladd")
4648 (set_attr "mode" "<MODE>")])
4650 (define_insn "*fmai_fnmsub_<mode><round_name>"
4651 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4655 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4656 (match_operand:VF_128 1 "register_operand" "0,0")
4658 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4661 "TARGET_FMA || TARGET_AVX512F"
4663 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4664 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4665 [(set_attr "type" "ssemuladd")
4666 (set_attr "mode" "<MODE>")])
4668 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
4669 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4673 (match_operand:VF_128 1 "register_operand" "0,0")
4674 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4675 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4677 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4682 vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4683 vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4684 [(set_attr "type" "ssemuladd")
4685 (set_attr "mode" "<MODE>")])
4687 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
4688 [(set (match_operand:VF_128 0 "register_operand" "=v")
4692 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4693 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")
4694 (match_operand:VF_128 3 "register_operand" "0"))
4696 (match_operand:QI 4 "register_operand" "Yk"))
4700 "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4701 [(set_attr "type" "ssemuladd")
4702 (set_attr "mode" "<MODE>")])
4704 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
4705 [(match_operand:VF_128 0 "register_operand")
4706 (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
4707 (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
4708 (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
4709 (match_operand:QI 4 "register_operand")]
4712 emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
4713 operands[0], operands[1], operands[2], operands[3],
4714 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4718 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
4719 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4723 (match_operand:VF_128 1 "register_operand" "0,0")
4724 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4725 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4726 (match_operand:VF_128 4 "const0_operand" "C,C")
4727 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4732 vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4733 vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4734 [(set_attr "type" "ssemuladd")
4735 (set_attr "mode" "<MODE>")])
4737 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
4738 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4742 (match_operand:VF_128 1 "register_operand" "0,0")
4743 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4745 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4747 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4752 vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4753 vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4754 [(set_attr "type" "ssemuladd")
4755 (set_attr "mode" "<MODE>")])
4757 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
4758 [(set (match_operand:VF_128 0 "register_operand" "=v")
4762 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4763 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")
4765 (match_operand:VF_128 3 "register_operand" "0")))
4767 (match_operand:QI 4 "register_operand" "Yk"))
4771 "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4772 [(set_attr "type" "ssemuladd")
4773 (set_attr "mode" "<MODE>")])
4775 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
4776 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4780 (match_operand:VF_128 1 "register_operand" "0,0")
4781 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4783 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4784 (match_operand:VF_128 4 "const0_operand" "C,C")
4785 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4790 vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4791 vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4792 [(set_attr "type" "ssemuladd")
4793 (set_attr "mode" "<MODE>")])
4795 (define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
4796 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4801 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4802 (match_operand:VF_128 1 "register_operand" "0,0")
4803 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4805 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4810 vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4811 vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4812 [(set_attr "type" "ssemuladd")
4813 (set_attr "mode" "<MODE>")])
4815 (define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
4816 [(set (match_operand:VF_128 0 "register_operand" "=v")
4821 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>"))
4822 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4823 (match_operand:VF_128 3 "register_operand" "0"))
4825 (match_operand:QI 4 "register_operand" "Yk"))
4829 "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4830 [(set_attr "type" "ssemuladd")
4831 (set_attr "mode" "<MODE>")])
4833 (define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
4834 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4839 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4840 (match_operand:VF_128 1 "register_operand" "0,0")
4841 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4842 (match_operand:VF_128 4 "const0_operand" "C,C")
4843 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4848 vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4849 vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4850 [(set_attr "type" "ssemuladd")
4851 (set_attr "mode" "<MODE>")])
4853 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
4854 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4859 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4860 (match_operand:VF_128 1 "register_operand" "0,0")
4862 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4864 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4869 vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4870 vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4871 [(set_attr "type" "ssemuladd")
4872 (set_attr "mode" "<MODE>")])
4874 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
4875 [(set (match_operand:VF_128 0 "register_operand" "=v")
4880 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>"))
4881 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4883 (match_operand:VF_128 3 "register_operand" "0")))
4885 (match_operand:QI 4 "register_operand" "Yk"))
4889 "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4890 [(set_attr "type" "ssemuladd")
4891 (set_attr "mode" "<MODE>")])
4893 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
4894 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4899 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4900 (match_operand:VF_128 1 "register_operand" "0,0")
4902 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4903 (match_operand:VF_128 4 "const0_operand" "C,C")
4904 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4909 vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4910 vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4911 [(set_attr "type" "ssemuladd")
4912 (set_attr "mode" "<MODE>")])
4914 ;; FMA4 floating point scalar intrinsics. These write the
4915 ;; entire destination register, with the high-order elements zeroed.
4917 (define_expand "fma4i_vmfmadd_<mode>"
4918 [(set (match_operand:VF_128 0 "register_operand")
4921 (match_operand:VF_128 1 "nonimmediate_operand")
4922 (match_operand:VF_128 2 "nonimmediate_operand")
4923 (match_operand:VF_128 3 "nonimmediate_operand"))
4927 "operands[4] = CONST0_RTX (<MODE>mode);")
4929 (define_insn "*fma4i_vmfmadd_<mode>"
4930 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4933 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4934 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4935 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4936 (match_operand:VF_128 4 "const0_operand")
4939 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4940 [(set_attr "type" "ssemuladd")
4941 (set_attr "mode" "<MODE>")])
4943 (define_insn "*fma4i_vmfmsub_<mode>"
4944 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4947 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4948 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4950 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4951 (match_operand:VF_128 4 "const0_operand")
4954 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4955 [(set_attr "type" "ssemuladd")
4956 (set_attr "mode" "<MODE>")])
4958 (define_insn "*fma4i_vmfnmadd_<mode>"
4959 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4963 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4964 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4965 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4966 (match_operand:VF_128 4 "const0_operand")
4969 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4970 [(set_attr "type" "ssemuladd")
4971 (set_attr "mode" "<MODE>")])
4973 (define_insn "*fma4i_vmfnmsub_<mode>"
4974 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4978 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4979 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4981 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4982 (match_operand:VF_128 4 "const0_operand")
4985 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4986 [(set_attr "type" "ssemuladd")
4987 (set_attr "mode" "<MODE>")])
4989 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4991 ;; Parallel single-precision floating point conversion operations
4993 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4995 (define_insn_and_split "sse_cvtpi2ps"
4996 [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
4999 (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
5000 (match_operand:V4SF 1 "register_operand" "0,0,Yv")
5002 (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
5003 "TARGET_SSE || TARGET_MMX_WITH_SSE"
5005 cvtpi2ps\t{%2, %0|%0, %2}
5008 "TARGET_MMX_WITH_SSE && reload_completed"
5011 rtx op2 = lowpart_subreg (V4SImode, operands[2],
5012 GET_MODE (operands[2]));
5013 /* Generate SSE2 cvtdq2ps. */
5014 rtx insn = gen_floatv4siv4sf2 (operands[3], op2);
5017 /* Merge operands[3] with operands[0]. */
5021 mask = gen_rtx_PARALLEL (VOIDmode,
5022 gen_rtvec (4, GEN_INT (0), GEN_INT (1),
5023 GEN_INT (6), GEN_INT (7)));
5024 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
5025 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5026 insn = gen_rtx_SET (operands[0], op2);
5030 /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
5031 mask = gen_rtx_PARALLEL (VOIDmode,
5032 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5033 GEN_INT (4), GEN_INT (5)));
5034 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
5035 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5036 insn = gen_rtx_SET (operands[0], op2);
5039 /* Swap bits 0:63 with bits 64:127. */
5040 mask = gen_rtx_PARALLEL (VOIDmode,
5041 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5042 GEN_INT (0), GEN_INT (1)));
5043 rtx dest = lowpart_subreg (V4SImode, operands[0],
5044 GET_MODE (operands[0]));
5045 op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
5046 insn = gen_rtx_SET (dest, op1);
5051 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
5052 (set_attr "type" "ssecvt")
5053 (set_attr "mode" "V4SF")])
5055 (define_insn "sse_cvtps2pi"
5056 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5058 (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm")]
5060 (parallel [(const_int 0) (const_int 1)])))]
5061 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5063 cvtps2pi\t{%1, %0|%0, %q1}
5064 %vcvtps2dq\t{%1, %0|%0, %1}"
5065 [(set_attr "mmx_isa" "native,x64")
5066 (set_attr "type" "ssecvt")
5067 (set_attr "unit" "mmx,*")
5068 (set_attr "mode" "DI")])
5070 (define_insn "sse_cvttps2pi"
5071 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5073 (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm"))
5074 (parallel [(const_int 0) (const_int 1)])))]
5075 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5077 cvttps2pi\t{%1, %0|%0, %q1}
5078 %vcvttps2dq\t{%1, %0|%0, %1}"
5079 [(set_attr "mmx_isa" "native,x64")
5080 (set_attr "type" "ssecvt")
5081 (set_attr "unit" "mmx,*")
5082 (set_attr "prefix_rep" "0")
5083 (set_attr "mode" "SF")])
5085 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
5086 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5089 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5090 (match_operand:V4SF 1 "register_operand" "0,0,v")
5094 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5095 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5096 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5097 [(set_attr "isa" "noavx,noavx,avx")
5098 (set_attr "type" "sseicvt")
5099 (set_attr "athlon_decode" "vector,double,*")
5100 (set_attr "amdfam10_decode" "vector,double,*")
5101 (set_attr "bdver1_decode" "double,direct,*")
5102 (set_attr "btver2_decode" "double,double,double")
5103 (set_attr "znver1_decode" "double,double,double")
5104 (set (attr "length_vex")
5106 (and (match_test "<MODE>mode == DImode")
5107 (eq_attr "alternative" "2"))
5109 (const_string "*")))
5110 (set (attr "prefix_rex")
5112 (and (match_test "<MODE>mode == DImode")
5113 (eq_attr "alternative" "0,1"))
5115 (const_string "*")))
5116 (set_attr "prefix" "orig,orig,maybe_evex")
5117 (set_attr "mode" "SF")])
5119 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
5120 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5123 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5124 (parallel [(const_int 0)]))]
5125 UNSPEC_FIX_NOTRUNC))]
5127 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5128 [(set_attr "type" "sseicvt")
5129 (set_attr "athlon_decode" "double,vector")
5130 (set_attr "bdver1_decode" "double,double")
5131 (set_attr "prefix_rep" "1")
5132 (set_attr "prefix" "maybe_vex")
5133 (set_attr "mode" "<MODE>")])
5135 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
5136 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5137 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
5138 UNSPEC_FIX_NOTRUNC))]
5140 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %k1}"
5141 [(set_attr "type" "sseicvt")
5142 (set_attr "athlon_decode" "double,vector")
5143 (set_attr "amdfam10_decode" "double,double")
5144 (set_attr "bdver1_decode" "double,double")
5145 (set_attr "prefix_rep" "1")
5146 (set_attr "prefix" "maybe_vex")
5147 (set_attr "mode" "<MODE>")])
5149 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
5150 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5153 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
5154 (parallel [(const_int 0)]))))]
5156 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5157 [(set_attr "type" "sseicvt")
5158 (set_attr "athlon_decode" "double,vector")
5159 (set_attr "amdfam10_decode" "double,double")
5160 (set_attr "bdver1_decode" "double,double")
5161 (set_attr "prefix_rep" "1")
5162 (set_attr "prefix" "maybe_vex")
5163 (set_attr "mode" "<MODE>")])
5165 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
5166 [(set (match_operand:VF_128 0 "register_operand" "=v")
5168 (vec_duplicate:VF_128
5169 (unsigned_float:<ssescalarmode>
5170 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
5171 (match_operand:VF_128 1 "register_operand" "v")
5173 "TARGET_AVX512F && <round_modev4sf_condition>"
5174 "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5175 [(set_attr "type" "sseicvt")
5176 (set_attr "prefix" "evex")
5177 (set_attr "mode" "<ssescalarmode>")])
5179 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
5180 [(set (match_operand:VF_128 0 "register_operand" "=v")
5182 (vec_duplicate:VF_128
5183 (unsigned_float:<ssescalarmode>
5184 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
5185 (match_operand:VF_128 1 "register_operand" "v")
5187 "TARGET_AVX512F && TARGET_64BIT"
5188 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5189 [(set_attr "type" "sseicvt")
5190 (set_attr "prefix" "evex")
5191 (set_attr "mode" "<ssescalarmode>")])
5193 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
5194 [(set (match_operand:VF1 0 "register_operand" "=x,v")
5196 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
5197 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
5199 cvtdq2ps\t{%1, %0|%0, %1}
5200 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5201 [(set_attr "isa" "noavx,avx")
5202 (set_attr "type" "ssecvt")
5203 (set_attr "prefix" "maybe_vex")
5204 (set_attr "mode" "<sseinsnmode>")])
5206 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
5207 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
5208 (unsigned_float:VF1_AVX512VL
5209 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5211 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5212 [(set_attr "type" "ssecvt")
5213 (set_attr "prefix" "evex")
5214 (set_attr "mode" "<MODE>")])
5216 (define_expand "floatuns<sseintvecmodelower><mode>2"
5217 [(match_operand:VF1 0 "register_operand")
5218 (match_operand:<sseintvecmode> 1 "register_operand")]
5219 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
5221 if (<MODE>mode == V16SFmode)
5222 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
5224 if (TARGET_AVX512VL)
5226 if (<MODE>mode == V4SFmode)
5227 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
5229 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
5232 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
5238 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
5239 (define_mode_attr sf2simodelower
5240 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
5242 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
5243 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
5245 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
5246 UNSPEC_FIX_NOTRUNC))]
5247 "TARGET_SSE2 && <mask_mode512bit_condition>"
5248 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5249 [(set_attr "type" "ssecvt")
5250 (set (attr "prefix_data16")
5252 (match_test "TARGET_AVX")
5254 (const_string "1")))
5255 (set_attr "prefix" "maybe_vex")
5256 (set_attr "mode" "<sseinsnmode>")])
5258 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
5259 [(set (match_operand:V16SI 0 "register_operand" "=v")
5261 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
5262 UNSPEC_FIX_NOTRUNC))]
5264 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5265 [(set_attr "type" "ssecvt")
5266 (set_attr "prefix" "evex")
5267 (set_attr "mode" "XI")])
5269 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
5270 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
5271 (unspec:VI4_AVX512VL
5272 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
5273 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5275 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5276 [(set_attr "type" "ssecvt")
5277 (set_attr "prefix" "evex")
5278 (set_attr "mode" "<sseinsnmode>")])
5280 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
5281 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5282 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5283 UNSPEC_FIX_NOTRUNC))]
5284 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5285 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5286 [(set_attr "type" "ssecvt")
5287 (set_attr "prefix" "evex")
5288 (set_attr "mode" "<sseinsnmode>")])
5290 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
5291 [(set (match_operand:V2DI 0 "register_operand" "=v")
5294 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5295 (parallel [(const_int 0) (const_int 1)]))]
5296 UNSPEC_FIX_NOTRUNC))]
5297 "TARGET_AVX512DQ && TARGET_AVX512VL"
5298 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5299 [(set_attr "type" "ssecvt")
5300 (set_attr "prefix" "evex")
5301 (set_attr "mode" "TI")])
5303 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
5304 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5305 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5306 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5307 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5308 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5309 [(set_attr "type" "ssecvt")
5310 (set_attr "prefix" "evex")
5311 (set_attr "mode" "<sseinsnmode>")])
5313 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
5314 [(set (match_operand:V2DI 0 "register_operand" "=v")
5317 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5318 (parallel [(const_int 0) (const_int 1)]))]
5319 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5320 "TARGET_AVX512DQ && TARGET_AVX512VL"
5321 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5322 [(set_attr "type" "ssecvt")
5323 (set_attr "prefix" "evex")
5324 (set_attr "mode" "TI")])
5326 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
5327 [(set (match_operand:V16SI 0 "register_operand" "=v")
5329 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5331 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5332 [(set_attr "type" "ssecvt")
5333 (set_attr "prefix" "evex")
5334 (set_attr "mode" "XI")])
5336 (define_insn "fix_truncv8sfv8si2<mask_name>"
5337 [(set (match_operand:V8SI 0 "register_operand" "=v")
5338 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
5339 "TARGET_AVX && <mask_avx512vl_condition>"
5340 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5341 [(set_attr "type" "ssecvt")
5342 (set_attr "prefix" "<mask_prefix>")
5343 (set_attr "mode" "OI")])
5345 (define_insn "fix_truncv4sfv4si2<mask_name>"
5346 [(set (match_operand:V4SI 0 "register_operand" "=v")
5347 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
5348 "TARGET_SSE2 && <mask_avx512vl_condition>"
5349 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5350 [(set_attr "type" "ssecvt")
5351 (set (attr "prefix_rep")
5353 (match_test "TARGET_AVX")
5355 (const_string "1")))
5356 (set (attr "prefix_data16")
5358 (match_test "TARGET_AVX")
5360 (const_string "0")))
5361 (set_attr "prefix_data16" "0")
5362 (set_attr "prefix" "<mask_prefix2>")
5363 (set_attr "mode" "TI")])
5365 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
5366 [(match_operand:<sseintvecmode> 0 "register_operand")
5367 (match_operand:VF1 1 "register_operand")]
5370 if (<MODE>mode == V16SFmode)
5371 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
5376 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5377 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
5378 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
5379 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
5384 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5386 ;; Parallel double-precision floating point conversion operations
5388 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5390 (define_insn "sse2_cvtpi2pd"
5391 [(set (match_operand:V2DF 0 "register_operand" "=v,x")
5392 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,?!y")))]
5395 %vcvtdq2pd\t{%1, %0|%0, %1}
5396 cvtpi2pd\t{%1, %0|%0, %1}"
5397 [(set_attr "mmx_isa" "*,native")
5398 (set_attr "type" "ssecvt")
5399 (set_attr "unit" "*,mmx")
5400 (set_attr "prefix_data16" "*,1")
5401 (set_attr "prefix" "maybe_vex,*")
5402 (set_attr "mode" "V2DF")])
5404 (define_insn "sse2_cvtpd2pi"
5405 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5406 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vBm,xm")]
5407 UNSPEC_FIX_NOTRUNC))]
5410 * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
5411 cvtpd2pi\t{%1, %0|%0, %1}"
5412 [(set_attr "mmx_isa" "*,native")
5413 (set_attr "type" "ssecvt")
5414 (set_attr "unit" "*,mmx")
5415 (set_attr "amdfam10_decode" "double")
5416 (set_attr "athlon_decode" "vector")
5417 (set_attr "bdver1_decode" "double")
5418 (set_attr "prefix_data16" "*,1")
5419 (set_attr "prefix" "maybe_vex,*")
5420 (set_attr "mode" "TI")])
5422 (define_insn "sse2_cvttpd2pi"
5423 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5424 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vBm,xm")))]
5427 * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
5428 cvttpd2pi\t{%1, %0|%0, %1}"
5429 [(set_attr "mmx_isa" "*,native")
5430 (set_attr "type" "ssecvt")
5431 (set_attr "unit" "*,mmx")
5432 (set_attr "amdfam10_decode" "double")
5433 (set_attr "athlon_decode" "vector")
5434 (set_attr "bdver1_decode" "double")
5435 (set_attr "prefix_data16" "*,1")
5436 (set_attr "prefix" "maybe_vex,*")
5437 (set_attr "mode" "TI")])
5439 (define_insn "sse2_cvtsi2sd"
5440 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5443 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5444 (match_operand:V2DF 1 "register_operand" "0,0,v")
5448 cvtsi2sd{l}\t{%2, %0|%0, %2}
5449 cvtsi2sd{l}\t{%2, %0|%0, %2}
5450 vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
5451 [(set_attr "isa" "noavx,noavx,avx")
5452 (set_attr "type" "sseicvt")
5453 (set_attr "athlon_decode" "double,direct,*")
5454 (set_attr "amdfam10_decode" "vector,double,*")
5455 (set_attr "bdver1_decode" "double,direct,*")
5456 (set_attr "btver2_decode" "double,double,double")
5457 (set_attr "znver1_decode" "double,double,double")
5458 (set_attr "prefix" "orig,orig,maybe_evex")
5459 (set_attr "mode" "DF")])
5461 (define_insn "sse2_cvtsi2sdq<round_name>"
5462 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5465 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5466 (match_operand:V2DF 1 "register_operand" "0,0,v")
5468 "TARGET_SSE2 && TARGET_64BIT"
5470 cvtsi2sd{q}\t{%2, %0|%0, %2}
5471 cvtsi2sd{q}\t{%2, %0|%0, %2}
5472 vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5473 [(set_attr "isa" "noavx,noavx,avx")
5474 (set_attr "type" "sseicvt")
5475 (set_attr "athlon_decode" "double,direct,*")
5476 (set_attr "amdfam10_decode" "vector,double,*")
5477 (set_attr "bdver1_decode" "double,direct,*")
5478 (set_attr "length_vex" "*,*,4")
5479 (set_attr "prefix_rex" "1,1,*")
5480 (set_attr "prefix" "orig,orig,maybe_evex")
5481 (set_attr "mode" "DF")])
5483 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
5484 [(set (match_operand:SWI48 0 "register_operand" "=r")
5487 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
5488 (parallel [(const_int 0)]))]
5489 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5491 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5492 [(set_attr "type" "sseicvt")
5493 (set_attr "prefix" "evex")
5494 (set_attr "mode" "<MODE>")])
5496 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
5497 [(set (match_operand:SWI48 0 "register_operand" "=r")
5500 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5501 (parallel [(const_int 0)]))))]
5503 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5504 [(set_attr "type" "sseicvt")
5505 (set_attr "prefix" "evex")
5506 (set_attr "mode" "<MODE>")])
5508 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
5509 [(set (match_operand:SWI48 0 "register_operand" "=r")
5512 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
5513 (parallel [(const_int 0)]))]
5514 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5516 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5517 [(set_attr "type" "sseicvt")
5518 (set_attr "prefix" "evex")
5519 (set_attr "mode" "<MODE>")])
5521 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
5522 [(set (match_operand:SWI48 0 "register_operand" "=r")
5525 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5526 (parallel [(const_int 0)]))))]
5528 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5529 [(set_attr "type" "sseicvt")
5530 (set_attr "prefix" "evex")
5531 (set_attr "mode" "<MODE>")])
5533 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
5534 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5537 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5538 (parallel [(const_int 0)]))]
5539 UNSPEC_FIX_NOTRUNC))]
5541 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5542 [(set_attr "type" "sseicvt")
5543 (set_attr "athlon_decode" "double,vector")
5544 (set_attr "bdver1_decode" "double,double")
5545 (set_attr "btver2_decode" "double,double")
5546 (set_attr "prefix_rep" "1")
5547 (set_attr "prefix" "maybe_vex")
5548 (set_attr "mode" "<MODE>")])
5550 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
5551 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5552 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
5553 UNSPEC_FIX_NOTRUNC))]
5555 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
5556 [(set_attr "type" "sseicvt")
5557 (set_attr "athlon_decode" "double,vector")
5558 (set_attr "amdfam10_decode" "double,double")
5559 (set_attr "bdver1_decode" "double,double")
5560 (set_attr "prefix_rep" "1")
5561 (set_attr "prefix" "maybe_vex")
5562 (set_attr "mode" "<MODE>")])
5564 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
5565 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5568 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
5569 (parallel [(const_int 0)]))))]
5571 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5572 [(set_attr "type" "sseicvt")
5573 (set_attr "athlon_decode" "double,vector")
5574 (set_attr "amdfam10_decode" "double,double")
5575 (set_attr "bdver1_decode" "double,double")
5576 (set_attr "btver2_decode" "double,double")
5577 (set_attr "prefix_rep" "1")
5578 (set_attr "prefix" "maybe_vex")
5579 (set_attr "mode" "<MODE>")])
5581 ;; For float<si2dfmode><mode>2 insn pattern
5582 (define_mode_attr si2dfmode
5583 [(V8DF "V8SI") (V4DF "V4SI")])
5584 (define_mode_attr si2dfmodelower
5585 [(V8DF "v8si") (V4DF "v4si")])
5587 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
5588 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5589 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5590 "TARGET_AVX && <mask_mode512bit_condition>"
5591 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5592 [(set_attr "type" "ssecvt")
5593 (set_attr "prefix" "maybe_vex")
5594 (set_attr "mode" "<MODE>")])
5596 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
5597 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5598 (any_float:VF2_AVX512VL
5599 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5601 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5602 [(set_attr "type" "ssecvt")
5603 (set_attr "prefix" "evex")
5604 (set_attr "mode" "<MODE>")])
5606 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
5607 (define_mode_attr qq2pssuff
5608 [(V8SF "") (V4SF "{y}")])
5610 (define_mode_attr sselongvecmode
5611 [(V8SF "V8DI") (V4SF "V4DI")])
5613 (define_mode_attr sselongvecmodelower
5614 [(V8SF "v8di") (V4SF "v4di")])
5616 (define_mode_attr sseintvecmode3
5617 [(V8SF "XI") (V4SF "OI")
5618 (V8DF "OI") (V4DF "TI")])
5620 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
5621 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5622 (any_float:VF1_128_256VL
5623 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5624 "TARGET_AVX512DQ && <round_modev8sf_condition>"
5625 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5626 [(set_attr "type" "ssecvt")
5627 (set_attr "prefix" "evex")
5628 (set_attr "mode" "<MODE>")])
5630 (define_expand "float<floatunssuffix>v2div2sf2"
5631 [(set (match_operand:V4SF 0 "register_operand" "=v")
5633 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5635 "TARGET_AVX512DQ && TARGET_AVX512VL"
5636 "operands[2] = CONST0_RTX (V2SFmode);")
5638 (define_insn "*float<floatunssuffix>v2div2sf2"
5639 [(set (match_operand:V4SF 0 "register_operand" "=v")
5641 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5642 (match_operand:V2SF 2 "const0_operand" "C")))]
5643 "TARGET_AVX512DQ && TARGET_AVX512VL"
5644 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5645 [(set_attr "type" "ssecvt")
5646 (set_attr "prefix" "evex")
5647 (set_attr "mode" "V4SF")])
5649 (define_mode_attr vpckfloat_concat_mode
5650 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
5651 (define_mode_attr vpckfloat_temp_mode
5652 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
5653 (define_mode_attr vpckfloat_op_mode
5654 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
5656 (define_expand "vec_pack<floatprefix>_float_<mode>"
5657 [(match_operand:<ssePSmode> 0 "register_operand")
5658 (any_float:<ssePSmode>
5659 (match_operand:VI8_AVX512VL 1 "register_operand"))
5660 (match_operand:VI8_AVX512VL 2 "register_operand")]
5663 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5664 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5665 rtx (*gen) (rtx, rtx) = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
5666 emit_insn (gen (r1, operands[1]));
5667 emit_insn (gen (r2, operands[2]));
5668 if (<MODE>mode == V2DImode)
5669 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
5671 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
5676 (define_expand "float<floatunssuffix>v2div2sf2_mask"
5677 [(set (match_operand:V4SF 0 "register_operand" "=v")
5680 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5682 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5683 (parallel [(const_int 0) (const_int 1)]))
5684 (match_operand:QI 3 "register_operand" "Yk"))
5686 "TARGET_AVX512DQ && TARGET_AVX512VL"
5687 "operands[4] = CONST0_RTX (V2SFmode);")
5689 (define_insn "*float<floatunssuffix>v2div2sf2_mask"
5690 [(set (match_operand:V4SF 0 "register_operand" "=v")
5693 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5695 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5696 (parallel [(const_int 0) (const_int 1)]))
5697 (match_operand:QI 3 "register_operand" "Yk"))
5698 (match_operand:V2SF 4 "const0_operand" "C")))]
5699 "TARGET_AVX512DQ && TARGET_AVX512VL"
5700 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5701 [(set_attr "type" "ssecvt")
5702 (set_attr "prefix" "evex")
5703 (set_attr "mode" "V4SF")])
5705 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
5706 [(set (match_operand:V4SF 0 "register_operand" "=v")
5709 (any_float:V2SF (match_operand:V2DI 1
5710 "nonimmediate_operand" "vm"))
5711 (match_operand:V2SF 3 "const0_operand" "C")
5712 (match_operand:QI 2 "register_operand" "Yk"))
5713 (match_operand:V2SF 4 "const0_operand" "C")))]
5714 "TARGET_AVX512DQ && TARGET_AVX512VL"
5715 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5716 [(set_attr "type" "ssecvt")
5717 (set_attr "prefix" "evex")
5718 (set_attr "mode" "V4SF")])
5720 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5721 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5722 (unsigned_float:VF2_512_256VL
5723 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5725 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5726 [(set_attr "type" "ssecvt")
5727 (set_attr "prefix" "evex")
5728 (set_attr "mode" "<MODE>")])
5730 (define_insn "ufloatv2siv2df2<mask_name>"
5731 [(set (match_operand:V2DF 0 "register_operand" "=v")
5732 (unsigned_float:V2DF
5734 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5735 (parallel [(const_int 0) (const_int 1)]))))]
5737 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5738 [(set_attr "type" "ssecvt")
5739 (set_attr "prefix" "evex")
5740 (set_attr "mode" "V2DF")])
5742 (define_insn "avx512f_cvtdq2pd512_2"
5743 [(set (match_operand:V8DF 0 "register_operand" "=v")
5746 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5747 (parallel [(const_int 0) (const_int 1)
5748 (const_int 2) (const_int 3)
5749 (const_int 4) (const_int 5)
5750 (const_int 6) (const_int 7)]))))]
5752 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5753 [(set_attr "type" "ssecvt")
5754 (set_attr "prefix" "evex")
5755 (set_attr "mode" "V8DF")])
5757 (define_insn "avx_cvtdq2pd256_2"
5758 [(set (match_operand:V4DF 0 "register_operand" "=v")
5761 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5762 (parallel [(const_int 0) (const_int 1)
5763 (const_int 2) (const_int 3)]))))]
5765 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5766 [(set_attr "type" "ssecvt")
5767 (set_attr "prefix" "maybe_evex")
5768 (set_attr "mode" "V4DF")])
5770 (define_insn "sse2_cvtdq2pd<mask_name>"
5771 [(set (match_operand:V2DF 0 "register_operand" "=v")
5774 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5775 (parallel [(const_int 0) (const_int 1)]))))]
5776 "TARGET_SSE2 && <mask_avx512vl_condition>"
5777 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5778 [(set_attr "type" "ssecvt")
5779 (set_attr "prefix" "maybe_vex")
5780 (set_attr "mode" "V2DF")])
5782 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5783 [(set (match_operand:V8SI 0 "register_operand" "=v")
5785 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5786 UNSPEC_FIX_NOTRUNC))]
5788 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5789 [(set_attr "type" "ssecvt")
5790 (set_attr "prefix" "evex")
5791 (set_attr "mode" "OI")])
5793 (define_insn "avx_cvtpd2dq256<mask_name>"
5794 [(set (match_operand:V4SI 0 "register_operand" "=v")
5795 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5796 UNSPEC_FIX_NOTRUNC))]
5797 "TARGET_AVX && <mask_avx512vl_condition>"
5798 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5799 [(set_attr "type" "ssecvt")
5800 (set_attr "prefix" "<mask_prefix>")
5801 (set_attr "mode" "OI")])
5803 (define_expand "avx_cvtpd2dq256_2"
5804 [(set (match_operand:V8SI 0 "register_operand")
5806 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5810 "operands[2] = CONST0_RTX (V4SImode);")
5812 (define_insn "*avx_cvtpd2dq256_2"
5813 [(set (match_operand:V8SI 0 "register_operand" "=v")
5815 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5817 (match_operand:V4SI 2 "const0_operand")))]
5819 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5820 [(set_attr "type" "ssecvt")
5821 (set_attr "prefix" "vex")
5822 (set_attr "btver2_decode" "vector")
5823 (set_attr "mode" "OI")])
5825 (define_insn "sse2_cvtpd2dq<mask_name>"
5826 [(set (match_operand:V4SI 0 "register_operand" "=v")
5828 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5830 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5831 "TARGET_SSE2 && <mask_avx512vl_condition>"
5834 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5836 return "cvtpd2dq\t{%1, %0|%0, %1}";
5838 [(set_attr "type" "ssecvt")
5839 (set_attr "prefix_rep" "1")
5840 (set_attr "prefix_data16" "0")
5841 (set_attr "prefix" "maybe_vex")
5842 (set_attr "mode" "TI")
5843 (set_attr "amdfam10_decode" "double")
5844 (set_attr "athlon_decode" "vector")
5845 (set_attr "bdver1_decode" "double")])
5847 ;; For ufix_notrunc* insn patterns
5848 (define_mode_attr pd2udqsuff
5849 [(V8DF "") (V4DF "{y}")])
5851 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5852 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5854 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
5855 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5857 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5858 [(set_attr "type" "ssecvt")
5859 (set_attr "prefix" "evex")
5860 (set_attr "mode" "<sseinsnmode>")])
5862 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
5863 [(set (match_operand:V4SI 0 "register_operand" "=v")
5866 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5867 UNSPEC_UNSIGNED_FIX_NOTRUNC)
5868 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5870 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5871 [(set_attr "type" "ssecvt")
5872 (set_attr "prefix" "evex")
5873 (set_attr "mode" "TI")])
5875 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
5876 [(set (match_operand:V8SI 0 "register_operand" "=v")
5878 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5880 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5881 [(set_attr "type" "ssecvt")
5882 (set_attr "prefix" "evex")
5883 (set_attr "mode" "OI")])
5885 (define_insn "ufix_truncv2dfv2si2<mask_name>"
5886 [(set (match_operand:V4SI 0 "register_operand" "=v")
5888 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5889 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5891 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5892 [(set_attr "type" "ssecvt")
5893 (set_attr "prefix" "evex")
5894 (set_attr "mode" "TI")])
5896 (define_insn "fix_truncv4dfv4si2<mask_name>"
5897 [(set (match_operand:V4SI 0 "register_operand" "=v")
5898 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5899 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5900 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5901 [(set_attr "type" "ssecvt")
5902 (set_attr "prefix" "maybe_evex")
5903 (set_attr "mode" "OI")])
5905 (define_insn "ufix_truncv4dfv4si2<mask_name>"
5906 [(set (match_operand:V4SI 0 "register_operand" "=v")
5907 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5908 "TARGET_AVX512VL && TARGET_AVX512F"
5909 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5910 [(set_attr "type" "ssecvt")
5911 (set_attr "prefix" "maybe_evex")
5912 (set_attr "mode" "OI")])
5914 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5915 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5916 (any_fix:<sseintvecmode>
5917 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5918 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5919 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5920 [(set_attr "type" "ssecvt")
5921 (set_attr "prefix" "evex")
5922 (set_attr "mode" "<sseintvecmode2>")])
5924 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5925 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5926 (unspec:<sseintvecmode>
5927 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5928 UNSPEC_FIX_NOTRUNC))]
5929 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5930 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5931 [(set_attr "type" "ssecvt")
5932 (set_attr "prefix" "evex")
5933 (set_attr "mode" "<sseintvecmode2>")])
5935 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5936 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5937 (unspec:<sseintvecmode>
5938 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5939 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5940 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5941 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5942 [(set_attr "type" "ssecvt")
5943 (set_attr "prefix" "evex")
5944 (set_attr "mode" "<sseintvecmode2>")])
5946 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5947 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5948 (any_fix:<sselongvecmode>
5949 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5950 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5951 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5952 [(set_attr "type" "ssecvt")
5953 (set_attr "prefix" "evex")
5954 (set_attr "mode" "<sseintvecmode3>")])
5956 (define_insn "fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
5957 [(set (match_operand:V2DI 0 "register_operand" "=v")
5960 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5961 (parallel [(const_int 0) (const_int 1)]))))]
5962 "TARGET_AVX512DQ && TARGET_AVX512VL"
5963 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5964 [(set_attr "type" "ssecvt")
5965 (set_attr "prefix" "evex")
5966 (set_attr "mode" "TI")])
5968 (define_mode_attr vunpckfixt_mode
5969 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
5970 (define_mode_attr vunpckfixt_model
5971 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
5972 (define_mode_attr vunpckfixt_extract_mode
5973 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
5975 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
5976 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
5977 (any_fix:<vunpckfixt_mode>
5978 (match_operand:VF1_AVX512VL 1 "register_operand"))]
5981 rtx tem = operands[1];
5982 if (<MODE>mode != V4SFmode)
5984 tem = gen_reg_rtx (<ssehalfvecmode>mode);
5985 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
5988 rtx (*gen) (rtx, rtx)
5989 = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
5990 emit_insn (gen (operands[0], tem));
5994 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
5995 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
5996 (any_fix:<vunpckfixt_mode>
5997 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6001 if (<MODE>mode != V4SFmode)
6003 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6004 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
6009 tem = gen_reg_rtx (V4SFmode);
6010 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
6012 rtx (*gen) (rtx, rtx)
6013 = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6014 emit_insn (gen (operands[0], tem));
6018 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
6019 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6020 (unsigned_fix:<sseintvecmode>
6021 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
6023 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6024 [(set_attr "type" "ssecvt")
6025 (set_attr "prefix" "evex")
6026 (set_attr "mode" "<sseintvecmode2>")])
6028 (define_expand "avx_cvttpd2dq256_2"
6029 [(set (match_operand:V8SI 0 "register_operand")
6031 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
6034 "operands[2] = CONST0_RTX (V4SImode);")
6036 (define_insn "sse2_cvttpd2dq<mask_name>"
6037 [(set (match_operand:V4SI 0 "register_operand" "=v")
6039 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
6040 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6041 "TARGET_SSE2 && <mask_avx512vl_condition>"
6044 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
6046 return "cvttpd2dq\t{%1, %0|%0, %1}";
6048 [(set_attr "type" "ssecvt")
6049 (set_attr "amdfam10_decode" "double")
6050 (set_attr "athlon_decode" "vector")
6051 (set_attr "bdver1_decode" "double")
6052 (set_attr "prefix" "maybe_vex")
6053 (set_attr "mode" "TI")])
6055 (define_insn "sse2_cvtsd2ss<round_name>"
6056 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6059 (float_truncate:V2SF
6060 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
6061 (match_operand:V4SF 1 "register_operand" "0,0,v")
6065 cvtsd2ss\t{%2, %0|%0, %2}
6066 cvtsd2ss\t{%2, %0|%0, %q2}
6067 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
6068 [(set_attr "isa" "noavx,noavx,avx")
6069 (set_attr "type" "ssecvt")
6070 (set_attr "athlon_decode" "vector,double,*")
6071 (set_attr "amdfam10_decode" "vector,double,*")
6072 (set_attr "bdver1_decode" "direct,direct,*")
6073 (set_attr "btver2_decode" "double,double,double")
6074 (set_attr "prefix" "orig,orig,<round_prefix>")
6075 (set_attr "mode" "SF")])
6077 (define_insn "*sse2_vd_cvtsd2ss"
6078 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6081 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
6082 (match_operand:V4SF 1 "register_operand" "0,0,v")
6086 cvtsd2ss\t{%2, %0|%0, %2}
6087 cvtsd2ss\t{%2, %0|%0, %2}
6088 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
6089 [(set_attr "isa" "noavx,noavx,avx")
6090 (set_attr "type" "ssecvt")
6091 (set_attr "athlon_decode" "vector,double,*")
6092 (set_attr "amdfam10_decode" "vector,double,*")
6093 (set_attr "bdver1_decode" "direct,direct,*")
6094 (set_attr "btver2_decode" "double,double,double")
6095 (set_attr "prefix" "orig,orig,vex")
6096 (set_attr "mode" "SF")])
6098 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
6099 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6103 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
6104 (parallel [(const_int 0) (const_int 1)])))
6105 (match_operand:V2DF 1 "register_operand" "0,0,v")
6109 cvtss2sd\t{%2, %0|%0, %2}
6110 cvtss2sd\t{%2, %0|%0, %k2}
6111 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
6112 [(set_attr "isa" "noavx,noavx,avx")
6113 (set_attr "type" "ssecvt")
6114 (set_attr "amdfam10_decode" "vector,double,*")
6115 (set_attr "athlon_decode" "direct,direct,*")
6116 (set_attr "bdver1_decode" "direct,direct,*")
6117 (set_attr "btver2_decode" "double,double,double")
6118 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
6119 (set_attr "mode" "DF")])
6121 (define_insn "*sse2_vd_cvtss2sd"
6122 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6125 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
6126 (match_operand:V2DF 1 "register_operand" "0,0,v")
6130 cvtss2sd\t{%2, %0|%0, %2}
6131 cvtss2sd\t{%2, %0|%0, %2}
6132 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
6133 [(set_attr "isa" "noavx,noavx,avx")
6134 (set_attr "type" "ssecvt")
6135 (set_attr "amdfam10_decode" "vector,double,*")
6136 (set_attr "athlon_decode" "direct,direct,*")
6137 (set_attr "bdver1_decode" "direct,direct,*")
6138 (set_attr "btver2_decode" "double,double,double")
6139 (set_attr "prefix" "orig,orig,vex")
6140 (set_attr "mode" "DF")])
6142 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
6143 [(set (match_operand:V8SF 0 "register_operand" "=v")
6144 (float_truncate:V8SF
6145 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
6147 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6148 [(set_attr "type" "ssecvt")
6149 (set_attr "prefix" "evex")
6150 (set_attr "mode" "V8SF")])
6152 (define_insn "avx_cvtpd2ps256<mask_name>"
6153 [(set (match_operand:V4SF 0 "register_operand" "=v")
6154 (float_truncate:V4SF
6155 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6156 "TARGET_AVX && <mask_avx512vl_condition>"
6157 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6158 [(set_attr "type" "ssecvt")
6159 (set_attr "prefix" "maybe_evex")
6160 (set_attr "btver2_decode" "vector")
6161 (set_attr "mode" "V4SF")])
6163 (define_expand "sse2_cvtpd2ps"
6164 [(set (match_operand:V4SF 0 "register_operand")
6166 (float_truncate:V2SF
6167 (match_operand:V2DF 1 "vector_operand"))
6170 "operands[2] = CONST0_RTX (V2SFmode);")
6172 (define_expand "sse2_cvtpd2ps_mask"
6173 [(set (match_operand:V4SF 0 "register_operand")
6176 (float_truncate:V2SF
6177 (match_operand:V2DF 1 "vector_operand"))
6179 (match_operand:V4SF 2 "register_operand")
6180 (match_operand:QI 3 "register_operand")))]
6182 "operands[4] = CONST0_RTX (V2SFmode);")
6184 (define_insn "*sse2_cvtpd2ps<mask_name>"
6185 [(set (match_operand:V4SF 0 "register_operand" "=v")
6187 (float_truncate:V2SF
6188 (match_operand:V2DF 1 "vector_operand" "vBm"))
6189 (match_operand:V2SF 2 "const0_operand")))]
6190 "TARGET_SSE2 && <mask_avx512vl_condition>"
6193 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
6195 return "cvtpd2ps\t{%1, %0|%0, %1}";
6197 [(set_attr "type" "ssecvt")
6198 (set_attr "amdfam10_decode" "double")
6199 (set_attr "athlon_decode" "vector")
6200 (set_attr "bdver1_decode" "double")
6201 (set_attr "prefix_data16" "1")
6202 (set_attr "prefix" "maybe_vex")
6203 (set_attr "mode" "V4SF")])
6205 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
6206 (define_mode_attr sf2dfmode
6207 [(V8DF "V8SF") (V4DF "V4SF")])
6209 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6210 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6211 (float_extend:VF2_512_256
6212 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6213 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
6214 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6215 [(set_attr "type" "ssecvt")
6216 (set_attr "prefix" "maybe_vex")
6217 (set_attr "mode" "<MODE>")])
6219 (define_insn "*avx_cvtps2pd256_2"
6220 [(set (match_operand:V4DF 0 "register_operand" "=v")
6223 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6224 (parallel [(const_int 0) (const_int 1)
6225 (const_int 2) (const_int 3)]))))]
6227 "vcvtps2pd\t{%x1, %0|%0, %x1}"
6228 [(set_attr "type" "ssecvt")
6229 (set_attr "prefix" "vex")
6230 (set_attr "mode" "V4DF")])
6232 (define_insn "vec_unpacks_lo_v16sf"
6233 [(set (match_operand:V8DF 0 "register_operand" "=v")
6236 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6237 (parallel [(const_int 0) (const_int 1)
6238 (const_int 2) (const_int 3)
6239 (const_int 4) (const_int 5)
6240 (const_int 6) (const_int 7)]))))]
6242 "vcvtps2pd\t{%t1, %0|%0, %t1}"
6243 [(set_attr "type" "ssecvt")
6244 (set_attr "prefix" "evex")
6245 (set_attr "mode" "V8DF")])
6247 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6248 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6249 (unspec:<avx512fmaskmode>
6250 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
6251 UNSPEC_CVTINT2MASK))]
6253 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6254 [(set_attr "prefix" "evex")
6255 (set_attr "mode" "<sseinsnmode>")])
6257 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6258 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6259 (unspec:<avx512fmaskmode>
6260 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
6261 UNSPEC_CVTINT2MASK))]
6263 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6264 [(set_attr "prefix" "evex")
6265 (set_attr "mode" "<sseinsnmode>")])
6267 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6268 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
6269 (vec_merge:VI12_AVX512VL
6272 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6275 operands[2] = CONSTM1_RTX (<MODE>mode);
6276 operands[3] = CONST0_RTX (<MODE>mode);
6279 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6280 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
6281 (vec_merge:VI12_AVX512VL
6282 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
6283 (match_operand:VI12_AVX512VL 3 "const0_operand")
6284 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6286 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6287 [(set_attr "prefix" "evex")
6288 (set_attr "mode" "<sseinsnmode>")])
6290 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6291 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
6292 (vec_merge:VI48_AVX512VL
6295 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6298 operands[2] = CONSTM1_RTX (<MODE>mode);
6299 operands[3] = CONST0_RTX (<MODE>mode);
6302 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6303 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
6304 (vec_merge:VI48_AVX512VL
6305 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
6306 (match_operand:VI48_AVX512VL 3 "const0_operand")
6307 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6309 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6310 [(set_attr "prefix" "evex")
6311 (set_attr "mode" "<sseinsnmode>")])
6313 (define_insn "sse2_cvtps2pd<mask_name>"
6314 [(set (match_operand:V2DF 0 "register_operand" "=v")
6317 (match_operand:V4SF 1 "vector_operand" "vm")
6318 (parallel [(const_int 0) (const_int 1)]))))]
6319 "TARGET_SSE2 && <mask_avx512vl_condition>"
6320 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6321 [(set_attr "type" "ssecvt")
6322 (set_attr "amdfam10_decode" "direct")
6323 (set_attr "athlon_decode" "double")
6324 (set_attr "bdver1_decode" "double")
6325 (set_attr "prefix_data16" "0")
6326 (set_attr "prefix" "maybe_vex")
6327 (set_attr "mode" "V2DF")])
6329 (define_expand "vec_unpacks_hi_v4sf"
6334 (match_operand:V4SF 1 "vector_operand"))
6335 (parallel [(const_int 6) (const_int 7)
6336 (const_int 2) (const_int 3)])))
6337 (set (match_operand:V2DF 0 "register_operand")
6341 (parallel [(const_int 0) (const_int 1)]))))]
6343 "operands[2] = gen_reg_rtx (V4SFmode);")
6345 (define_expand "vec_unpacks_hi_v8sf"
6348 (match_operand:V8SF 1 "register_operand")
6349 (parallel [(const_int 4) (const_int 5)
6350 (const_int 6) (const_int 7)])))
6351 (set (match_operand:V4DF 0 "register_operand")
6355 "operands[2] = gen_reg_rtx (V4SFmode);")
6357 (define_expand "vec_unpacks_hi_v16sf"
6360 (match_operand:V16SF 1 "register_operand")
6361 (parallel [(const_int 8) (const_int 9)
6362 (const_int 10) (const_int 11)
6363 (const_int 12) (const_int 13)
6364 (const_int 14) (const_int 15)])))
6365 (set (match_operand:V8DF 0 "register_operand")
6369 "operands[2] = gen_reg_rtx (V8SFmode);")
6371 (define_expand "vec_unpacks_lo_v4sf"
6372 [(set (match_operand:V2DF 0 "register_operand")
6375 (match_operand:V4SF 1 "vector_operand")
6376 (parallel [(const_int 0) (const_int 1)]))))]
6379 (define_expand "vec_unpacks_lo_v8sf"
6380 [(set (match_operand:V4DF 0 "register_operand")
6383 (match_operand:V8SF 1 "nonimmediate_operand")
6384 (parallel [(const_int 0) (const_int 1)
6385 (const_int 2) (const_int 3)]))))]
6388 (define_mode_attr sseunpackfltmode
6389 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
6390 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
6392 (define_expand "vec_unpacks_float_hi_<mode>"
6393 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6394 (match_operand:VI2_AVX512F 1 "register_operand")]
6397 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6399 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
6400 emit_insn (gen_rtx_SET (operands[0],
6401 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6405 (define_expand "vec_unpacks_float_lo_<mode>"
6406 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6407 (match_operand:VI2_AVX512F 1 "register_operand")]
6410 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6412 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
6413 emit_insn (gen_rtx_SET (operands[0],
6414 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6418 (define_expand "vec_unpacku_float_hi_<mode>"
6419 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6420 (match_operand:VI2_AVX512F 1 "register_operand")]
6423 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6425 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
6426 emit_insn (gen_rtx_SET (operands[0],
6427 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6431 (define_expand "vec_unpacku_float_lo_<mode>"
6432 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6433 (match_operand:VI2_AVX512F 1 "register_operand")]
6436 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6438 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
6439 emit_insn (gen_rtx_SET (operands[0],
6440 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6444 (define_expand "vec_unpacks_float_hi_v4si"
6447 (match_operand:V4SI 1 "vector_operand")
6448 (parallel [(const_int 2) (const_int 3)
6449 (const_int 2) (const_int 3)])))
6450 (set (match_operand:V2DF 0 "register_operand")
6454 (parallel [(const_int 0) (const_int 1)]))))]
6456 "operands[2] = gen_reg_rtx (V4SImode);")
6458 (define_expand "vec_unpacks_float_lo_v4si"
6459 [(set (match_operand:V2DF 0 "register_operand")
6462 (match_operand:V4SI 1 "vector_operand")
6463 (parallel [(const_int 0) (const_int 1)]))))]
6466 (define_expand "vec_unpacks_float_hi_v8si"
6469 (match_operand:V8SI 1 "vector_operand")
6470 (parallel [(const_int 4) (const_int 5)
6471 (const_int 6) (const_int 7)])))
6472 (set (match_operand:V4DF 0 "register_operand")
6476 "operands[2] = gen_reg_rtx (V4SImode);")
6478 (define_expand "vec_unpacks_float_lo_v8si"
6479 [(set (match_operand:V4DF 0 "register_operand")
6482 (match_operand:V8SI 1 "nonimmediate_operand")
6483 (parallel [(const_int 0) (const_int 1)
6484 (const_int 2) (const_int 3)]))))]
6487 (define_expand "vec_unpacks_float_hi_v16si"
6490 (match_operand:V16SI 1 "nonimmediate_operand")
6491 (parallel [(const_int 8) (const_int 9)
6492 (const_int 10) (const_int 11)
6493 (const_int 12) (const_int 13)
6494 (const_int 14) (const_int 15)])))
6495 (set (match_operand:V8DF 0 "register_operand")
6499 "operands[2] = gen_reg_rtx (V8SImode);")
6501 (define_expand "vec_unpacks_float_lo_v16si"
6502 [(set (match_operand:V8DF 0 "register_operand")
6505 (match_operand:V16SI 1 "nonimmediate_operand")
6506 (parallel [(const_int 0) (const_int 1)
6507 (const_int 2) (const_int 3)
6508 (const_int 4) (const_int 5)
6509 (const_int 6) (const_int 7)]))))]
6512 (define_expand "vec_unpacku_float_hi_v4si"
6515 (match_operand:V4SI 1 "vector_operand")
6516 (parallel [(const_int 2) (const_int 3)
6517 (const_int 2) (const_int 3)])))
6522 (parallel [(const_int 0) (const_int 1)]))))
6524 (lt:V2DF (match_dup 6) (match_dup 3)))
6526 (and:V2DF (match_dup 7) (match_dup 4)))
6527 (set (match_operand:V2DF 0 "register_operand")
6528 (plus:V2DF (match_dup 6) (match_dup 8)))]
6531 REAL_VALUE_TYPE TWO32r;
6535 real_ldexp (&TWO32r, &dconst1, 32);
6536 x = const_double_from_real_value (TWO32r, DFmode);
6538 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6539 operands[4] = force_reg (V2DFmode,
6540 ix86_build_const_vector (V2DFmode, 1, x));
6542 operands[5] = gen_reg_rtx (V4SImode);
6544 for (i = 6; i < 9; i++)
6545 operands[i] = gen_reg_rtx (V2DFmode);
6548 (define_expand "vec_unpacku_float_lo_v4si"
6552 (match_operand:V4SI 1 "vector_operand")
6553 (parallel [(const_int 0) (const_int 1)]))))
6555 (lt:V2DF (match_dup 5) (match_dup 3)))
6557 (and:V2DF (match_dup 6) (match_dup 4)))
6558 (set (match_operand:V2DF 0 "register_operand")
6559 (plus:V2DF (match_dup 5) (match_dup 7)))]
6562 REAL_VALUE_TYPE TWO32r;
6566 real_ldexp (&TWO32r, &dconst1, 32);
6567 x = const_double_from_real_value (TWO32r, DFmode);
6569 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6570 operands[4] = force_reg (V2DFmode,
6571 ix86_build_const_vector (V2DFmode, 1, x));
6573 for (i = 5; i < 8; i++)
6574 operands[i] = gen_reg_rtx (V2DFmode);
6577 (define_expand "vec_unpacku_float_hi_v8si"
6578 [(match_operand:V4DF 0 "register_operand")
6579 (match_operand:V8SI 1 "register_operand")]
6582 REAL_VALUE_TYPE TWO32r;
6586 real_ldexp (&TWO32r, &dconst1, 32);
6587 x = const_double_from_real_value (TWO32r, DFmode);
6589 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6590 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6591 tmp[5] = gen_reg_rtx (V4SImode);
6593 for (i = 2; i < 5; i++)
6594 tmp[i] = gen_reg_rtx (V4DFmode);
6595 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
6596 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
6597 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6598 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6599 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6603 (define_expand "vec_unpacku_float_hi_v16si"
6604 [(match_operand:V8DF 0 "register_operand")
6605 (match_operand:V16SI 1 "register_operand")]
6608 REAL_VALUE_TYPE TWO32r;
6611 real_ldexp (&TWO32r, &dconst1, 32);
6612 x = const_double_from_real_value (TWO32r, DFmode);
6614 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6615 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6616 tmp[2] = gen_reg_rtx (V8DFmode);
6617 tmp[3] = gen_reg_rtx (V8SImode);
6618 k = gen_reg_rtx (QImode);
6620 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
6621 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
6622 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
6623 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6624 emit_move_insn (operands[0], tmp[2]);
6628 (define_expand "vec_unpacku_float_lo_v8si"
6629 [(match_operand:V4DF 0 "register_operand")
6630 (match_operand:V8SI 1 "nonimmediate_operand")]
6633 REAL_VALUE_TYPE TWO32r;
6637 real_ldexp (&TWO32r, &dconst1, 32);
6638 x = const_double_from_real_value (TWO32r, DFmode);
6640 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6641 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6643 for (i = 2; i < 5; i++)
6644 tmp[i] = gen_reg_rtx (V4DFmode);
6645 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
6646 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6647 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6648 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6652 (define_expand "vec_unpacku_float_lo_v16si"
6653 [(match_operand:V8DF 0 "register_operand")
6654 (match_operand:V16SI 1 "nonimmediate_operand")]
6657 REAL_VALUE_TYPE TWO32r;
6660 real_ldexp (&TWO32r, &dconst1, 32);
6661 x = const_double_from_real_value (TWO32r, DFmode);
6663 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6664 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6665 tmp[2] = gen_reg_rtx (V8DFmode);
6666 k = gen_reg_rtx (QImode);
6668 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
6669 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
6670 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6671 emit_move_insn (operands[0], tmp[2]);
6675 (define_expand "vec_pack_trunc_<mode>"
6677 (float_truncate:<sf2dfmode>
6678 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
6680 (float_truncate:<sf2dfmode>
6681 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
6682 (set (match_operand:<ssePSmode> 0 "register_operand")
6683 (vec_concat:<ssePSmode>
6688 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
6689 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
6692 (define_expand "vec_pack_trunc_v2df"
6693 [(match_operand:V4SF 0 "register_operand")
6694 (match_operand:V2DF 1 "vector_operand")
6695 (match_operand:V2DF 2 "vector_operand")]
6700 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6702 tmp0 = gen_reg_rtx (V4DFmode);
6703 tmp1 = force_reg (V2DFmode, operands[1]);
6705 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6706 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
6710 tmp0 = gen_reg_rtx (V4SFmode);
6711 tmp1 = gen_reg_rtx (V4SFmode);
6713 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
6714 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
6715 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
6720 (define_expand "vec_pack_sfix_trunc_v8df"
6721 [(match_operand:V16SI 0 "register_operand")
6722 (match_operand:V8DF 1 "nonimmediate_operand")
6723 (match_operand:V8DF 2 "nonimmediate_operand")]
6728 r1 = gen_reg_rtx (V8SImode);
6729 r2 = gen_reg_rtx (V8SImode);
6731 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
6732 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
6733 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6737 (define_expand "vec_pack_sfix_trunc_v4df"
6738 [(match_operand:V8SI 0 "register_operand")
6739 (match_operand:V4DF 1 "nonimmediate_operand")
6740 (match_operand:V4DF 2 "nonimmediate_operand")]
6745 r1 = gen_reg_rtx (V4SImode);
6746 r2 = gen_reg_rtx (V4SImode);
6748 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
6749 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
6750 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6754 (define_expand "vec_pack_sfix_trunc_v2df"
6755 [(match_operand:V4SI 0 "register_operand")
6756 (match_operand:V2DF 1 "vector_operand")
6757 (match_operand:V2DF 2 "vector_operand")]
6760 rtx tmp0, tmp1, tmp2;
6762 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6764 tmp0 = gen_reg_rtx (V4DFmode);
6765 tmp1 = force_reg (V2DFmode, operands[1]);
6767 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6768 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
6772 tmp0 = gen_reg_rtx (V4SImode);
6773 tmp1 = gen_reg_rtx (V4SImode);
6774 tmp2 = gen_reg_rtx (V2DImode);
6776 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
6777 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
6778 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6779 gen_lowpart (V2DImode, tmp0),
6780 gen_lowpart (V2DImode, tmp1)));
6781 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6786 (define_mode_attr ssepackfltmode
6787 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
6789 (define_expand "vec_pack_ufix_trunc_<mode>"
6790 [(match_operand:<ssepackfltmode> 0 "register_operand")
6791 (match_operand:VF2 1 "register_operand")
6792 (match_operand:VF2 2 "register_operand")]
6795 if (<MODE>mode == V8DFmode)
6799 r1 = gen_reg_rtx (V8SImode);
6800 r2 = gen_reg_rtx (V8SImode);
6802 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
6803 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
6804 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6809 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
6810 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
6811 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
6812 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
6813 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
6815 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
6816 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
6820 tmp[5] = gen_reg_rtx (V8SFmode);
6821 ix86_expand_vec_extract_even_odd (tmp[5],
6822 gen_lowpart (V8SFmode, tmp[2]),
6823 gen_lowpart (V8SFmode, tmp[3]), 0);
6824 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
6826 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
6827 operands[0], 0, OPTAB_DIRECT);
6828 if (tmp[6] != operands[0])
6829 emit_move_insn (operands[0], tmp[6]);
6835 (define_expand "avx512f_vec_pack_sfix_v8df"
6836 [(match_operand:V16SI 0 "register_operand")
6837 (match_operand:V8DF 1 "nonimmediate_operand")
6838 (match_operand:V8DF 2 "nonimmediate_operand")]
6843 r1 = gen_reg_rtx (V8SImode);
6844 r2 = gen_reg_rtx (V8SImode);
6846 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
6847 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
6848 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6852 (define_expand "vec_pack_sfix_v4df"
6853 [(match_operand:V8SI 0 "register_operand")
6854 (match_operand:V4DF 1 "nonimmediate_operand")
6855 (match_operand:V4DF 2 "nonimmediate_operand")]
6860 r1 = gen_reg_rtx (V4SImode);
6861 r2 = gen_reg_rtx (V4SImode);
6863 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6864 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6865 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6869 (define_expand "vec_pack_sfix_v2df"
6870 [(match_operand:V4SI 0 "register_operand")
6871 (match_operand:V2DF 1 "vector_operand")
6872 (match_operand:V2DF 2 "vector_operand")]
6875 rtx tmp0, tmp1, tmp2;
6877 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6879 tmp0 = gen_reg_rtx (V4DFmode);
6880 tmp1 = force_reg (V2DFmode, operands[1]);
6882 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6883 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6887 tmp0 = gen_reg_rtx (V4SImode);
6888 tmp1 = gen_reg_rtx (V4SImode);
6889 tmp2 = gen_reg_rtx (V2DImode);
6891 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6892 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
6893 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6894 gen_lowpart (V2DImode, tmp0),
6895 gen_lowpart (V2DImode, tmp1)));
6896 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6901 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6903 ;; Parallel single-precision floating point element swizzling
6905 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6907 (define_expand "sse_movhlps_exp"
6908 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6911 (match_operand:V4SF 1 "nonimmediate_operand")
6912 (match_operand:V4SF 2 "nonimmediate_operand"))
6913 (parallel [(const_int 6)
6919 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6921 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6923 /* Fix up the destination if needed. */
6924 if (dst != operands[0])
6925 emit_move_insn (operands[0], dst);
6930 (define_insn "sse_movhlps"
6931 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6934 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6935 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
6936 (parallel [(const_int 6)
6940 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6942 movhlps\t{%2, %0|%0, %2}
6943 vmovhlps\t{%2, %1, %0|%0, %1, %2}
6944 movlps\t{%H2, %0|%0, %H2}
6945 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
6946 %vmovhps\t{%2, %0|%q0, %2}"
6947 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6948 (set_attr "type" "ssemov")
6949 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6950 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6952 (define_expand "sse_movlhps_exp"
6953 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6956 (match_operand:V4SF 1 "nonimmediate_operand")
6957 (match_operand:V4SF 2 "nonimmediate_operand"))
6958 (parallel [(const_int 0)
6964 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6966 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6968 /* Fix up the destination if needed. */
6969 if (dst != operands[0])
6970 emit_move_insn (operands[0], dst);
6975 (define_insn "sse_movlhps"
6976 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6979 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6980 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
6981 (parallel [(const_int 0)
6985 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6987 movlhps\t{%2, %0|%0, %2}
6988 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6989 movhps\t{%2, %0|%0, %q2}
6990 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6991 %vmovlps\t{%2, %H0|%H0, %2}"
6992 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6993 (set_attr "type" "ssemov")
6994 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6995 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6997 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
6998 [(set (match_operand:V16SF 0 "register_operand" "=v")
7001 (match_operand:V16SF 1 "register_operand" "v")
7002 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7003 (parallel [(const_int 2) (const_int 18)
7004 (const_int 3) (const_int 19)
7005 (const_int 6) (const_int 22)
7006 (const_int 7) (const_int 23)
7007 (const_int 10) (const_int 26)
7008 (const_int 11) (const_int 27)
7009 (const_int 14) (const_int 30)
7010 (const_int 15) (const_int 31)])))]
7012 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7013 [(set_attr "type" "sselog")
7014 (set_attr "prefix" "evex")
7015 (set_attr "mode" "V16SF")])
7017 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7018 (define_insn "avx_unpckhps256<mask_name>"
7019 [(set (match_operand:V8SF 0 "register_operand" "=v")
7022 (match_operand:V8SF 1 "register_operand" "v")
7023 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7024 (parallel [(const_int 2) (const_int 10)
7025 (const_int 3) (const_int 11)
7026 (const_int 6) (const_int 14)
7027 (const_int 7) (const_int 15)])))]
7028 "TARGET_AVX && <mask_avx512vl_condition>"
7029 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7030 [(set_attr "type" "sselog")
7031 (set_attr "prefix" "vex")
7032 (set_attr "mode" "V8SF")])
7034 (define_expand "vec_interleave_highv8sf"
7038 (match_operand:V8SF 1 "register_operand")
7039 (match_operand:V8SF 2 "nonimmediate_operand"))
7040 (parallel [(const_int 0) (const_int 8)
7041 (const_int 1) (const_int 9)
7042 (const_int 4) (const_int 12)
7043 (const_int 5) (const_int 13)])))
7049 (parallel [(const_int 2) (const_int 10)
7050 (const_int 3) (const_int 11)
7051 (const_int 6) (const_int 14)
7052 (const_int 7) (const_int 15)])))
7053 (set (match_operand:V8SF 0 "register_operand")
7058 (parallel [(const_int 4) (const_int 5)
7059 (const_int 6) (const_int 7)
7060 (const_int 12) (const_int 13)
7061 (const_int 14) (const_int 15)])))]
7064 operands[3] = gen_reg_rtx (V8SFmode);
7065 operands[4] = gen_reg_rtx (V8SFmode);
7068 (define_insn "vec_interleave_highv4sf<mask_name>"
7069 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7072 (match_operand:V4SF 1 "register_operand" "0,v")
7073 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7074 (parallel [(const_int 2) (const_int 6)
7075 (const_int 3) (const_int 7)])))]
7076 "TARGET_SSE && <mask_avx512vl_condition>"
7078 unpckhps\t{%2, %0|%0, %2}
7079 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7080 [(set_attr "isa" "noavx,avx")
7081 (set_attr "type" "sselog")
7082 (set_attr "prefix" "orig,vex")
7083 (set_attr "mode" "V4SF")])
7085 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
7086 [(set (match_operand:V16SF 0 "register_operand" "=v")
7089 (match_operand:V16SF 1 "register_operand" "v")
7090 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7091 (parallel [(const_int 0) (const_int 16)
7092 (const_int 1) (const_int 17)
7093 (const_int 4) (const_int 20)
7094 (const_int 5) (const_int 21)
7095 (const_int 8) (const_int 24)
7096 (const_int 9) (const_int 25)
7097 (const_int 12) (const_int 28)
7098 (const_int 13) (const_int 29)])))]
7100 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7101 [(set_attr "type" "sselog")
7102 (set_attr "prefix" "evex")
7103 (set_attr "mode" "V16SF")])
7105 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7106 (define_insn "avx_unpcklps256<mask_name>"
7107 [(set (match_operand:V8SF 0 "register_operand" "=v")
7110 (match_operand:V8SF 1 "register_operand" "v")
7111 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7112 (parallel [(const_int 0) (const_int 8)
7113 (const_int 1) (const_int 9)
7114 (const_int 4) (const_int 12)
7115 (const_int 5) (const_int 13)])))]
7116 "TARGET_AVX && <mask_avx512vl_condition>"
7117 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7118 [(set_attr "type" "sselog")
7119 (set_attr "prefix" "vex")
7120 (set_attr "mode" "V8SF")])
7122 (define_insn "unpcklps128_mask"
7123 [(set (match_operand:V4SF 0 "register_operand" "=v")
7127 (match_operand:V4SF 1 "register_operand" "v")
7128 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7129 (parallel [(const_int 0) (const_int 4)
7130 (const_int 1) (const_int 5)]))
7131 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
7132 (match_operand:QI 4 "register_operand" "Yk")))]
7134 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7135 [(set_attr "type" "sselog")
7136 (set_attr "prefix" "evex")
7137 (set_attr "mode" "V4SF")])
7139 (define_expand "vec_interleave_lowv8sf"
7143 (match_operand:V8SF 1 "register_operand")
7144 (match_operand:V8SF 2 "nonimmediate_operand"))
7145 (parallel [(const_int 0) (const_int 8)
7146 (const_int 1) (const_int 9)
7147 (const_int 4) (const_int 12)
7148 (const_int 5) (const_int 13)])))
7154 (parallel [(const_int 2) (const_int 10)
7155 (const_int 3) (const_int 11)
7156 (const_int 6) (const_int 14)
7157 (const_int 7) (const_int 15)])))
7158 (set (match_operand:V8SF 0 "register_operand")
7163 (parallel [(const_int 0) (const_int 1)
7164 (const_int 2) (const_int 3)
7165 (const_int 8) (const_int 9)
7166 (const_int 10) (const_int 11)])))]
7169 operands[3] = gen_reg_rtx (V8SFmode);
7170 operands[4] = gen_reg_rtx (V8SFmode);
7173 (define_insn "vec_interleave_lowv4sf"
7174 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7177 (match_operand:V4SF 1 "register_operand" "0,v")
7178 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7179 (parallel [(const_int 0) (const_int 4)
7180 (const_int 1) (const_int 5)])))]
7183 unpcklps\t{%2, %0|%0, %2}
7184 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
7185 [(set_attr "isa" "noavx,avx")
7186 (set_attr "type" "sselog")
7187 (set_attr "prefix" "orig,maybe_evex")
7188 (set_attr "mode" "V4SF")])
7190 ;; These are modeled with the same vec_concat as the others so that we
7191 ;; capture users of shufps that can use the new instructions
7192 (define_insn "avx_movshdup256<mask_name>"
7193 [(set (match_operand:V8SF 0 "register_operand" "=v")
7196 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7198 (parallel [(const_int 1) (const_int 1)
7199 (const_int 3) (const_int 3)
7200 (const_int 5) (const_int 5)
7201 (const_int 7) (const_int 7)])))]
7202 "TARGET_AVX && <mask_avx512vl_condition>"
7203 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7204 [(set_attr "type" "sse")
7205 (set_attr "prefix" "vex")
7206 (set_attr "mode" "V8SF")])
7208 (define_insn "sse3_movshdup<mask_name>"
7209 [(set (match_operand:V4SF 0 "register_operand" "=v")
7212 (match_operand:V4SF 1 "vector_operand" "vBm")
7214 (parallel [(const_int 1)
7218 "TARGET_SSE3 && <mask_avx512vl_condition>"
7219 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7220 [(set_attr "type" "sse")
7221 (set_attr "prefix_rep" "1")
7222 (set_attr "prefix" "maybe_vex")
7223 (set_attr "mode" "V4SF")])
7225 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
7226 [(set (match_operand:V16SF 0 "register_operand" "=v")
7229 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7231 (parallel [(const_int 1) (const_int 1)
7232 (const_int 3) (const_int 3)
7233 (const_int 5) (const_int 5)
7234 (const_int 7) (const_int 7)
7235 (const_int 9) (const_int 9)
7236 (const_int 11) (const_int 11)
7237 (const_int 13) (const_int 13)
7238 (const_int 15) (const_int 15)])))]
7240 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7241 [(set_attr "type" "sse")
7242 (set_attr "prefix" "evex")
7243 (set_attr "mode" "V16SF")])
7245 (define_insn "avx_movsldup256<mask_name>"
7246 [(set (match_operand:V8SF 0 "register_operand" "=v")
7249 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7251 (parallel [(const_int 0) (const_int 0)
7252 (const_int 2) (const_int 2)
7253 (const_int 4) (const_int 4)
7254 (const_int 6) (const_int 6)])))]
7255 "TARGET_AVX && <mask_avx512vl_condition>"
7256 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7257 [(set_attr "type" "sse")
7258 (set_attr "prefix" "vex")
7259 (set_attr "mode" "V8SF")])
7261 (define_insn "sse3_movsldup<mask_name>"
7262 [(set (match_operand:V4SF 0 "register_operand" "=v")
7265 (match_operand:V4SF 1 "vector_operand" "vBm")
7267 (parallel [(const_int 0)
7271 "TARGET_SSE3 && <mask_avx512vl_condition>"
7272 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7273 [(set_attr "type" "sse")
7274 (set_attr "prefix_rep" "1")
7275 (set_attr "prefix" "maybe_vex")
7276 (set_attr "mode" "V4SF")])
7278 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
7279 [(set (match_operand:V16SF 0 "register_operand" "=v")
7282 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7284 (parallel [(const_int 0) (const_int 0)
7285 (const_int 2) (const_int 2)
7286 (const_int 4) (const_int 4)
7287 (const_int 6) (const_int 6)
7288 (const_int 8) (const_int 8)
7289 (const_int 10) (const_int 10)
7290 (const_int 12) (const_int 12)
7291 (const_int 14) (const_int 14)])))]
7293 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7294 [(set_attr "type" "sse")
7295 (set_attr "prefix" "evex")
7296 (set_attr "mode" "V16SF")])
7298 (define_expand "avx_shufps256<mask_expand4_name>"
7299 [(match_operand:V8SF 0 "register_operand")
7300 (match_operand:V8SF 1 "register_operand")
7301 (match_operand:V8SF 2 "nonimmediate_operand")
7302 (match_operand:SI 3 "const_int_operand")]
7305 int mask = INTVAL (operands[3]);
7306 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
7309 GEN_INT ((mask >> 0) & 3),
7310 GEN_INT ((mask >> 2) & 3),
7311 GEN_INT (((mask >> 4) & 3) + 8),
7312 GEN_INT (((mask >> 6) & 3) + 8),
7313 GEN_INT (((mask >> 0) & 3) + 4),
7314 GEN_INT (((mask >> 2) & 3) + 4),
7315 GEN_INT (((mask >> 4) & 3) + 12),
7316 GEN_INT (((mask >> 6) & 3) + 12)
7317 <mask_expand4_args>));
7321 ;; One bit in mask selects 2 elements.
7322 (define_insn "avx_shufps256_1<mask_name>"
7323 [(set (match_operand:V8SF 0 "register_operand" "=v")
7326 (match_operand:V8SF 1 "register_operand" "v")
7327 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7328 (parallel [(match_operand 3 "const_0_to_3_operand" )
7329 (match_operand 4 "const_0_to_3_operand" )
7330 (match_operand 5 "const_8_to_11_operand" )
7331 (match_operand 6 "const_8_to_11_operand" )
7332 (match_operand 7 "const_4_to_7_operand" )
7333 (match_operand 8 "const_4_to_7_operand" )
7334 (match_operand 9 "const_12_to_15_operand")
7335 (match_operand 10 "const_12_to_15_operand")])))]
7337 && <mask_avx512vl_condition>
7338 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7339 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7340 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7341 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
7344 mask = INTVAL (operands[3]);
7345 mask |= INTVAL (operands[4]) << 2;
7346 mask |= (INTVAL (operands[5]) - 8) << 4;
7347 mask |= (INTVAL (operands[6]) - 8) << 6;
7348 operands[3] = GEN_INT (mask);
7350 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7352 [(set_attr "type" "sseshuf")
7353 (set_attr "length_immediate" "1")
7354 (set_attr "prefix" "<mask_prefix>")
7355 (set_attr "mode" "V8SF")])
7357 (define_expand "sse_shufps<mask_expand4_name>"
7358 [(match_operand:V4SF 0 "register_operand")
7359 (match_operand:V4SF 1 "register_operand")
7360 (match_operand:V4SF 2 "vector_operand")
7361 (match_operand:SI 3 "const_int_operand")]
7364 int mask = INTVAL (operands[3]);
7365 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
7368 GEN_INT ((mask >> 0) & 3),
7369 GEN_INT ((mask >> 2) & 3),
7370 GEN_INT (((mask >> 4) & 3) + 4),
7371 GEN_INT (((mask >> 6) & 3) + 4)
7372 <mask_expand4_args>));
7376 (define_insn "sse_shufps_v4sf_mask"
7377 [(set (match_operand:V4SF 0 "register_operand" "=v")
7381 (match_operand:V4SF 1 "register_operand" "v")
7382 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7383 (parallel [(match_operand 3 "const_0_to_3_operand")
7384 (match_operand 4 "const_0_to_3_operand")
7385 (match_operand 5 "const_4_to_7_operand")
7386 (match_operand 6 "const_4_to_7_operand")]))
7387 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
7388 (match_operand:QI 8 "register_operand" "Yk")))]
7392 mask |= INTVAL (operands[3]) << 0;
7393 mask |= INTVAL (operands[4]) << 2;
7394 mask |= (INTVAL (operands[5]) - 4) << 4;
7395 mask |= (INTVAL (operands[6]) - 4) << 6;
7396 operands[3] = GEN_INT (mask);
7398 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
7400 [(set_attr "type" "sseshuf")
7401 (set_attr "length_immediate" "1")
7402 (set_attr "prefix" "evex")
7403 (set_attr "mode" "V4SF")])
7405 (define_insn "sse_shufps_<mode>"
7406 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
7407 (vec_select:VI4F_128
7408 (vec_concat:<ssedoublevecmode>
7409 (match_operand:VI4F_128 1 "register_operand" "0,v")
7410 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
7411 (parallel [(match_operand 3 "const_0_to_3_operand")
7412 (match_operand 4 "const_0_to_3_operand")
7413 (match_operand 5 "const_4_to_7_operand")
7414 (match_operand 6 "const_4_to_7_operand")])))]
7418 mask |= INTVAL (operands[3]) << 0;
7419 mask |= INTVAL (operands[4]) << 2;
7420 mask |= (INTVAL (operands[5]) - 4) << 4;
7421 mask |= (INTVAL (operands[6]) - 4) << 6;
7422 operands[3] = GEN_INT (mask);
7424 switch (which_alternative)
7427 return "shufps\t{%3, %2, %0|%0, %2, %3}";
7429 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7434 [(set_attr "isa" "noavx,avx")
7435 (set_attr "type" "sseshuf")
7436 (set_attr "length_immediate" "1")
7437 (set_attr "prefix" "orig,maybe_evex")
7438 (set_attr "mode" "V4SF")])
7440 (define_insn "sse_storehps"
7441 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7443 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
7444 (parallel [(const_int 2) (const_int 3)])))]
7445 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7447 %vmovhps\t{%1, %0|%q0, %1}
7448 %vmovhlps\t{%1, %d0|%d0, %1}
7449 %vmovlps\t{%H1, %d0|%d0, %H1}"
7450 [(set_attr "type" "ssemov")
7451 (set_attr "prefix" "maybe_vex")
7452 (set_attr "mode" "V2SF,V4SF,V2SF")])
7454 (define_expand "sse_loadhps_exp"
7455 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7458 (match_operand:V4SF 1 "nonimmediate_operand")
7459 (parallel [(const_int 0) (const_int 1)]))
7460 (match_operand:V2SF 2 "nonimmediate_operand")))]
7463 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7465 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
7467 /* Fix up the destination if needed. */
7468 if (dst != operands[0])
7469 emit_move_insn (operands[0], dst);
7474 (define_insn "sse_loadhps"
7475 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7478 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7479 (parallel [(const_int 0) (const_int 1)]))
7480 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
7483 movhps\t{%2, %0|%0, %q2}
7484 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7485 movlhps\t{%2, %0|%0, %2}
7486 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7487 %vmovlps\t{%2, %H0|%H0, %2}"
7488 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7489 (set_attr "type" "ssemov")
7490 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7491 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
7493 (define_insn "sse_storelps"
7494 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7496 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
7497 (parallel [(const_int 0) (const_int 1)])))]
7498 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7500 %vmovlps\t{%1, %0|%q0, %1}
7501 %vmovaps\t{%1, %0|%0, %1}
7502 %vmovlps\t{%1, %d0|%d0, %q1}"
7503 [(set_attr "type" "ssemov")
7504 (set_attr "prefix" "maybe_vex")
7505 (set_attr "mode" "V2SF,V4SF,V2SF")])
7507 (define_expand "sse_loadlps_exp"
7508 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7510 (match_operand:V2SF 2 "nonimmediate_operand")
7512 (match_operand:V4SF 1 "nonimmediate_operand")
7513 (parallel [(const_int 2) (const_int 3)]))))]
7516 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7518 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
7520 /* Fix up the destination if needed. */
7521 if (dst != operands[0])
7522 emit_move_insn (operands[0], dst);
7527 (define_insn "sse_loadlps"
7528 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7530 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
7532 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
7533 (parallel [(const_int 2) (const_int 3)]))))]
7536 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
7537 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
7538 movlps\t{%2, %0|%0, %q2}
7539 vmovlps\t{%2, %1, %0|%0, %1, %q2}
7540 %vmovlps\t{%2, %0|%q0, %2}"
7541 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7542 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
7543 (set (attr "length_immediate")
7544 (if_then_else (eq_attr "alternative" "0,1")
7546 (const_string "*")))
7547 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7548 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7550 (define_insn "sse_movss"
7551 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7553 (match_operand:V4SF 2 "register_operand" " x,v")
7554 (match_operand:V4SF 1 "register_operand" " 0,v")
7558 movss\t{%2, %0|%0, %2}
7559 vmovss\t{%2, %1, %0|%0, %1, %2}"
7560 [(set_attr "isa" "noavx,avx")
7561 (set_attr "type" "ssemov")
7562 (set_attr "prefix" "orig,maybe_evex")
7563 (set_attr "mode" "SF")])
7565 (define_insn "avx2_vec_dup<mode>"
7566 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
7567 (vec_duplicate:VF1_128_256
7569 (match_operand:V4SF 1 "register_operand" "v")
7570 (parallel [(const_int 0)]))))]
7572 "vbroadcastss\t{%1, %0|%0, %1}"
7573 [(set_attr "type" "sselog1")
7574 (set_attr "prefix" "maybe_evex")
7575 (set_attr "mode" "<MODE>")])
7577 (define_insn "avx2_vec_dupv8sf_1"
7578 [(set (match_operand:V8SF 0 "register_operand" "=v")
7581 (match_operand:V8SF 1 "register_operand" "v")
7582 (parallel [(const_int 0)]))))]
7584 "vbroadcastss\t{%x1, %0|%0, %x1}"
7585 [(set_attr "type" "sselog1")
7586 (set_attr "prefix" "maybe_evex")
7587 (set_attr "mode" "V8SF")])
7589 (define_insn "avx512f_vec_dup<mode>_1"
7590 [(set (match_operand:VF_512 0 "register_operand" "=v")
7591 (vec_duplicate:VF_512
7592 (vec_select:<ssescalarmode>
7593 (match_operand:VF_512 1 "register_operand" "v")
7594 (parallel [(const_int 0)]))))]
7596 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
7597 [(set_attr "type" "sselog1")
7598 (set_attr "prefix" "evex")
7599 (set_attr "mode" "<MODE>")])
7601 ;; Although insertps takes register source, we prefer
7602 ;; unpcklps with register source since it is shorter.
7603 (define_insn "*vec_concatv2sf_sse4_1"
7604 [(set (match_operand:V2SF 0 "register_operand"
7605 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
7607 (match_operand:SF 1 "nonimmediate_operand"
7608 " 0, 0,Yv, 0,0, v,m, 0 , m")
7609 (match_operand:SF 2 "nonimm_or_0_operand"
7610 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
7611 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7613 unpcklps\t{%2, %0|%0, %2}
7614 unpcklps\t{%2, %0|%0, %2}
7615 vunpcklps\t{%2, %1, %0|%0, %1, %2}
7616 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7617 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7618 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
7619 %vmovss\t{%1, %0|%0, %1}
7620 punpckldq\t{%2, %0|%0, %2}
7621 movd\t{%1, %0|%0, %1}"
7623 (cond [(eq_attr "alternative" "0,1,3,4")
7624 (const_string "noavx")
7625 (eq_attr "alternative" "2,5")
7626 (const_string "avx")
7628 (const_string "*")))
7630 (cond [(eq_attr "alternative" "6")
7631 (const_string "ssemov")
7632 (eq_attr "alternative" "7")
7633 (const_string "mmxcvt")
7634 (eq_attr "alternative" "8")
7635 (const_string "mmxmov")
7637 (const_string "sselog")))
7638 (set (attr "mmx_isa")
7639 (if_then_else (eq_attr "alternative" "7,8")
7640 (const_string "native")
7641 (const_string "*")))
7642 (set (attr "prefix_data16")
7643 (if_then_else (eq_attr "alternative" "3,4")
7645 (const_string "*")))
7646 (set (attr "prefix_extra")
7647 (if_then_else (eq_attr "alternative" "3,4,5")
7649 (const_string "*")))
7650 (set (attr "length_immediate")
7651 (if_then_else (eq_attr "alternative" "3,4,5")
7653 (const_string "*")))
7654 (set (attr "prefix")
7655 (cond [(eq_attr "alternative" "2,5")
7656 (const_string "maybe_evex")
7657 (eq_attr "alternative" "6")
7658 (const_string "maybe_vex")
7660 (const_string "orig")))
7661 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
7663 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7664 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
7665 ;; alternatives pretty much forces the MMX alternative to be chosen.
7666 (define_insn "*vec_concatv2sf_sse"
7667 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
7669 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
7670 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
7673 unpcklps\t{%2, %0|%0, %2}
7674 movss\t{%1, %0|%0, %1}
7675 punpckldq\t{%2, %0|%0, %2}
7676 movd\t{%1, %0|%0, %1}"
7677 [(set_attr "mmx_isa" "*,*,native,native")
7678 (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7679 (set_attr "mode" "V4SF,SF,DI,DI")])
7681 (define_insn "*vec_concatv4sf"
7682 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
7684 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
7685 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
7688 movlhps\t{%2, %0|%0, %2}
7689 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7690 movhps\t{%2, %0|%0, %q2}
7691 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
7692 [(set_attr "isa" "noavx,avx,noavx,avx")
7693 (set_attr "type" "ssemov")
7694 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
7695 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
7697 (define_insn "*vec_concatv4sf_0"
7698 [(set (match_operand:V4SF 0 "register_operand" "=v")
7700 (match_operand:V2SF 1 "nonimmediate_operand" "xm")
7701 (match_operand:V2SF 2 "const0_operand" " C")))]
7703 "%vmovq\t{%1, %0|%0, %1}"
7704 [(set_attr "type" "ssemov")
7705 (set_attr "prefix" "maybe_vex")
7706 (set_attr "mode" "DF")])
7708 ;; Avoid combining registers from different units in a single alternative,
7709 ;; see comment above inline_secondary_memory_needed function in i386.c
7710 (define_insn "vec_set<mode>_0"
7711 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
7712 "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x ,m ,m ,m")
7714 (vec_duplicate:VI4F_128
7715 (match_operand:<ssescalarmode> 2 "general_operand"
7716 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
7717 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
7718 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
7722 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7723 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7724 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
7725 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
7726 %vmovd\t{%2, %0|%0, %2}
7727 movss\t{%2, %0|%0, %2}
7728 movss\t{%2, %0|%0, %2}
7729 vmovss\t{%2, %1, %0|%0, %1, %2}
7730 pinsrd\t{$0, %2, %0|%0, %2, 0}
7731 pinsrd\t{$0, %2, %0|%0, %2, 0}
7732 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
7737 (cond [(eq_attr "alternative" "0,1,8,9")
7738 (const_string "sse4_noavx")
7739 (eq_attr "alternative" "2,7,10")
7740 (const_string "avx")
7741 (eq_attr "alternative" "3,4")
7742 (const_string "sse2")
7743 (eq_attr "alternative" "5,6")
7744 (const_string "noavx")
7746 (const_string "*")))
7748 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
7749 (const_string "sselog")
7750 (eq_attr "alternative" "12")
7751 (const_string "imov")
7752 (eq_attr "alternative" "13")
7753 (const_string "fmov")
7755 (const_string "ssemov")))
7756 (set (attr "prefix_extra")
7757 (if_then_else (eq_attr "alternative" "8,9,10")
7759 (const_string "*")))
7760 (set (attr "length_immediate")
7761 (if_then_else (eq_attr "alternative" "8,9,10")
7763 (const_string "*")))
7764 (set (attr "prefix")
7765 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
7766 (const_string "orig")
7767 (eq_attr "alternative" "2")
7768 (const_string "maybe_evex")
7769 (eq_attr "alternative" "3,4")
7770 (const_string "maybe_vex")
7771 (eq_attr "alternative" "7,10")
7772 (const_string "vex")
7774 (const_string "*")))
7775 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
7776 (set (attr "preferred_for_speed")
7777 (cond [(eq_attr "alternative" "4")
7778 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
7780 (symbol_ref "true")))])
7782 ;; A subset is vec_setv4sf.
7783 (define_insn "*vec_setv4sf_sse4_1"
7784 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7787 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
7788 (match_operand:V4SF 1 "register_operand" "0,0,v")
7789 (match_operand:SI 3 "const_int_operand")))]
7791 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7792 < GET_MODE_NUNITS (V4SFmode))"
7794 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
7795 switch (which_alternative)
7799 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7801 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7806 [(set_attr "isa" "noavx,noavx,avx")
7807 (set_attr "type" "sselog")
7808 (set_attr "prefix_data16" "1,1,*")
7809 (set_attr "prefix_extra" "1")
7810 (set_attr "length_immediate" "1")
7811 (set_attr "prefix" "orig,orig,maybe_evex")
7812 (set_attr "mode" "V4SF")])
7814 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
7815 (define_insn "vec_set<mode>_0"
7816 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
7817 (vec_merge:VI4F_256_512
7818 (vec_duplicate:VI4F_256_512
7819 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
7820 (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
7824 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
7825 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
7826 vmovd\t{%2, %x0|%x0, %2}"
7828 (if_then_else (eq_attr "alternative" "0")
7829 (const_string "sselog")
7830 (const_string "ssemov")))
7831 (set_attr "prefix" "maybe_evex")
7832 (set_attr "mode" "SF,<ssescalarmode>,SI")
7833 (set (attr "preferred_for_speed")
7834 (cond [(eq_attr "alternative" "2")
7835 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
7837 (symbol_ref "true")))])
7839 (define_insn "sse4_1_insertps"
7840 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7841 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
7842 (match_operand:V4SF 1 "register_operand" "0,0,v")
7843 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
7847 if (MEM_P (operands[2]))
7849 unsigned count_s = INTVAL (operands[3]) >> 6;
7851 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
7852 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
7854 switch (which_alternative)
7858 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7860 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7865 [(set_attr "isa" "noavx,noavx,avx")
7866 (set_attr "type" "sselog")
7867 (set_attr "prefix_data16" "1,1,*")
7868 (set_attr "prefix_extra" "1")
7869 (set_attr "length_immediate" "1")
7870 (set_attr "prefix" "orig,orig,maybe_evex")
7871 (set_attr "mode" "V4SF")])
7874 [(set (match_operand:VI4F_128 0 "memory_operand")
7876 (vec_duplicate:VI4F_128
7877 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
7880 "TARGET_SSE && reload_completed"
7881 [(set (match_dup 0) (match_dup 1))]
7882 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
7884 (define_expand "vec_set<mode>"
7885 [(match_operand:V 0 "register_operand")
7886 (match_operand:<ssescalarmode> 1 "register_operand")
7887 (match_operand 2 "const_int_operand")]
7890 ix86_expand_vector_set (false, operands[0], operands[1],
7891 INTVAL (operands[2]));
7895 (define_insn_and_split "*vec_extractv4sf_0"
7896 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
7898 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
7899 (parallel [(const_int 0)])))]
7900 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7902 "&& reload_completed"
7903 [(set (match_dup 0) (match_dup 1))]
7904 "operands[1] = gen_lowpart (SFmode, operands[1]);")
7906 (define_insn_and_split "*sse4_1_extractps"
7907 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
7909 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
7910 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
7913 extractps\t{%2, %1, %0|%0, %1, %2}
7914 extractps\t{%2, %1, %0|%0, %1, %2}
7915 vextractps\t{%2, %1, %0|%0, %1, %2}
7918 "&& reload_completed && SSE_REG_P (operands[0])"
7921 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
7922 switch (INTVAL (operands[2]))
7926 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
7927 operands[2], operands[2],
7928 GEN_INT (INTVAL (operands[2]) + 4),
7929 GEN_INT (INTVAL (operands[2]) + 4)));
7932 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
7935 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
7940 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
7941 (set_attr "type" "sselog,sselog,sselog,*,*")
7942 (set_attr "prefix_data16" "1,1,1,*,*")
7943 (set_attr "prefix_extra" "1,1,1,*,*")
7944 (set_attr "length_immediate" "1,1,1,*,*")
7945 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
7946 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
7948 (define_insn_and_split "*vec_extractv4sf_mem"
7949 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
7951 (match_operand:V4SF 1 "memory_operand" "o,o,o")
7952 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
7955 "&& reload_completed"
7956 [(set (match_dup 0) (match_dup 1))]
7958 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
7961 (define_mode_attr extract_type
7962 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7964 (define_mode_attr extract_suf
7965 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7967 (define_mode_iterator AVX512_VEC
7968 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7970 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
7971 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
7972 (match_operand:AVX512_VEC 1 "register_operand")
7973 (match_operand:SI 2 "const_0_to_3_operand")
7974 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7975 (match_operand:QI 4 "register_operand")]
7979 mask = INTVAL (operands[2]);
7980 rtx dest = operands[0];
7982 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
7983 dest = gen_reg_rtx (<ssequartermode>mode);
7985 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7986 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
7987 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7988 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
7991 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
7992 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
7994 if (dest != operands[0])
7995 emit_move_insn (operands[0], dest);
7999 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
8000 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
8001 (vec_merge:<ssequartermode>
8002 (vec_select:<ssequartermode>
8003 (match_operand:V8FI 1 "register_operand" "v")
8004 (parallel [(match_operand 2 "const_0_to_7_operand")
8005 (match_operand 3 "const_0_to_7_operand")]))
8006 (match_operand:<ssequartermode> 4 "memory_operand" "0")
8007 (match_operand:QI 5 "register_operand" "Yk")))]
8009 && INTVAL (operands[2]) % 2 == 0
8010 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8011 && rtx_equal_p (operands[4], operands[0])"
8013 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
8014 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
8016 [(set_attr "type" "sselog")
8017 (set_attr "prefix_extra" "1")
8018 (set_attr "length_immediate" "1")
8019 (set_attr "memory" "store")
8020 (set_attr "prefix" "evex")
8021 (set_attr "mode" "<sseinsnmode>")])
8023 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
8024 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
8025 (vec_merge:<ssequartermode>
8026 (vec_select:<ssequartermode>
8027 (match_operand:V16FI 1 "register_operand" "v")
8028 (parallel [(match_operand 2 "const_0_to_15_operand")
8029 (match_operand 3 "const_0_to_15_operand")
8030 (match_operand 4 "const_0_to_15_operand")
8031 (match_operand 5 "const_0_to_15_operand")]))
8032 (match_operand:<ssequartermode> 6 "memory_operand" "0")
8033 (match_operand:QI 7 "register_operand" "Yk")))]
8035 && INTVAL (operands[2]) % 4 == 0
8036 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8037 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8038 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
8039 && rtx_equal_p (operands[6], operands[0])"
8041 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8042 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
8044 [(set_attr "type" "sselog")
8045 (set_attr "prefix_extra" "1")
8046 (set_attr "length_immediate" "1")
8047 (set_attr "memory" "store")
8048 (set_attr "prefix" "evex")
8049 (set_attr "mode" "<sseinsnmode>")])
8051 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
8052 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8053 (vec_select:<ssequartermode>
8054 (match_operand:V8FI 1 "register_operand" "v")
8055 (parallel [(match_operand 2 "const_0_to_7_operand")
8056 (match_operand 3 "const_0_to_7_operand")])))]
8058 && INTVAL (operands[2]) % 2 == 0
8059 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
8061 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8062 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
8064 [(set_attr "type" "sselog1")
8065 (set_attr "prefix_extra" "1")
8066 (set_attr "length_immediate" "1")
8067 (set_attr "prefix" "evex")
8068 (set_attr "mode" "<sseinsnmode>")])
8071 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8072 (vec_select:<ssequartermode>
8073 (match_operand:V8FI 1 "register_operand")
8074 (parallel [(const_int 0) (const_int 1)])))]
8078 || REG_P (operands[0])
8079 || !EXT_REX_SSE_REG_P (operands[1]))"
8080 [(set (match_dup 0) (match_dup 1))]
8082 if (!TARGET_AVX512VL
8083 && REG_P (operands[0])
8084 && EXT_REX_SSE_REG_P (operands[1]))
8086 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8088 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8091 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
8092 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8093 (vec_select:<ssequartermode>
8094 (match_operand:V16FI 1 "register_operand" "v")
8095 (parallel [(match_operand 2 "const_0_to_15_operand")
8096 (match_operand 3 "const_0_to_15_operand")
8097 (match_operand 4 "const_0_to_15_operand")
8098 (match_operand 5 "const_0_to_15_operand")])))]
8100 && INTVAL (operands[2]) % 4 == 0
8101 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8102 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8103 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
8105 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8106 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
8108 [(set_attr "type" "sselog1")
8109 (set_attr "prefix_extra" "1")
8110 (set_attr "length_immediate" "1")
8111 (set_attr "prefix" "evex")
8112 (set_attr "mode" "<sseinsnmode>")])
8115 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8116 (vec_select:<ssequartermode>
8117 (match_operand:V16FI 1 "register_operand")
8118 (parallel [(const_int 0) (const_int 1)
8119 (const_int 2) (const_int 3)])))]
8123 || REG_P (operands[0])
8124 || !EXT_REX_SSE_REG_P (operands[1]))"
8125 [(set (match_dup 0) (match_dup 1))]
8127 if (!TARGET_AVX512VL
8128 && REG_P (operands[0])
8129 && EXT_REX_SSE_REG_P (operands[1]))
8131 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8133 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8136 (define_mode_attr extract_type_2
8137 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
8139 (define_mode_attr extract_suf_2
8140 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
8142 (define_mode_iterator AVX512_VEC_2
8143 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
8145 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
8146 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8147 (match_operand:AVX512_VEC_2 1 "register_operand")
8148 (match_operand:SI 2 "const_0_to_1_operand")
8149 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
8150 (match_operand:QI 4 "register_operand")]
8153 rtx (*insn)(rtx, rtx, rtx, rtx);
8154 rtx dest = operands[0];
8156 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
8157 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8159 switch (INTVAL (operands[2]))
8162 insn = gen_vec_extract_lo_<mode>_mask;
8165 insn = gen_vec_extract_hi_<mode>_mask;
8171 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8172 if (dest != operands[0])
8173 emit_move_insn (operands[0], dest);
8178 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8179 (vec_select:<ssehalfvecmode>
8180 (match_operand:V8FI 1 "nonimmediate_operand")
8181 (parallel [(const_int 0) (const_int 1)
8182 (const_int 2) (const_int 3)])))]
8183 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8186 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
8187 [(set (match_dup 0) (match_dup 1))]
8188 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8190 (define_insn "vec_extract_lo_<mode>_maskm"
8191 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8192 (vec_merge:<ssehalfvecmode>
8193 (vec_select:<ssehalfvecmode>
8194 (match_operand:V8FI 1 "register_operand" "v")
8195 (parallel [(const_int 0) (const_int 1)
8196 (const_int 2) (const_int 3)]))
8197 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8198 (match_operand:QI 3 "register_operand" "Yk")))]
8200 && rtx_equal_p (operands[2], operands[0])"
8201 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
8202 [(set_attr "type" "sselog1")
8203 (set_attr "prefix_extra" "1")
8204 (set_attr "length_immediate" "1")
8205 (set_attr "prefix" "evex")
8206 (set_attr "mode" "<sseinsnmode>")])
8208 (define_insn "vec_extract_lo_<mode><mask_name>"
8209 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>,v")
8210 (vec_select:<ssehalfvecmode>
8211 (match_operand:V8FI 1 "<store_mask_predicate>" "v,v,<store_mask_constraint>")
8212 (parallel [(const_int 0) (const_int 1)
8213 (const_int 2) (const_int 3)])))]
8215 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8217 if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
8218 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8222 [(set_attr "type" "sselog1")
8223 (set_attr "prefix_extra" "1")
8224 (set_attr "length_immediate" "1")
8225 (set_attr "memory" "none,store,load")
8226 (set_attr "prefix" "evex")
8227 (set_attr "mode" "<sseinsnmode>")])
8229 (define_insn "vec_extract_hi_<mode>_maskm"
8230 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8231 (vec_merge:<ssehalfvecmode>
8232 (vec_select:<ssehalfvecmode>
8233 (match_operand:V8FI 1 "register_operand" "v")
8234 (parallel [(const_int 4) (const_int 5)
8235 (const_int 6) (const_int 7)]))
8236 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8237 (match_operand:QI 3 "register_operand" "Yk")))]
8239 && rtx_equal_p (operands[2], operands[0])"
8240 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8241 [(set_attr "type" "sselog")
8242 (set_attr "prefix_extra" "1")
8243 (set_attr "length_immediate" "1")
8244 (set_attr "memory" "store")
8245 (set_attr "prefix" "evex")
8246 (set_attr "mode" "<sseinsnmode>")])
8248 (define_insn "vec_extract_hi_<mode><mask_name>"
8249 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8250 (vec_select:<ssehalfvecmode>
8251 (match_operand:V8FI 1 "register_operand" "v")
8252 (parallel [(const_int 4) (const_int 5)
8253 (const_int 6) (const_int 7)])))]
8255 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
8256 [(set_attr "type" "sselog1")
8257 (set_attr "prefix_extra" "1")
8258 (set_attr "length_immediate" "1")
8259 (set_attr "prefix" "evex")
8260 (set_attr "mode" "<sseinsnmode>")])
8262 (define_insn "vec_extract_hi_<mode>_maskm"
8263 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8264 (vec_merge:<ssehalfvecmode>
8265 (vec_select:<ssehalfvecmode>
8266 (match_operand:V16FI 1 "register_operand" "v")
8267 (parallel [(const_int 8) (const_int 9)
8268 (const_int 10) (const_int 11)
8269 (const_int 12) (const_int 13)
8270 (const_int 14) (const_int 15)]))
8271 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8272 (match_operand:QI 3 "register_operand" "Yk")))]
8274 && rtx_equal_p (operands[2], operands[0])"
8275 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8276 [(set_attr "type" "sselog1")
8277 (set_attr "prefix_extra" "1")
8278 (set_attr "length_immediate" "1")
8279 (set_attr "prefix" "evex")
8280 (set_attr "mode" "<sseinsnmode>")])
8282 (define_insn "vec_extract_hi_<mode><mask_name>"
8283 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
8284 (vec_select:<ssehalfvecmode>
8285 (match_operand:V16FI 1 "register_operand" "v,v")
8286 (parallel [(const_int 8) (const_int 9)
8287 (const_int 10) (const_int 11)
8288 (const_int 12) (const_int 13)
8289 (const_int 14) (const_int 15)])))]
8290 "TARGET_AVX512F && <mask_avx512dq_condition>"
8292 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
8293 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8294 [(set_attr "type" "sselog1")
8295 (set_attr "prefix_extra" "1")
8296 (set_attr "isa" "avx512dq,noavx512dq")
8297 (set_attr "length_immediate" "1")
8298 (set_attr "prefix" "evex")
8299 (set_attr "mode" "<sseinsnmode>")])
8301 (define_expand "avx512vl_vextractf128<mode>"
8302 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8303 (match_operand:VI48F_256 1 "register_operand")
8304 (match_operand:SI 2 "const_0_to_1_operand")
8305 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
8306 (match_operand:QI 4 "register_operand")]
8307 "TARGET_AVX512DQ && TARGET_AVX512VL"
8309 rtx (*insn)(rtx, rtx, rtx, rtx);
8310 rtx dest = operands[0];
8313 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
8314 /* For V8S[IF]mode there are maskm insns with =m and 0
8316 ? !rtx_equal_p (dest, operands[3])
8317 /* For V4D[IF]mode, hi insns don't allow memory, and
8318 lo insns have =m and 0C constraints. */
8319 : (operands[2] != const0_rtx
8320 || (!rtx_equal_p (dest, operands[3])
8321 && GET_CODE (operands[3]) != CONST_VECTOR))))
8322 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8323 switch (INTVAL (operands[2]))
8326 insn = gen_vec_extract_lo_<mode>_mask;
8329 insn = gen_vec_extract_hi_<mode>_mask;
8335 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8336 if (dest != operands[0])
8337 emit_move_insn (operands[0], dest);
8341 (define_expand "avx_vextractf128<mode>"
8342 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8343 (match_operand:V_256 1 "register_operand")
8344 (match_operand:SI 2 "const_0_to_1_operand")]
8347 rtx (*insn)(rtx, rtx);
8349 switch (INTVAL (operands[2]))
8352 insn = gen_vec_extract_lo_<mode>;
8355 insn = gen_vec_extract_hi_<mode>;
8361 emit_insn (insn (operands[0], operands[1]));
8365 (define_insn "vec_extract_lo_<mode><mask_name>"
8366 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
8367 (vec_select:<ssehalfvecmode>
8368 (match_operand:V16FI 1 "<store_mask_predicate>"
8369 "v,<store_mask_constraint>,v")
8370 (parallel [(const_int 0) (const_int 1)
8371 (const_int 2) (const_int 3)
8372 (const_int 4) (const_int 5)
8373 (const_int 6) (const_int 7)])))]
8375 && <mask_mode512bit_condition>
8376 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8379 || (!TARGET_AVX512VL
8380 && !REG_P (operands[0])
8381 && EXT_REX_SSE_REG_P (operands[1])))
8382 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8386 [(set_attr "type" "sselog1")
8387 (set_attr "prefix_extra" "1")
8388 (set_attr "length_immediate" "1")
8389 (set_attr "memory" "none,load,store")
8390 (set_attr "prefix" "evex")
8391 (set_attr "mode" "<sseinsnmode>")])
8394 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8395 (vec_select:<ssehalfvecmode>
8396 (match_operand:V16FI 1 "nonimmediate_operand")
8397 (parallel [(const_int 0) (const_int 1)
8398 (const_int 2) (const_int 3)
8399 (const_int 4) (const_int 5)
8400 (const_int 6) (const_int 7)])))]
8401 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8404 || REG_P (operands[0])
8405 || !EXT_REX_SSE_REG_P (operands[1]))"
8406 [(set (match_dup 0) (match_dup 1))]
8408 if (!TARGET_AVX512VL
8409 && REG_P (operands[0])
8410 && EXT_REX_SSE_REG_P (operands[1]))
8412 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
8414 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
8417 (define_insn "vec_extract_lo_<mode><mask_name>"
8418 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,v,m")
8419 (vec_select:<ssehalfvecmode>
8420 (match_operand:VI8F_256 1 "<store_mask_predicate>"
8421 "v,<store_mask_constraint>,v")
8422 (parallel [(const_int 0) (const_int 1)])))]
8424 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
8425 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8428 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
8432 [(set_attr "type" "sselog1")
8433 (set_attr "prefix_extra" "1")
8434 (set_attr "length_immediate" "1")
8435 (set_attr "memory" "none,load,store")
8436 (set_attr "prefix" "evex")
8437 (set_attr "mode" "XI")])
8440 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8441 (vec_select:<ssehalfvecmode>
8442 (match_operand:VI8F_256 1 "nonimmediate_operand")
8443 (parallel [(const_int 0) (const_int 1)])))]
8444 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8445 && reload_completed"
8446 [(set (match_dup 0) (match_dup 1))]
8447 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8449 (define_insn "vec_extract_hi_<mode><mask_name>"
8450 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
8451 (vec_select:<ssehalfvecmode>
8452 (match_operand:VI8F_256 1 "register_operand" "v,v")
8453 (parallel [(const_int 2) (const_int 3)])))]
8454 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
8456 if (TARGET_AVX512VL)
8458 if (TARGET_AVX512DQ)
8459 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
8461 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
8464 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
8466 [(set_attr "type" "sselog1")
8467 (set_attr "prefix_extra" "1")
8468 (set_attr "length_immediate" "1")
8469 (set_attr "prefix" "vex")
8470 (set_attr "mode" "<sseinsnmode>")])
8473 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8474 (vec_select:<ssehalfvecmode>
8475 (match_operand:VI4F_256 1 "nonimmediate_operand")
8476 (parallel [(const_int 0) (const_int 1)
8477 (const_int 2) (const_int 3)])))]
8478 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8479 && reload_completed"
8480 [(set (match_dup 0) (match_dup 1))]
8481 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8483 (define_insn "vec_extract_lo_<mode><mask_name>"
8484 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
8485 "=<store_mask_constraint>,v")
8486 (vec_select:<ssehalfvecmode>
8487 (match_operand:VI4F_256 1 "<store_mask_predicate>"
8488 "v,<store_mask_constraint>")
8489 (parallel [(const_int 0) (const_int 1)
8490 (const_int 2) (const_int 3)])))]
8492 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
8493 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8496 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8500 [(set_attr "type" "sselog1")
8501 (set_attr "prefix_extra" "1")
8502 (set_attr "length_immediate" "1")
8503 (set_attr "prefix" "evex")
8504 (set_attr "mode" "<sseinsnmode>")])
8506 (define_insn "vec_extract_lo_<mode>_maskm"
8507 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8508 (vec_merge:<ssehalfvecmode>
8509 (vec_select:<ssehalfvecmode>
8510 (match_operand:VI4F_256 1 "register_operand" "v")
8511 (parallel [(const_int 0) (const_int 1)
8512 (const_int 2) (const_int 3)]))
8513 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8514 (match_operand:QI 3 "register_operand" "Yk")))]
8515 "TARGET_AVX512VL && TARGET_AVX512F
8516 && rtx_equal_p (operands[2], operands[0])"
8517 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
8518 [(set_attr "type" "sselog1")
8519 (set_attr "prefix_extra" "1")
8520 (set_attr "length_immediate" "1")
8521 (set_attr "prefix" "evex")
8522 (set_attr "mode" "<sseinsnmode>")])
8524 (define_insn "vec_extract_hi_<mode>_maskm"
8525 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8526 (vec_merge:<ssehalfvecmode>
8527 (vec_select:<ssehalfvecmode>
8528 (match_operand:VI4F_256 1 "register_operand" "v")
8529 (parallel [(const_int 4) (const_int 5)
8530 (const_int 6) (const_int 7)]))
8531 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8532 (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
8533 "TARGET_AVX512F && TARGET_AVX512VL
8534 && rtx_equal_p (operands[2], operands[0])"
8535 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8536 [(set_attr "type" "sselog1")
8537 (set_attr "length_immediate" "1")
8538 (set_attr "prefix" "evex")
8539 (set_attr "mode" "<sseinsnmode>")])
8541 (define_insn "vec_extract_hi_<mode>_mask"
8542 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
8543 (vec_merge:<ssehalfvecmode>
8544 (vec_select:<ssehalfvecmode>
8545 (match_operand:VI4F_256 1 "register_operand" "v")
8546 (parallel [(const_int 4) (const_int 5)
8547 (const_int 6) (const_int 7)]))
8548 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C")
8549 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
8551 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8552 [(set_attr "type" "sselog1")
8553 (set_attr "length_immediate" "1")
8554 (set_attr "prefix" "evex")
8555 (set_attr "mode" "<sseinsnmode>")])
8557 (define_insn "vec_extract_hi_<mode>"
8558 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
8559 (vec_select:<ssehalfvecmode>
8560 (match_operand:VI4F_256 1 "register_operand" "x, v")
8561 (parallel [(const_int 4) (const_int 5)
8562 (const_int 6) (const_int 7)])))]
8565 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
8566 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8567 [(set_attr "isa" "*, avx512vl")
8568 (set_attr "prefix" "vex, evex")
8569 (set_attr "type" "sselog1")
8570 (set_attr "length_immediate" "1")
8571 (set_attr "mode" "<sseinsnmode>")])
8573 (define_insn_and_split "vec_extract_lo_v32hi"
8574 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
8576 (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
8577 (parallel [(const_int 0) (const_int 1)
8578 (const_int 2) (const_int 3)
8579 (const_int 4) (const_int 5)
8580 (const_int 6) (const_int 7)
8581 (const_int 8) (const_int 9)
8582 (const_int 10) (const_int 11)
8583 (const_int 12) (const_int 13)
8584 (const_int 14) (const_int 15)])))]
8585 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8588 || REG_P (operands[0])
8589 || !EXT_REX_SSE_REG_P (operands[1]))
8592 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8594 "&& reload_completed
8596 || REG_P (operands[0])
8597 || !EXT_REX_SSE_REG_P (operands[1]))"
8598 [(set (match_dup 0) (match_dup 1))]
8600 if (!TARGET_AVX512VL
8601 && REG_P (operands[0])
8602 && EXT_REX_SSE_REG_P (operands[1]))
8603 operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
8605 operands[1] = gen_lowpart (V16HImode, operands[1]);
8607 [(set_attr "type" "sselog1")
8608 (set_attr "prefix_extra" "1")
8609 (set_attr "length_immediate" "1")
8610 (set_attr "memory" "none,load,store")
8611 (set_attr "prefix" "evex")
8612 (set_attr "mode" "XI")])
8614 (define_insn "vec_extract_hi_v32hi"
8615 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
8617 (match_operand:V32HI 1 "register_operand" "v")
8618 (parallel [(const_int 16) (const_int 17)
8619 (const_int 18) (const_int 19)
8620 (const_int 20) (const_int 21)
8621 (const_int 22) (const_int 23)
8622 (const_int 24) (const_int 25)
8623 (const_int 26) (const_int 27)
8624 (const_int 28) (const_int 29)
8625 (const_int 30) (const_int 31)])))]
8627 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8628 [(set_attr "type" "sselog1")
8629 (set_attr "prefix_extra" "1")
8630 (set_attr "length_immediate" "1")
8631 (set_attr "prefix" "evex")
8632 (set_attr "mode" "XI")])
8634 (define_insn_and_split "vec_extract_lo_v16hi"
8635 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
8637 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
8638 (parallel [(const_int 0) (const_int 1)
8639 (const_int 2) (const_int 3)
8640 (const_int 4) (const_int 5)
8641 (const_int 6) (const_int 7)])))]
8642 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8644 "&& reload_completed"
8645 [(set (match_dup 0) (match_dup 1))]
8646 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
8648 (define_insn "vec_extract_hi_v16hi"
8649 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
8651 (match_operand:V16HI 1 "register_operand" "x,v,v")
8652 (parallel [(const_int 8) (const_int 9)
8653 (const_int 10) (const_int 11)
8654 (const_int 12) (const_int 13)
8655 (const_int 14) (const_int 15)])))]
8658 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
8659 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
8660 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
8661 [(set_attr "type" "sselog1")
8662 (set_attr "prefix_extra" "1")
8663 (set_attr "length_immediate" "1")
8664 (set_attr "isa" "*,avx512dq,avx512f")
8665 (set_attr "prefix" "vex,evex,evex")
8666 (set_attr "mode" "OI")])
8668 (define_insn_and_split "vec_extract_lo_v64qi"
8669 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
8671 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
8672 (parallel [(const_int 0) (const_int 1)
8673 (const_int 2) (const_int 3)
8674 (const_int 4) (const_int 5)
8675 (const_int 6) (const_int 7)
8676 (const_int 8) (const_int 9)
8677 (const_int 10) (const_int 11)
8678 (const_int 12) (const_int 13)
8679 (const_int 14) (const_int 15)
8680 (const_int 16) (const_int 17)
8681 (const_int 18) (const_int 19)
8682 (const_int 20) (const_int 21)
8683 (const_int 22) (const_int 23)
8684 (const_int 24) (const_int 25)
8685 (const_int 26) (const_int 27)
8686 (const_int 28) (const_int 29)
8687 (const_int 30) (const_int 31)])))]
8688 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8691 || REG_P (operands[0])
8692 || !EXT_REX_SSE_REG_P (operands[1]))
8695 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8697 "&& reload_completed
8699 || REG_P (operands[0])
8700 || !EXT_REX_SSE_REG_P (operands[1]))"
8701 [(set (match_dup 0) (match_dup 1))]
8703 if (!TARGET_AVX512VL
8704 && REG_P (operands[0])
8705 && EXT_REX_SSE_REG_P (operands[1]))
8706 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
8708 operands[1] = gen_lowpart (V32QImode, operands[1]);
8710 [(set_attr "type" "sselog1")
8711 (set_attr "prefix_extra" "1")
8712 (set_attr "length_immediate" "1")
8713 (set_attr "memory" "none,load,store")
8714 (set_attr "prefix" "evex")
8715 (set_attr "mode" "XI")])
8717 (define_insn "vec_extract_hi_v64qi"
8718 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
8720 (match_operand:V64QI 1 "register_operand" "v")
8721 (parallel [(const_int 32) (const_int 33)
8722 (const_int 34) (const_int 35)
8723 (const_int 36) (const_int 37)
8724 (const_int 38) (const_int 39)
8725 (const_int 40) (const_int 41)
8726 (const_int 42) (const_int 43)
8727 (const_int 44) (const_int 45)
8728 (const_int 46) (const_int 47)
8729 (const_int 48) (const_int 49)
8730 (const_int 50) (const_int 51)
8731 (const_int 52) (const_int 53)
8732 (const_int 54) (const_int 55)
8733 (const_int 56) (const_int 57)
8734 (const_int 58) (const_int 59)
8735 (const_int 60) (const_int 61)
8736 (const_int 62) (const_int 63)])))]
8738 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8739 [(set_attr "type" "sselog1")
8740 (set_attr "prefix_extra" "1")
8741 (set_attr "length_immediate" "1")
8742 (set_attr "prefix" "evex")
8743 (set_attr "mode" "XI")])
8745 (define_insn_and_split "vec_extract_lo_v32qi"
8746 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
8748 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
8749 (parallel [(const_int 0) (const_int 1)
8750 (const_int 2) (const_int 3)
8751 (const_int 4) (const_int 5)
8752 (const_int 6) (const_int 7)
8753 (const_int 8) (const_int 9)
8754 (const_int 10) (const_int 11)
8755 (const_int 12) (const_int 13)
8756 (const_int 14) (const_int 15)])))]
8757 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8759 "&& reload_completed"
8760 [(set (match_dup 0) (match_dup 1))]
8761 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
8763 (define_insn "vec_extract_hi_v32qi"
8764 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
8766 (match_operand:V32QI 1 "register_operand" "x,v,v")
8767 (parallel [(const_int 16) (const_int 17)
8768 (const_int 18) (const_int 19)
8769 (const_int 20) (const_int 21)
8770 (const_int 22) (const_int 23)
8771 (const_int 24) (const_int 25)
8772 (const_int 26) (const_int 27)
8773 (const_int 28) (const_int 29)
8774 (const_int 30) (const_int 31)])))]
8777 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
8778 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
8779 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
8780 [(set_attr "type" "sselog1")
8781 (set_attr "prefix_extra" "1")
8782 (set_attr "length_immediate" "1")
8783 (set_attr "isa" "*,avx512dq,avx512f")
8784 (set_attr "prefix" "vex,evex,evex")
8785 (set_attr "mode" "OI")])
8787 ;; Modes handled by vec_extract patterns.
8788 (define_mode_iterator VEC_EXTRACT_MODE
8789 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
8790 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
8791 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
8792 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
8793 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
8794 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
8795 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
8797 (define_expand "vec_extract<mode><ssescalarmodelower>"
8798 [(match_operand:<ssescalarmode> 0 "register_operand")
8799 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
8800 (match_operand 2 "const_int_operand")]
8803 ix86_expand_vector_extract (false, operands[0], operands[1],
8804 INTVAL (operands[2]));
8808 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
8809 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8810 (match_operand:V_256_512 1 "register_operand")
8811 (match_operand 2 "const_0_to_1_operand")]
8814 if (INTVAL (operands[2]))
8815 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
8817 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
8821 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8823 ;; Parallel double-precision floating point element swizzling
8825 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8827 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
8828 [(set (match_operand:V8DF 0 "register_operand" "=v")
8831 (match_operand:V8DF 1 "register_operand" "v")
8832 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8833 (parallel [(const_int 1) (const_int 9)
8834 (const_int 3) (const_int 11)
8835 (const_int 5) (const_int 13)
8836 (const_int 7) (const_int 15)])))]
8838 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8839 [(set_attr "type" "sselog")
8840 (set_attr "prefix" "evex")
8841 (set_attr "mode" "V8DF")])
8843 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8844 (define_insn "avx_unpckhpd256<mask_name>"
8845 [(set (match_operand:V4DF 0 "register_operand" "=v")
8848 (match_operand:V4DF 1 "register_operand" "v")
8849 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8850 (parallel [(const_int 1) (const_int 5)
8851 (const_int 3) (const_int 7)])))]
8852 "TARGET_AVX && <mask_avx512vl_condition>"
8853 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8854 [(set_attr "type" "sselog")
8855 (set_attr "prefix" "vex")
8856 (set_attr "mode" "V4DF")])
8858 (define_expand "vec_interleave_highv4df"
8862 (match_operand:V4DF 1 "register_operand")
8863 (match_operand:V4DF 2 "nonimmediate_operand"))
8864 (parallel [(const_int 0) (const_int 4)
8865 (const_int 2) (const_int 6)])))
8871 (parallel [(const_int 1) (const_int 5)
8872 (const_int 3) (const_int 7)])))
8873 (set (match_operand:V4DF 0 "register_operand")
8878 (parallel [(const_int 2) (const_int 3)
8879 (const_int 6) (const_int 7)])))]
8882 operands[3] = gen_reg_rtx (V4DFmode);
8883 operands[4] = gen_reg_rtx (V4DFmode);
8887 (define_insn "avx512vl_unpckhpd128_mask"
8888 [(set (match_operand:V2DF 0 "register_operand" "=v")
8892 (match_operand:V2DF 1 "register_operand" "v")
8893 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8894 (parallel [(const_int 1) (const_int 3)]))
8895 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
8896 (match_operand:QI 4 "register_operand" "Yk")))]
8898 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8899 [(set_attr "type" "sselog")
8900 (set_attr "prefix" "evex")
8901 (set_attr "mode" "V2DF")])
8903 (define_expand "vec_interleave_highv2df"
8904 [(set (match_operand:V2DF 0 "register_operand")
8907 (match_operand:V2DF 1 "nonimmediate_operand")
8908 (match_operand:V2DF 2 "nonimmediate_operand"))
8909 (parallel [(const_int 1)
8913 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
8914 operands[2] = force_reg (V2DFmode, operands[2]);
8917 (define_insn "*vec_interleave_highv2df"
8918 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
8921 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
8922 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
8923 (parallel [(const_int 1)
8925 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
8927 unpckhpd\t{%2, %0|%0, %2}
8928 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
8929 %vmovddup\t{%H1, %0|%0, %H1}
8930 movlpd\t{%H1, %0|%0, %H1}
8931 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
8932 %vmovhpd\t{%1, %0|%q0, %1}"
8933 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8934 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8935 (set (attr "prefix_data16")
8936 (if_then_else (eq_attr "alternative" "3,5")
8938 (const_string "*")))
8939 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8940 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8942 (define_expand "avx512f_movddup512<mask_name>"
8943 [(set (match_operand:V8DF 0 "register_operand")
8946 (match_operand:V8DF 1 "nonimmediate_operand")
8948 (parallel [(const_int 0) (const_int 8)
8949 (const_int 2) (const_int 10)
8950 (const_int 4) (const_int 12)
8951 (const_int 6) (const_int 14)])))]
8954 (define_expand "avx512f_unpcklpd512<mask_name>"
8955 [(set (match_operand:V8DF 0 "register_operand")
8958 (match_operand:V8DF 1 "register_operand")
8959 (match_operand:V8DF 2 "nonimmediate_operand"))
8960 (parallel [(const_int 0) (const_int 8)
8961 (const_int 2) (const_int 10)
8962 (const_int 4) (const_int 12)
8963 (const_int 6) (const_int 14)])))]
8966 (define_insn "*avx512f_unpcklpd512<mask_name>"
8967 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
8970 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
8971 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
8972 (parallel [(const_int 0) (const_int 8)
8973 (const_int 2) (const_int 10)
8974 (const_int 4) (const_int 12)
8975 (const_int 6) (const_int 14)])))]
8978 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
8979 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8980 [(set_attr "type" "sselog")
8981 (set_attr "prefix" "evex")
8982 (set_attr "mode" "V8DF")])
8984 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8985 (define_expand "avx_movddup256<mask_name>"
8986 [(set (match_operand:V4DF 0 "register_operand")
8989 (match_operand:V4DF 1 "nonimmediate_operand")
8991 (parallel [(const_int 0) (const_int 4)
8992 (const_int 2) (const_int 6)])))]
8993 "TARGET_AVX && <mask_avx512vl_condition>")
8995 (define_expand "avx_unpcklpd256<mask_name>"
8996 [(set (match_operand:V4DF 0 "register_operand")
8999 (match_operand:V4DF 1 "register_operand")
9000 (match_operand:V4DF 2 "nonimmediate_operand"))
9001 (parallel [(const_int 0) (const_int 4)
9002 (const_int 2) (const_int 6)])))]
9003 "TARGET_AVX && <mask_avx512vl_condition>")
9005 (define_insn "*avx_unpcklpd256<mask_name>"
9006 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
9009 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
9010 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
9011 (parallel [(const_int 0) (const_int 4)
9012 (const_int 2) (const_int 6)])))]
9013 "TARGET_AVX && <mask_avx512vl_condition>"
9015 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
9016 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
9017 [(set_attr "type" "sselog")
9018 (set_attr "prefix" "vex")
9019 (set_attr "mode" "V4DF")])
9021 (define_expand "vec_interleave_lowv4df"
9025 (match_operand:V4DF 1 "register_operand")
9026 (match_operand:V4DF 2 "nonimmediate_operand"))
9027 (parallel [(const_int 0) (const_int 4)
9028 (const_int 2) (const_int 6)])))
9034 (parallel [(const_int 1) (const_int 5)
9035 (const_int 3) (const_int 7)])))
9036 (set (match_operand:V4DF 0 "register_operand")
9041 (parallel [(const_int 0) (const_int 1)
9042 (const_int 4) (const_int 5)])))]
9045 operands[3] = gen_reg_rtx (V4DFmode);
9046 operands[4] = gen_reg_rtx (V4DFmode);
9049 (define_insn "avx512vl_unpcklpd128_mask"
9050 [(set (match_operand:V2DF 0 "register_operand" "=v")
9054 (match_operand:V2DF 1 "register_operand" "v")
9055 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9056 (parallel [(const_int 0) (const_int 2)]))
9057 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9058 (match_operand:QI 4 "register_operand" "Yk")))]
9060 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9061 [(set_attr "type" "sselog")
9062 (set_attr "prefix" "evex")
9063 (set_attr "mode" "V2DF")])
9065 (define_expand "vec_interleave_lowv2df"
9066 [(set (match_operand:V2DF 0 "register_operand")
9069 (match_operand:V2DF 1 "nonimmediate_operand")
9070 (match_operand:V2DF 2 "nonimmediate_operand"))
9071 (parallel [(const_int 0)
9075 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
9076 operands[1] = force_reg (V2DFmode, operands[1]);
9079 (define_insn "*vec_interleave_lowv2df"
9080 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
9083 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
9084 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
9085 (parallel [(const_int 0)
9087 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
9089 unpcklpd\t{%2, %0|%0, %2}
9090 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9091 %vmovddup\t{%1, %0|%0, %q1}
9092 movhpd\t{%2, %0|%0, %q2}
9093 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
9094 %vmovlpd\t{%2, %H0|%H0, %2}"
9095 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9096 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9097 (set (attr "prefix_data16")
9098 (if_then_else (eq_attr "alternative" "3,5")
9100 (const_string "*")))
9101 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9102 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9105 [(set (match_operand:V2DF 0 "memory_operand")
9108 (match_operand:V2DF 1 "register_operand")
9110 (parallel [(const_int 0)
9112 "TARGET_SSE3 && reload_completed"
9115 rtx low = gen_lowpart (DFmode, operands[1]);
9117 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
9118 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
9123 [(set (match_operand:V2DF 0 "register_operand")
9126 (match_operand:V2DF 1 "memory_operand")
9128 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
9129 (match_operand:SI 3 "const_int_operand")])))]
9130 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
9131 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
9133 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
9136 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
9137 [(set (match_operand:VF_128 0 "register_operand" "=v")
9140 [(match_operand:VF_128 1 "register_operand" "v")
9141 (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
9146 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
9147 [(set_attr "prefix" "evex")
9148 (set_attr "mode" "<ssescalarmode>")])
9150 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
9151 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9153 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
9154 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
9157 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
9158 [(set_attr "prefix" "evex")
9159 (set_attr "mode" "<MODE>")])
9161 (define_expand "<avx512>_vternlog<mode>_maskz"
9162 [(match_operand:VI48_AVX512VL 0 "register_operand")
9163 (match_operand:VI48_AVX512VL 1 "register_operand")
9164 (match_operand:VI48_AVX512VL 2 "register_operand")
9165 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
9166 (match_operand:SI 4 "const_0_to_255_operand")
9167 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9170 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
9171 operands[0], operands[1], operands[2], operands[3],
9172 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
9176 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
9177 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9178 (unspec:VI48_AVX512VL
9179 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9180 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9181 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9182 (match_operand:SI 4 "const_0_to_255_operand")]
9185 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
9186 [(set_attr "type" "sselog")
9187 (set_attr "prefix" "evex")
9188 (set_attr "mode" "<sseinsnmode>")])
9190 (define_insn "<avx512>_vternlog<mode>_mask"
9191 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9192 (vec_merge:VI48_AVX512VL
9193 (unspec:VI48_AVX512VL
9194 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9195 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9196 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9197 (match_operand:SI 4 "const_0_to_255_operand")]
9200 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9202 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
9203 [(set_attr "type" "sselog")
9204 (set_attr "prefix" "evex")
9205 (set_attr "mode" "<sseinsnmode>")])
9207 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
9208 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9209 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
9212 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
9213 [(set_attr "prefix" "evex")
9214 (set_attr "mode" "<MODE>")])
9216 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
9217 [(set (match_operand:VF_128 0 "register_operand" "=v")
9220 [(match_operand:VF_128 1 "register_operand" "v")
9221 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
9226 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
9227 [(set_attr "prefix" "evex")
9228 (set_attr "mode" "<ssescalarmode>")])
9230 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
9231 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9232 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
9233 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
9234 (match_operand:SI 3 "const_0_to_255_operand")]
9237 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9238 [(set_attr "prefix" "evex")
9239 (set_attr "mode" "<sseinsnmode>")])
9241 (define_expand "avx512f_shufps512_mask"
9242 [(match_operand:V16SF 0 "register_operand")
9243 (match_operand:V16SF 1 "register_operand")
9244 (match_operand:V16SF 2 "nonimmediate_operand")
9245 (match_operand:SI 3 "const_0_to_255_operand")
9246 (match_operand:V16SF 4 "register_operand")
9247 (match_operand:HI 5 "register_operand")]
9250 int mask = INTVAL (operands[3]);
9251 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
9252 GEN_INT ((mask >> 0) & 3),
9253 GEN_INT ((mask >> 2) & 3),
9254 GEN_INT (((mask >> 4) & 3) + 16),
9255 GEN_INT (((mask >> 6) & 3) + 16),
9256 GEN_INT (((mask >> 0) & 3) + 4),
9257 GEN_INT (((mask >> 2) & 3) + 4),
9258 GEN_INT (((mask >> 4) & 3) + 20),
9259 GEN_INT (((mask >> 6) & 3) + 20),
9260 GEN_INT (((mask >> 0) & 3) + 8),
9261 GEN_INT (((mask >> 2) & 3) + 8),
9262 GEN_INT (((mask >> 4) & 3) + 24),
9263 GEN_INT (((mask >> 6) & 3) + 24),
9264 GEN_INT (((mask >> 0) & 3) + 12),
9265 GEN_INT (((mask >> 2) & 3) + 12),
9266 GEN_INT (((mask >> 4) & 3) + 28),
9267 GEN_INT (((mask >> 6) & 3) + 28),
9268 operands[4], operands[5]));
9273 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
9274 [(match_operand:VF_AVX512VL 0 "register_operand")
9275 (match_operand:VF_AVX512VL 1 "register_operand")
9276 (match_operand:VF_AVX512VL 2 "register_operand")
9277 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9278 (match_operand:SI 4 "const_0_to_255_operand")
9279 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9282 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9283 operands[0], operands[1], operands[2], operands[3],
9284 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9285 <round_saeonly_expand_operand6>));
9289 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
9290 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9292 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9293 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9294 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9295 (match_operand:SI 4 "const_0_to_255_operand")]
9298 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
9299 [(set_attr "prefix" "evex")
9300 (set_attr "mode" "<MODE>")])
9302 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
9303 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9304 (vec_merge:VF_AVX512VL
9306 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9307 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9308 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9309 (match_operand:SI 4 "const_0_to_255_operand")]
9312 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9314 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
9315 [(set_attr "prefix" "evex")
9316 (set_attr "mode" "<MODE>")])
9318 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
9319 [(match_operand:VF_128 0 "register_operand")
9320 (match_operand:VF_128 1 "register_operand")
9321 (match_operand:VF_128 2 "register_operand")
9322 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9323 (match_operand:SI 4 "const_0_to_255_operand")
9324 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9327 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9328 operands[0], operands[1], operands[2], operands[3],
9329 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9330 <round_saeonly_expand_operand6>));
9334 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
9335 [(set (match_operand:VF_128 0 "register_operand" "=v")
9338 [(match_operand:VF_128 1 "register_operand" "0")
9339 (match_operand:VF_128 2 "register_operand" "v")
9340 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9341 (match_operand:SI 4 "const_0_to_255_operand")]
9346 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %<iptr>3<round_saeonly_sd_mask_op5>, %4}";
9347 [(set_attr "prefix" "evex")
9348 (set_attr "mode" "<ssescalarmode>")])
9350 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
9351 [(set (match_operand:VF_128 0 "register_operand" "=v")
9355 [(match_operand:VF_128 1 "register_operand" "0")
9356 (match_operand:VF_128 2 "register_operand" "v")
9357 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9358 (match_operand:SI 4 "const_0_to_255_operand")]
9363 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9365 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
9366 [(set_attr "prefix" "evex")
9367 (set_attr "mode" "<ssescalarmode>")])
9369 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
9370 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9372 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
9373 (match_operand:SI 2 "const_0_to_255_operand")]
9376 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
9377 [(set_attr "length_immediate" "1")
9378 (set_attr "prefix" "evex")
9379 (set_attr "mode" "<MODE>")])
9381 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
9382 [(set (match_operand:VF_128 0 "register_operand" "=v")
9385 [(match_operand:VF_128 1 "register_operand" "v")
9386 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9387 (match_operand:SI 3 "const_0_to_255_operand")]
9392 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
9393 [(set_attr "length_immediate" "1")
9394 (set_attr "prefix" "evex")
9395 (set_attr "mode" "<MODE>")])
9397 ;; One bit in mask selects 2 elements.
9398 (define_insn "avx512f_shufps512_1<mask_name>"
9399 [(set (match_operand:V16SF 0 "register_operand" "=v")
9402 (match_operand:V16SF 1 "register_operand" "v")
9403 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
9404 (parallel [(match_operand 3 "const_0_to_3_operand")
9405 (match_operand 4 "const_0_to_3_operand")
9406 (match_operand 5 "const_16_to_19_operand")
9407 (match_operand 6 "const_16_to_19_operand")
9408 (match_operand 7 "const_4_to_7_operand")
9409 (match_operand 8 "const_4_to_7_operand")
9410 (match_operand 9 "const_20_to_23_operand")
9411 (match_operand 10 "const_20_to_23_operand")
9412 (match_operand 11 "const_8_to_11_operand")
9413 (match_operand 12 "const_8_to_11_operand")
9414 (match_operand 13 "const_24_to_27_operand")
9415 (match_operand 14 "const_24_to_27_operand")
9416 (match_operand 15 "const_12_to_15_operand")
9417 (match_operand 16 "const_12_to_15_operand")
9418 (match_operand 17 "const_28_to_31_operand")
9419 (match_operand 18 "const_28_to_31_operand")])))]
9421 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
9422 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
9423 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
9424 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
9425 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
9426 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
9427 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
9428 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
9429 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
9430 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
9431 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
9432 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
9435 mask = INTVAL (operands[3]);
9436 mask |= INTVAL (operands[4]) << 2;
9437 mask |= (INTVAL (operands[5]) - 16) << 4;
9438 mask |= (INTVAL (operands[6]) - 16) << 6;
9439 operands[3] = GEN_INT (mask);
9441 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9443 [(set_attr "type" "sselog")
9444 (set_attr "length_immediate" "1")
9445 (set_attr "prefix" "evex")
9446 (set_attr "mode" "V16SF")])
9448 (define_expand "avx512f_shufpd512_mask"
9449 [(match_operand:V8DF 0 "register_operand")
9450 (match_operand:V8DF 1 "register_operand")
9451 (match_operand:V8DF 2 "nonimmediate_operand")
9452 (match_operand:SI 3 "const_0_to_255_operand")
9453 (match_operand:V8DF 4 "register_operand")
9454 (match_operand:QI 5 "register_operand")]
9457 int mask = INTVAL (operands[3]);
9458 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
9460 GEN_INT (mask & 2 ? 9 : 8),
9461 GEN_INT (mask & 4 ? 3 : 2),
9462 GEN_INT (mask & 8 ? 11 : 10),
9463 GEN_INT (mask & 16 ? 5 : 4),
9464 GEN_INT (mask & 32 ? 13 : 12),
9465 GEN_INT (mask & 64 ? 7 : 6),
9466 GEN_INT (mask & 128 ? 15 : 14),
9467 operands[4], operands[5]));
9471 (define_insn "avx512f_shufpd512_1<mask_name>"
9472 [(set (match_operand:V8DF 0 "register_operand" "=v")
9475 (match_operand:V8DF 1 "register_operand" "v")
9476 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9477 (parallel [(match_operand 3 "const_0_to_1_operand")
9478 (match_operand 4 "const_8_to_9_operand")
9479 (match_operand 5 "const_2_to_3_operand")
9480 (match_operand 6 "const_10_to_11_operand")
9481 (match_operand 7 "const_4_to_5_operand")
9482 (match_operand 8 "const_12_to_13_operand")
9483 (match_operand 9 "const_6_to_7_operand")
9484 (match_operand 10 "const_14_to_15_operand")])))]
9488 mask = INTVAL (operands[3]);
9489 mask |= (INTVAL (operands[4]) - 8) << 1;
9490 mask |= (INTVAL (operands[5]) - 2) << 2;
9491 mask |= (INTVAL (operands[6]) - 10) << 3;
9492 mask |= (INTVAL (operands[7]) - 4) << 4;
9493 mask |= (INTVAL (operands[8]) - 12) << 5;
9494 mask |= (INTVAL (operands[9]) - 6) << 6;
9495 mask |= (INTVAL (operands[10]) - 14) << 7;
9496 operands[3] = GEN_INT (mask);
9498 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9500 [(set_attr "type" "sselog")
9501 (set_attr "length_immediate" "1")
9502 (set_attr "prefix" "evex")
9503 (set_attr "mode" "V8DF")])
9505 (define_expand "avx_shufpd256<mask_expand4_name>"
9506 [(match_operand:V4DF 0 "register_operand")
9507 (match_operand:V4DF 1 "register_operand")
9508 (match_operand:V4DF 2 "nonimmediate_operand")
9509 (match_operand:SI 3 "const_int_operand")]
9512 int mask = INTVAL (operands[3]);
9513 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
9517 GEN_INT (mask & 2 ? 5 : 4),
9518 GEN_INT (mask & 4 ? 3 : 2),
9519 GEN_INT (mask & 8 ? 7 : 6)
9520 <mask_expand4_args>));
9524 (define_insn "avx_shufpd256_1<mask_name>"
9525 [(set (match_operand:V4DF 0 "register_operand" "=v")
9528 (match_operand:V4DF 1 "register_operand" "v")
9529 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9530 (parallel [(match_operand 3 "const_0_to_1_operand")
9531 (match_operand 4 "const_4_to_5_operand")
9532 (match_operand 5 "const_2_to_3_operand")
9533 (match_operand 6 "const_6_to_7_operand")])))]
9534 "TARGET_AVX && <mask_avx512vl_condition>"
9537 mask = INTVAL (operands[3]);
9538 mask |= (INTVAL (operands[4]) - 4) << 1;
9539 mask |= (INTVAL (operands[5]) - 2) << 2;
9540 mask |= (INTVAL (operands[6]) - 6) << 3;
9541 operands[3] = GEN_INT (mask);
9543 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
9545 [(set_attr "type" "sseshuf")
9546 (set_attr "length_immediate" "1")
9547 (set_attr "prefix" "vex")
9548 (set_attr "mode" "V4DF")])
9550 (define_expand "sse2_shufpd<mask_expand4_name>"
9551 [(match_operand:V2DF 0 "register_operand")
9552 (match_operand:V2DF 1 "register_operand")
9553 (match_operand:V2DF 2 "vector_operand")
9554 (match_operand:SI 3 "const_int_operand")]
9557 int mask = INTVAL (operands[3]);
9558 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
9559 operands[2], GEN_INT (mask & 1),
9560 GEN_INT (mask & 2 ? 3 : 2)
9561 <mask_expand4_args>));
9565 (define_insn "sse2_shufpd_v2df_mask"
9566 [(set (match_operand:V2DF 0 "register_operand" "=v")
9570 (match_operand:V2DF 1 "register_operand" "v")
9571 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9572 (parallel [(match_operand 3 "const_0_to_1_operand")
9573 (match_operand 4 "const_2_to_3_operand")]))
9574 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
9575 (match_operand:QI 6 "register_operand" "Yk")))]
9579 mask = INTVAL (operands[3]);
9580 mask |= (INTVAL (operands[4]) - 2) << 1;
9581 operands[3] = GEN_INT (mask);
9583 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
9585 [(set_attr "type" "sseshuf")
9586 (set_attr "length_immediate" "1")
9587 (set_attr "prefix" "evex")
9588 (set_attr "mode" "V2DF")])
9590 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
9591 (define_insn "avx2_interleave_highv4di<mask_name>"
9592 [(set (match_operand:V4DI 0 "register_operand" "=v")
9595 (match_operand:V4DI 1 "register_operand" "v")
9596 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
9597 (parallel [(const_int 1)
9601 "TARGET_AVX2 && <mask_avx512vl_condition>"
9602 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9603 [(set_attr "type" "sselog")
9604 (set_attr "prefix" "vex")
9605 (set_attr "mode" "OI")])
9607 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
9608 [(set (match_operand:V8DI 0 "register_operand" "=v")
9611 (match_operand:V8DI 1 "register_operand" "v")
9612 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
9613 (parallel [(const_int 1) (const_int 9)
9614 (const_int 3) (const_int 11)
9615 (const_int 5) (const_int 13)
9616 (const_int 7) (const_int 15)])))]
9618 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9619 [(set_attr "type" "sselog")
9620 (set_attr "prefix" "evex")
9621 (set_attr "mode" "XI")])
9623 (define_insn "vec_interleave_highv2di<mask_name>"
9624 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9627 (match_operand:V2DI 1 "register_operand" "0,v")
9628 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
9629 (parallel [(const_int 1)
9631 "TARGET_SSE2 && <mask_avx512vl_condition>"
9633 punpckhqdq\t{%2, %0|%0, %2}
9634 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9635 [(set_attr "isa" "noavx,avx")
9636 (set_attr "type" "sselog")
9637 (set_attr "prefix_data16" "1,*")
9638 (set_attr "prefix" "orig,<mask_prefix>")
9639 (set_attr "mode" "TI")])
9641 (define_insn "avx2_interleave_lowv4di<mask_name>"
9642 [(set (match_operand:V4DI 0 "register_operand" "=v")
9645 (match_operand:V4DI 1 "register_operand" "v")
9646 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
9647 (parallel [(const_int 0)
9651 "TARGET_AVX2 && <mask_avx512vl_condition>"
9652 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9653 [(set_attr "type" "sselog")
9654 (set_attr "prefix" "vex")
9655 (set_attr "mode" "OI")])
9657 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
9658 [(set (match_operand:V8DI 0 "register_operand" "=v")
9661 (match_operand:V8DI 1 "register_operand" "v")
9662 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
9663 (parallel [(const_int 0) (const_int 8)
9664 (const_int 2) (const_int 10)
9665 (const_int 4) (const_int 12)
9666 (const_int 6) (const_int 14)])))]
9668 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9669 [(set_attr "type" "sselog")
9670 (set_attr "prefix" "evex")
9671 (set_attr "mode" "XI")])
9673 (define_insn "vec_interleave_lowv2di<mask_name>"
9674 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9677 (match_operand:V2DI 1 "register_operand" "0,v")
9678 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
9679 (parallel [(const_int 0)
9681 "TARGET_SSE2 && <mask_avx512vl_condition>"
9683 punpcklqdq\t{%2, %0|%0, %2}
9684 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9685 [(set_attr "isa" "noavx,avx")
9686 (set_attr "type" "sselog")
9687 (set_attr "prefix_data16" "1,*")
9688 (set_attr "prefix" "orig,vex")
9689 (set_attr "mode" "TI")])
9691 (define_insn "sse2_shufpd_<mode>"
9692 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
9693 (vec_select:VI8F_128
9694 (vec_concat:<ssedoublevecmode>
9695 (match_operand:VI8F_128 1 "register_operand" "0,v")
9696 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
9697 (parallel [(match_operand 3 "const_0_to_1_operand")
9698 (match_operand 4 "const_2_to_3_operand")])))]
9702 mask = INTVAL (operands[3]);
9703 mask |= (INTVAL (operands[4]) - 2) << 1;
9704 operands[3] = GEN_INT (mask);
9706 switch (which_alternative)
9709 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
9711 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9716 [(set_attr "isa" "noavx,avx")
9717 (set_attr "type" "sseshuf")
9718 (set_attr "length_immediate" "1")
9719 (set_attr "prefix" "orig,maybe_evex")
9720 (set_attr "mode" "V2DF")])
9722 ;; Avoid combining registers from different units in a single alternative,
9723 ;; see comment above inline_secondary_memory_needed function in i386.c
9724 (define_insn "sse2_storehpd"
9725 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
9727 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
9728 (parallel [(const_int 1)])))]
9729 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9731 %vmovhpd\t{%1, %0|%0, %1}
9733 vunpckhpd\t{%d1, %0|%0, %d1}
9737 [(set_attr "isa" "*,noavx,avx,*,*,*")
9738 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
9739 (set (attr "prefix_data16")
9741 (and (eq_attr "alternative" "0")
9742 (not (match_test "TARGET_AVX")))
9744 (const_string "*")))
9745 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
9746 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
9749 [(set (match_operand:DF 0 "register_operand")
9751 (match_operand:V2DF 1 "memory_operand")
9752 (parallel [(const_int 1)])))]
9753 "TARGET_SSE2 && reload_completed"
9754 [(set (match_dup 0) (match_dup 1))]
9755 "operands[1] = adjust_address (operands[1], DFmode, 8);")
9757 (define_insn "*vec_extractv2df_1_sse"
9758 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9760 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
9761 (parallel [(const_int 1)])))]
9762 "!TARGET_SSE2 && TARGET_SSE
9763 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9765 movhps\t{%1, %0|%q0, %1}
9766 movhlps\t{%1, %0|%0, %1}
9767 movlps\t{%H1, %0|%0, %H1}"
9768 [(set_attr "type" "ssemov")
9769 (set_attr "mode" "V2SF,V4SF,V2SF")])
9771 ;; Avoid combining registers from different units in a single alternative,
9772 ;; see comment above inline_secondary_memory_needed function in i386.c
9773 (define_insn "sse2_storelpd"
9774 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
9776 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
9777 (parallel [(const_int 0)])))]
9778 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9780 %vmovlpd\t{%1, %0|%0, %1}
9785 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
9786 (set (attr "prefix_data16")
9787 (if_then_else (eq_attr "alternative" "0")
9789 (const_string "*")))
9790 (set_attr "prefix" "maybe_vex")
9791 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
9794 [(set (match_operand:DF 0 "register_operand")
9796 (match_operand:V2DF 1 "nonimmediate_operand")
9797 (parallel [(const_int 0)])))]
9798 "TARGET_SSE2 && reload_completed"
9799 [(set (match_dup 0) (match_dup 1))]
9800 "operands[1] = gen_lowpart (DFmode, operands[1]);")
9802 (define_insn "*vec_extractv2df_0_sse"
9803 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9805 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
9806 (parallel [(const_int 0)])))]
9807 "!TARGET_SSE2 && TARGET_SSE
9808 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9810 movlps\t{%1, %0|%0, %1}
9811 movaps\t{%1, %0|%0, %1}
9812 movlps\t{%1, %0|%0, %q1}"
9813 [(set_attr "type" "ssemov")
9814 (set_attr "mode" "V2SF,V4SF,V2SF")])
9816 (define_expand "sse2_loadhpd_exp"
9817 [(set (match_operand:V2DF 0 "nonimmediate_operand")
9820 (match_operand:V2DF 1 "nonimmediate_operand")
9821 (parallel [(const_int 0)]))
9822 (match_operand:DF 2 "nonimmediate_operand")))]
9825 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9827 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
9829 /* Fix up the destination if needed. */
9830 if (dst != operands[0])
9831 emit_move_insn (operands[0], dst);
9836 ;; Avoid combining registers from different units in a single alternative,
9837 ;; see comment above inline_secondary_memory_needed function in i386.c
9838 (define_insn "sse2_loadhpd"
9839 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9843 (match_operand:V2DF 1 "nonimmediate_operand"
9845 (parallel [(const_int 0)]))
9846 (match_operand:DF 2 "nonimmediate_operand"
9847 " m,m,x,Yv,x,*f,r")))]
9848 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9850 movhpd\t{%2, %0|%0, %2}
9851 vmovhpd\t{%2, %1, %0|%0, %1, %2}
9852 unpcklpd\t{%2, %0|%0, %2}
9853 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9857 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
9858 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
9859 (set (attr "prefix_data16")
9860 (if_then_else (eq_attr "alternative" "0")
9862 (const_string "*")))
9863 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
9864 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
9867 [(set (match_operand:V2DF 0 "memory_operand")
9869 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
9870 (match_operand:DF 1 "register_operand")))]
9871 "TARGET_SSE2 && reload_completed"
9872 [(set (match_dup 0) (match_dup 1))]
9873 "operands[0] = adjust_address (operands[0], DFmode, 8);")
9875 (define_expand "sse2_loadlpd_exp"
9876 [(set (match_operand:V2DF 0 "nonimmediate_operand")
9878 (match_operand:DF 2 "nonimmediate_operand")
9880 (match_operand:V2DF 1 "nonimmediate_operand")
9881 (parallel [(const_int 1)]))))]
9884 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9886 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
9888 /* Fix up the destination if needed. */
9889 if (dst != operands[0])
9890 emit_move_insn (operands[0], dst);
9895 ;; Avoid combining registers from different units in a single alternative,
9896 ;; see comment above inline_secondary_memory_needed function in i386.c
9897 (define_insn "sse2_loadlpd"
9898 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9899 "=v,x,v,x,v,x,x,v,m,m ,m")
9901 (match_operand:DF 2 "nonimmediate_operand"
9902 "vm,m,m,x,v,0,0,v,x,*f,r")
9904 (match_operand:V2DF 1 "nonimm_or_0_operand"
9905 " C,0,v,0,v,x,o,o,0,0 ,0")
9906 (parallel [(const_int 1)]))))]
9907 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9909 %vmovq\t{%2, %0|%0, %2}
9910 movlpd\t{%2, %0|%0, %2}
9911 vmovlpd\t{%2, %1, %0|%0, %1, %2}
9912 movsd\t{%2, %0|%0, %2}
9913 vmovsd\t{%2, %1, %0|%0, %1, %2}
9914 shufpd\t{$2, %1, %0|%0, %1, 2}
9915 movhpd\t{%H1, %0|%0, %H1}
9916 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
9920 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
9922 (cond [(eq_attr "alternative" "5")
9923 (const_string "sselog")
9924 (eq_attr "alternative" "9")
9925 (const_string "fmov")
9926 (eq_attr "alternative" "10")
9927 (const_string "imov")
9929 (const_string "ssemov")))
9930 (set (attr "prefix_data16")
9931 (if_then_else (eq_attr "alternative" "1,6")
9933 (const_string "*")))
9934 (set (attr "length_immediate")
9935 (if_then_else (eq_attr "alternative" "5")
9937 (const_string "*")))
9938 (set (attr "prefix")
9939 (cond [(eq_attr "alternative" "0")
9940 (const_string "maybe_vex")
9941 (eq_attr "alternative" "1,3,5,6")
9942 (const_string "orig")
9943 (eq_attr "alternative" "2,4,7")
9944 (const_string "maybe_evex")
9946 (const_string "*")))
9947 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
9950 [(set (match_operand:V2DF 0 "memory_operand")
9952 (match_operand:DF 1 "register_operand")
9953 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
9954 "TARGET_SSE2 && reload_completed"
9955 [(set (match_dup 0) (match_dup 1))]
9956 "operands[0] = adjust_address (operands[0], DFmode, 0);")
9958 (define_insn "sse2_movsd"
9959 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
9961 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
9962 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
9966 movsd\t{%2, %0|%0, %2}
9967 vmovsd\t{%2, %1, %0|%0, %1, %2}
9968 movlpd\t{%2, %0|%0, %q2}
9969 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
9970 %vmovlpd\t{%2, %0|%q0, %2}
9971 shufpd\t{$2, %1, %0|%0, %1, 2}
9972 movhps\t{%H1, %0|%0, %H1}
9973 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
9974 %vmovhps\t{%1, %H0|%H0, %1}"
9975 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
9978 (eq_attr "alternative" "5")
9979 (const_string "sselog")
9980 (const_string "ssemov")))
9981 (set (attr "prefix_data16")
9983 (and (eq_attr "alternative" "2,4")
9984 (not (match_test "TARGET_AVX")))
9986 (const_string "*")))
9987 (set (attr "length_immediate")
9988 (if_then_else (eq_attr "alternative" "5")
9990 (const_string "*")))
9991 (set (attr "prefix")
9992 (cond [(eq_attr "alternative" "1,3,7")
9993 (const_string "maybe_evex")
9994 (eq_attr "alternative" "4,8")
9995 (const_string "maybe_vex")
9997 (const_string "orig")))
9998 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
10000 (define_insn "vec_dupv2df<mask_name>"
10001 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
10002 (vec_duplicate:V2DF
10003 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
10004 "TARGET_SSE2 && <mask_avx512vl_condition>"
10007 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
10008 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10009 [(set_attr "isa" "noavx,sse3,avx512vl")
10010 (set_attr "type" "sselog1")
10011 (set_attr "prefix" "orig,maybe_vex,evex")
10012 (set_attr "mode" "V2DF,DF,DF")])
10014 (define_insn "vec_concatv2df"
10015 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
10017 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
10018 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,1,1,m,m, C,x,m")))]
10020 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
10021 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
10023 unpcklpd\t{%2, %0|%0, %2}
10024 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10025 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10026 %vmovddup\t{%1, %0|%0, %1}
10027 vmovddup\t{%1, %0|%0, %1}
10028 movhpd\t{%2, %0|%0, %2}
10029 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10030 %vmovq\t{%1, %0|%0, %1}
10031 movlhps\t{%2, %0|%0, %2}
10032 movhps\t{%2, %0|%0, %2}"
10034 (cond [(eq_attr "alternative" "0,5")
10035 (const_string "sse2_noavx")
10036 (eq_attr "alternative" "1,6")
10037 (const_string "avx")
10038 (eq_attr "alternative" "2,4")
10039 (const_string "avx512vl")
10040 (eq_attr "alternative" "3")
10041 (const_string "sse3")
10042 (eq_attr "alternative" "7")
10043 (const_string "sse2")
10045 (const_string "noavx")))
10048 (eq_attr "alternative" "0,1,2,3,4")
10049 (const_string "sselog")
10050 (const_string "ssemov")))
10051 (set (attr "prefix_data16")
10052 (if_then_else (eq_attr "alternative" "5")
10054 (const_string "*")))
10055 (set (attr "prefix")
10056 (cond [(eq_attr "alternative" "1,6")
10057 (const_string "vex")
10058 (eq_attr "alternative" "2,4")
10059 (const_string "evex")
10060 (eq_attr "alternative" "3,7")
10061 (const_string "maybe_vex")
10063 (const_string "orig")))
10064 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
10066 ;; vmovq clears also the higher bits.
10067 (define_insn "vec_set<mode>_0"
10068 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
10069 (vec_merge:VF2_512_256
10070 (vec_duplicate:VF2_512_256
10071 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm"))
10072 (match_operand:VF2_512_256 1 "const0_operand" "C")
10075 "vmovq\t{%2, %x0|%x0, %2}"
10076 [(set_attr "type" "ssemov")
10077 (set_attr "prefix" "maybe_evex")
10078 (set_attr "mode" "DF")])
10080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10082 ;; Parallel integer down-conversion operations
10084 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10086 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
10087 (define_mode_attr pmov_src_mode
10088 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
10089 (define_mode_attr pmov_src_lower
10090 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
10091 (define_mode_attr pmov_suff_1
10092 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
10094 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
10095 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10096 (any_truncate:PMOV_DST_MODE_1
10097 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
10099 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
10100 [(set_attr "type" "ssemov")
10101 (set_attr "memory" "none,store")
10102 (set_attr "prefix" "evex")
10103 (set_attr "mode" "<sseinsnmode>")])
10105 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
10106 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10107 (vec_merge:PMOV_DST_MODE_1
10108 (any_truncate:PMOV_DST_MODE_1
10109 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
10110 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
10111 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10113 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10114 [(set_attr "type" "ssemov")
10115 (set_attr "memory" "none,store")
10116 (set_attr "prefix" "evex")
10117 (set_attr "mode" "<sseinsnmode>")])
10119 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
10120 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
10121 (vec_merge:PMOV_DST_MODE_1
10122 (any_truncate:PMOV_DST_MODE_1
10123 (match_operand:<pmov_src_mode> 1 "register_operand"))
10125 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10128 (define_insn "avx512bw_<code>v32hiv32qi2"
10129 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10130 (any_truncate:V32QI
10131 (match_operand:V32HI 1 "register_operand" "v,v")))]
10133 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
10134 [(set_attr "type" "ssemov")
10135 (set_attr "memory" "none,store")
10136 (set_attr "prefix" "evex")
10137 (set_attr "mode" "XI")])
10139 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
10140 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10142 (any_truncate:V32QI
10143 (match_operand:V32HI 1 "register_operand" "v,v"))
10144 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
10145 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
10147 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10148 [(set_attr "type" "ssemov")
10149 (set_attr "memory" "none,store")
10150 (set_attr "prefix" "evex")
10151 (set_attr "mode" "XI")])
10153 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
10154 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10156 (any_truncate:V32QI
10157 (match_operand:V32HI 1 "register_operand"))
10159 (match_operand:SI 2 "register_operand")))]
10162 (define_mode_iterator PMOV_DST_MODE_2
10163 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
10164 (define_mode_attr pmov_suff_2
10165 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
10167 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
10168 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10169 (any_truncate:PMOV_DST_MODE_2
10170 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
10172 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
10173 [(set_attr "type" "ssemov")
10174 (set_attr "memory" "none,store")
10175 (set_attr "prefix" "evex")
10176 (set_attr "mode" "<sseinsnmode>")])
10178 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
10179 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10180 (vec_merge:PMOV_DST_MODE_2
10181 (any_truncate:PMOV_DST_MODE_2
10182 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
10183 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
10184 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10186 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10187 [(set_attr "type" "ssemov")
10188 (set_attr "memory" "none,store")
10189 (set_attr "prefix" "evex")
10190 (set_attr "mode" "<sseinsnmode>")])
10192 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
10193 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10194 (vec_merge:PMOV_DST_MODE_2
10195 (any_truncate:PMOV_DST_MODE_2
10196 (match_operand:<ssedoublemode> 1 "register_operand"))
10198 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10201 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
10202 (define_mode_attr pmov_dst_3
10203 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
10204 (define_mode_attr pmov_dst_zeroed_3
10205 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
10206 (define_mode_attr pmov_suff_3
10207 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
10209 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
10210 [(set (match_operand:V16QI 0 "register_operand" "=v")
10212 (any_truncate:<pmov_dst_3>
10213 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
10214 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
10216 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10217 [(set_attr "type" "ssemov")
10218 (set_attr "prefix" "evex")
10219 (set_attr "mode" "TI")])
10221 (define_insn "*avx512vl_<code>v2div2qi2_store"
10222 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10225 (match_operand:V2DI 1 "register_operand" "v"))
10228 (parallel [(const_int 2) (const_int 3)
10229 (const_int 4) (const_int 5)
10230 (const_int 6) (const_int 7)
10231 (const_int 8) (const_int 9)
10232 (const_int 10) (const_int 11)
10233 (const_int 12) (const_int 13)
10234 (const_int 14) (const_int 15)]))))]
10236 "vpmov<trunsuffix>qb\t{%1, %0|%w0, %1}"
10237 [(set_attr "type" "ssemov")
10238 (set_attr "memory" "store")
10239 (set_attr "prefix" "evex")
10240 (set_attr "mode" "TI")])
10242 (define_insn "avx512vl_<code>v2div2qi2_mask"
10243 [(set (match_operand:V16QI 0 "register_operand" "=v")
10247 (match_operand:V2DI 1 "register_operand" "v"))
10249 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10250 (parallel [(const_int 0) (const_int 1)]))
10251 (match_operand:QI 3 "register_operand" "Yk"))
10252 (const_vector:V14QI [(const_int 0) (const_int 0)
10253 (const_int 0) (const_int 0)
10254 (const_int 0) (const_int 0)
10255 (const_int 0) (const_int 0)
10256 (const_int 0) (const_int 0)
10257 (const_int 0) (const_int 0)
10258 (const_int 0) (const_int 0)])))]
10260 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10261 [(set_attr "type" "ssemov")
10262 (set_attr "prefix" "evex")
10263 (set_attr "mode" "TI")])
10265 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
10266 [(set (match_operand:V16QI 0 "register_operand" "=v")
10270 (match_operand:V2DI 1 "register_operand" "v"))
10271 (const_vector:V2QI [(const_int 0) (const_int 0)])
10272 (match_operand:QI 2 "register_operand" "Yk"))
10273 (const_vector:V14QI [(const_int 0) (const_int 0)
10274 (const_int 0) (const_int 0)
10275 (const_int 0) (const_int 0)
10276 (const_int 0) (const_int 0)
10277 (const_int 0) (const_int 0)
10278 (const_int 0) (const_int 0)
10279 (const_int 0) (const_int 0)])))]
10281 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10282 [(set_attr "type" "ssemov")
10283 (set_attr "prefix" "evex")
10284 (set_attr "mode" "TI")])
10286 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
10287 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10291 (match_operand:V2DI 1 "register_operand" "v"))
10294 (parallel [(const_int 0) (const_int 1)]))
10295 (match_operand:QI 2 "register_operand" "Yk"))
10298 (parallel [(const_int 2) (const_int 3)
10299 (const_int 4) (const_int 5)
10300 (const_int 6) (const_int 7)
10301 (const_int 8) (const_int 9)
10302 (const_int 10) (const_int 11)
10303 (const_int 12) (const_int 13)
10304 (const_int 14) (const_int 15)]))))]
10306 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
10307 [(set_attr "type" "ssemov")
10308 (set_attr "memory" "store")
10309 (set_attr "prefix" "evex")
10310 (set_attr "mode" "TI")])
10312 (define_insn "*avx512vl_<code><mode>v4qi2_store"
10313 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10316 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10319 (parallel [(const_int 4) (const_int 5)
10320 (const_int 6) (const_int 7)
10321 (const_int 8) (const_int 9)
10322 (const_int 10) (const_int 11)
10323 (const_int 12) (const_int 13)
10324 (const_int 14) (const_int 15)]))))]
10326 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%k0, %1}"
10327 [(set_attr "type" "ssemov")
10328 (set_attr "memory" "store")
10329 (set_attr "prefix" "evex")
10330 (set_attr "mode" "TI")])
10332 (define_insn "avx512vl_<code><mode>v4qi2_mask"
10333 [(set (match_operand:V16QI 0 "register_operand" "=v")
10337 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10339 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10340 (parallel [(const_int 0) (const_int 1)
10341 (const_int 2) (const_int 3)]))
10342 (match_operand:QI 3 "register_operand" "Yk"))
10343 (const_vector:V12QI [(const_int 0) (const_int 0)
10344 (const_int 0) (const_int 0)
10345 (const_int 0) (const_int 0)
10346 (const_int 0) (const_int 0)
10347 (const_int 0) (const_int 0)
10348 (const_int 0) (const_int 0)])))]
10350 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10351 [(set_attr "type" "ssemov")
10352 (set_attr "prefix" "evex")
10353 (set_attr "mode" "TI")])
10355 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
10356 [(set (match_operand:V16QI 0 "register_operand" "=v")
10360 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10361 (const_vector:V4QI [(const_int 0) (const_int 0)
10362 (const_int 0) (const_int 0)])
10363 (match_operand:QI 2 "register_operand" "Yk"))
10364 (const_vector:V12QI [(const_int 0) (const_int 0)
10365 (const_int 0) (const_int 0)
10366 (const_int 0) (const_int 0)
10367 (const_int 0) (const_int 0)
10368 (const_int 0) (const_int 0)
10369 (const_int 0) (const_int 0)])))]
10371 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10372 [(set_attr "type" "ssemov")
10373 (set_attr "prefix" "evex")
10374 (set_attr "mode" "TI")])
10376 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
10377 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10381 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10384 (parallel [(const_int 0) (const_int 1)
10385 (const_int 2) (const_int 3)]))
10386 (match_operand:QI 2 "register_operand" "Yk"))
10389 (parallel [(const_int 4) (const_int 5)
10390 (const_int 6) (const_int 7)
10391 (const_int 8) (const_int 9)
10392 (const_int 10) (const_int 11)
10393 (const_int 12) (const_int 13)
10394 (const_int 14) (const_int 15)]))))]
10396 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}"
10397 [(set_attr "type" "ssemov")
10398 (set_attr "memory" "store")
10399 (set_attr "prefix" "evex")
10400 (set_attr "mode" "TI")])
10402 (define_mode_iterator VI2_128_BW_4_256
10403 [(V8HI "TARGET_AVX512BW") V8SI])
10405 (define_insn "*avx512vl_<code><mode>v8qi2_store"
10406 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10409 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10412 (parallel [(const_int 8) (const_int 9)
10413 (const_int 10) (const_int 11)
10414 (const_int 12) (const_int 13)
10415 (const_int 14) (const_int 15)]))))]
10417 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%q0, %1}"
10418 [(set_attr "type" "ssemov")
10419 (set_attr "memory" "store")
10420 (set_attr "prefix" "evex")
10421 (set_attr "mode" "TI")])
10423 (define_insn "avx512vl_<code><mode>v8qi2_mask"
10424 [(set (match_operand:V16QI 0 "register_operand" "=v")
10428 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10430 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10431 (parallel [(const_int 0) (const_int 1)
10432 (const_int 2) (const_int 3)
10433 (const_int 4) (const_int 5)
10434 (const_int 6) (const_int 7)]))
10435 (match_operand:QI 3 "register_operand" "Yk"))
10436 (const_vector:V8QI [(const_int 0) (const_int 0)
10437 (const_int 0) (const_int 0)
10438 (const_int 0) (const_int 0)
10439 (const_int 0) (const_int 0)])))]
10441 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10442 [(set_attr "type" "ssemov")
10443 (set_attr "prefix" "evex")
10444 (set_attr "mode" "TI")])
10446 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
10447 [(set (match_operand:V16QI 0 "register_operand" "=v")
10451 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10452 (const_vector:V8QI [(const_int 0) (const_int 0)
10453 (const_int 0) (const_int 0)
10454 (const_int 0) (const_int 0)
10455 (const_int 0) (const_int 0)])
10456 (match_operand:QI 2 "register_operand" "Yk"))
10457 (const_vector:V8QI [(const_int 0) (const_int 0)
10458 (const_int 0) (const_int 0)
10459 (const_int 0) (const_int 0)
10460 (const_int 0) (const_int 0)])))]
10462 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10463 [(set_attr "type" "ssemov")
10464 (set_attr "prefix" "evex")
10465 (set_attr "mode" "TI")])
10467 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
10468 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10472 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10475 (parallel [(const_int 0) (const_int 1)
10476 (const_int 2) (const_int 3)
10477 (const_int 4) (const_int 5)
10478 (const_int 6) (const_int 7)]))
10479 (match_operand:QI 2 "register_operand" "Yk"))
10482 (parallel [(const_int 8) (const_int 9)
10483 (const_int 10) (const_int 11)
10484 (const_int 12) (const_int 13)
10485 (const_int 14) (const_int 15)]))))]
10487 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10488 [(set_attr "type" "ssemov")
10489 (set_attr "memory" "store")
10490 (set_attr "prefix" "evex")
10491 (set_attr "mode" "TI")])
10493 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
10494 (define_mode_attr pmov_dst_4
10495 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
10496 (define_mode_attr pmov_dst_zeroed_4
10497 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
10498 (define_mode_attr pmov_suff_4
10499 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
10501 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
10502 [(set (match_operand:V8HI 0 "register_operand" "=v")
10504 (any_truncate:<pmov_dst_4>
10505 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
10506 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
10508 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10509 [(set_attr "type" "ssemov")
10510 (set_attr "prefix" "evex")
10511 (set_attr "mode" "TI")])
10513 (define_insn "*avx512vl_<code><mode>v4hi2_store"
10514 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10517 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10520 (parallel [(const_int 4) (const_int 5)
10521 (const_int 6) (const_int 7)]))))]
10523 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10524 [(set_attr "type" "ssemov")
10525 (set_attr "memory" "store")
10526 (set_attr "prefix" "evex")
10527 (set_attr "mode" "TI")])
10529 (define_insn "avx512vl_<code><mode>v4hi2_mask"
10530 [(set (match_operand:V8HI 0 "register_operand" "=v")
10534 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10536 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
10537 (parallel [(const_int 0) (const_int 1)
10538 (const_int 2) (const_int 3)]))
10539 (match_operand:QI 3 "register_operand" "Yk"))
10540 (const_vector:V4HI [(const_int 0) (const_int 0)
10541 (const_int 0) (const_int 0)])))]
10543 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10544 [(set_attr "type" "ssemov")
10545 (set_attr "prefix" "evex")
10546 (set_attr "mode" "TI")])
10548 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
10549 [(set (match_operand:V8HI 0 "register_operand" "=v")
10553 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10554 (const_vector:V4HI [(const_int 0) (const_int 0)
10555 (const_int 0) (const_int 0)])
10556 (match_operand:QI 2 "register_operand" "Yk"))
10557 (const_vector:V4HI [(const_int 0) (const_int 0)
10558 (const_int 0) (const_int 0)])))]
10560 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10561 [(set_attr "type" "ssemov")
10562 (set_attr "prefix" "evex")
10563 (set_attr "mode" "TI")])
10565 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
10566 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10570 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10573 (parallel [(const_int 0) (const_int 1)
10574 (const_int 2) (const_int 3)]))
10575 (match_operand:QI 2 "register_operand" "Yk"))
10578 (parallel [(const_int 4) (const_int 5)
10579 (const_int 6) (const_int 7)]))))]
10582 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
10583 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
10584 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
10586 [(set_attr "type" "ssemov")
10587 (set_attr "memory" "store")
10588 (set_attr "prefix" "evex")
10589 (set_attr "mode" "TI")])
10591 (define_insn "*avx512vl_<code>v2div2hi2_store"
10592 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10595 (match_operand:V2DI 1 "register_operand" "v"))
10598 (parallel [(const_int 2) (const_int 3)
10599 (const_int 4) (const_int 5)
10600 (const_int 6) (const_int 7)]))))]
10602 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
10603 [(set_attr "type" "ssemov")
10604 (set_attr "memory" "store")
10605 (set_attr "prefix" "evex")
10606 (set_attr "mode" "TI")])
10608 (define_insn "avx512vl_<code>v2div2hi2_mask"
10609 [(set (match_operand:V8HI 0 "register_operand" "=v")
10613 (match_operand:V2DI 1 "register_operand" "v"))
10615 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
10616 (parallel [(const_int 0) (const_int 1)]))
10617 (match_operand:QI 3 "register_operand" "Yk"))
10618 (const_vector:V6HI [(const_int 0) (const_int 0)
10619 (const_int 0) (const_int 0)
10620 (const_int 0) (const_int 0)])))]
10622 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10623 [(set_attr "type" "ssemov")
10624 (set_attr "prefix" "evex")
10625 (set_attr "mode" "TI")])
10627 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
10628 [(set (match_operand:V8HI 0 "register_operand" "=v")
10632 (match_operand:V2DI 1 "register_operand" "v"))
10633 (const_vector:V2HI [(const_int 0) (const_int 0)])
10634 (match_operand:QI 2 "register_operand" "Yk"))
10635 (const_vector:V6HI [(const_int 0) (const_int 0)
10636 (const_int 0) (const_int 0)
10637 (const_int 0) (const_int 0)])))]
10639 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10640 [(set_attr "type" "ssemov")
10641 (set_attr "prefix" "evex")
10642 (set_attr "mode" "TI")])
10644 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
10645 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10649 (match_operand:V2DI 1 "register_operand" "v"))
10652 (parallel [(const_int 0) (const_int 1)]))
10653 (match_operand:QI 2 "register_operand" "Yk"))
10656 (parallel [(const_int 2) (const_int 3)
10657 (const_int 4) (const_int 5)
10658 (const_int 6) (const_int 7)]))))]
10660 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
10661 [(set_attr "type" "ssemov")
10662 (set_attr "memory" "store")
10663 (set_attr "prefix" "evex")
10664 (set_attr "mode" "TI")])
10666 (define_insn "*avx512vl_<code>v2div2si2"
10667 [(set (match_operand:V4SI 0 "register_operand" "=v")
10670 (match_operand:V2DI 1 "register_operand" "v"))
10671 (match_operand:V2SI 2 "const0_operand")))]
10673 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
10674 [(set_attr "type" "ssemov")
10675 (set_attr "prefix" "evex")
10676 (set_attr "mode" "TI")])
10678 (define_insn "*avx512vl_<code>v2div2si2_store"
10679 [(set (match_operand:V4SI 0 "memory_operand" "=m")
10682 (match_operand:V2DI 1 "register_operand" "v"))
10685 (parallel [(const_int 2) (const_int 3)]))))]
10687 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
10688 [(set_attr "type" "ssemov")
10689 (set_attr "memory" "store")
10690 (set_attr "prefix" "evex")
10691 (set_attr "mode" "TI")])
10693 (define_insn "avx512vl_<code>v2div2si2_mask"
10694 [(set (match_operand:V4SI 0 "register_operand" "=v")
10698 (match_operand:V2DI 1 "register_operand" "v"))
10700 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
10701 (parallel [(const_int 0) (const_int 1)]))
10702 (match_operand:QI 3 "register_operand" "Yk"))
10703 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
10705 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10706 [(set_attr "type" "ssemov")
10707 (set_attr "prefix" "evex")
10708 (set_attr "mode" "TI")])
10710 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
10711 [(set (match_operand:V4SI 0 "register_operand" "=v")
10715 (match_operand:V2DI 1 "register_operand" "v"))
10716 (const_vector:V2SI [(const_int 0) (const_int 0)])
10717 (match_operand:QI 2 "register_operand" "Yk"))
10718 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
10720 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10721 [(set_attr "type" "ssemov")
10722 (set_attr "prefix" "evex")
10723 (set_attr "mode" "TI")])
10725 (define_insn "avx512vl_<code>v2div2si2_mask_store"
10726 [(set (match_operand:V4SI 0 "memory_operand" "=m")
10730 (match_operand:V2DI 1 "register_operand" "v"))
10733 (parallel [(const_int 0) (const_int 1)]))
10734 (match_operand:QI 2 "register_operand" "Yk"))
10737 (parallel [(const_int 2) (const_int 3)]))))]
10739 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
10740 [(set_attr "type" "ssemov")
10741 (set_attr "memory" "store")
10742 (set_attr "prefix" "evex")
10743 (set_attr "mode" "TI")])
10745 (define_insn "*avx512f_<code>v8div16qi2"
10746 [(set (match_operand:V16QI 0 "register_operand" "=v")
10749 (match_operand:V8DI 1 "register_operand" "v"))
10750 (const_vector:V8QI [(const_int 0) (const_int 0)
10751 (const_int 0) (const_int 0)
10752 (const_int 0) (const_int 0)
10753 (const_int 0) (const_int 0)])))]
10755 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10756 [(set_attr "type" "ssemov")
10757 (set_attr "prefix" "evex")
10758 (set_attr "mode" "TI")])
10760 (define_insn "*avx512f_<code>v8div16qi2_store"
10761 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10764 (match_operand:V8DI 1 "register_operand" "v"))
10767 (parallel [(const_int 8) (const_int 9)
10768 (const_int 10) (const_int 11)
10769 (const_int 12) (const_int 13)
10770 (const_int 14) (const_int 15)]))))]
10772 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10773 [(set_attr "type" "ssemov")
10774 (set_attr "memory" "store")
10775 (set_attr "prefix" "evex")
10776 (set_attr "mode" "TI")])
10778 (define_insn "avx512f_<code>v8div16qi2_mask"
10779 [(set (match_operand:V16QI 0 "register_operand" "=v")
10783 (match_operand:V8DI 1 "register_operand" "v"))
10785 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10786 (parallel [(const_int 0) (const_int 1)
10787 (const_int 2) (const_int 3)
10788 (const_int 4) (const_int 5)
10789 (const_int 6) (const_int 7)]))
10790 (match_operand:QI 3 "register_operand" "Yk"))
10791 (const_vector:V8QI [(const_int 0) (const_int 0)
10792 (const_int 0) (const_int 0)
10793 (const_int 0) (const_int 0)
10794 (const_int 0) (const_int 0)])))]
10796 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10797 [(set_attr "type" "ssemov")
10798 (set_attr "prefix" "evex")
10799 (set_attr "mode" "TI")])
10801 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
10802 [(set (match_operand:V16QI 0 "register_operand" "=v")
10806 (match_operand:V8DI 1 "register_operand" "v"))
10807 (const_vector:V8QI [(const_int 0) (const_int 0)
10808 (const_int 0) (const_int 0)
10809 (const_int 0) (const_int 0)
10810 (const_int 0) (const_int 0)])
10811 (match_operand:QI 2 "register_operand" "Yk"))
10812 (const_vector:V8QI [(const_int 0) (const_int 0)
10813 (const_int 0) (const_int 0)
10814 (const_int 0) (const_int 0)
10815 (const_int 0) (const_int 0)])))]
10817 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10818 [(set_attr "type" "ssemov")
10819 (set_attr "prefix" "evex")
10820 (set_attr "mode" "TI")])
10822 (define_insn "avx512f_<code>v8div16qi2_mask_store"
10823 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10827 (match_operand:V8DI 1 "register_operand" "v"))
10830 (parallel [(const_int 0) (const_int 1)
10831 (const_int 2) (const_int 3)
10832 (const_int 4) (const_int 5)
10833 (const_int 6) (const_int 7)]))
10834 (match_operand:QI 2 "register_operand" "Yk"))
10837 (parallel [(const_int 8) (const_int 9)
10838 (const_int 10) (const_int 11)
10839 (const_int 12) (const_int 13)
10840 (const_int 14) (const_int 15)]))))]
10842 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10843 [(set_attr "type" "ssemov")
10844 (set_attr "memory" "store")
10845 (set_attr "prefix" "evex")
10846 (set_attr "mode" "TI")])
10848 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10850 ;; Parallel integral arithmetic
10852 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10854 (define_expand "neg<mode>2"
10855 [(set (match_operand:VI_AVX2 0 "register_operand")
10858 (match_operand:VI_AVX2 1 "vector_operand")))]
10860 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
10862 (define_expand "<plusminus_insn><mode>3"
10863 [(set (match_operand:VI_AVX2 0 "register_operand")
10865 (match_operand:VI_AVX2 1 "vector_operand")
10866 (match_operand:VI_AVX2 2 "vector_operand")))]
10868 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10870 (define_expand "<plusminus_insn><mode>3_mask"
10871 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10872 (vec_merge:VI48_AVX512VL
10873 (plusminus:VI48_AVX512VL
10874 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10875 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10876 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
10877 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10879 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10881 (define_expand "<plusminus_insn><mode>3_mask"
10882 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10883 (vec_merge:VI12_AVX512VL
10884 (plusminus:VI12_AVX512VL
10885 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
10886 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10887 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
10888 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10890 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10892 (define_insn "*<plusminus_insn><mode>3"
10893 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
10895 (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
10896 (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
10897 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10899 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10900 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10901 [(set_attr "isa" "noavx,avx")
10902 (set_attr "type" "sseiadd")
10903 (set_attr "prefix_data16" "1,*")
10904 (set_attr "prefix" "orig,vex")
10905 (set_attr "mode" "<sseinsnmode>")])
10907 (define_insn "*sub<mode>3_bcst"
10908 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10909 (minus:VI48_AVX512VL
10910 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10911 (vec_duplicate:VI48_AVX512VL
10912 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
10913 "TARGET_AVX512F && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
10914 "vpsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
10915 [(set_attr "type" "sseiadd")
10916 (set_attr "prefix" "evex")
10917 (set_attr "mode" "<sseinsnmode>")])
10919 (define_insn "*add<mode>3_bcst"
10920 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10921 (plus:VI48_AVX512VL
10922 (vec_duplicate:VI48_AVX512VL
10923 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
10924 (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
10925 "TARGET_AVX512F && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
10926 "vpadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0|%0, %2, %1<avx512bcst>}"
10927 [(set_attr "type" "sseiadd")
10928 (set_attr "prefix" "evex")
10929 (set_attr "mode" "<sseinsnmode>")])
10931 (define_insn "*<plusminus_insn><mode>3_mask"
10932 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10933 (vec_merge:VI48_AVX512VL
10934 (plusminus:VI48_AVX512VL
10935 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10936 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10937 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
10938 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10939 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10940 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10941 [(set_attr "type" "sseiadd")
10942 (set_attr "prefix" "evex")
10943 (set_attr "mode" "<sseinsnmode>")])
10945 (define_insn "*<plusminus_insn><mode>3_mask"
10946 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10947 (vec_merge:VI12_AVX512VL
10948 (plusminus:VI12_AVX512VL
10949 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10950 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10951 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
10952 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10953 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10954 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10955 [(set_attr "type" "sseiadd")
10956 (set_attr "prefix" "evex")
10957 (set_attr "mode" "<sseinsnmode>")])
10959 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10960 [(set (match_operand:VI12_AVX2 0 "register_operand")
10961 (sat_plusminus:VI12_AVX2
10962 (match_operand:VI12_AVX2 1 "vector_operand")
10963 (match_operand:VI12_AVX2 2 "vector_operand")))]
10964 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10965 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10967 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10968 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
10969 (sat_plusminus:VI12_AVX2
10970 (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
10971 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
10972 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
10973 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10975 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10976 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10977 [(set_attr "isa" "noavx,avx")
10978 (set_attr "type" "sseiadd")
10979 (set_attr "prefix_data16" "1,*")
10980 (set_attr "prefix" "orig,maybe_evex")
10981 (set_attr "mode" "TI")])
10983 (define_expand "mul<mode>3<mask_name>"
10984 [(set (match_operand:VI1_AVX512 0 "register_operand")
10985 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
10986 (match_operand:VI1_AVX512 2 "register_operand")))]
10987 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10989 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
10993 (define_expand "mul<mode>3<mask_name>"
10994 [(set (match_operand:VI2_AVX2 0 "register_operand")
10995 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
10996 (match_operand:VI2_AVX2 2 "vector_operand")))]
10997 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10998 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11000 (define_insn "*mul<mode>3<mask_name>"
11001 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11002 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
11003 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
11004 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11005 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11007 pmullw\t{%2, %0|%0, %2}
11008 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11009 [(set_attr "isa" "noavx,avx")
11010 (set_attr "type" "sseimul")
11011 (set_attr "prefix_data16" "1,*")
11012 (set_attr "prefix" "orig,vex")
11013 (set_attr "mode" "<sseinsnmode>")])
11015 (define_expand "<s>mul<mode>3_highpart<mask_name>"
11016 [(set (match_operand:VI2_AVX2 0 "register_operand")
11018 (lshiftrt:<ssedoublemode>
11019 (mult:<ssedoublemode>
11020 (any_extend:<ssedoublemode>
11021 (match_operand:VI2_AVX2 1 "vector_operand"))
11022 (any_extend:<ssedoublemode>
11023 (match_operand:VI2_AVX2 2 "vector_operand")))
11026 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11027 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11029 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
11030 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11032 (lshiftrt:<ssedoublemode>
11033 (mult:<ssedoublemode>
11034 (any_extend:<ssedoublemode>
11035 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
11036 (any_extend:<ssedoublemode>
11037 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
11039 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11040 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11042 pmulh<u>w\t{%2, %0|%0, %2}
11043 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11044 [(set_attr "isa" "noavx,avx")
11045 (set_attr "type" "sseimul")
11046 (set_attr "prefix_data16" "1,*")
11047 (set_attr "prefix" "orig,vex")
11048 (set_attr "mode" "<sseinsnmode>")])
11050 (define_expand "vec_widen_umult_even_v16si<mask_name>"
11051 [(set (match_operand:V8DI 0 "register_operand")
11055 (match_operand:V16SI 1 "nonimmediate_operand")
11056 (parallel [(const_int 0) (const_int 2)
11057 (const_int 4) (const_int 6)
11058 (const_int 8) (const_int 10)
11059 (const_int 12) (const_int 14)])))
11062 (match_operand:V16SI 2 "nonimmediate_operand")
11063 (parallel [(const_int 0) (const_int 2)
11064 (const_int 4) (const_int 6)
11065 (const_int 8) (const_int 10)
11066 (const_int 12) (const_int 14)])))))]
11068 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11070 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
11071 [(set (match_operand:V8DI 0 "register_operand" "=v")
11075 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11076 (parallel [(const_int 0) (const_int 2)
11077 (const_int 4) (const_int 6)
11078 (const_int 8) (const_int 10)
11079 (const_int 12) (const_int 14)])))
11082 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11083 (parallel [(const_int 0) (const_int 2)
11084 (const_int 4) (const_int 6)
11085 (const_int 8) (const_int 10)
11086 (const_int 12) (const_int 14)])))))]
11087 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11088 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11089 [(set_attr "type" "sseimul")
11090 (set_attr "prefix_extra" "1")
11091 (set_attr "prefix" "evex")
11092 (set_attr "mode" "XI")])
11094 (define_expand "vec_widen_umult_even_v8si<mask_name>"
11095 [(set (match_operand:V4DI 0 "register_operand")
11099 (match_operand:V8SI 1 "nonimmediate_operand")
11100 (parallel [(const_int 0) (const_int 2)
11101 (const_int 4) (const_int 6)])))
11104 (match_operand:V8SI 2 "nonimmediate_operand")
11105 (parallel [(const_int 0) (const_int 2)
11106 (const_int 4) (const_int 6)])))))]
11107 "TARGET_AVX2 && <mask_avx512vl_condition>"
11108 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11110 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
11111 [(set (match_operand:V4DI 0 "register_operand" "=v")
11115 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11116 (parallel [(const_int 0) (const_int 2)
11117 (const_int 4) (const_int 6)])))
11120 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11121 (parallel [(const_int 0) (const_int 2)
11122 (const_int 4) (const_int 6)])))))]
11123 "TARGET_AVX2 && <mask_avx512vl_condition>
11124 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11125 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11126 [(set_attr "type" "sseimul")
11127 (set_attr "prefix" "maybe_evex")
11128 (set_attr "mode" "OI")])
11130 (define_expand "vec_widen_umult_even_v4si<mask_name>"
11131 [(set (match_operand:V2DI 0 "register_operand")
11135 (match_operand:V4SI 1 "vector_operand")
11136 (parallel [(const_int 0) (const_int 2)])))
11139 (match_operand:V4SI 2 "vector_operand")
11140 (parallel [(const_int 0) (const_int 2)])))))]
11141 "TARGET_SSE2 && <mask_avx512vl_condition>"
11142 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11144 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
11145 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
11149 (match_operand:V4SI 1 "vector_operand" "%0,v")
11150 (parallel [(const_int 0) (const_int 2)])))
11153 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
11154 (parallel [(const_int 0) (const_int 2)])))))]
11155 "TARGET_SSE2 && <mask_avx512vl_condition>
11156 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11158 pmuludq\t{%2, %0|%0, %2}
11159 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11160 [(set_attr "isa" "noavx,avx")
11161 (set_attr "type" "sseimul")
11162 (set_attr "prefix_data16" "1,*")
11163 (set_attr "prefix" "orig,maybe_evex")
11164 (set_attr "mode" "TI")])
11166 (define_expand "vec_widen_smult_even_v16si<mask_name>"
11167 [(set (match_operand:V8DI 0 "register_operand")
11171 (match_operand:V16SI 1 "nonimmediate_operand")
11172 (parallel [(const_int 0) (const_int 2)
11173 (const_int 4) (const_int 6)
11174 (const_int 8) (const_int 10)
11175 (const_int 12) (const_int 14)])))
11178 (match_operand:V16SI 2 "nonimmediate_operand")
11179 (parallel [(const_int 0) (const_int 2)
11180 (const_int 4) (const_int 6)
11181 (const_int 8) (const_int 10)
11182 (const_int 12) (const_int 14)])))))]
11184 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11186 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
11187 [(set (match_operand:V8DI 0 "register_operand" "=v")
11191 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11192 (parallel [(const_int 0) (const_int 2)
11193 (const_int 4) (const_int 6)
11194 (const_int 8) (const_int 10)
11195 (const_int 12) (const_int 14)])))
11198 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11199 (parallel [(const_int 0) (const_int 2)
11200 (const_int 4) (const_int 6)
11201 (const_int 8) (const_int 10)
11202 (const_int 12) (const_int 14)])))))]
11203 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11204 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11205 [(set_attr "type" "sseimul")
11206 (set_attr "prefix_extra" "1")
11207 (set_attr "prefix" "evex")
11208 (set_attr "mode" "XI")])
11210 (define_expand "vec_widen_smult_even_v8si<mask_name>"
11211 [(set (match_operand:V4DI 0 "register_operand")
11215 (match_operand:V8SI 1 "nonimmediate_operand")
11216 (parallel [(const_int 0) (const_int 2)
11217 (const_int 4) (const_int 6)])))
11220 (match_operand:V8SI 2 "nonimmediate_operand")
11221 (parallel [(const_int 0) (const_int 2)
11222 (const_int 4) (const_int 6)])))))]
11223 "TARGET_AVX2 && <mask_avx512vl_condition>"
11224 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11226 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
11227 [(set (match_operand:V4DI 0 "register_operand" "=v")
11231 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11232 (parallel [(const_int 0) (const_int 2)
11233 (const_int 4) (const_int 6)])))
11236 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11237 (parallel [(const_int 0) (const_int 2)
11238 (const_int 4) (const_int 6)])))))]
11239 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11240 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11241 [(set_attr "type" "sseimul")
11242 (set_attr "prefix_extra" "1")
11243 (set_attr "prefix" "vex")
11244 (set_attr "mode" "OI")])
11246 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
11247 [(set (match_operand:V2DI 0 "register_operand")
11251 (match_operand:V4SI 1 "vector_operand")
11252 (parallel [(const_int 0) (const_int 2)])))
11255 (match_operand:V4SI 2 "vector_operand")
11256 (parallel [(const_int 0) (const_int 2)])))))]
11257 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
11258 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11260 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
11261 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
11265 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
11266 (parallel [(const_int 0) (const_int 2)])))
11269 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
11270 (parallel [(const_int 0) (const_int 2)])))))]
11271 "TARGET_SSE4_1 && <mask_avx512vl_condition>
11272 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11274 pmuldq\t{%2, %0|%0, %2}
11275 pmuldq\t{%2, %0|%0, %2}
11276 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11277 [(set_attr "isa" "noavx,noavx,avx")
11278 (set_attr "type" "sseimul")
11279 (set_attr "prefix_data16" "1,1,*")
11280 (set_attr "prefix_extra" "1")
11281 (set_attr "prefix" "orig,orig,vex")
11282 (set_attr "mode" "TI")])
11284 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
11285 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
11286 (unspec:<sseunpackmode>
11287 [(match_operand:VI2_AVX2 1 "register_operand" "v")
11288 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
11289 UNSPEC_PMADDWD512))]
11290 "TARGET_AVX512BW && <mask_mode512bit_condition>"
11291 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
11292 [(set_attr "type" "sseiadd")
11293 (set_attr "prefix" "evex")
11294 (set_attr "mode" "XI")])
11296 (define_expand "avx2_pmaddwd"
11297 [(set (match_operand:V8SI 0 "register_operand")
11302 (match_operand:V16HI 1 "nonimmediate_operand")
11303 (parallel [(const_int 0) (const_int 2)
11304 (const_int 4) (const_int 6)
11305 (const_int 8) (const_int 10)
11306 (const_int 12) (const_int 14)])))
11309 (match_operand:V16HI 2 "nonimmediate_operand")
11310 (parallel [(const_int 0) (const_int 2)
11311 (const_int 4) (const_int 6)
11312 (const_int 8) (const_int 10)
11313 (const_int 12) (const_int 14)]))))
11316 (vec_select:V8HI (match_dup 1)
11317 (parallel [(const_int 1) (const_int 3)
11318 (const_int 5) (const_int 7)
11319 (const_int 9) (const_int 11)
11320 (const_int 13) (const_int 15)])))
11322 (vec_select:V8HI (match_dup 2)
11323 (parallel [(const_int 1) (const_int 3)
11324 (const_int 5) (const_int 7)
11325 (const_int 9) (const_int 11)
11326 (const_int 13) (const_int 15)]))))))]
11328 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
11330 (define_insn "*avx2_pmaddwd"
11331 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
11336 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
11337 (parallel [(const_int 0) (const_int 2)
11338 (const_int 4) (const_int 6)
11339 (const_int 8) (const_int 10)
11340 (const_int 12) (const_int 14)])))
11343 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
11344 (parallel [(const_int 0) (const_int 2)
11345 (const_int 4) (const_int 6)
11346 (const_int 8) (const_int 10)
11347 (const_int 12) (const_int 14)]))))
11350 (vec_select:V8HI (match_dup 1)
11351 (parallel [(const_int 1) (const_int 3)
11352 (const_int 5) (const_int 7)
11353 (const_int 9) (const_int 11)
11354 (const_int 13) (const_int 15)])))
11356 (vec_select:V8HI (match_dup 2)
11357 (parallel [(const_int 1) (const_int 3)
11358 (const_int 5) (const_int 7)
11359 (const_int 9) (const_int 11)
11360 (const_int 13) (const_int 15)]))))))]
11361 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11362 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11363 [(set_attr "type" "sseiadd")
11364 (set_attr "isa" "*,avx512bw")
11365 (set_attr "prefix" "vex,evex")
11366 (set_attr "mode" "OI")])
11368 (define_expand "sse2_pmaddwd"
11369 [(set (match_operand:V4SI 0 "register_operand")
11374 (match_operand:V8HI 1 "vector_operand")
11375 (parallel [(const_int 0) (const_int 2)
11376 (const_int 4) (const_int 6)])))
11379 (match_operand:V8HI 2 "vector_operand")
11380 (parallel [(const_int 0) (const_int 2)
11381 (const_int 4) (const_int 6)]))))
11384 (vec_select:V4HI (match_dup 1)
11385 (parallel [(const_int 1) (const_int 3)
11386 (const_int 5) (const_int 7)])))
11388 (vec_select:V4HI (match_dup 2)
11389 (parallel [(const_int 1) (const_int 3)
11390 (const_int 5) (const_int 7)]))))))]
11392 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
11394 (define_insn "*sse2_pmaddwd"
11395 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
11400 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11401 (parallel [(const_int 0) (const_int 2)
11402 (const_int 4) (const_int 6)])))
11405 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
11406 (parallel [(const_int 0) (const_int 2)
11407 (const_int 4) (const_int 6)]))))
11410 (vec_select:V4HI (match_dup 1)
11411 (parallel [(const_int 1) (const_int 3)
11412 (const_int 5) (const_int 7)])))
11414 (vec_select:V4HI (match_dup 2)
11415 (parallel [(const_int 1) (const_int 3)
11416 (const_int 5) (const_int 7)]))))))]
11417 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11419 pmaddwd\t{%2, %0|%0, %2}
11420 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
11421 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11422 [(set_attr "isa" "noavx,avx,avx512bw")
11423 (set_attr "type" "sseiadd")
11424 (set_attr "atom_unit" "simul")
11425 (set_attr "prefix_data16" "1,*,*")
11426 (set_attr "prefix" "orig,vex,evex")
11427 (set_attr "mode" "TI")])
11429 (define_insn "avx512dq_mul<mode>3<mask_name>"
11430 [(set (match_operand:VI8 0 "register_operand" "=v")
11432 (match_operand:VI8 1 "register_operand" "v")
11433 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
11434 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
11435 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11436 [(set_attr "type" "sseimul")
11437 (set_attr "prefix" "evex")
11438 (set_attr "mode" "<sseinsnmode>")])
11440 (define_expand "mul<mode>3<mask_name>"
11441 [(set (match_operand:VI4_AVX512F 0 "register_operand")
11443 (match_operand:VI4_AVX512F 1 "general_vector_operand")
11444 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
11445 "TARGET_SSE2 && <mask_mode512bit_condition>"
11449 if (!vector_operand (operands[1], <MODE>mode))
11450 operands[1] = force_reg (<MODE>mode, operands[1]);
11451 if (!vector_operand (operands[2], <MODE>mode))
11452 operands[2] = force_reg (<MODE>mode, operands[2]);
11453 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
11457 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
11462 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
11463 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
11465 (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
11466 (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
11467 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11468 && <mask_mode512bit_condition>"
11470 pmulld\t{%2, %0|%0, %2}
11471 pmulld\t{%2, %0|%0, %2}
11472 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11473 [(set_attr "isa" "noavx,noavx,avx")
11474 (set_attr "type" "sseimul")
11475 (set_attr "prefix_extra" "1")
11476 (set_attr "prefix" "<mask_prefix4>")
11477 (set_attr "btver2_decode" "vector,vector,vector")
11478 (set_attr "mode" "<sseinsnmode>")])
11480 (define_expand "mul<mode>3"
11481 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
11482 (mult:VI8_AVX2_AVX512F
11483 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
11484 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
11487 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
11491 (define_expand "vec_widen_<s>mult_hi_<mode>"
11492 [(match_operand:<sseunpackmode> 0 "register_operand")
11493 (any_extend:<sseunpackmode>
11494 (match_operand:VI124_AVX2 1 "register_operand"))
11495 (match_operand:VI124_AVX2 2 "register_operand")]
11498 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11503 (define_expand "vec_widen_<s>mult_lo_<mode>"
11504 [(match_operand:<sseunpackmode> 0 "register_operand")
11505 (any_extend:<sseunpackmode>
11506 (match_operand:VI124_AVX2 1 "register_operand"))
11507 (match_operand:VI124_AVX2 2 "register_operand")]
11510 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11515 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
11516 ;; named patterns, but signed V4SI needs special help for plain SSE2.
11517 (define_expand "vec_widen_smult_even_v4si"
11518 [(match_operand:V2DI 0 "register_operand")
11519 (match_operand:V4SI 1 "vector_operand")
11520 (match_operand:V4SI 2 "vector_operand")]
11523 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11528 (define_expand "vec_widen_<s>mult_odd_<mode>"
11529 [(match_operand:<sseunpackmode> 0 "register_operand")
11530 (any_extend:<sseunpackmode>
11531 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
11532 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
11535 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11540 (define_mode_attr SDOT_PMADD_SUF
11541 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
11543 (define_expand "sdot_prod<mode>"
11544 [(match_operand:<sseunpackmode> 0 "register_operand")
11545 (match_operand:VI2_AVX2 1 "register_operand")
11546 (match_operand:VI2_AVX2 2 "register_operand")
11547 (match_operand:<sseunpackmode> 3 "register_operand")]
11550 rtx t = gen_reg_rtx (<sseunpackmode>mode);
11551 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
11552 emit_insn (gen_rtx_SET (operands[0],
11553 gen_rtx_PLUS (<sseunpackmode>mode,
11558 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
11559 ;; back together when madd is available.
11560 (define_expand "sdot_prodv4si"
11561 [(match_operand:V2DI 0 "register_operand")
11562 (match_operand:V4SI 1 "register_operand")
11563 (match_operand:V4SI 2 "register_operand")
11564 (match_operand:V2DI 3 "register_operand")]
11567 rtx t = gen_reg_rtx (V2DImode);
11568 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
11569 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
11573 (define_expand "uavg<mode>3_ceil"
11574 [(set (match_operand:VI12_AVX2 0 "register_operand")
11575 (truncate:VI12_AVX2
11576 (lshiftrt:<ssedoublemode>
11577 (plus:<ssedoublemode>
11578 (plus:<ssedoublemode>
11579 (zero_extend:<ssedoublemode>
11580 (match_operand:VI12_AVX2 1 "vector_operand"))
11581 (zero_extend:<ssedoublemode>
11582 (match_operand:VI12_AVX2 2 "vector_operand")))
11587 operands[3] = CONST1_RTX(<MODE>mode);
11588 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
11591 (define_expand "usadv16qi"
11592 [(match_operand:V4SI 0 "register_operand")
11593 (match_operand:V16QI 1 "register_operand")
11594 (match_operand:V16QI 2 "vector_operand")
11595 (match_operand:V4SI 3 "vector_operand")]
11598 rtx t1 = gen_reg_rtx (V2DImode);
11599 rtx t2 = gen_reg_rtx (V4SImode);
11600 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
11601 convert_move (t2, t1, 0);
11602 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
11606 (define_expand "usadv32qi"
11607 [(match_operand:V8SI 0 "register_operand")
11608 (match_operand:V32QI 1 "register_operand")
11609 (match_operand:V32QI 2 "nonimmediate_operand")
11610 (match_operand:V8SI 3 "nonimmediate_operand")]
11613 rtx t1 = gen_reg_rtx (V4DImode);
11614 rtx t2 = gen_reg_rtx (V8SImode);
11615 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
11616 convert_move (t2, t1, 0);
11617 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
11621 (define_expand "usadv64qi"
11622 [(match_operand:V16SI 0 "register_operand")
11623 (match_operand:V64QI 1 "register_operand")
11624 (match_operand:V64QI 2 "nonimmediate_operand")
11625 (match_operand:V16SI 3 "nonimmediate_operand")]
11628 rtx t1 = gen_reg_rtx (V8DImode);
11629 rtx t2 = gen_reg_rtx (V16SImode);
11630 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
11631 convert_move (t2, t1, 0);
11632 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
11636 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
11637 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
11638 (ashiftrt:VI248_AVX512BW_1
11639 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
11640 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11642 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11643 [(set_attr "type" "sseishft")
11644 (set (attr "length_immediate")
11645 (if_then_else (match_operand 2 "const_int_operand")
11647 (const_string "0")))
11648 (set_attr "mode" "<sseinsnmode>")])
11650 (define_insn "ashr<mode>3"
11651 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
11652 (ashiftrt:VI24_AVX2
11653 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
11654 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
11657 psra<ssemodesuffix>\t{%2, %0|%0, %2}
11658 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11659 [(set_attr "isa" "noavx,avx")
11660 (set_attr "type" "sseishft")
11661 (set (attr "length_immediate")
11662 (if_then_else (match_operand 2 "const_int_operand")
11664 (const_string "0")))
11665 (set_attr "prefix_data16" "1,*")
11666 (set_attr "prefix" "orig,vex")
11667 (set_attr "mode" "<sseinsnmode>")])
11669 (define_insn "ashr<mode>3<mask_name>"
11670 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
11671 (ashiftrt:VI248_AVX512BW_AVX512VL
11672 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
11673 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11675 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11676 [(set_attr "type" "sseishft")
11677 (set (attr "length_immediate")
11678 (if_then_else (match_operand 2 "const_int_operand")
11680 (const_string "0")))
11681 (set_attr "mode" "<sseinsnmode>")])
11683 (define_insn "<mask_codefor><shift_insn><mode>3<mask_name>"
11684 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
11685 (any_lshift:VI248_AVX512BW_2
11686 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
11687 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11689 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11690 [(set_attr "type" "sseishft")
11691 (set (attr "length_immediate")
11692 (if_then_else (match_operand 2 "const_int_operand")
11694 (const_string "0")))
11695 (set_attr "mode" "<sseinsnmode>")])
11697 (define_insn "<shift_insn><mode>3"
11698 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
11699 (any_lshift:VI248_AVX2
11700 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
11701 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
11704 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
11705 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11706 [(set_attr "isa" "noavx,avx")
11707 (set_attr "type" "sseishft")
11708 (set (attr "length_immediate")
11709 (if_then_else (match_operand 2 "const_int_operand")
11711 (const_string "0")))
11712 (set_attr "prefix_data16" "1,*")
11713 (set_attr "prefix" "orig,vex")
11714 (set_attr "mode" "<sseinsnmode>")])
11716 (define_insn "<shift_insn><mode>3<mask_name>"
11717 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
11718 (any_lshift:VI248_AVX512BW
11719 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
11720 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
11722 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11723 [(set_attr "type" "sseishft")
11724 (set (attr "length_immediate")
11725 (if_then_else (match_operand 2 "const_int_operand")
11727 (const_string "0")))
11728 (set_attr "mode" "<sseinsnmode>")])
11731 (define_expand "vec_shr_<mode>"
11732 [(set (match_dup 3)
11734 (match_operand:VI_128 1 "register_operand")
11735 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
11736 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
11739 operands[1] = gen_lowpart (V1TImode, operands[1]);
11740 operands[3] = gen_reg_rtx (V1TImode);
11741 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
11744 (define_insn "avx512bw_<shift_insn><mode>3"
11745 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
11746 (any_lshift:VIMAX_AVX512VL
11747 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
11748 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
11751 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
11752 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
11754 [(set_attr "type" "sseishft")
11755 (set_attr "length_immediate" "1")
11756 (set_attr "prefix" "maybe_evex")
11757 (set_attr "mode" "<sseinsnmode>")])
11759 (define_insn "<sse2_avx2>_<shift_insn><mode>3"
11760 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
11761 (any_lshift:VIMAX_AVX2
11762 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
11763 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
11766 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
11768 switch (which_alternative)
11771 return "p<vshift>dq\t{%2, %0|%0, %2}";
11773 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
11775 gcc_unreachable ();
11778 [(set_attr "isa" "noavx,avx")
11779 (set_attr "type" "sseishft")
11780 (set_attr "length_immediate" "1")
11781 (set_attr "atom_unit" "sishuf")
11782 (set_attr "prefix_data16" "1,*")
11783 (set_attr "prefix" "orig,vex")
11784 (set_attr "mode" "<sseinsnmode>")])
11786 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
11787 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11788 (any_rotate:VI48_AVX512VL
11789 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
11790 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11792 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11793 [(set_attr "prefix" "evex")
11794 (set_attr "mode" "<sseinsnmode>")])
11796 (define_insn "<avx512>_<rotate><mode><mask_name>"
11797 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11798 (any_rotate:VI48_AVX512VL
11799 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
11800 (match_operand:SI 2 "const_0_to_255_operand")))]
11802 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11803 [(set_attr "prefix" "evex")
11804 (set_attr "mode" "<sseinsnmode>")])
11806 (define_expand "<code><mode>3"
11807 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
11808 (maxmin:VI124_256_AVX512F_AVX512BW
11809 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
11810 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
11812 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11814 (define_insn "*avx2_<code><mode>3"
11815 [(set (match_operand:VI124_256 0 "register_operand" "=v")
11817 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
11818 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
11819 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11820 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11821 [(set_attr "type" "sseiadd")
11822 (set_attr "prefix_extra" "1")
11823 (set_attr "prefix" "vex")
11824 (set_attr "mode" "OI")])
11826 (define_expand "<code><mode>3_mask"
11827 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11828 (vec_merge:VI48_AVX512VL
11829 (maxmin:VI48_AVX512VL
11830 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11831 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11832 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11833 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11835 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11837 (define_insn "*avx512f_<code><mode>3<mask_name>"
11838 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11839 (maxmin:VI48_AVX512VL
11840 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
11841 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11842 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11843 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11844 [(set_attr "type" "sseiadd")
11845 (set_attr "prefix_extra" "1")
11846 (set_attr "prefix" "maybe_evex")
11847 (set_attr "mode" "<sseinsnmode>")])
11849 (define_insn "<mask_codefor><code><mode>3<mask_name>"
11850 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11851 (maxmin:VI12_AVX512VL
11852 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
11853 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
11855 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11856 [(set_attr "type" "sseiadd")
11857 (set_attr "prefix" "evex")
11858 (set_attr "mode" "<sseinsnmode>")])
11860 (define_expand "<code><mode>3"
11861 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
11862 (maxmin:VI8_AVX2_AVX512F
11863 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
11864 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
11868 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
11869 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11872 enum rtx_code code;
11877 xops[0] = operands[0];
11879 if (<CODE> == SMAX || <CODE> == UMAX)
11881 xops[1] = operands[1];
11882 xops[2] = operands[2];
11886 xops[1] = operands[2];
11887 xops[2] = operands[1];
11890 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
11892 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
11893 xops[4] = operands[1];
11894 xops[5] = operands[2];
11896 ok = ix86_expand_int_vcond (xops);
11902 (define_expand "<code><mode>3"
11903 [(set (match_operand:VI124_128 0 "register_operand")
11905 (match_operand:VI124_128 1 "vector_operand")
11906 (match_operand:VI124_128 2 "vector_operand")))]
11909 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
11910 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11916 xops[0] = operands[0];
11917 operands[1] = force_reg (<MODE>mode, operands[1]);
11918 operands[2] = force_reg (<MODE>mode, operands[2]);
11920 if (<CODE> == SMAX)
11922 xops[1] = operands[1];
11923 xops[2] = operands[2];
11927 xops[1] = operands[2];
11928 xops[2] = operands[1];
11931 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
11932 xops[4] = operands[1];
11933 xops[5] = operands[2];
11935 ok = ix86_expand_int_vcond (xops);
11941 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11942 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
11944 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
11945 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11947 && <mask_mode512bit_condition>
11948 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11950 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11951 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11952 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11953 [(set_attr "isa" "noavx,noavx,avx")
11954 (set_attr "type" "sseiadd")
11955 (set_attr "prefix_extra" "1,1,*")
11956 (set_attr "prefix" "orig,orig,vex")
11957 (set_attr "mode" "TI")])
11959 (define_insn "*<code>v8hi3"
11960 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
11962 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11963 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
11964 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11966 p<maxmin_int>w\t{%2, %0|%0, %2}
11967 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
11968 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
11969 [(set_attr "isa" "noavx,avx,avx512bw")
11970 (set_attr "type" "sseiadd")
11971 (set_attr "prefix_data16" "1,*,*")
11972 (set_attr "prefix_extra" "*,1,1")
11973 (set_attr "prefix" "orig,vex,evex")
11974 (set_attr "mode" "TI")])
11976 (define_expand "<code><mode>3"
11977 [(set (match_operand:VI124_128 0 "register_operand")
11979 (match_operand:VI124_128 1 "vector_operand")
11980 (match_operand:VI124_128 2 "vector_operand")))]
11983 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
11984 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11985 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
11987 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
11988 operands[1] = force_reg (<MODE>mode, operands[1]);
11989 if (rtx_equal_p (op3, op2))
11990 op3 = gen_reg_rtx (V8HImode);
11991 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
11992 emit_insn (gen_addv8hi3 (op0, op3, op2));
12000 operands[1] = force_reg (<MODE>mode, operands[1]);
12001 operands[2] = force_reg (<MODE>mode, operands[2]);
12003 xops[0] = operands[0];
12005 if (<CODE> == UMAX)
12007 xops[1] = operands[1];
12008 xops[2] = operands[2];
12012 xops[1] = operands[2];
12013 xops[2] = operands[1];
12016 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
12017 xops[4] = operands[1];
12018 xops[5] = operands[2];
12020 ok = ix86_expand_int_vcond (xops);
12026 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12027 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
12029 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
12030 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12032 && <mask_mode512bit_condition>
12033 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12035 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12036 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12037 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12038 [(set_attr "isa" "noavx,noavx,avx")
12039 (set_attr "type" "sseiadd")
12040 (set_attr "prefix_extra" "1,1,*")
12041 (set_attr "prefix" "orig,orig,vex")
12042 (set_attr "mode" "TI")])
12044 (define_insn "*<code>v16qi3"
12045 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
12047 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
12048 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
12049 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12051 p<maxmin_int>b\t{%2, %0|%0, %2}
12052 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
12053 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
12054 [(set_attr "isa" "noavx,avx,avx512bw")
12055 (set_attr "type" "sseiadd")
12056 (set_attr "prefix_data16" "1,*,*")
12057 (set_attr "prefix_extra" "*,1,1")
12058 (set_attr "prefix" "orig,vex,evex")
12059 (set_attr "mode" "TI")])
12061 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12063 ;; Parallel integral comparisons
12065 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12067 (define_expand "avx2_eq<mode>3"
12068 [(set (match_operand:VI_256 0 "register_operand")
12070 (match_operand:VI_256 1 "nonimmediate_operand")
12071 (match_operand:VI_256 2 "nonimmediate_operand")))]
12073 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12075 (define_insn "*avx2_eq<mode>3"
12076 [(set (match_operand:VI_256 0 "register_operand" "=x")
12078 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
12079 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12080 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12081 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12082 [(set_attr "type" "ssecmp")
12083 (set_attr "prefix_extra" "1")
12084 (set_attr "prefix" "vex")
12085 (set_attr "mode" "OI")])
12087 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12088 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12089 (unspec:<avx512fmaskmode>
12090 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
12091 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
12092 UNSPEC_MASKED_EQ))]
12094 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12096 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12097 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12098 (unspec:<avx512fmaskmode>
12099 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12100 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
12101 UNSPEC_MASKED_EQ))]
12103 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12105 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12106 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12107 (unspec:<avx512fmaskmode>
12108 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12109 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12110 UNSPEC_MASKED_EQ))]
12111 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12113 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12114 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12115 [(set_attr "type" "ssecmp")
12116 (set_attr "prefix_extra" "1")
12117 (set_attr "prefix" "evex")
12118 (set_attr "mode" "<sseinsnmode>")])
12120 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12121 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12122 (unspec:<avx512fmaskmode>
12123 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12124 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12125 UNSPEC_MASKED_EQ))]
12126 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12128 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12129 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12130 [(set_attr "type" "ssecmp")
12131 (set_attr "prefix_extra" "1")
12132 (set_attr "prefix" "evex")
12133 (set_attr "mode" "<sseinsnmode>")])
12135 (define_insn "*sse4_1_eqv2di3"
12136 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12138 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
12139 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12140 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12142 pcmpeqq\t{%2, %0|%0, %2}
12143 pcmpeqq\t{%2, %0|%0, %2}
12144 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
12145 [(set_attr "isa" "noavx,noavx,avx")
12146 (set_attr "type" "ssecmp")
12147 (set_attr "prefix_extra" "1")
12148 (set_attr "prefix" "orig,orig,vex")
12149 (set_attr "mode" "TI")])
12151 (define_insn "*sse2_eq<mode>3"
12152 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12154 (match_operand:VI124_128 1 "vector_operand" "%0,x")
12155 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12156 "TARGET_SSE2 && !TARGET_XOP
12157 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12159 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
12160 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12161 [(set_attr "isa" "noavx,avx")
12162 (set_attr "type" "ssecmp")
12163 (set_attr "prefix_data16" "1,*")
12164 (set_attr "prefix" "orig,vex")
12165 (set_attr "mode" "TI")])
12167 (define_expand "sse2_eq<mode>3"
12168 [(set (match_operand:VI124_128 0 "register_operand")
12170 (match_operand:VI124_128 1 "vector_operand")
12171 (match_operand:VI124_128 2 "vector_operand")))]
12172 "TARGET_SSE2 && !TARGET_XOP "
12173 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12175 (define_expand "sse4_1_eqv2di3"
12176 [(set (match_operand:V2DI 0 "register_operand")
12178 (match_operand:V2DI 1 "vector_operand")
12179 (match_operand:V2DI 2 "vector_operand")))]
12181 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
12183 (define_insn "sse4_2_gtv2di3"
12184 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12186 (match_operand:V2DI 1 "register_operand" "0,0,x")
12187 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12190 pcmpgtq\t{%2, %0|%0, %2}
12191 pcmpgtq\t{%2, %0|%0, %2}
12192 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
12193 [(set_attr "isa" "noavx,noavx,avx")
12194 (set_attr "type" "ssecmp")
12195 (set_attr "prefix_extra" "1")
12196 (set_attr "prefix" "orig,orig,vex")
12197 (set_attr "mode" "TI")])
12199 (define_insn "avx2_gt<mode>3"
12200 [(set (match_operand:VI_256 0 "register_operand" "=x")
12202 (match_operand:VI_256 1 "register_operand" "x")
12203 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12205 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12206 [(set_attr "type" "ssecmp")
12207 (set_attr "prefix_extra" "1")
12208 (set_attr "prefix" "vex")
12209 (set_attr "mode" "OI")])
12211 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12212 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12213 (unspec:<avx512fmaskmode>
12214 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12215 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12217 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12218 [(set_attr "type" "ssecmp")
12219 (set_attr "prefix_extra" "1")
12220 (set_attr "prefix" "evex")
12221 (set_attr "mode" "<sseinsnmode>")])
12223 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12224 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12225 (unspec:<avx512fmaskmode>
12226 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
12227 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12229 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12230 [(set_attr "type" "ssecmp")
12231 (set_attr "prefix_extra" "1")
12232 (set_attr "prefix" "evex")
12233 (set_attr "mode" "<sseinsnmode>")])
12235 (define_insn "sse2_gt<mode>3"
12236 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12238 (match_operand:VI124_128 1 "register_operand" "0,x")
12239 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12240 "TARGET_SSE2 && !TARGET_XOP"
12242 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
12243 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12244 [(set_attr "isa" "noavx,avx")
12245 (set_attr "type" "ssecmp")
12246 (set_attr "prefix_data16" "1,*")
12247 (set_attr "prefix" "orig,vex")
12248 (set_attr "mode" "TI")])
12250 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
12251 [(set (match_operand:V_512 0 "register_operand")
12252 (if_then_else:V_512
12253 (match_operator 3 ""
12254 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12255 (match_operand:VI_AVX512BW 5 "general_operand")])
12256 (match_operand:V_512 1)
12257 (match_operand:V_512 2)))]
12259 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12260 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12262 bool ok = ix86_expand_int_vcond (operands);
12267 (define_expand "vcond<V_256:mode><VI_256:mode>"
12268 [(set (match_operand:V_256 0 "register_operand")
12269 (if_then_else:V_256
12270 (match_operator 3 ""
12271 [(match_operand:VI_256 4 "nonimmediate_operand")
12272 (match_operand:VI_256 5 "general_operand")])
12273 (match_operand:V_256 1)
12274 (match_operand:V_256 2)))]
12276 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12277 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12279 bool ok = ix86_expand_int_vcond (operands);
12284 (define_expand "vcond<V_128:mode><VI124_128:mode>"
12285 [(set (match_operand:V_128 0 "register_operand")
12286 (if_then_else:V_128
12287 (match_operator 3 ""
12288 [(match_operand:VI124_128 4 "vector_operand")
12289 (match_operand:VI124_128 5 "general_operand")])
12290 (match_operand:V_128 1)
12291 (match_operand:V_128 2)))]
12293 && (GET_MODE_NUNITS (<V_128:MODE>mode)
12294 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12296 bool ok = ix86_expand_int_vcond (operands);
12301 (define_expand "vcond<VI8F_128:mode>v2di"
12302 [(set (match_operand:VI8F_128 0 "register_operand")
12303 (if_then_else:VI8F_128
12304 (match_operator 3 ""
12305 [(match_operand:V2DI 4 "vector_operand")
12306 (match_operand:V2DI 5 "general_operand")])
12307 (match_operand:VI8F_128 1)
12308 (match_operand:VI8F_128 2)))]
12311 bool ok = ix86_expand_int_vcond (operands);
12316 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
12317 [(set (match_operand:V_512 0 "register_operand")
12318 (if_then_else:V_512
12319 (match_operator 3 ""
12320 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12321 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
12322 (match_operand:V_512 1 "general_operand")
12323 (match_operand:V_512 2 "general_operand")))]
12325 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12326 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12328 bool ok = ix86_expand_int_vcond (operands);
12333 (define_expand "vcondu<V_256:mode><VI_256:mode>"
12334 [(set (match_operand:V_256 0 "register_operand")
12335 (if_then_else:V_256
12336 (match_operator 3 ""
12337 [(match_operand:VI_256 4 "nonimmediate_operand")
12338 (match_operand:VI_256 5 "nonimmediate_operand")])
12339 (match_operand:V_256 1 "general_operand")
12340 (match_operand:V_256 2 "general_operand")))]
12342 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12343 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12345 bool ok = ix86_expand_int_vcond (operands);
12350 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
12351 [(set (match_operand:V_128 0 "register_operand")
12352 (if_then_else:V_128
12353 (match_operator 3 ""
12354 [(match_operand:VI124_128 4 "vector_operand")
12355 (match_operand:VI124_128 5 "vector_operand")])
12356 (match_operand:V_128 1 "general_operand")
12357 (match_operand:V_128 2 "general_operand")))]
12359 && (GET_MODE_NUNITS (<V_128:MODE>mode)
12360 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12362 bool ok = ix86_expand_int_vcond (operands);
12367 (define_expand "vcondu<VI8F_128:mode>v2di"
12368 [(set (match_operand:VI8F_128 0 "register_operand")
12369 (if_then_else:VI8F_128
12370 (match_operator 3 ""
12371 [(match_operand:V2DI 4 "vector_operand")
12372 (match_operand:V2DI 5 "vector_operand")])
12373 (match_operand:VI8F_128 1 "general_operand")
12374 (match_operand:VI8F_128 2 "general_operand")))]
12377 bool ok = ix86_expand_int_vcond (operands);
12382 (define_expand "vcondeq<VI8F_128:mode>v2di"
12383 [(set (match_operand:VI8F_128 0 "register_operand")
12384 (if_then_else:VI8F_128
12385 (match_operator 3 ""
12386 [(match_operand:V2DI 4 "vector_operand")
12387 (match_operand:V2DI 5 "general_operand")])
12388 (match_operand:VI8F_128 1)
12389 (match_operand:VI8F_128 2)))]
12392 bool ok = ix86_expand_int_vcond (operands);
12397 (define_mode_iterator VEC_PERM_AVX2
12398 [V16QI V8HI V4SI V2DI V4SF V2DF
12399 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
12400 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
12401 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
12402 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
12403 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
12404 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
12406 (define_expand "vec_perm<mode>"
12407 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
12408 (match_operand:VEC_PERM_AVX2 1 "register_operand")
12409 (match_operand:VEC_PERM_AVX2 2 "register_operand")
12410 (match_operand:<sseintvecmode> 3 "register_operand")]
12411 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
12413 ix86_expand_vec_perm (operands);
12417 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12419 ;; Parallel bitwise logical operations
12421 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12423 (define_expand "one_cmpl<mode>2"
12424 [(set (match_operand:VI 0 "register_operand")
12425 (xor:VI (match_operand:VI 1 "vector_operand")
12429 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
12432 (define_expand "<sse2_avx2>_andnot<mode>3"
12433 [(set (match_operand:VI_AVX2 0 "register_operand")
12435 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
12436 (match_operand:VI_AVX2 2 "vector_operand")))]
12439 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
12440 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12441 (vec_merge:VI48_AVX512VL
12444 (match_operand:VI48_AVX512VL 1 "register_operand"))
12445 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12446 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12447 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12450 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
12451 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
12452 (vec_merge:VI12_AVX512VL
12455 (match_operand:VI12_AVX512VL 1 "register_operand"))
12456 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
12457 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
12458 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12461 (define_insn "*andnot<mode>3"
12462 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
12464 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
12465 (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
12471 const char *ssesuffix;
12473 switch (get_attr_mode (insn))
12476 gcc_assert (TARGET_AVX512F);
12479 gcc_assert (TARGET_AVX2);
12482 gcc_assert (TARGET_SSE2);
12484 switch (<MODE>mode)
12488 /* There is no vpandnb or vpandnw instruction, nor vpandn for
12489 512-bit vectors. Use vpandnq instead. */
12494 ssesuffix = "<ssemodesuffix>";
12500 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
12501 ? "<ssemodesuffix>" : "");
12504 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
12509 gcc_assert (TARGET_AVX512F);
12512 gcc_assert (TARGET_AVX);
12515 gcc_assert (TARGET_SSE);
12521 gcc_unreachable ();
12524 switch (which_alternative)
12527 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12531 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
12534 gcc_unreachable ();
12537 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12538 output_asm_insn (buf, operands);
12541 [(set_attr "isa" "noavx,avx,avx")
12542 (set_attr "type" "sselog")
12543 (set (attr "prefix_data16")
12545 (and (eq_attr "alternative" "0")
12546 (eq_attr "mode" "TI"))
12548 (const_string "*")))
12549 (set_attr "prefix" "orig,vex,evex")
12551 (cond [(and (match_test "<MODE_SIZE> == 16")
12552 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12553 (const_string "<ssePSmode>")
12554 (match_test "TARGET_AVX2")
12555 (const_string "<sseinsnmode>")
12556 (match_test "TARGET_AVX")
12558 (match_test "<MODE_SIZE> > 16")
12559 (const_string "V8SF")
12560 (const_string "<sseinsnmode>"))
12561 (ior (not (match_test "TARGET_SSE2"))
12562 (match_test "optimize_function_for_size_p (cfun)"))
12563 (const_string "V4SF")
12565 (const_string "<sseinsnmode>")))])
12567 (define_insn "*andnot<mode>3_bcst"
12568 [(set (match_operand:VI 0 "register_operand" "=v")
12571 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
12572 (vec_duplicate:VI48_AVX512VL
12573 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
12575 "vpandn<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
12576 [(set_attr "type" "sselog")
12577 (set_attr "prefix" "evex")
12578 (set_attr "mode" "<sseinsnmode>")])
12580 (define_insn "*andnot<mode>3_mask"
12581 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12582 (vec_merge:VI48_AVX512VL
12585 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
12586 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
12587 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
12588 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
12590 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
12591 [(set_attr "type" "sselog")
12592 (set_attr "prefix" "evex")
12593 (set_attr "mode" "<sseinsnmode>")])
12595 (define_expand "<code><mode>3"
12596 [(set (match_operand:VI 0 "register_operand")
12598 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
12599 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
12602 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
12606 (define_insn "<mask_codefor><code><mode>3<mask_name>"
12607 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
12608 (any_logic:VI48_AVX_AVX512F
12609 (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
12610 (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
12611 "TARGET_SSE && <mask_mode512bit_condition>
12612 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12617 const char *ssesuffix;
12619 switch (get_attr_mode (insn))
12622 gcc_assert (TARGET_AVX512F);
12625 gcc_assert (TARGET_AVX2);
12628 gcc_assert (TARGET_SSE2);
12630 switch (<MODE>mode)
12634 ssesuffix = "<ssemodesuffix>";
12640 ssesuffix = (TARGET_AVX512VL
12641 && (<mask_applied> || which_alternative == 2)
12642 ? "<ssemodesuffix>" : "");
12645 gcc_unreachable ();
12650 gcc_assert (TARGET_AVX);
12653 gcc_assert (TARGET_SSE);
12659 gcc_unreachable ();
12662 switch (which_alternative)
12665 if (<mask_applied>)
12666 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
12668 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12672 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
12675 gcc_unreachable ();
12678 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12679 output_asm_insn (buf, operands);
12682 [(set_attr "isa" "noavx,avx,avx")
12683 (set_attr "type" "sselog")
12684 (set (attr "prefix_data16")
12686 (and (eq_attr "alternative" "0")
12687 (eq_attr "mode" "TI"))
12689 (const_string "*")))
12690 (set_attr "prefix" "<mask_prefix3>,evex")
12692 (cond [(and (match_test "<MODE_SIZE> == 16")
12693 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12694 (const_string "<ssePSmode>")
12695 (match_test "TARGET_AVX2")
12696 (const_string "<sseinsnmode>")
12697 (match_test "TARGET_AVX")
12699 (match_test "<MODE_SIZE> > 16")
12700 (const_string "V8SF")
12701 (const_string "<sseinsnmode>"))
12702 (ior (not (match_test "TARGET_SSE2"))
12703 (match_test "optimize_function_for_size_p (cfun)"))
12704 (const_string "V4SF")
12706 (const_string "<sseinsnmode>")))])
12708 (define_insn "*<code><mode>3"
12709 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
12710 (any_logic:VI12_AVX_AVX512F
12711 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
12712 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
12713 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12718 const char *ssesuffix;
12720 switch (get_attr_mode (insn))
12723 gcc_assert (TARGET_AVX512F);
12726 gcc_assert (TARGET_AVX2);
12729 gcc_assert (TARGET_SSE2);
12731 switch (<MODE>mode)
12741 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
12744 gcc_unreachable ();
12749 gcc_assert (TARGET_AVX);
12752 gcc_assert (TARGET_SSE);
12758 gcc_unreachable ();
12761 switch (which_alternative)
12764 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12768 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
12771 gcc_unreachable ();
12774 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12775 output_asm_insn (buf, operands);
12778 [(set_attr "isa" "noavx,avx,avx")
12779 (set_attr "type" "sselog")
12780 (set (attr "prefix_data16")
12782 (and (eq_attr "alternative" "0")
12783 (eq_attr "mode" "TI"))
12785 (const_string "*")))
12786 (set_attr "prefix" "orig,vex,evex")
12788 (cond [(and (match_test "<MODE_SIZE> == 16")
12789 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12790 (const_string "<ssePSmode>")
12791 (match_test "TARGET_AVX2")
12792 (const_string "<sseinsnmode>")
12793 (match_test "TARGET_AVX")
12795 (match_test "<MODE_SIZE> > 16")
12796 (const_string "V8SF")
12797 (const_string "<sseinsnmode>"))
12798 (ior (not (match_test "TARGET_SSE2"))
12799 (match_test "optimize_function_for_size_p (cfun)"))
12800 (const_string "V4SF")
12802 (const_string "<sseinsnmode>")))])
12804 (define_insn "*<code><mode>3_bcst"
12805 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12806 (any_logic:VI48_AVX512VL
12807 (vec_duplicate:VI48_AVX512VL
12808 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
12809 (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
12810 "TARGET_AVX512F && <mask_avx512vl_condition>"
12811 "vp<logic><ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
12812 [(set_attr "type" "sseiadd")
12813 (set_attr "prefix" "evex")
12814 (set_attr "mode" "<sseinsnmode>")])
12816 (define_mode_iterator VI1248_AVX512VLBW
12817 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
12818 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
12819 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
12820 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
12821 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
12822 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
12824 (define_mode_iterator AVX512ZEXTMASK
12825 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
12827 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
12828 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12829 (unspec:<avx512fmaskmode>
12830 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12831 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12834 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12835 [(set_attr "prefix" "evex")
12836 (set_attr "mode" "<sseinsnmode>")])
12838 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
12839 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12840 (unspec:<avx512fmaskmode>
12841 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12842 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12845 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12846 [(set_attr "prefix" "evex")
12847 (set_attr "mode" "<sseinsnmode>")])
12849 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
12850 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12851 (zero_extend:AVX512ZEXTMASK
12852 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12853 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12854 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12857 && (<AVX512ZEXTMASK:MODE_SIZE>
12858 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12859 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12860 [(set_attr "prefix" "evex")
12861 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12863 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
12864 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12865 (zero_extend:AVX512ZEXTMASK
12866 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
12867 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12868 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12869 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12871 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
12873 && (<AVX512ZEXTMASK:MODE_SIZE>
12874 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12875 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
12876 [(set_attr "prefix" "evex")
12877 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12879 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
12880 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12881 (zero_extend:AVX512ZEXTMASK
12882 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12883 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12884 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12887 && (<AVX512ZEXTMASK:MODE_SIZE>
12888 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12889 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12890 [(set_attr "prefix" "evex")
12891 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12893 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
12894 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12895 (zero_extend:AVX512ZEXTMASK
12896 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
12897 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12898 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12899 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12901 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
12903 && (<AVX512ZEXTMASK:MODE_SIZE>
12904 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12905 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
12906 [(set_attr "prefix" "evex")
12907 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12911 ;; Parallel integral element swizzling
12913 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12915 (define_expand "vec_pack_trunc_<mode>"
12916 [(match_operand:<ssepackmode> 0 "register_operand")
12917 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
12918 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
12921 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
12922 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
12923 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
12927 (define_expand "vec_pack_trunc_qi"
12928 [(set (match_operand:HI 0 "register_operand")
12929 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
12931 (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
12934 (define_expand "vec_pack_trunc_<mode>"
12935 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
12936 (ior:<DOUBLEMASKMODE>
12937 (ashift:<DOUBLEMASKMODE>
12938 (zero_extend:<DOUBLEMASKMODE>
12939 (match_operand:SWI24 2 "register_operand"))
12941 (zero_extend:<DOUBLEMASKMODE>
12942 (match_operand:SWI24 1 "register_operand"))))]
12945 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
12948 (define_expand "vec_pack_sbool_trunc_qi"
12949 [(match_operand:QI 0 "register_operand")
12950 (match_operand:QI 1 "register_operand")
12951 (match_operand:QI 2 "register_operand")
12952 (match_operand:QI 3 "const_int_operand")]
12955 HOST_WIDE_INT nunits = INTVAL (operands[3]);
12956 rtx mask, tem1, tem2;
12957 if (nunits != 8 && nunits != 4)
12959 mask = gen_reg_rtx (QImode);
12960 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
12961 tem1 = gen_reg_rtx (QImode);
12962 emit_insn (gen_kandqi (tem1, operands[1], mask));
12963 if (TARGET_AVX512DQ)
12965 tem2 = gen_reg_rtx (QImode);
12966 emit_insn (gen_kashiftqi (tem2, operands[2],
12967 GEN_INT (nunits / 2)));
12971 tem2 = gen_reg_rtx (HImode);
12972 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
12974 GEN_INT (nunits / 2)));
12975 tem2 = lowpart_subreg (QImode, tem2, HImode);
12977 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
12981 (define_insn "<sse2_avx2>_packsswb<mask_name>"
12982 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
12983 (vec_concat:VI1_AVX512
12984 (ss_truncate:<ssehalfvecmode>
12985 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12986 (ss_truncate:<ssehalfvecmode>
12987 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12988 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12990 packsswb\t{%2, %0|%0, %2}
12991 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12992 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12993 [(set_attr "isa" "noavx,avx,avx512bw")
12994 (set_attr "type" "sselog")
12995 (set_attr "prefix_data16" "1,*,*")
12996 (set_attr "prefix" "orig,<mask_prefix>,evex")
12997 (set_attr "mode" "<sseinsnmode>")])
12999 (define_insn "<sse2_avx2>_packssdw<mask_name>"
13000 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
13001 (vec_concat:VI2_AVX2
13002 (ss_truncate:<ssehalfvecmode>
13003 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13004 (ss_truncate:<ssehalfvecmode>
13005 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13006 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13008 packssdw\t{%2, %0|%0, %2}
13009 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13010 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13011 [(set_attr "isa" "noavx,avx,avx512bw")
13012 (set_attr "type" "sselog")
13013 (set_attr "prefix_data16" "1,*,*")
13014 (set_attr "prefix" "orig,<mask_prefix>,evex")
13015 (set_attr "mode" "<sseinsnmode>")])
13017 (define_insn "<sse2_avx2>_packuswb<mask_name>"
13018 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13019 (vec_concat:VI1_AVX512
13020 (us_truncate:<ssehalfvecmode>
13021 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13022 (us_truncate:<ssehalfvecmode>
13023 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13024 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13026 packuswb\t{%2, %0|%0, %2}
13027 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13028 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13029 [(set_attr "isa" "noavx,avx,avx512bw")
13030 (set_attr "type" "sselog")
13031 (set_attr "prefix_data16" "1,*,*")
13032 (set_attr "prefix" "orig,<mask_prefix>,evex")
13033 (set_attr "mode" "<sseinsnmode>")])
13035 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
13036 [(set (match_operand:V64QI 0 "register_operand" "=v")
13039 (match_operand:V64QI 1 "register_operand" "v")
13040 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13041 (parallel [(const_int 8) (const_int 72)
13042 (const_int 9) (const_int 73)
13043 (const_int 10) (const_int 74)
13044 (const_int 11) (const_int 75)
13045 (const_int 12) (const_int 76)
13046 (const_int 13) (const_int 77)
13047 (const_int 14) (const_int 78)
13048 (const_int 15) (const_int 79)
13049 (const_int 24) (const_int 88)
13050 (const_int 25) (const_int 89)
13051 (const_int 26) (const_int 90)
13052 (const_int 27) (const_int 91)
13053 (const_int 28) (const_int 92)
13054 (const_int 29) (const_int 93)
13055 (const_int 30) (const_int 94)
13056 (const_int 31) (const_int 95)
13057 (const_int 40) (const_int 104)
13058 (const_int 41) (const_int 105)
13059 (const_int 42) (const_int 106)
13060 (const_int 43) (const_int 107)
13061 (const_int 44) (const_int 108)
13062 (const_int 45) (const_int 109)
13063 (const_int 46) (const_int 110)
13064 (const_int 47) (const_int 111)
13065 (const_int 56) (const_int 120)
13066 (const_int 57) (const_int 121)
13067 (const_int 58) (const_int 122)
13068 (const_int 59) (const_int 123)
13069 (const_int 60) (const_int 124)
13070 (const_int 61) (const_int 125)
13071 (const_int 62) (const_int 126)
13072 (const_int 63) (const_int 127)])))]
13074 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13075 [(set_attr "type" "sselog")
13076 (set_attr "prefix" "evex")
13077 (set_attr "mode" "XI")])
13079 (define_insn "avx2_interleave_highv32qi<mask_name>"
13080 [(set (match_operand:V32QI 0 "register_operand" "=v")
13083 (match_operand:V32QI 1 "register_operand" "v")
13084 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13085 (parallel [(const_int 8) (const_int 40)
13086 (const_int 9) (const_int 41)
13087 (const_int 10) (const_int 42)
13088 (const_int 11) (const_int 43)
13089 (const_int 12) (const_int 44)
13090 (const_int 13) (const_int 45)
13091 (const_int 14) (const_int 46)
13092 (const_int 15) (const_int 47)
13093 (const_int 24) (const_int 56)
13094 (const_int 25) (const_int 57)
13095 (const_int 26) (const_int 58)
13096 (const_int 27) (const_int 59)
13097 (const_int 28) (const_int 60)
13098 (const_int 29) (const_int 61)
13099 (const_int 30) (const_int 62)
13100 (const_int 31) (const_int 63)])))]
13101 "TARGET_AVX2 && <mask_avx512vl_condition>"
13102 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13103 [(set_attr "type" "sselog")
13104 (set_attr "prefix" "<mask_prefix>")
13105 (set_attr "mode" "OI")])
13107 (define_insn "vec_interleave_highv16qi<mask_name>"
13108 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13111 (match_operand:V16QI 1 "register_operand" "0,v")
13112 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13113 (parallel [(const_int 8) (const_int 24)
13114 (const_int 9) (const_int 25)
13115 (const_int 10) (const_int 26)
13116 (const_int 11) (const_int 27)
13117 (const_int 12) (const_int 28)
13118 (const_int 13) (const_int 29)
13119 (const_int 14) (const_int 30)
13120 (const_int 15) (const_int 31)])))]
13121 "TARGET_SSE2 && <mask_avx512vl_condition>"
13123 punpckhbw\t{%2, %0|%0, %2}
13124 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13125 [(set_attr "isa" "noavx,avx")
13126 (set_attr "type" "sselog")
13127 (set_attr "prefix_data16" "1,*")
13128 (set_attr "prefix" "orig,<mask_prefix>")
13129 (set_attr "mode" "TI")])
13131 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
13132 [(set (match_operand:V64QI 0 "register_operand" "=v")
13135 (match_operand:V64QI 1 "register_operand" "v")
13136 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13137 (parallel [(const_int 0) (const_int 64)
13138 (const_int 1) (const_int 65)
13139 (const_int 2) (const_int 66)
13140 (const_int 3) (const_int 67)
13141 (const_int 4) (const_int 68)
13142 (const_int 5) (const_int 69)
13143 (const_int 6) (const_int 70)
13144 (const_int 7) (const_int 71)
13145 (const_int 16) (const_int 80)
13146 (const_int 17) (const_int 81)
13147 (const_int 18) (const_int 82)
13148 (const_int 19) (const_int 83)
13149 (const_int 20) (const_int 84)
13150 (const_int 21) (const_int 85)
13151 (const_int 22) (const_int 86)
13152 (const_int 23) (const_int 87)
13153 (const_int 32) (const_int 96)
13154 (const_int 33) (const_int 97)
13155 (const_int 34) (const_int 98)
13156 (const_int 35) (const_int 99)
13157 (const_int 36) (const_int 100)
13158 (const_int 37) (const_int 101)
13159 (const_int 38) (const_int 102)
13160 (const_int 39) (const_int 103)
13161 (const_int 48) (const_int 112)
13162 (const_int 49) (const_int 113)
13163 (const_int 50) (const_int 114)
13164 (const_int 51) (const_int 115)
13165 (const_int 52) (const_int 116)
13166 (const_int 53) (const_int 117)
13167 (const_int 54) (const_int 118)
13168 (const_int 55) (const_int 119)])))]
13170 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13171 [(set_attr "type" "sselog")
13172 (set_attr "prefix" "evex")
13173 (set_attr "mode" "XI")])
13175 (define_insn "avx2_interleave_lowv32qi<mask_name>"
13176 [(set (match_operand:V32QI 0 "register_operand" "=v")
13179 (match_operand:V32QI 1 "register_operand" "v")
13180 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13181 (parallel [(const_int 0) (const_int 32)
13182 (const_int 1) (const_int 33)
13183 (const_int 2) (const_int 34)
13184 (const_int 3) (const_int 35)
13185 (const_int 4) (const_int 36)
13186 (const_int 5) (const_int 37)
13187 (const_int 6) (const_int 38)
13188 (const_int 7) (const_int 39)
13189 (const_int 16) (const_int 48)
13190 (const_int 17) (const_int 49)
13191 (const_int 18) (const_int 50)
13192 (const_int 19) (const_int 51)
13193 (const_int 20) (const_int 52)
13194 (const_int 21) (const_int 53)
13195 (const_int 22) (const_int 54)
13196 (const_int 23) (const_int 55)])))]
13197 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13198 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13199 [(set_attr "type" "sselog")
13200 (set_attr "prefix" "maybe_vex")
13201 (set_attr "mode" "OI")])
13203 (define_insn "vec_interleave_lowv16qi<mask_name>"
13204 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13207 (match_operand:V16QI 1 "register_operand" "0,v")
13208 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13209 (parallel [(const_int 0) (const_int 16)
13210 (const_int 1) (const_int 17)
13211 (const_int 2) (const_int 18)
13212 (const_int 3) (const_int 19)
13213 (const_int 4) (const_int 20)
13214 (const_int 5) (const_int 21)
13215 (const_int 6) (const_int 22)
13216 (const_int 7) (const_int 23)])))]
13217 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13219 punpcklbw\t{%2, %0|%0, %2}
13220 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13221 [(set_attr "isa" "noavx,avx")
13222 (set_attr "type" "sselog")
13223 (set_attr "prefix_data16" "1,*")
13224 (set_attr "prefix" "orig,vex")
13225 (set_attr "mode" "TI")])
13227 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
13228 [(set (match_operand:V32HI 0 "register_operand" "=v")
13231 (match_operand:V32HI 1 "register_operand" "v")
13232 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13233 (parallel [(const_int 4) (const_int 36)
13234 (const_int 5) (const_int 37)
13235 (const_int 6) (const_int 38)
13236 (const_int 7) (const_int 39)
13237 (const_int 12) (const_int 44)
13238 (const_int 13) (const_int 45)
13239 (const_int 14) (const_int 46)
13240 (const_int 15) (const_int 47)
13241 (const_int 20) (const_int 52)
13242 (const_int 21) (const_int 53)
13243 (const_int 22) (const_int 54)
13244 (const_int 23) (const_int 55)
13245 (const_int 28) (const_int 60)
13246 (const_int 29) (const_int 61)
13247 (const_int 30) (const_int 62)
13248 (const_int 31) (const_int 63)])))]
13250 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13251 [(set_attr "type" "sselog")
13252 (set_attr "prefix" "evex")
13253 (set_attr "mode" "XI")])
13255 (define_insn "avx2_interleave_highv16hi<mask_name>"
13256 [(set (match_operand:V16HI 0 "register_operand" "=v")
13259 (match_operand:V16HI 1 "register_operand" "v")
13260 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13261 (parallel [(const_int 4) (const_int 20)
13262 (const_int 5) (const_int 21)
13263 (const_int 6) (const_int 22)
13264 (const_int 7) (const_int 23)
13265 (const_int 12) (const_int 28)
13266 (const_int 13) (const_int 29)
13267 (const_int 14) (const_int 30)
13268 (const_int 15) (const_int 31)])))]
13269 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13270 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13271 [(set_attr "type" "sselog")
13272 (set_attr "prefix" "maybe_evex")
13273 (set_attr "mode" "OI")])
13275 (define_insn "vec_interleave_highv8hi<mask_name>"
13276 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13279 (match_operand:V8HI 1 "register_operand" "0,v")
13280 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13281 (parallel [(const_int 4) (const_int 12)
13282 (const_int 5) (const_int 13)
13283 (const_int 6) (const_int 14)
13284 (const_int 7) (const_int 15)])))]
13285 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13287 punpckhwd\t{%2, %0|%0, %2}
13288 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13289 [(set_attr "isa" "noavx,avx")
13290 (set_attr "type" "sselog")
13291 (set_attr "prefix_data16" "1,*")
13292 (set_attr "prefix" "orig,maybe_vex")
13293 (set_attr "mode" "TI")])
13295 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
13296 [(set (match_operand:V32HI 0 "register_operand" "=v")
13299 (match_operand:V32HI 1 "register_operand" "v")
13300 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13301 (parallel [(const_int 0) (const_int 32)
13302 (const_int 1) (const_int 33)
13303 (const_int 2) (const_int 34)
13304 (const_int 3) (const_int 35)
13305 (const_int 8) (const_int 40)
13306 (const_int 9) (const_int 41)
13307 (const_int 10) (const_int 42)
13308 (const_int 11) (const_int 43)
13309 (const_int 16) (const_int 48)
13310 (const_int 17) (const_int 49)
13311 (const_int 18) (const_int 50)
13312 (const_int 19) (const_int 51)
13313 (const_int 24) (const_int 56)
13314 (const_int 25) (const_int 57)
13315 (const_int 26) (const_int 58)
13316 (const_int 27) (const_int 59)])))]
13318 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13319 [(set_attr "type" "sselog")
13320 (set_attr "prefix" "evex")
13321 (set_attr "mode" "XI")])
13323 (define_insn "avx2_interleave_lowv16hi<mask_name>"
13324 [(set (match_operand:V16HI 0 "register_operand" "=v")
13327 (match_operand:V16HI 1 "register_operand" "v")
13328 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13329 (parallel [(const_int 0) (const_int 16)
13330 (const_int 1) (const_int 17)
13331 (const_int 2) (const_int 18)
13332 (const_int 3) (const_int 19)
13333 (const_int 8) (const_int 24)
13334 (const_int 9) (const_int 25)
13335 (const_int 10) (const_int 26)
13336 (const_int 11) (const_int 27)])))]
13337 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13338 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13339 [(set_attr "type" "sselog")
13340 (set_attr "prefix" "maybe_evex")
13341 (set_attr "mode" "OI")])
13343 (define_insn "vec_interleave_lowv8hi<mask_name>"
13344 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13347 (match_operand:V8HI 1 "register_operand" "0,v")
13348 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13349 (parallel [(const_int 0) (const_int 8)
13350 (const_int 1) (const_int 9)
13351 (const_int 2) (const_int 10)
13352 (const_int 3) (const_int 11)])))]
13353 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13355 punpcklwd\t{%2, %0|%0, %2}
13356 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13357 [(set_attr "isa" "noavx,avx")
13358 (set_attr "type" "sselog")
13359 (set_attr "prefix_data16" "1,*")
13360 (set_attr "prefix" "orig,maybe_evex")
13361 (set_attr "mode" "TI")])
13363 (define_insn "avx2_interleave_highv8si<mask_name>"
13364 [(set (match_operand:V8SI 0 "register_operand" "=v")
13367 (match_operand:V8SI 1 "register_operand" "v")
13368 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
13369 (parallel [(const_int 2) (const_int 10)
13370 (const_int 3) (const_int 11)
13371 (const_int 6) (const_int 14)
13372 (const_int 7) (const_int 15)])))]
13373 "TARGET_AVX2 && <mask_avx512vl_condition>"
13374 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13375 [(set_attr "type" "sselog")
13376 (set_attr "prefix" "maybe_evex")
13377 (set_attr "mode" "OI")])
13379 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
13380 [(set (match_operand:V16SI 0 "register_operand" "=v")
13383 (match_operand:V16SI 1 "register_operand" "v")
13384 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
13385 (parallel [(const_int 2) (const_int 18)
13386 (const_int 3) (const_int 19)
13387 (const_int 6) (const_int 22)
13388 (const_int 7) (const_int 23)
13389 (const_int 10) (const_int 26)
13390 (const_int 11) (const_int 27)
13391 (const_int 14) (const_int 30)
13392 (const_int 15) (const_int 31)])))]
13394 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13395 [(set_attr "type" "sselog")
13396 (set_attr "prefix" "evex")
13397 (set_attr "mode" "XI")])
13400 (define_insn "vec_interleave_highv4si<mask_name>"
13401 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
13404 (match_operand:V4SI 1 "register_operand" "0,v")
13405 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
13406 (parallel [(const_int 2) (const_int 6)
13407 (const_int 3) (const_int 7)])))]
13408 "TARGET_SSE2 && <mask_avx512vl_condition>"
13410 punpckhdq\t{%2, %0|%0, %2}
13411 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13412 [(set_attr "isa" "noavx,avx")
13413 (set_attr "type" "sselog")
13414 (set_attr "prefix_data16" "1,*")
13415 (set_attr "prefix" "orig,maybe_vex")
13416 (set_attr "mode" "TI")])
13418 (define_insn "avx2_interleave_lowv8si<mask_name>"
13419 [(set (match_operand:V8SI 0 "register_operand" "=v")
13422 (match_operand:V8SI 1 "register_operand" "v")
13423 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
13424 (parallel [(const_int 0) (const_int 8)
13425 (const_int 1) (const_int 9)
13426 (const_int 4) (const_int 12)
13427 (const_int 5) (const_int 13)])))]
13428 "TARGET_AVX2 && <mask_avx512vl_condition>"
13429 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13430 [(set_attr "type" "sselog")
13431 (set_attr "prefix" "maybe_evex")
13432 (set_attr "mode" "OI")])
13434 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
13435 [(set (match_operand:V16SI 0 "register_operand" "=v")
13438 (match_operand:V16SI 1 "register_operand" "v")
13439 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
13440 (parallel [(const_int 0) (const_int 16)
13441 (const_int 1) (const_int 17)
13442 (const_int 4) (const_int 20)
13443 (const_int 5) (const_int 21)
13444 (const_int 8) (const_int 24)
13445 (const_int 9) (const_int 25)
13446 (const_int 12) (const_int 28)
13447 (const_int 13) (const_int 29)])))]
13449 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13450 [(set_attr "type" "sselog")
13451 (set_attr "prefix" "evex")
13452 (set_attr "mode" "XI")])
13454 (define_insn "vec_interleave_lowv4si<mask_name>"
13455 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
13458 (match_operand:V4SI 1 "register_operand" "0,v")
13459 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
13460 (parallel [(const_int 0) (const_int 4)
13461 (const_int 1) (const_int 5)])))]
13462 "TARGET_SSE2 && <mask_avx512vl_condition>"
13464 punpckldq\t{%2, %0|%0, %2}
13465 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13466 [(set_attr "isa" "noavx,avx")
13467 (set_attr "type" "sselog")
13468 (set_attr "prefix_data16" "1,*")
13469 (set_attr "prefix" "orig,vex")
13470 (set_attr "mode" "TI")])
13472 (define_expand "vec_interleave_high<mode>"
13473 [(match_operand:VI_256 0 "register_operand")
13474 (match_operand:VI_256 1 "register_operand")
13475 (match_operand:VI_256 2 "nonimmediate_operand")]
13478 rtx t1 = gen_reg_rtx (<MODE>mode);
13479 rtx t2 = gen_reg_rtx (<MODE>mode);
13480 rtx t3 = gen_reg_rtx (V4DImode);
13481 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
13482 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
13483 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
13484 gen_lowpart (V4DImode, t2),
13485 GEN_INT (1 + (3 << 4))));
13486 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
13490 (define_expand "vec_interleave_low<mode>"
13491 [(match_operand:VI_256 0 "register_operand")
13492 (match_operand:VI_256 1 "register_operand")
13493 (match_operand:VI_256 2 "nonimmediate_operand")]
13496 rtx t1 = gen_reg_rtx (<MODE>mode);
13497 rtx t2 = gen_reg_rtx (<MODE>mode);
13498 rtx t3 = gen_reg_rtx (V4DImode);
13499 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
13500 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
13501 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
13502 gen_lowpart (V4DImode, t2),
13503 GEN_INT (0 + (2 << 4))));
13504 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
13508 ;; Modes handled by pinsr patterns.
13509 (define_mode_iterator PINSR_MODE
13510 [(V16QI "TARGET_SSE4_1") V8HI
13511 (V4SI "TARGET_SSE4_1")
13512 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
13514 (define_mode_attr sse2p4_1
13515 [(V16QI "sse4_1") (V8HI "sse2")
13516 (V4SI "sse4_1") (V2DI "sse4_1")])
13518 (define_mode_attr pinsr_evex_isa
13519 [(V16QI "avx512bw") (V8HI "avx512bw")
13520 (V4SI "avx512dq") (V2DI "avx512dq")])
13522 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
13523 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
13524 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
13525 (vec_merge:PINSR_MODE
13526 (vec_duplicate:PINSR_MODE
13527 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
13528 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
13529 (match_operand:SI 3 "const_int_operand")))]
13531 && ((unsigned) exact_log2 (INTVAL (operands[3]))
13532 < GET_MODE_NUNITS (<MODE>mode))"
13534 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
13536 switch (which_alternative)
13539 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13540 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
13543 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
13546 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13547 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
13551 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13553 gcc_unreachable ();
13556 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
13557 (set_attr "type" "sselog")
13558 (set (attr "prefix_rex")
13560 (and (not (match_test "TARGET_AVX"))
13561 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
13563 (const_string "*")))
13564 (set (attr "prefix_data16")
13566 (and (not (match_test "TARGET_AVX"))
13567 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13569 (const_string "*")))
13570 (set (attr "prefix_extra")
13572 (and (not (match_test "TARGET_AVX"))
13573 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13575 (const_string "1")))
13576 (set_attr "length_immediate" "1")
13577 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
13578 (set_attr "mode" "TI")])
13580 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
13581 [(match_operand:AVX512_VEC 0 "register_operand")
13582 (match_operand:AVX512_VEC 1 "register_operand")
13583 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
13584 (match_operand:SI 3 "const_0_to_3_operand")
13585 (match_operand:AVX512_VEC 4 "register_operand")
13586 (match_operand:<avx512fmaskmode> 5 "register_operand")]
13589 int mask, selector;
13590 mask = INTVAL (operands[3]);
13591 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
13592 ? 0xFFFF ^ (0x000F << mask * 4)
13593 : 0xFF ^ (0x03 << mask * 2));
13594 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
13595 (operands[0], operands[1], operands[2], GEN_INT (selector),
13596 operands[4], operands[5]));
13600 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
13601 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
13602 (vec_merge:AVX512_VEC
13603 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
13604 (vec_duplicate:AVX512_VEC
13605 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
13606 (match_operand:SI 3 "const_int_operand" "n,n,n")))]
13608 && (INTVAL (operands[3])
13609 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
13611 if (which_alternative == 0)
13612 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
13613 switch (<MODE>mode)
13616 return "vmovapd\t{%2, %x0|%x0, %2}";
13618 return "vmovaps\t{%2, %x0|%x0, %2}";
13620 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
13621 : "vmovdqa\t{%2, %x0|%x0, %2}";
13623 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
13624 : "vmovdqa\t{%2, %x0|%x0, %2}";
13626 gcc_unreachable ();
13629 [(set_attr "type" "sselog,ssemov,ssemov")
13630 (set_attr "length_immediate" "1,0,0")
13631 (set_attr "prefix" "evex,vex,evex")
13632 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
13634 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
13635 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
13636 (vec_merge:AVX512_VEC
13637 (match_operand:AVX512_VEC 1 "register_operand" "v")
13638 (vec_duplicate:AVX512_VEC
13639 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
13640 (match_operand:SI 3 "const_int_operand" "n")))]
13644 int selector = INTVAL (operands[3]);
13646 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
13648 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
13650 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
13652 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
13655 gcc_unreachable ();
13657 operands[3] = GEN_INT (mask);
13659 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
13661 [(set_attr "type" "sselog")
13662 (set_attr "length_immediate" "1")
13663 (set_attr "prefix" "evex")
13664 (set_attr "mode" "<sseinsnmode>")])
13666 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
13667 [(match_operand:AVX512_VEC_2 0 "register_operand")
13668 (match_operand:AVX512_VEC_2 1 "register_operand")
13669 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
13670 (match_operand:SI 3 "const_0_to_1_operand")
13671 (match_operand:AVX512_VEC_2 4 "register_operand")
13672 (match_operand:<avx512fmaskmode> 5 "register_operand")]
13675 int mask = INTVAL (operands[3]);
13677 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
13678 operands[2], operands[4],
13681 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
13682 operands[2], operands[4],
13687 (define_insn "vec_set_lo_<mode><mask_name>"
13688 [(set (match_operand:V16FI 0 "register_operand" "=v")
13690 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
13691 (vec_select:<ssehalfvecmode>
13692 (match_operand:V16FI 1 "register_operand" "v")
13693 (parallel [(const_int 8) (const_int 9)
13694 (const_int 10) (const_int 11)
13695 (const_int 12) (const_int 13)
13696 (const_int 14) (const_int 15)]))))]
13698 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
13699 [(set_attr "type" "sselog")
13700 (set_attr "length_immediate" "1")
13701 (set_attr "prefix" "evex")
13702 (set_attr "mode" "<sseinsnmode>")])
13704 (define_insn "vec_set_hi_<mode><mask_name>"
13705 [(set (match_operand:V16FI 0 "register_operand" "=v")
13707 (vec_select:<ssehalfvecmode>
13708 (match_operand:V16FI 1 "register_operand" "v")
13709 (parallel [(const_int 0) (const_int 1)
13710 (const_int 2) (const_int 3)
13711 (const_int 4) (const_int 5)
13712 (const_int 6) (const_int 7)]))
13713 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
13715 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
13716 [(set_attr "type" "sselog")
13717 (set_attr "length_immediate" "1")
13718 (set_attr "prefix" "evex")
13719 (set_attr "mode" "<sseinsnmode>")])
13721 (define_insn "vec_set_lo_<mode><mask_name>"
13722 [(set (match_operand:V8FI 0 "register_operand" "=v")
13724 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
13725 (vec_select:<ssehalfvecmode>
13726 (match_operand:V8FI 1 "register_operand" "v")
13727 (parallel [(const_int 4) (const_int 5)
13728 (const_int 6) (const_int 7)]))))]
13730 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
13731 [(set_attr "type" "sselog")
13732 (set_attr "length_immediate" "1")
13733 (set_attr "prefix" "evex")
13734 (set_attr "mode" "XI")])
13736 (define_insn "vec_set_hi_<mode><mask_name>"
13737 [(set (match_operand:V8FI 0 "register_operand" "=v")
13739 (vec_select:<ssehalfvecmode>
13740 (match_operand:V8FI 1 "register_operand" "v")
13741 (parallel [(const_int 0) (const_int 1)
13742 (const_int 2) (const_int 3)]))
13743 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
13745 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
13746 [(set_attr "type" "sselog")
13747 (set_attr "length_immediate" "1")
13748 (set_attr "prefix" "evex")
13749 (set_attr "mode" "XI")])
13751 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
13752 [(match_operand:VI8F_256 0 "register_operand")
13753 (match_operand:VI8F_256 1 "register_operand")
13754 (match_operand:VI8F_256 2 "nonimmediate_operand")
13755 (match_operand:SI 3 "const_0_to_3_operand")
13756 (match_operand:VI8F_256 4 "register_operand")
13757 (match_operand:QI 5 "register_operand")]
13760 int mask = INTVAL (operands[3]);
13761 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
13762 (operands[0], operands[1], operands[2],
13763 GEN_INT (((mask >> 0) & 1) * 2 + 0),
13764 GEN_INT (((mask >> 0) & 1) * 2 + 1),
13765 GEN_INT (((mask >> 1) & 1) * 2 + 4),
13766 GEN_INT (((mask >> 1) & 1) * 2 + 5),
13767 operands[4], operands[5]));
13771 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
13772 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
13773 (vec_select:VI8F_256
13774 (vec_concat:<ssedoublemode>
13775 (match_operand:VI8F_256 1 "register_operand" "v")
13776 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
13777 (parallel [(match_operand 3 "const_0_to_3_operand")
13778 (match_operand 4 "const_0_to_3_operand")
13779 (match_operand 5 "const_4_to_7_operand")
13780 (match_operand 6 "const_4_to_7_operand")])))]
13782 && (INTVAL (operands[3]) & 1) == 0
13783 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13784 && (INTVAL (operands[5]) & 1) == 0
13785 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
13788 mask = INTVAL (operands[3]) / 2;
13789 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
13790 operands[3] = GEN_INT (mask);
13791 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
13793 [(set_attr "type" "sselog")
13794 (set_attr "length_immediate" "1")
13795 (set_attr "prefix" "evex")
13796 (set_attr "mode" "XI")])
13798 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
13799 [(match_operand:V8FI 0 "register_operand")
13800 (match_operand:V8FI 1 "register_operand")
13801 (match_operand:V8FI 2 "nonimmediate_operand")
13802 (match_operand:SI 3 "const_0_to_255_operand")
13803 (match_operand:V8FI 4 "register_operand")
13804 (match_operand:QI 5 "register_operand")]
13807 int mask = INTVAL (operands[3]);
13808 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
13809 (operands[0], operands[1], operands[2],
13810 GEN_INT (((mask >> 0) & 3) * 2),
13811 GEN_INT (((mask >> 0) & 3) * 2 + 1),
13812 GEN_INT (((mask >> 2) & 3) * 2),
13813 GEN_INT (((mask >> 2) & 3) * 2 + 1),
13814 GEN_INT (((mask >> 4) & 3) * 2 + 8),
13815 GEN_INT (((mask >> 4) & 3) * 2 + 9),
13816 GEN_INT (((mask >> 6) & 3) * 2 + 8),
13817 GEN_INT (((mask >> 6) & 3) * 2 + 9),
13818 operands[4], operands[5]));
13822 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
13823 [(set (match_operand:V8FI 0 "register_operand" "=v")
13825 (vec_concat:<ssedoublemode>
13826 (match_operand:V8FI 1 "register_operand" "v")
13827 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
13828 (parallel [(match_operand 3 "const_0_to_7_operand")
13829 (match_operand 4 "const_0_to_7_operand")
13830 (match_operand 5 "const_0_to_7_operand")
13831 (match_operand 6 "const_0_to_7_operand")
13832 (match_operand 7 "const_8_to_15_operand")
13833 (match_operand 8 "const_8_to_15_operand")
13834 (match_operand 9 "const_8_to_15_operand")
13835 (match_operand 10 "const_8_to_15_operand")])))]
13837 && (INTVAL (operands[3]) & 1) == 0
13838 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13839 && (INTVAL (operands[5]) & 1) == 0
13840 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
13841 && (INTVAL (operands[7]) & 1) == 0
13842 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
13843 && (INTVAL (operands[9]) & 1) == 0
13844 && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
13847 mask = INTVAL (operands[3]) / 2;
13848 mask |= INTVAL (operands[5]) / 2 << 2;
13849 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
13850 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
13851 operands[3] = GEN_INT (mask);
13853 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
13855 [(set_attr "type" "sselog")
13856 (set_attr "length_immediate" "1")
13857 (set_attr "prefix" "evex")
13858 (set_attr "mode" "<sseinsnmode>")])
13860 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
13861 [(match_operand:VI4F_256 0 "register_operand")
13862 (match_operand:VI4F_256 1 "register_operand")
13863 (match_operand:VI4F_256 2 "nonimmediate_operand")
13864 (match_operand:SI 3 "const_0_to_3_operand")
13865 (match_operand:VI4F_256 4 "register_operand")
13866 (match_operand:QI 5 "register_operand")]
13869 int mask = INTVAL (operands[3]);
13870 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
13871 (operands[0], operands[1], operands[2],
13872 GEN_INT (((mask >> 0) & 1) * 4 + 0),
13873 GEN_INT (((mask >> 0) & 1) * 4 + 1),
13874 GEN_INT (((mask >> 0) & 1) * 4 + 2),
13875 GEN_INT (((mask >> 0) & 1) * 4 + 3),
13876 GEN_INT (((mask >> 1) & 1) * 4 + 8),
13877 GEN_INT (((mask >> 1) & 1) * 4 + 9),
13878 GEN_INT (((mask >> 1) & 1) * 4 + 10),
13879 GEN_INT (((mask >> 1) & 1) * 4 + 11),
13880 operands[4], operands[5]));
13884 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
13885 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
13886 (vec_select:VI4F_256
13887 (vec_concat:<ssedoublemode>
13888 (match_operand:VI4F_256 1 "register_operand" "v")
13889 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
13890 (parallel [(match_operand 3 "const_0_to_7_operand")
13891 (match_operand 4 "const_0_to_7_operand")
13892 (match_operand 5 "const_0_to_7_operand")
13893 (match_operand 6 "const_0_to_7_operand")
13894 (match_operand 7 "const_8_to_15_operand")
13895 (match_operand 8 "const_8_to_15_operand")
13896 (match_operand 9 "const_8_to_15_operand")
13897 (match_operand 10 "const_8_to_15_operand")])))]
13899 && (INTVAL (operands[3]) & 3) == 0
13900 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13901 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
13902 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
13903 && (INTVAL (operands[7]) & 3) == 0
13904 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
13905 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
13906 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
13909 mask = INTVAL (operands[3]) / 4;
13910 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
13911 operands[3] = GEN_INT (mask);
13913 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
13915 [(set_attr "type" "sselog")
13916 (set_attr "length_immediate" "1")
13917 (set_attr "prefix" "evex")
13918 (set_attr "mode" "<sseinsnmode>")])
13920 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
13921 [(match_operand:V16FI 0 "register_operand")
13922 (match_operand:V16FI 1 "register_operand")
13923 (match_operand:V16FI 2 "nonimmediate_operand")
13924 (match_operand:SI 3 "const_0_to_255_operand")
13925 (match_operand:V16FI 4 "register_operand")
13926 (match_operand:HI 5 "register_operand")]
13929 int mask = INTVAL (operands[3]);
13930 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
13931 (operands[0], operands[1], operands[2],
13932 GEN_INT (((mask >> 0) & 3) * 4),
13933 GEN_INT (((mask >> 0) & 3) * 4 + 1),
13934 GEN_INT (((mask >> 0) & 3) * 4 + 2),
13935 GEN_INT (((mask >> 0) & 3) * 4 + 3),
13936 GEN_INT (((mask >> 2) & 3) * 4),
13937 GEN_INT (((mask >> 2) & 3) * 4 + 1),
13938 GEN_INT (((mask >> 2) & 3) * 4 + 2),
13939 GEN_INT (((mask >> 2) & 3) * 4 + 3),
13940 GEN_INT (((mask >> 4) & 3) * 4 + 16),
13941 GEN_INT (((mask >> 4) & 3) * 4 + 17),
13942 GEN_INT (((mask >> 4) & 3) * 4 + 18),
13943 GEN_INT (((mask >> 4) & 3) * 4 + 19),
13944 GEN_INT (((mask >> 6) & 3) * 4 + 16),
13945 GEN_INT (((mask >> 6) & 3) * 4 + 17),
13946 GEN_INT (((mask >> 6) & 3) * 4 + 18),
13947 GEN_INT (((mask >> 6) & 3) * 4 + 19),
13948 operands[4], operands[5]));
13952 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
13953 [(set (match_operand:V16FI 0 "register_operand" "=v")
13955 (vec_concat:<ssedoublemode>
13956 (match_operand:V16FI 1 "register_operand" "v")
13957 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
13958 (parallel [(match_operand 3 "const_0_to_15_operand")
13959 (match_operand 4 "const_0_to_15_operand")
13960 (match_operand 5 "const_0_to_15_operand")
13961 (match_operand 6 "const_0_to_15_operand")
13962 (match_operand 7 "const_0_to_15_operand")
13963 (match_operand 8 "const_0_to_15_operand")
13964 (match_operand 9 "const_0_to_15_operand")
13965 (match_operand 10 "const_0_to_15_operand")
13966 (match_operand 11 "const_16_to_31_operand")
13967 (match_operand 12 "const_16_to_31_operand")
13968 (match_operand 13 "const_16_to_31_operand")
13969 (match_operand 14 "const_16_to_31_operand")
13970 (match_operand 15 "const_16_to_31_operand")
13971 (match_operand 16 "const_16_to_31_operand")
13972 (match_operand 17 "const_16_to_31_operand")
13973 (match_operand 18 "const_16_to_31_operand")])))]
13975 && (INTVAL (operands[3]) & 3) == 0
13976 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13977 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
13978 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
13979 && (INTVAL (operands[7]) & 3) == 0
13980 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
13981 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
13982 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
13983 && (INTVAL (operands[11]) & 3) == 0
13984 && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
13985 && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
13986 && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
13987 && (INTVAL (operands[15]) & 3) == 0
13988 && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
13989 && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
13990 && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
13993 mask = INTVAL (operands[3]) / 4;
13994 mask |= INTVAL (operands[7]) / 4 << 2;
13995 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
13996 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
13997 operands[3] = GEN_INT (mask);
13999 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
14001 [(set_attr "type" "sselog")
14002 (set_attr "length_immediate" "1")
14003 (set_attr "prefix" "evex")
14004 (set_attr "mode" "<sseinsnmode>")])
14006 (define_expand "avx512f_pshufdv3_mask"
14007 [(match_operand:V16SI 0 "register_operand")
14008 (match_operand:V16SI 1 "nonimmediate_operand")
14009 (match_operand:SI 2 "const_0_to_255_operand")
14010 (match_operand:V16SI 3 "register_operand")
14011 (match_operand:HI 4 "register_operand")]
14014 int mask = INTVAL (operands[2]);
14015 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
14016 GEN_INT ((mask >> 0) & 3),
14017 GEN_INT ((mask >> 2) & 3),
14018 GEN_INT ((mask >> 4) & 3),
14019 GEN_INT ((mask >> 6) & 3),
14020 GEN_INT (((mask >> 0) & 3) + 4),
14021 GEN_INT (((mask >> 2) & 3) + 4),
14022 GEN_INT (((mask >> 4) & 3) + 4),
14023 GEN_INT (((mask >> 6) & 3) + 4),
14024 GEN_INT (((mask >> 0) & 3) + 8),
14025 GEN_INT (((mask >> 2) & 3) + 8),
14026 GEN_INT (((mask >> 4) & 3) + 8),
14027 GEN_INT (((mask >> 6) & 3) + 8),
14028 GEN_INT (((mask >> 0) & 3) + 12),
14029 GEN_INT (((mask >> 2) & 3) + 12),
14030 GEN_INT (((mask >> 4) & 3) + 12),
14031 GEN_INT (((mask >> 6) & 3) + 12),
14032 operands[3], operands[4]));
14036 (define_insn "avx512f_pshufd_1<mask_name>"
14037 [(set (match_operand:V16SI 0 "register_operand" "=v")
14039 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
14040 (parallel [(match_operand 2 "const_0_to_3_operand")
14041 (match_operand 3 "const_0_to_3_operand")
14042 (match_operand 4 "const_0_to_3_operand")
14043 (match_operand 5 "const_0_to_3_operand")
14044 (match_operand 6 "const_4_to_7_operand")
14045 (match_operand 7 "const_4_to_7_operand")
14046 (match_operand 8 "const_4_to_7_operand")
14047 (match_operand 9 "const_4_to_7_operand")
14048 (match_operand 10 "const_8_to_11_operand")
14049 (match_operand 11 "const_8_to_11_operand")
14050 (match_operand 12 "const_8_to_11_operand")
14051 (match_operand 13 "const_8_to_11_operand")
14052 (match_operand 14 "const_12_to_15_operand")
14053 (match_operand 15 "const_12_to_15_operand")
14054 (match_operand 16 "const_12_to_15_operand")
14055 (match_operand 17 "const_12_to_15_operand")])))]
14057 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14058 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14059 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14060 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
14061 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
14062 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
14063 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
14064 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
14065 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
14066 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
14067 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
14068 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
14071 mask |= INTVAL (operands[2]) << 0;
14072 mask |= INTVAL (operands[3]) << 2;
14073 mask |= INTVAL (operands[4]) << 4;
14074 mask |= INTVAL (operands[5]) << 6;
14075 operands[2] = GEN_INT (mask);
14077 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
14079 [(set_attr "type" "sselog1")
14080 (set_attr "prefix" "evex")
14081 (set_attr "length_immediate" "1")
14082 (set_attr "mode" "XI")])
14084 (define_expand "avx512vl_pshufdv3_mask"
14085 [(match_operand:V8SI 0 "register_operand")
14086 (match_operand:V8SI 1 "nonimmediate_operand")
14087 (match_operand:SI 2 "const_0_to_255_operand")
14088 (match_operand:V8SI 3 "register_operand")
14089 (match_operand:QI 4 "register_operand")]
14092 int mask = INTVAL (operands[2]);
14093 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
14094 GEN_INT ((mask >> 0) & 3),
14095 GEN_INT ((mask >> 2) & 3),
14096 GEN_INT ((mask >> 4) & 3),
14097 GEN_INT ((mask >> 6) & 3),
14098 GEN_INT (((mask >> 0) & 3) + 4),
14099 GEN_INT (((mask >> 2) & 3) + 4),
14100 GEN_INT (((mask >> 4) & 3) + 4),
14101 GEN_INT (((mask >> 6) & 3) + 4),
14102 operands[3], operands[4]));
14106 (define_expand "avx2_pshufdv3"
14107 [(match_operand:V8SI 0 "register_operand")
14108 (match_operand:V8SI 1 "nonimmediate_operand")
14109 (match_operand:SI 2 "const_0_to_255_operand")]
14112 int mask = INTVAL (operands[2]);
14113 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
14114 GEN_INT ((mask >> 0) & 3),
14115 GEN_INT ((mask >> 2) & 3),
14116 GEN_INT ((mask >> 4) & 3),
14117 GEN_INT ((mask >> 6) & 3),
14118 GEN_INT (((mask >> 0) & 3) + 4),
14119 GEN_INT (((mask >> 2) & 3) + 4),
14120 GEN_INT (((mask >> 4) & 3) + 4),
14121 GEN_INT (((mask >> 6) & 3) + 4)));
14125 (define_insn "avx2_pshufd_1<mask_name>"
14126 [(set (match_operand:V8SI 0 "register_operand" "=v")
14128 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
14129 (parallel [(match_operand 2 "const_0_to_3_operand")
14130 (match_operand 3 "const_0_to_3_operand")
14131 (match_operand 4 "const_0_to_3_operand")
14132 (match_operand 5 "const_0_to_3_operand")
14133 (match_operand 6 "const_4_to_7_operand")
14134 (match_operand 7 "const_4_to_7_operand")
14135 (match_operand 8 "const_4_to_7_operand")
14136 (match_operand 9 "const_4_to_7_operand")])))]
14138 && <mask_avx512vl_condition>
14139 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14140 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14141 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14142 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
14145 mask |= INTVAL (operands[2]) << 0;
14146 mask |= INTVAL (operands[3]) << 2;
14147 mask |= INTVAL (operands[4]) << 4;
14148 mask |= INTVAL (operands[5]) << 6;
14149 operands[2] = GEN_INT (mask);
14151 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14153 [(set_attr "type" "sselog1")
14154 (set_attr "prefix" "maybe_evex")
14155 (set_attr "length_immediate" "1")
14156 (set_attr "mode" "OI")])
14158 (define_expand "avx512vl_pshufd_mask"
14159 [(match_operand:V4SI 0 "register_operand")
14160 (match_operand:V4SI 1 "nonimmediate_operand")
14161 (match_operand:SI 2 "const_0_to_255_operand")
14162 (match_operand:V4SI 3 "register_operand")
14163 (match_operand:QI 4 "register_operand")]
14166 int mask = INTVAL (operands[2]);
14167 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
14168 GEN_INT ((mask >> 0) & 3),
14169 GEN_INT ((mask >> 2) & 3),
14170 GEN_INT ((mask >> 4) & 3),
14171 GEN_INT ((mask >> 6) & 3),
14172 operands[3], operands[4]));
14176 (define_expand "sse2_pshufd"
14177 [(match_operand:V4SI 0 "register_operand")
14178 (match_operand:V4SI 1 "vector_operand")
14179 (match_operand:SI 2 "const_int_operand")]
14182 int mask = INTVAL (operands[2]);
14183 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
14184 GEN_INT ((mask >> 0) & 3),
14185 GEN_INT ((mask >> 2) & 3),
14186 GEN_INT ((mask >> 4) & 3),
14187 GEN_INT ((mask >> 6) & 3)));
14191 (define_insn "sse2_pshufd_1<mask_name>"
14192 [(set (match_operand:V4SI 0 "register_operand" "=v")
14194 (match_operand:V4SI 1 "vector_operand" "vBm")
14195 (parallel [(match_operand 2 "const_0_to_3_operand")
14196 (match_operand 3 "const_0_to_3_operand")
14197 (match_operand 4 "const_0_to_3_operand")
14198 (match_operand 5 "const_0_to_3_operand")])))]
14199 "TARGET_SSE2 && <mask_avx512vl_condition>"
14202 mask |= INTVAL (operands[2]) << 0;
14203 mask |= INTVAL (operands[3]) << 2;
14204 mask |= INTVAL (operands[4]) << 4;
14205 mask |= INTVAL (operands[5]) << 6;
14206 operands[2] = GEN_INT (mask);
14208 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14210 [(set_attr "type" "sselog1")
14211 (set_attr "prefix_data16" "1")
14212 (set_attr "prefix" "<mask_prefix2>")
14213 (set_attr "length_immediate" "1")
14214 (set_attr "mode" "TI")])
14216 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
14217 [(set (match_operand:V32HI 0 "register_operand" "=v")
14219 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14220 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14223 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14224 [(set_attr "type" "sselog")
14225 (set_attr "prefix" "evex")
14226 (set_attr "mode" "XI")])
14228 (define_expand "avx512vl_pshuflwv3_mask"
14229 [(match_operand:V16HI 0 "register_operand")
14230 (match_operand:V16HI 1 "nonimmediate_operand")
14231 (match_operand:SI 2 "const_0_to_255_operand")
14232 (match_operand:V16HI 3 "register_operand")
14233 (match_operand:HI 4 "register_operand")]
14234 "TARGET_AVX512VL && TARGET_AVX512BW"
14236 int mask = INTVAL (operands[2]);
14237 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
14238 GEN_INT ((mask >> 0) & 3),
14239 GEN_INT ((mask >> 2) & 3),
14240 GEN_INT ((mask >> 4) & 3),
14241 GEN_INT ((mask >> 6) & 3),
14242 GEN_INT (((mask >> 0) & 3) + 8),
14243 GEN_INT (((mask >> 2) & 3) + 8),
14244 GEN_INT (((mask >> 4) & 3) + 8),
14245 GEN_INT (((mask >> 6) & 3) + 8),
14246 operands[3], operands[4]));
14250 (define_expand "avx2_pshuflwv3"
14251 [(match_operand:V16HI 0 "register_operand")
14252 (match_operand:V16HI 1 "nonimmediate_operand")
14253 (match_operand:SI 2 "const_0_to_255_operand")]
14256 int mask = INTVAL (operands[2]);
14257 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
14258 GEN_INT ((mask >> 0) & 3),
14259 GEN_INT ((mask >> 2) & 3),
14260 GEN_INT ((mask >> 4) & 3),
14261 GEN_INT ((mask >> 6) & 3),
14262 GEN_INT (((mask >> 0) & 3) + 8),
14263 GEN_INT (((mask >> 2) & 3) + 8),
14264 GEN_INT (((mask >> 4) & 3) + 8),
14265 GEN_INT (((mask >> 6) & 3) + 8)));
14269 (define_insn "avx2_pshuflw_1<mask_name>"
14270 [(set (match_operand:V16HI 0 "register_operand" "=v")
14272 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14273 (parallel [(match_operand 2 "const_0_to_3_operand")
14274 (match_operand 3 "const_0_to_3_operand")
14275 (match_operand 4 "const_0_to_3_operand")
14276 (match_operand 5 "const_0_to_3_operand")
14281 (match_operand 6 "const_8_to_11_operand")
14282 (match_operand 7 "const_8_to_11_operand")
14283 (match_operand 8 "const_8_to_11_operand")
14284 (match_operand 9 "const_8_to_11_operand")
14288 (const_int 15)])))]
14290 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14291 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14292 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14293 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14294 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14297 mask |= INTVAL (operands[2]) << 0;
14298 mask |= INTVAL (operands[3]) << 2;
14299 mask |= INTVAL (operands[4]) << 4;
14300 mask |= INTVAL (operands[5]) << 6;
14301 operands[2] = GEN_INT (mask);
14303 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14305 [(set_attr "type" "sselog")
14306 (set_attr "prefix" "maybe_evex")
14307 (set_attr "length_immediate" "1")
14308 (set_attr "mode" "OI")])
14310 (define_expand "avx512vl_pshuflw_mask"
14311 [(match_operand:V8HI 0 "register_operand")
14312 (match_operand:V8HI 1 "nonimmediate_operand")
14313 (match_operand:SI 2 "const_0_to_255_operand")
14314 (match_operand:V8HI 3 "register_operand")
14315 (match_operand:QI 4 "register_operand")]
14316 "TARGET_AVX512VL && TARGET_AVX512BW"
14318 int mask = INTVAL (operands[2]);
14319 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
14320 GEN_INT ((mask >> 0) & 3),
14321 GEN_INT ((mask >> 2) & 3),
14322 GEN_INT ((mask >> 4) & 3),
14323 GEN_INT ((mask >> 6) & 3),
14324 operands[3], operands[4]));
14328 (define_expand "sse2_pshuflw"
14329 [(match_operand:V8HI 0 "register_operand")
14330 (match_operand:V8HI 1 "vector_operand")
14331 (match_operand:SI 2 "const_int_operand")]
14334 int mask = INTVAL (operands[2]);
14335 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
14336 GEN_INT ((mask >> 0) & 3),
14337 GEN_INT ((mask >> 2) & 3),
14338 GEN_INT ((mask >> 4) & 3),
14339 GEN_INT ((mask >> 6) & 3)));
14343 (define_insn "sse2_pshuflw_1<mask_name>"
14344 [(set (match_operand:V8HI 0 "register_operand" "=v")
14346 (match_operand:V8HI 1 "vector_operand" "vBm")
14347 (parallel [(match_operand 2 "const_0_to_3_operand")
14348 (match_operand 3 "const_0_to_3_operand")
14349 (match_operand 4 "const_0_to_3_operand")
14350 (match_operand 5 "const_0_to_3_operand")
14355 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14358 mask |= INTVAL (operands[2]) << 0;
14359 mask |= INTVAL (operands[3]) << 2;
14360 mask |= INTVAL (operands[4]) << 4;
14361 mask |= INTVAL (operands[5]) << 6;
14362 operands[2] = GEN_INT (mask);
14364 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14366 [(set_attr "type" "sselog")
14367 (set_attr "prefix_data16" "0")
14368 (set_attr "prefix_rep" "1")
14369 (set_attr "prefix" "maybe_vex")
14370 (set_attr "length_immediate" "1")
14371 (set_attr "mode" "TI")])
14373 (define_expand "avx2_pshufhwv3"
14374 [(match_operand:V16HI 0 "register_operand")
14375 (match_operand:V16HI 1 "nonimmediate_operand")
14376 (match_operand:SI 2 "const_0_to_255_operand")]
14379 int mask = INTVAL (operands[2]);
14380 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
14381 GEN_INT (((mask >> 0) & 3) + 4),
14382 GEN_INT (((mask >> 2) & 3) + 4),
14383 GEN_INT (((mask >> 4) & 3) + 4),
14384 GEN_INT (((mask >> 6) & 3) + 4),
14385 GEN_INT (((mask >> 0) & 3) + 12),
14386 GEN_INT (((mask >> 2) & 3) + 12),
14387 GEN_INT (((mask >> 4) & 3) + 12),
14388 GEN_INT (((mask >> 6) & 3) + 12)));
14392 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
14393 [(set (match_operand:V32HI 0 "register_operand" "=v")
14395 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14396 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14399 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14400 [(set_attr "type" "sselog")
14401 (set_attr "prefix" "evex")
14402 (set_attr "mode" "XI")])
14404 (define_expand "avx512vl_pshufhwv3_mask"
14405 [(match_operand:V16HI 0 "register_operand")
14406 (match_operand:V16HI 1 "nonimmediate_operand")
14407 (match_operand:SI 2 "const_0_to_255_operand")
14408 (match_operand:V16HI 3 "register_operand")
14409 (match_operand:HI 4 "register_operand")]
14410 "TARGET_AVX512VL && TARGET_AVX512BW"
14412 int mask = INTVAL (operands[2]);
14413 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
14414 GEN_INT (((mask >> 0) & 3) + 4),
14415 GEN_INT (((mask >> 2) & 3) + 4),
14416 GEN_INT (((mask >> 4) & 3) + 4),
14417 GEN_INT (((mask >> 6) & 3) + 4),
14418 GEN_INT (((mask >> 0) & 3) + 12),
14419 GEN_INT (((mask >> 2) & 3) + 12),
14420 GEN_INT (((mask >> 4) & 3) + 12),
14421 GEN_INT (((mask >> 6) & 3) + 12),
14422 operands[3], operands[4]));
14426 (define_insn "avx2_pshufhw_1<mask_name>"
14427 [(set (match_operand:V16HI 0 "register_operand" "=v")
14429 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14430 (parallel [(const_int 0)
14434 (match_operand 2 "const_4_to_7_operand")
14435 (match_operand 3 "const_4_to_7_operand")
14436 (match_operand 4 "const_4_to_7_operand")
14437 (match_operand 5 "const_4_to_7_operand")
14442 (match_operand 6 "const_12_to_15_operand")
14443 (match_operand 7 "const_12_to_15_operand")
14444 (match_operand 8 "const_12_to_15_operand")
14445 (match_operand 9 "const_12_to_15_operand")])))]
14447 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14448 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14449 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14450 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14451 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14454 mask |= (INTVAL (operands[2]) - 4) << 0;
14455 mask |= (INTVAL (operands[3]) - 4) << 2;
14456 mask |= (INTVAL (operands[4]) - 4) << 4;
14457 mask |= (INTVAL (operands[5]) - 4) << 6;
14458 operands[2] = GEN_INT (mask);
14460 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14462 [(set_attr "type" "sselog")
14463 (set_attr "prefix" "maybe_evex")
14464 (set_attr "length_immediate" "1")
14465 (set_attr "mode" "OI")])
14467 (define_expand "avx512vl_pshufhw_mask"
14468 [(match_operand:V8HI 0 "register_operand")
14469 (match_operand:V8HI 1 "nonimmediate_operand")
14470 (match_operand:SI 2 "const_0_to_255_operand")
14471 (match_operand:V8HI 3 "register_operand")
14472 (match_operand:QI 4 "register_operand")]
14473 "TARGET_AVX512VL && TARGET_AVX512BW"
14475 int mask = INTVAL (operands[2]);
14476 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
14477 GEN_INT (((mask >> 0) & 3) + 4),
14478 GEN_INT (((mask >> 2) & 3) + 4),
14479 GEN_INT (((mask >> 4) & 3) + 4),
14480 GEN_INT (((mask >> 6) & 3) + 4),
14481 operands[3], operands[4]));
14485 (define_expand "sse2_pshufhw"
14486 [(match_operand:V8HI 0 "register_operand")
14487 (match_operand:V8HI 1 "vector_operand")
14488 (match_operand:SI 2 "const_int_operand")]
14491 int mask = INTVAL (operands[2]);
14492 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
14493 GEN_INT (((mask >> 0) & 3) + 4),
14494 GEN_INT (((mask >> 2) & 3) + 4),
14495 GEN_INT (((mask >> 4) & 3) + 4),
14496 GEN_INT (((mask >> 6) & 3) + 4)));
14500 (define_insn "sse2_pshufhw_1<mask_name>"
14501 [(set (match_operand:V8HI 0 "register_operand" "=v")
14503 (match_operand:V8HI 1 "vector_operand" "vBm")
14504 (parallel [(const_int 0)
14508 (match_operand 2 "const_4_to_7_operand")
14509 (match_operand 3 "const_4_to_7_operand")
14510 (match_operand 4 "const_4_to_7_operand")
14511 (match_operand 5 "const_4_to_7_operand")])))]
14512 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14515 mask |= (INTVAL (operands[2]) - 4) << 0;
14516 mask |= (INTVAL (operands[3]) - 4) << 2;
14517 mask |= (INTVAL (operands[4]) - 4) << 4;
14518 mask |= (INTVAL (operands[5]) - 4) << 6;
14519 operands[2] = GEN_INT (mask);
14521 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14523 [(set_attr "type" "sselog")
14524 (set_attr "prefix_rep" "1")
14525 (set_attr "prefix_data16" "0")
14526 (set_attr "prefix" "maybe_vex")
14527 (set_attr "length_immediate" "1")
14528 (set_attr "mode" "TI")])
14530 (define_expand "sse2_loadd"
14531 [(set (match_operand:V4SI 0 "register_operand")
14533 (vec_duplicate:V4SI
14534 (match_operand:SI 1 "nonimmediate_operand"))
14538 "operands[2] = CONST0_RTX (V4SImode);")
14540 (define_insn "sse2_loadld"
14541 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
14543 (vec_duplicate:V4SI
14544 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
14545 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
14549 %vmovd\t{%2, %0|%0, %2}
14550 %vmovd\t{%2, %0|%0, %2}
14551 movss\t{%2, %0|%0, %2}
14552 movss\t{%2, %0|%0, %2}
14553 vmovss\t{%2, %1, %0|%0, %1, %2}"
14554 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
14555 (set_attr "type" "ssemov")
14556 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
14557 (set_attr "mode" "TI,TI,V4SF,SF,SF")
14558 (set (attr "preferred_for_speed")
14559 (cond [(eq_attr "alternative" "1")
14560 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
14562 (symbol_ref "true")))])
14564 ;; QI and HI modes handled by pextr patterns.
14565 (define_mode_iterator PEXTR_MODE12
14566 [(V16QI "TARGET_SSE4_1") V8HI])
14568 (define_insn "*vec_extract<mode>"
14569 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
14570 (vec_select:<ssescalarmode>
14571 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
14573 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
14576 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14577 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
14578 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14579 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14580 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
14581 (set_attr "type" "sselog1")
14582 (set_attr "prefix_data16" "1")
14583 (set (attr "prefix_extra")
14585 (and (eq_attr "alternative" "0,2")
14586 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14588 (const_string "1")))
14589 (set_attr "length_immediate" "1")
14590 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
14591 (set_attr "mode" "TI")])
14593 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
14594 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
14596 (vec_select:<PEXTR_MODE12:ssescalarmode>
14597 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
14599 [(match_operand:SI 2
14600 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
14603 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14604 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
14605 [(set_attr "isa" "*,avx512bw")
14606 (set_attr "type" "sselog1")
14607 (set_attr "prefix_data16" "1")
14608 (set (attr "prefix_extra")
14610 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
14612 (const_string "1")))
14613 (set_attr "length_immediate" "1")
14614 (set_attr "prefix" "maybe_vex")
14615 (set_attr "mode" "TI")])
14617 (define_insn "*vec_extract<mode>_mem"
14618 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
14619 (vec_select:<ssescalarmode>
14620 (match_operand:VI12_128 1 "memory_operand" "o")
14622 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
14626 (define_insn "*vec_extract<ssevecmodelower>_0"
14627 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
14629 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
14630 (parallel [(const_int 0)])))]
14631 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14633 [(set_attr "isa" "*,sse2,*,*")
14634 (set (attr "preferred_for_speed")
14635 (cond [(eq_attr "alternative" "1")
14636 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14638 (symbol_ref "true")))])
14640 (define_insn "*vec_extractv2di_0_sse"
14641 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,x ,m")
14643 (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
14644 (parallel [(const_int 0)])))]
14645 "TARGET_SSE && !TARGET_64BIT
14646 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14648 [(set_attr "isa" "sse4,*,*")
14649 (set (attr "preferred_for_speed")
14650 (cond [(eq_attr "alternative" "0")
14651 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14653 (symbol_ref "true")))])
14656 [(set (match_operand:DI 0 "general_reg_operand")
14658 (match_operand:V2DI 1 "register_operand")
14659 (parallel [(const_int 0)])))]
14660 "TARGET_SSE4_1 && !TARGET_64BIT
14661 && reload_completed"
14662 [(set (match_dup 2) (match_dup 4))
14666 (parallel [(const_int 1)])))]
14668 operands[4] = gen_lowpart (SImode, operands[1]);
14669 operands[5] = gen_lowpart (V4SImode, operands[1]);
14670 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
14674 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
14676 (match_operand:<ssevecmode> 1 "register_operand")
14677 (parallel [(const_int 0)])))]
14678 "TARGET_SSE && reload_completed"
14679 [(set (match_dup 0) (match_dup 1))]
14680 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
14682 (define_insn "*vec_extractv4si_0_zext_sse4"
14683 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
14686 (match_operand:V4SI 1 "register_operand" "v,x,v")
14687 (parallel [(const_int 0)]))))]
14690 [(set_attr "isa" "x64,*,avx512f")
14691 (set (attr "preferred_for_speed")
14692 (cond [(eq_attr "alternative" "0")
14693 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14695 (symbol_ref "true")))])
14697 (define_insn "*vec_extractv4si_0_zext"
14698 [(set (match_operand:DI 0 "register_operand" "=r")
14701 (match_operand:V4SI 1 "register_operand" "x")
14702 (parallel [(const_int 0)]))))]
14703 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
14707 [(set (match_operand:DI 0 "register_operand")
14710 (match_operand:V4SI 1 "register_operand")
14711 (parallel [(const_int 0)]))))]
14712 "TARGET_SSE2 && reload_completed"
14713 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
14714 "operands[1] = gen_lowpart (SImode, operands[1]);")
14716 (define_insn "*vec_extractv4si"
14717 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
14719 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
14720 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
14723 switch (which_alternative)
14727 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
14731 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
14732 return "psrldq\t{%2, %0|%0, %2}";
14736 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
14737 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
14740 gcc_unreachable ();
14743 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
14744 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
14745 (set (attr "prefix_extra")
14746 (if_then_else (eq_attr "alternative" "0,1")
14748 (const_string "*")))
14749 (set_attr "length_immediate" "1")
14750 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
14751 (set_attr "mode" "TI")])
14753 (define_insn "*vec_extractv4si_zext"
14754 [(set (match_operand:DI 0 "register_operand" "=r,r")
14757 (match_operand:V4SI 1 "register_operand" "x,v")
14758 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
14759 "TARGET_64BIT && TARGET_SSE4_1"
14760 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
14761 [(set_attr "isa" "*,avx512dq")
14762 (set_attr "type" "sselog1")
14763 (set_attr "prefix_extra" "1")
14764 (set_attr "length_immediate" "1")
14765 (set_attr "prefix" "maybe_vex")
14766 (set_attr "mode" "TI")])
14768 (define_insn "*vec_extractv4si_mem"
14769 [(set (match_operand:SI 0 "register_operand" "=x,r")
14771 (match_operand:V4SI 1 "memory_operand" "o,o")
14772 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
14776 (define_insn_and_split "*vec_extractv4si_zext_mem"
14777 [(set (match_operand:DI 0 "register_operand" "=x,r")
14780 (match_operand:V4SI 1 "memory_operand" "o,o")
14781 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
14782 "TARGET_64BIT && TARGET_SSE"
14784 "&& reload_completed"
14785 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
14787 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
14790 (define_insn "*vec_extractv2di_1"
14791 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
14793 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
14794 (parallel [(const_int 1)])))]
14795 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14797 %vpextrq\t{$1, %1, %0|%0, %1, 1}
14798 vpextrq\t{$1, %1, %0|%0, %1, 1}
14799 %vmovhps\t{%1, %0|%0, %1}
14800 psrldq\t{$8, %0|%0, 8}
14801 vpsrldq\t{$8, %1, %0|%0, %1, 8}
14802 vpsrldq\t{$8, %1, %0|%0, %1, 8}
14803 movhlps\t{%1, %0|%0, %1}
14807 (cond [(eq_attr "alternative" "0")
14808 (const_string "x64_sse4")
14809 (eq_attr "alternative" "1")
14810 (const_string "x64_avx512dq")
14811 (eq_attr "alternative" "3")
14812 (const_string "sse2_noavx")
14813 (eq_attr "alternative" "4")
14814 (const_string "avx")
14815 (eq_attr "alternative" "5")
14816 (const_string "avx512bw")
14817 (eq_attr "alternative" "6")
14818 (const_string "noavx")
14819 (eq_attr "alternative" "8")
14820 (const_string "x64")
14822 (const_string "*")))
14824 (cond [(eq_attr "alternative" "2,6,7")
14825 (const_string "ssemov")
14826 (eq_attr "alternative" "3,4,5")
14827 (const_string "sseishft1")
14828 (eq_attr "alternative" "8")
14829 (const_string "imov")
14831 (const_string "sselog1")))
14832 (set (attr "length_immediate")
14833 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
14835 (const_string "*")))
14836 (set (attr "prefix_rex")
14837 (if_then_else (eq_attr "alternative" "0,1")
14839 (const_string "*")))
14840 (set (attr "prefix_extra")
14841 (if_then_else (eq_attr "alternative" "0,1")
14843 (const_string "*")))
14844 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
14845 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
14848 [(set (match_operand:<ssescalarmode> 0 "register_operand")
14849 (vec_select:<ssescalarmode>
14850 (match_operand:VI_128 1 "memory_operand")
14852 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
14853 "TARGET_SSE && reload_completed"
14854 [(set (match_dup 0) (match_dup 1))]
14856 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
14858 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
14861 (define_insn "*vec_extractv2ti"
14862 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
14864 (match_operand:V2TI 1 "register_operand" "x,v")
14866 [(match_operand:SI 2 "const_0_to_1_operand")])))]
14869 vextract%~128\t{%2, %1, %0|%0, %1, %2}
14870 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
14871 [(set_attr "type" "sselog")
14872 (set_attr "prefix_extra" "1")
14873 (set_attr "length_immediate" "1")
14874 (set_attr "prefix" "vex,evex")
14875 (set_attr "mode" "OI")])
14877 (define_insn "*vec_extractv4ti"
14878 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
14880 (match_operand:V4TI 1 "register_operand" "v")
14882 [(match_operand:SI 2 "const_0_to_3_operand")])))]
14884 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
14885 [(set_attr "type" "sselog")
14886 (set_attr "prefix_extra" "1")
14887 (set_attr "length_immediate" "1")
14888 (set_attr "prefix" "evex")
14889 (set_attr "mode" "XI")])
14891 (define_mode_iterator VEXTRACTI128_MODE
14892 [(V4TI "TARGET_AVX512F") V2TI])
14895 [(set (match_operand:TI 0 "nonimmediate_operand")
14897 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
14898 (parallel [(const_int 0)])))]
14900 && reload_completed
14901 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
14902 [(set (match_dup 0) (match_dup 1))]
14903 "operands[1] = gen_lowpart (TImode, operands[1]);")
14905 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
14906 ;; vector modes into vec_extract*.
14908 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
14909 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
14910 "can_create_pseudo_p ()
14911 && REG_P (operands[1])
14912 && VECTOR_MODE_P (GET_MODE (operands[1]))
14913 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
14914 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
14915 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
14916 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
14917 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
14918 (parallel [(const_int 0)])))]
14922 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
14925 if (<MODE>mode == SImode)
14927 tmp = gen_reg_rtx (V8SImode);
14928 emit_insn (gen_vec_extract_lo_v16si (tmp,
14929 gen_lowpart (V16SImode,
14934 tmp = gen_reg_rtx (V4DImode);
14935 emit_insn (gen_vec_extract_lo_v8di (tmp,
14936 gen_lowpart (V8DImode,
14942 tmp = gen_reg_rtx (<ssevecmode>mode);
14943 if (<MODE>mode == SImode)
14944 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
14947 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
14952 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
14957 (define_insn "*vec_concatv2si_sse4_1"
14958 [(set (match_operand:V2SI 0 "register_operand"
14959 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
14961 (match_operand:SI 1 "nonimmediate_operand"
14962 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
14963 (match_operand:SI 2 "nonimm_or_0_operand"
14964 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
14965 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14967 pinsrd\t{$1, %2, %0|%0, %2, 1}
14968 pinsrd\t{$1, %2, %0|%0, %2, 1}
14969 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
14970 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
14971 punpckldq\t{%2, %0|%0, %2}
14972 punpckldq\t{%2, %0|%0, %2}
14973 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
14974 %vmovd\t{%1, %0|%0, %1}
14975 punpckldq\t{%2, %0|%0, %2}
14976 movd\t{%1, %0|%0, %1}"
14977 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
14978 (set (attr "mmx_isa")
14979 (if_then_else (eq_attr "alternative" "8,9")
14980 (const_string "native")
14981 (const_string "*")))
14983 (cond [(eq_attr "alternative" "7")
14984 (const_string "ssemov")
14985 (eq_attr "alternative" "8")
14986 (const_string "mmxcvt")
14987 (eq_attr "alternative" "9")
14988 (const_string "mmxmov")
14990 (const_string "sselog")))
14991 (set (attr "prefix_extra")
14992 (if_then_else (eq_attr "alternative" "0,1,2,3")
14994 (const_string "*")))
14995 (set (attr "length_immediate")
14996 (if_then_else (eq_attr "alternative" "0,1,2,3")
14998 (const_string "*")))
14999 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
15000 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
15002 ;; ??? In theory we can match memory for the MMX alternative, but allowing
15003 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
15004 ;; alternatives pretty much forces the MMX alternative to be chosen.
15005 (define_insn "*vec_concatv2si"
15006 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,x,x,*y,*y")
15008 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
15009 (match_operand:SI 2 "reg_or_0_operand" " x,C ,x,C,*y,C")))]
15010 "TARGET_SSE && !TARGET_SSE4_1"
15012 punpckldq\t{%2, %0|%0, %2}
15013 movd\t{%1, %0|%0, %1}
15014 unpcklps\t{%2, %0|%0, %2}
15015 movss\t{%1, %0|%0, %1}
15016 punpckldq\t{%2, %0|%0, %2}
15017 movd\t{%1, %0|%0, %1}"
15018 [(set_attr "isa" "sse2,sse2,*,*,*,*")
15019 (set_attr "mmx_isa" "*,*,*,*,native,native")
15020 (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
15021 (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
15023 (define_insn "*vec_concatv4si"
15024 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
15026 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
15027 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
15030 punpcklqdq\t{%2, %0|%0, %2}
15031 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15032 movlhps\t{%2, %0|%0, %2}
15033 movhps\t{%2, %0|%0, %q2}
15034 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
15035 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
15036 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
15037 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
15038 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
15040 (define_insn "*vec_concatv4si_0"
15041 [(set (match_operand:V4SI 0 "register_operand" "=v,x")
15043 (match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y")
15044 (match_operand:V2SI 2 "const0_operand" " C,C")))]
15047 %vmovq\t{%1, %0|%0, %1}
15048 movq2dq\t{%1, %0|%0, %1}"
15049 [(set_attr "mmx_isa" "*,native")
15050 (set_attr "type" "ssemov")
15051 (set_attr "prefix" "maybe_vex,orig")
15052 (set_attr "mode" "TI")])
15054 (define_insn "vec_concatv2di"
15055 [(set (match_operand:V2DI 0 "register_operand"
15056 "=Yr,*x,x ,v ,x,v ,x,x,v")
15058 (match_operand:DI 1 "register_operand"
15059 " 0, 0,x ,Yv,0,Yv,0,0,v")
15060 (match_operand:DI 2 "nonimmediate_operand"
15061 " rm,rm,rm,rm,x,Yv,x,m,m")))]
15064 pinsrq\t{$1, %2, %0|%0, %2, 1}
15065 pinsrq\t{$1, %2, %0|%0, %2, 1}
15066 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15067 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15068 punpcklqdq\t{%2, %0|%0, %2}
15069 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15070 movlhps\t{%2, %0|%0, %2}
15071 movhps\t{%2, %0|%0, %2}
15072 vmovhps\t{%2, %1, %0|%0, %1, %2}"
15074 (cond [(eq_attr "alternative" "0,1")
15075 (const_string "x64_sse4_noavx")
15076 (eq_attr "alternative" "2")
15077 (const_string "x64_avx")
15078 (eq_attr "alternative" "3")
15079 (const_string "x64_avx512dq")
15080 (eq_attr "alternative" "4")
15081 (const_string "sse2_noavx")
15082 (eq_attr "alternative" "5,8")
15083 (const_string "avx")
15085 (const_string "noavx")))
15088 (eq_attr "alternative" "0,1,2,3,4,5")
15089 (const_string "sselog")
15090 (const_string "ssemov")))
15091 (set (attr "prefix_rex")
15092 (if_then_else (eq_attr "alternative" "0,1,2,3")
15094 (const_string "*")))
15095 (set (attr "prefix_extra")
15096 (if_then_else (eq_attr "alternative" "0,1,2,3")
15098 (const_string "*")))
15099 (set (attr "length_immediate")
15100 (if_then_else (eq_attr "alternative" "0,1,2,3")
15102 (const_string "*")))
15103 (set (attr "prefix")
15104 (cond [(eq_attr "alternative" "2")
15105 (const_string "vex")
15106 (eq_attr "alternative" "3")
15107 (const_string "evex")
15108 (eq_attr "alternative" "5,8")
15109 (const_string "maybe_evex")
15111 (const_string "orig")))
15112 (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
15114 (define_insn "*vec_concatv2di_0"
15115 [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
15117 (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
15118 (match_operand:DI 2 "const0_operand" " C,C ,C")))]
15121 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
15122 %vmovq\t{%1, %0|%0, %1}
15123 movq2dq\t{%1, %0|%0, %1}"
15124 [(set_attr "isa" "x64,*,*")
15125 (set_attr "mmx_isa" "*,*,native")
15126 (set_attr "type" "ssemov")
15127 (set_attr "prefix_rex" "1,*,*")
15128 (set_attr "prefix" "maybe_vex,maybe_vex,orig")
15129 (set_attr "mode" "TI")
15130 (set (attr "preferred_for_speed")
15131 (cond [(eq_attr "alternative" "0")
15132 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15134 (symbol_ref "true")))])
15136 ;; vmovq clears also the higher bits.
15137 (define_insn "vec_set<mode>_0"
15138 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
15139 (vec_merge:VI8_AVX_AVX512F
15140 (vec_duplicate:VI8_AVX_AVX512F
15141 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
15142 (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
15145 "vmovq\t{%2, %x0|%x0, %2}"
15146 [(set_attr "isa" "x64,*")
15147 (set_attr "type" "ssemov")
15148 (set_attr "prefix_rex" "1,*")
15149 (set_attr "prefix" "maybe_evex")
15150 (set_attr "mode" "TI")
15151 (set (attr "preferred_for_speed")
15152 (cond [(eq_attr "alternative" "0")
15153 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15155 (symbol_ref "true")))])
15157 (define_expand "vec_unpacks_lo_<mode>"
15158 [(match_operand:<sseunpackmode> 0 "register_operand")
15159 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15161 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
15163 (define_expand "vec_unpacks_hi_<mode>"
15164 [(match_operand:<sseunpackmode> 0 "register_operand")
15165 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15167 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
15169 (define_expand "vec_unpacku_lo_<mode>"
15170 [(match_operand:<sseunpackmode> 0 "register_operand")
15171 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15173 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
15175 (define_expand "vec_unpacks_sbool_lo_qi"
15176 [(match_operand:QI 0 "register_operand")
15177 (match_operand:QI 1 "register_operand")
15178 (match_operand:QI 2 "const_int_operand")]
15181 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
15183 emit_move_insn (operands[0], operands[1]);
15187 (define_expand "vec_unpacks_lo_hi"
15188 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15189 (match_operand:HI 1 "register_operand"))]
15192 (define_expand "vec_unpacks_lo_si"
15193 [(set (match_operand:HI 0 "register_operand")
15194 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
15197 (define_expand "vec_unpacks_lo_di"
15198 [(set (match_operand:SI 0 "register_operand")
15199 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
15202 (define_expand "vec_unpacku_hi_<mode>"
15203 [(match_operand:<sseunpackmode> 0 "register_operand")
15204 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15206 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
15208 (define_expand "vec_unpacks_sbool_hi_qi"
15209 [(match_operand:QI 0 "register_operand")
15210 (match_operand:QI 1 "register_operand")
15211 (match_operand:QI 2 "const_int_operand")]
15214 HOST_WIDE_INT nunits = INTVAL (operands[2]);
15215 if (nunits != 8 && nunits != 4)
15217 if (TARGET_AVX512DQ)
15218 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
15219 GEN_INT (nunits / 2)));
15222 rtx tem = gen_reg_rtx (HImode);
15223 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
15225 GEN_INT (nunits / 2)));
15226 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
15231 (define_expand "vec_unpacks_hi_hi"
15233 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15234 (lshiftrt:HI (match_operand:HI 1 "register_operand")
15236 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15239 (define_expand "vec_unpacks_hi_<mode>"
15241 [(set (subreg:SWI48x
15242 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
15243 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
15245 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15247 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
15249 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15253 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15255 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
15256 [(set (match_operand:VI12_AVX2 0 "register_operand")
15257 (truncate:VI12_AVX2
15258 (lshiftrt:<ssedoublemode>
15259 (plus:<ssedoublemode>
15260 (plus:<ssedoublemode>
15261 (zero_extend:<ssedoublemode>
15262 (match_operand:VI12_AVX2 1 "vector_operand"))
15263 (zero_extend:<ssedoublemode>
15264 (match_operand:VI12_AVX2 2 "vector_operand")))
15265 (match_dup <mask_expand_op3>))
15267 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15269 operands[<mask_expand_op3>] = CONST1_RTX(<MODE>mode);
15270 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
15273 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
15274 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
15275 (truncate:VI12_AVX2
15276 (lshiftrt:<ssedoublemode>
15277 (plus:<ssedoublemode>
15278 (plus:<ssedoublemode>
15279 (zero_extend:<ssedoublemode>
15280 (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
15281 (zero_extend:<ssedoublemode>
15282 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
15283 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
15285 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
15286 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15288 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
15289 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15290 [(set_attr "isa" "noavx,avx")
15291 (set_attr "type" "sseiadd")
15292 (set_attr "prefix_data16" "1,*")
15293 (set_attr "prefix" "orig,<mask_prefix>")
15294 (set_attr "mode" "<sseinsnmode>")])
15296 ;; The correct representation for this is absolutely enormous, and
15297 ;; surely not generally useful.
15298 (define_insn "<sse2_avx2>_psadbw"
15299 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
15300 (unspec:VI8_AVX2_AVX512BW
15301 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
15302 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
15306 psadbw\t{%2, %0|%0, %2}
15307 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
15308 [(set_attr "isa" "noavx,avx")
15309 (set_attr "type" "sseiadd")
15310 (set_attr "atom_unit" "simul")
15311 (set_attr "prefix_data16" "1,*")
15312 (set_attr "prefix" "orig,maybe_evex")
15313 (set_attr "mode" "<sseinsnmode>")])
15315 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
15316 [(set (match_operand:SI 0 "register_operand" "=r")
15318 [(match_operand:VF_128_256 1 "register_operand" "x")]
15321 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
15322 [(set_attr "type" "ssemov")
15323 (set_attr "prefix" "maybe_vex")
15324 (set_attr "mode" "<MODE>")])
15326 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
15327 [(set (match_operand:DI 0 "register_operand" "=r")
15330 [(match_operand:VF_128_256 1 "register_operand" "x")]
15332 "TARGET_64BIT && TARGET_SSE"
15333 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
15334 [(set_attr "type" "ssemov")
15335 (set_attr "prefix" "maybe_vex")
15336 (set_attr "mode" "<MODE>")])
15338 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
15339 [(set (match_operand:SI 0 "register_operand" "=r")
15342 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15343 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
15347 "&& reload_completed"
15348 [(set (match_dup 0)
15349 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15350 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15351 [(set_attr "type" "ssemov")
15352 (set_attr "prefix" "maybe_vex")
15353 (set_attr "mode" "<MODE>")])
15355 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt"
15356 [(set (match_operand:DI 0 "register_operand" "=r")
15360 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15361 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
15363 "TARGET_64BIT && TARGET_SSE"
15365 "&& reload_completed"
15366 [(set (match_dup 0)
15367 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15368 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15369 [(set_attr "type" "ssemov")
15370 (set_attr "prefix" "maybe_vex")
15371 (set_attr "mode" "<MODE>")])
15373 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
15374 [(set (match_operand:SI 0 "register_operand" "=r")
15376 [(subreg:VF_128_256
15377 (ashiftrt:<sseintvecmode>
15378 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15379 (match_operand:QI 2 "const_int_operand" "n")) 0)]
15383 "&& reload_completed"
15384 [(set (match_dup 0)
15385 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15386 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15387 [(set_attr "type" "ssemov")
15388 (set_attr "prefix" "maybe_vex")
15389 (set_attr "mode" "<MODE>")])
15391 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift"
15392 [(set (match_operand:DI 0 "register_operand" "=r")
15395 [(subreg:VF_128_256
15396 (ashiftrt:<sseintvecmode>
15397 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15398 (match_operand:QI 2 "const_int_operand" "n")) 0)]
15400 "TARGET_64BIT && TARGET_SSE"
15402 "&& reload_completed"
15403 [(set (match_dup 0)
15404 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15405 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15406 [(set_attr "type" "ssemov")
15407 (set_attr "prefix" "maybe_vex")
15408 (set_attr "mode" "<MODE>")])
15410 (define_insn "<sse2_avx2>_pmovmskb"
15411 [(set (match_operand:SI 0 "register_operand" "=r")
15413 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
15416 "%vpmovmskb\t{%1, %0|%0, %1}"
15417 [(set_attr "type" "ssemov")
15418 (set (attr "prefix_data16")
15420 (match_test "TARGET_AVX")
15422 (const_string "1")))
15423 (set_attr "prefix" "maybe_vex")
15424 (set_attr "mode" "SI")])
15426 (define_insn "*<sse2_avx2>_pmovmskb_zext"
15427 [(set (match_operand:DI 0 "register_operand" "=r")
15430 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
15432 "TARGET_64BIT && TARGET_SSE2"
15433 "%vpmovmskb\t{%1, %k0|%k0, %1}"
15434 [(set_attr "type" "ssemov")
15435 (set (attr "prefix_data16")
15437 (match_test "TARGET_AVX")
15439 (const_string "1")))
15440 (set_attr "prefix" "maybe_vex")
15441 (set_attr "mode" "SI")])
15443 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
15444 [(set (match_operand:SI 0 "register_operand" "=r")
15446 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
15447 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
15452 [(set (match_dup 0)
15453 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15455 [(set_attr "type" "ssemov")
15456 (set (attr "prefix_data16")
15458 (match_test "TARGET_AVX")
15460 (const_string "1")))
15461 (set_attr "prefix" "maybe_vex")
15462 (set_attr "mode" "SI")])
15464 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
15465 [(set (match_operand:DI 0 "register_operand" "=r")
15468 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
15469 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
15471 "TARGET_64BIT && TARGET_SSE2"
15474 [(set (match_dup 0)
15475 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15477 [(set_attr "type" "ssemov")
15478 (set (attr "prefix_data16")
15480 (match_test "TARGET_AVX")
15482 (const_string "1")))
15483 (set_attr "prefix" "maybe_vex")
15484 (set_attr "mode" "SI")])
15486 (define_expand "sse2_maskmovdqu"
15487 [(set (match_operand:V16QI 0 "memory_operand")
15488 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
15489 (match_operand:V16QI 2 "register_operand")
15494 (define_insn "*sse2_maskmovdqu"
15495 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
15496 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
15497 (match_operand:V16QI 2 "register_operand" "x")
15498 (mem:V16QI (match_dup 0))]
15502 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
15503 that requires %v to be at the beginning of the opcode name. */
15504 if (Pmode != word_mode)
15505 fputs ("\taddr32", asm_out_file);
15506 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
15508 [(set_attr "type" "ssemov")
15509 (set_attr "prefix_data16" "1")
15510 (set (attr "length_address")
15511 (symbol_ref ("Pmode != word_mode")))
15512 ;; The implicit %rdi operand confuses default length_vex computation.
15513 (set (attr "length_vex")
15514 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
15515 (set_attr "prefix" "maybe_vex")
15516 (set_attr "znver1_decode" "vector")
15517 (set_attr "mode" "TI")])
15519 (define_insn "sse_ldmxcsr"
15520 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
15524 [(set_attr "type" "sse")
15525 (set_attr "atom_sse_attr" "mxcsr")
15526 (set_attr "prefix" "maybe_vex")
15527 (set_attr "memory" "load")])
15529 (define_insn "sse_stmxcsr"
15530 [(set (match_operand:SI 0 "memory_operand" "=m")
15531 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
15534 [(set_attr "type" "sse")
15535 (set_attr "atom_sse_attr" "mxcsr")
15536 (set_attr "prefix" "maybe_vex")
15537 (set_attr "memory" "store")])
15539 (define_insn "sse2_clflush"
15540 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
15544 [(set_attr "type" "sse")
15545 (set_attr "atom_sse_attr" "fence")
15546 (set_attr "memory" "unknown")])
15548 ;; As per AMD and Intel ISA manuals, the first operand is extensions
15549 ;; and it goes to %ecx. The second operand received is hints and it goes
15551 (define_insn "sse3_mwait"
15552 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
15553 (match_operand:SI 1 "register_operand" "a")]
15556 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
15557 ;; Since 32bit register operands are implicitly zero extended to 64bit,
15558 ;; we only need to set up 32bit registers.
15560 [(set_attr "length" "3")])
15562 (define_insn "sse3_monitor_<mode>"
15563 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
15564 (match_operand:SI 1 "register_operand" "c")
15565 (match_operand:SI 2 "register_operand" "d")]
15568 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
15569 ;; RCX and RDX are used. Since 32bit register operands are implicitly
15570 ;; zero extended to 64bit, we only need to set up 32bit registers.
15572 [(set (attr "length")
15573 (symbol_ref ("(Pmode != word_mode) + 3")))])
15575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15577 ;; SSSE3 instructions
15579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15581 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
15583 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
15584 [(set (match_operand:V16HI 0 "register_operand" "=x")
15589 (ssse3_plusminus:HI
15591 (match_operand:V16HI 1 "register_operand" "x")
15592 (parallel [(const_int 0)]))
15593 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15594 (ssse3_plusminus:HI
15595 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15596 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15598 (ssse3_plusminus:HI
15599 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
15600 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
15601 (ssse3_plusminus:HI
15602 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
15603 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
15606 (ssse3_plusminus:HI
15607 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
15608 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
15609 (ssse3_plusminus:HI
15610 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
15611 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
15613 (ssse3_plusminus:HI
15614 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
15615 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
15616 (ssse3_plusminus:HI
15617 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
15618 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
15622 (ssse3_plusminus:HI
15624 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
15625 (parallel [(const_int 0)]))
15626 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15627 (ssse3_plusminus:HI
15628 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15629 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
15631 (ssse3_plusminus:HI
15632 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
15633 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
15634 (ssse3_plusminus:HI
15635 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
15636 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
15639 (ssse3_plusminus:HI
15640 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
15641 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
15642 (ssse3_plusminus:HI
15643 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
15644 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
15646 (ssse3_plusminus:HI
15647 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
15648 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
15649 (ssse3_plusminus:HI
15650 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
15651 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
15653 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
15654 [(set_attr "type" "sseiadd")
15655 (set_attr "prefix_extra" "1")
15656 (set_attr "prefix" "vex")
15657 (set_attr "mode" "OI")])
15659 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
15660 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15664 (ssse3_plusminus:HI
15666 (match_operand:V8HI 1 "register_operand" "0,x")
15667 (parallel [(const_int 0)]))
15668 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15669 (ssse3_plusminus:HI
15670 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15671 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15673 (ssse3_plusminus:HI
15674 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
15675 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
15676 (ssse3_plusminus:HI
15677 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
15678 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
15681 (ssse3_plusminus:HI
15683 (match_operand:V8HI 2 "vector_operand" "xBm,xm")
15684 (parallel [(const_int 0)]))
15685 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15686 (ssse3_plusminus:HI
15687 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15688 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
15690 (ssse3_plusminus:HI
15691 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
15692 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
15693 (ssse3_plusminus:HI
15694 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
15695 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
15698 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
15699 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
15700 [(set_attr "isa" "noavx,avx")
15701 (set_attr "type" "sseiadd")
15702 (set_attr "atom_unit" "complex")
15703 (set_attr "prefix_data16" "1,*")
15704 (set_attr "prefix_extra" "1")
15705 (set_attr "prefix" "orig,vex")
15706 (set_attr "mode" "TI")])
15708 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
15709 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
15712 (ssse3_plusminus:HI
15714 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
15715 (parallel [(const_int 0)]))
15716 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15717 (ssse3_plusminus:HI
15718 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15719 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15721 (ssse3_plusminus:HI
15723 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
15724 (parallel [(const_int 0)]))
15725 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15726 (ssse3_plusminus:HI
15727 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15728 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
15729 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
15731 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
15734 "TARGET_MMX_WITH_SSE && reload_completed"
15737 /* Generate SSE version of the operation. */
15738 rtx op0 = lowpart_subreg (V8HImode, operands[0],
15739 GET_MODE (operands[0]));
15740 rtx op1 = lowpart_subreg (V8HImode, operands[1],
15741 GET_MODE (operands[1]));
15742 rtx op2 = lowpart_subreg (V8HImode, operands[2],
15743 GET_MODE (operands[2]));
15744 emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
15745 ix86_move_vector_high_sse_to_mmx (op0);
15748 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
15749 (set_attr "type" "sseiadd")
15750 (set_attr "atom_unit" "complex")
15751 (set_attr "prefix_extra" "1")
15752 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15753 (set_attr "mode" "DI,TI,TI")])
15755 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
15756 [(set (match_operand:V8SI 0 "register_operand" "=x")
15762 (match_operand:V8SI 1 "register_operand" "x")
15763 (parallel [(const_int 0)]))
15764 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15766 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
15767 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
15770 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
15771 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
15773 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
15774 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
15779 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
15780 (parallel [(const_int 0)]))
15781 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
15783 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
15784 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
15787 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
15788 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
15790 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
15791 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
15793 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
15794 [(set_attr "type" "sseiadd")
15795 (set_attr "prefix_extra" "1")
15796 (set_attr "prefix" "vex")
15797 (set_attr "mode" "OI")])
15799 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
15800 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15805 (match_operand:V4SI 1 "register_operand" "0,x")
15806 (parallel [(const_int 0)]))
15807 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15809 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
15810 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
15814 (match_operand:V4SI 2 "vector_operand" "xBm,xm")
15815 (parallel [(const_int 0)]))
15816 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
15818 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
15819 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
15822 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
15823 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
15824 [(set_attr "isa" "noavx,avx")
15825 (set_attr "type" "sseiadd")
15826 (set_attr "atom_unit" "complex")
15827 (set_attr "prefix_data16" "1,*")
15828 (set_attr "prefix_extra" "1")
15829 (set_attr "prefix" "orig,vex")
15830 (set_attr "mode" "TI")])
15832 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
15833 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
15837 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
15838 (parallel [(const_int 0)]))
15839 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15842 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
15843 (parallel [(const_int 0)]))
15844 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
15845 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
15847 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
15850 "TARGET_MMX_WITH_SSE && reload_completed"
15853 /* Generate SSE version of the operation. */
15854 rtx op0 = lowpart_subreg (V4SImode, operands[0],
15855 GET_MODE (operands[0]));
15856 rtx op1 = lowpart_subreg (V4SImode, operands[1],
15857 GET_MODE (operands[1]));
15858 rtx op2 = lowpart_subreg (V4SImode, operands[2],
15859 GET_MODE (operands[2]));
15860 emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
15861 ix86_move_vector_high_sse_to_mmx (op0);
15864 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
15865 (set_attr "type" "sseiadd")
15866 (set_attr "atom_unit" "complex")
15867 (set_attr "prefix_extra" "1")
15868 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15869 (set_attr "mode" "DI,TI,TI")])
15871 (define_insn "avx2_pmaddubsw256"
15872 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
15877 (match_operand:V32QI 1 "register_operand" "x,v")
15878 (parallel [(const_int 0) (const_int 2)
15879 (const_int 4) (const_int 6)
15880 (const_int 8) (const_int 10)
15881 (const_int 12) (const_int 14)
15882 (const_int 16) (const_int 18)
15883 (const_int 20) (const_int 22)
15884 (const_int 24) (const_int 26)
15885 (const_int 28) (const_int 30)])))
15888 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
15889 (parallel [(const_int 0) (const_int 2)
15890 (const_int 4) (const_int 6)
15891 (const_int 8) (const_int 10)
15892 (const_int 12) (const_int 14)
15893 (const_int 16) (const_int 18)
15894 (const_int 20) (const_int 22)
15895 (const_int 24) (const_int 26)
15896 (const_int 28) (const_int 30)]))))
15899 (vec_select:V16QI (match_dup 1)
15900 (parallel [(const_int 1) (const_int 3)
15901 (const_int 5) (const_int 7)
15902 (const_int 9) (const_int 11)
15903 (const_int 13) (const_int 15)
15904 (const_int 17) (const_int 19)
15905 (const_int 21) (const_int 23)
15906 (const_int 25) (const_int 27)
15907 (const_int 29) (const_int 31)])))
15909 (vec_select:V16QI (match_dup 2)
15910 (parallel [(const_int 1) (const_int 3)
15911 (const_int 5) (const_int 7)
15912 (const_int 9) (const_int 11)
15913 (const_int 13) (const_int 15)
15914 (const_int 17) (const_int 19)
15915 (const_int 21) (const_int 23)
15916 (const_int 25) (const_int 27)
15917 (const_int 29) (const_int 31)]))))))]
15919 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
15920 [(set_attr "isa" "*,avx512bw")
15921 (set_attr "type" "sseiadd")
15922 (set_attr "prefix_extra" "1")
15923 (set_attr "prefix" "vex,evex")
15924 (set_attr "mode" "OI")])
15926 ;; The correct representation for this is absolutely enormous, and
15927 ;; surely not generally useful.
15928 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
15929 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
15930 (unspec:VI2_AVX512VL
15931 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
15932 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
15933 UNSPEC_PMADDUBSW512))]
15935 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
15936 [(set_attr "type" "sseiadd")
15937 (set_attr "prefix" "evex")
15938 (set_attr "mode" "XI")])
15940 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
15941 [(set (match_operand:V32HI 0 "register_operand" "=v")
15948 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
15950 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
15952 (const_vector:V32HI [(const_int 1) (const_int 1)
15953 (const_int 1) (const_int 1)
15954 (const_int 1) (const_int 1)
15955 (const_int 1) (const_int 1)
15956 (const_int 1) (const_int 1)
15957 (const_int 1) (const_int 1)
15958 (const_int 1) (const_int 1)
15959 (const_int 1) (const_int 1)
15960 (const_int 1) (const_int 1)
15961 (const_int 1) (const_int 1)
15962 (const_int 1) (const_int 1)
15963 (const_int 1) (const_int 1)
15964 (const_int 1) (const_int 1)
15965 (const_int 1) (const_int 1)
15966 (const_int 1) (const_int 1)
15967 (const_int 1) (const_int 1)]))
15970 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15971 [(set_attr "type" "sseimul")
15972 (set_attr "prefix" "evex")
15973 (set_attr "mode" "XI")])
15975 (define_insn "ssse3_pmaddubsw128"
15976 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
15981 (match_operand:V16QI 1 "register_operand" "0,x,v")
15982 (parallel [(const_int 0) (const_int 2)
15983 (const_int 4) (const_int 6)
15984 (const_int 8) (const_int 10)
15985 (const_int 12) (const_int 14)])))
15988 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
15989 (parallel [(const_int 0) (const_int 2)
15990 (const_int 4) (const_int 6)
15991 (const_int 8) (const_int 10)
15992 (const_int 12) (const_int 14)]))))
15995 (vec_select:V8QI (match_dup 1)
15996 (parallel [(const_int 1) (const_int 3)
15997 (const_int 5) (const_int 7)
15998 (const_int 9) (const_int 11)
15999 (const_int 13) (const_int 15)])))
16001 (vec_select:V8QI (match_dup 2)
16002 (parallel [(const_int 1) (const_int 3)
16003 (const_int 5) (const_int 7)
16004 (const_int 9) (const_int 11)
16005 (const_int 13) (const_int 15)]))))))]
16008 pmaddubsw\t{%2, %0|%0, %2}
16009 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
16010 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16011 [(set_attr "isa" "noavx,avx,avx512bw")
16012 (set_attr "type" "sseiadd")
16013 (set_attr "atom_unit" "simul")
16014 (set_attr "prefix_data16" "1,*,*")
16015 (set_attr "prefix_extra" "1")
16016 (set_attr "prefix" "orig,vex,evex")
16017 (set_attr "mode" "TI")])
16019 (define_insn "ssse3_pmaddubsw"
16020 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16025 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
16026 (parallel [(const_int 0) (const_int 2)
16027 (const_int 4) (const_int 6)])))
16030 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
16031 (parallel [(const_int 0) (const_int 2)
16032 (const_int 4) (const_int 6)]))))
16035 (vec_select:V4QI (match_dup 1)
16036 (parallel [(const_int 1) (const_int 3)
16037 (const_int 5) (const_int 7)])))
16039 (vec_select:V4QI (match_dup 2)
16040 (parallel [(const_int 1) (const_int 3)
16041 (const_int 5) (const_int 7)]))))))]
16042 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16044 pmaddubsw\t{%2, %0|%0, %2}
16045 pmaddubsw\t{%2, %0|%0, %2}
16046 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16047 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16048 (set_attr "type" "sseiadd")
16049 (set_attr "atom_unit" "simul")
16050 (set_attr "prefix_extra" "1")
16051 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16052 (set_attr "mode" "DI,TI,TI")])
16054 (define_mode_iterator PMULHRSW
16055 [V8HI (V16HI "TARGET_AVX2")])
16057 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
16058 [(set (match_operand:PMULHRSW 0 "register_operand")
16059 (vec_merge:PMULHRSW
16061 (lshiftrt:<ssedoublemode>
16062 (plus:<ssedoublemode>
16063 (lshiftrt:<ssedoublemode>
16064 (mult:<ssedoublemode>
16065 (sign_extend:<ssedoublemode>
16066 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16067 (sign_extend:<ssedoublemode>
16068 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16072 (match_operand:PMULHRSW 3 "register_operand")
16073 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
16074 "TARGET_AVX512BW && TARGET_AVX512VL"
16076 operands[5] = CONST1_RTX(<MODE>mode);
16077 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16080 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
16081 [(set (match_operand:PMULHRSW 0 "register_operand")
16083 (lshiftrt:<ssedoublemode>
16084 (plus:<ssedoublemode>
16085 (lshiftrt:<ssedoublemode>
16086 (mult:<ssedoublemode>
16087 (sign_extend:<ssedoublemode>
16088 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16089 (sign_extend:<ssedoublemode>
16090 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16096 operands[3] = CONST1_RTX(<MODE>mode);
16097 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16100 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
16101 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
16103 (lshiftrt:<ssedoublemode>
16104 (plus:<ssedoublemode>
16105 (lshiftrt:<ssedoublemode>
16106 (mult:<ssedoublemode>
16107 (sign_extend:<ssedoublemode>
16108 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
16109 (sign_extend:<ssedoublemode>
16110 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
16112 (match_operand:VI2_AVX2 3 "const1_operand"))
16114 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
16115 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16117 pmulhrsw\t{%2, %0|%0, %2}
16118 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
16119 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
16120 [(set_attr "isa" "noavx,avx,avx512bw")
16121 (set_attr "type" "sseimul")
16122 (set_attr "prefix_data16" "1,*,*")
16123 (set_attr "prefix_extra" "1")
16124 (set_attr "prefix" "orig,maybe_evex,evex")
16125 (set_attr "mode" "<sseinsnmode>")])
16127 (define_expand "ssse3_pmulhrswv4hi3"
16128 [(set (match_operand:V4HI 0 "register_operand")
16135 (match_operand:V4HI 1 "register_mmxmem_operand"))
16137 (match_operand:V4HI 2 "register_mmxmem_operand")))
16141 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16143 operands[3] = CONST1_RTX(V4HImode);
16144 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
16147 (define_insn "*ssse3_pmulhrswv4hi3"
16148 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16155 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
16157 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
16159 (match_operand:V4HI 3 "const1_operand"))
16161 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
16163 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16165 pmulhrsw\t{%2, %0|%0, %2}
16166 pmulhrsw\t{%2, %0|%0, %2}
16167 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
16168 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16169 (set_attr "type" "sseimul")
16170 (set_attr "prefix_extra" "1")
16171 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16172 (set_attr "mode" "DI,TI,TI")])
16174 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
16175 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
16177 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
16178 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
16180 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16182 pshufb\t{%2, %0|%0, %2}
16183 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
16184 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16185 [(set_attr "isa" "noavx,avx,avx512bw")
16186 (set_attr "type" "sselog1")
16187 (set_attr "prefix_data16" "1,*,*")
16188 (set_attr "prefix_extra" "1")
16189 (set_attr "prefix" "orig,maybe_evex,evex")
16190 (set_attr "btver2_decode" "vector")
16191 (set_attr "mode" "<sseinsnmode>")])
16193 (define_insn_and_split "ssse3_pshufbv8qi3"
16194 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
16195 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
16196 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
16198 (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
16199 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16201 pshufb\t{%2, %0|%0, %2}
16204 "TARGET_MMX_WITH_SSE && reload_completed"
16205 [(set (match_dup 3) (match_dup 5))
16207 (and:V4SI (match_dup 3) (match_dup 2)))
16209 (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
16211 /* Emulate MMX version of pshufb with SSE version by masking out the
16212 bit 3 of the shuffle control byte. */
16213 operands[0] = lowpart_subreg (V16QImode, operands[0],
16214 GET_MODE (operands[0]));
16215 operands[1] = lowpart_subreg (V16QImode, operands[1],
16216 GET_MODE (operands[1]));
16217 operands[2] = lowpart_subreg (V4SImode, operands[2],
16218 GET_MODE (operands[2]));
16219 operands[4] = lowpart_subreg (V16QImode, operands[3],
16220 GET_MODE (operands[3]));
16221 rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
16222 GEN_INT (0xf7f7f7f7),
16223 GEN_INT (0xf7f7f7f7),
16224 GEN_INT (0xf7f7f7f7));
16225 rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
16226 operands[5] = force_const_mem (V4SImode, vec_const);
16228 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16229 (set_attr "prefix_extra" "1")
16230 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16231 (set_attr "mode" "DI,TI,TI")])
16233 (define_insn "<ssse3_avx2>_psign<mode>3"
16234 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
16236 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
16237 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
16241 psign<ssemodesuffix>\t{%2, %0|%0, %2}
16242 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16243 [(set_attr "isa" "noavx,avx")
16244 (set_attr "type" "sselog1")
16245 (set_attr "prefix_data16" "1,*")
16246 (set_attr "prefix_extra" "1")
16247 (set_attr "prefix" "orig,vex")
16248 (set_attr "mode" "<sseinsnmode>")])
16250 (define_insn "ssse3_psign<mode>3"
16251 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
16253 [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
16254 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
16256 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16258 psign<mmxvecsize>\t{%2, %0|%0, %2}
16259 psign<mmxvecsize>\t{%2, %0|%0, %2}
16260 vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
16261 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16262 (set_attr "type" "sselog1")
16263 (set_attr "prefix_extra" "1")
16264 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16265 (set_attr "mode" "DI,TI,TI")])
16267 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
16268 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
16269 (vec_merge:VI1_AVX512
16271 [(match_operand:VI1_AVX512 1 "register_operand" "v")
16272 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
16273 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
16275 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
16276 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
16277 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
16279 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16280 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
16282 [(set_attr "type" "sseishft")
16283 (set_attr "atom_unit" "sishuf")
16284 (set_attr "prefix_extra" "1")
16285 (set_attr "length_immediate" "1")
16286 (set_attr "prefix" "evex")
16287 (set_attr "mode" "<sseinsnmode>")])
16289 (define_insn "<ssse3_avx2>_palignr<mode>"
16290 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
16291 (unspec:SSESCALARMODE
16292 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
16293 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
16294 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
16298 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16300 switch (which_alternative)
16303 return "palignr\t{%3, %2, %0|%0, %2, %3}";
16306 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
16308 gcc_unreachable ();
16311 [(set_attr "isa" "noavx,avx,avx512bw")
16312 (set_attr "type" "sseishft")
16313 (set_attr "atom_unit" "sishuf")
16314 (set_attr "prefix_data16" "1,*,*")
16315 (set_attr "prefix_extra" "1")
16316 (set_attr "length_immediate" "1")
16317 (set_attr "prefix" "orig,vex,evex")
16318 (set_attr "mode" "<sseinsnmode>")])
16320 (define_insn_and_split "ssse3_palignrdi"
16321 [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
16322 (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
16323 (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
16324 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
16326 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16328 switch (which_alternative)
16331 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16332 return "palignr\t{%3, %2, %0|%0, %2, %3}";
16337 gcc_unreachable ();
16340 "TARGET_MMX_WITH_SSE && reload_completed"
16341 [(set (match_dup 0)
16342 (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
16344 /* Emulate MMX palignrdi with SSE psrldq. */
16345 rtx op0 = lowpart_subreg (V2DImode, operands[0],
16346 GET_MODE (operands[0]));
16349 insn = gen_vec_concatv2di (op0, operands[2], operands[1]);
16352 /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
16353 insn = gen_vec_concatv2di (op0, operands[1], operands[2]);
16355 /* Swap bits 0:63 with bits 64:127. */
16356 rtx mask = gen_rtx_PARALLEL (VOIDmode,
16357 gen_rtvec (4, GEN_INT (2),
16361 rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
16362 rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
16363 insn = gen_rtx_SET (op1, op2);
16366 operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
16368 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16369 (set_attr "type" "sseishft")
16370 (set_attr "atom_unit" "sishuf")
16371 (set_attr "prefix_extra" "1")
16372 (set_attr "length_immediate" "1")
16373 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16374 (set_attr "mode" "DI,TI,TI")])
16376 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
16377 ;; modes for abs instruction on pre AVX-512 targets.
16378 (define_mode_iterator VI1248_AVX512VL_AVX512BW
16379 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
16380 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
16381 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
16382 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
16384 (define_insn "*abs<mode>2"
16385 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
16386 (abs:VI1248_AVX512VL_AVX512BW
16387 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
16389 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
16390 [(set_attr "type" "sselog1")
16391 (set_attr "prefix_data16" "1")
16392 (set_attr "prefix_extra" "1")
16393 (set_attr "prefix" "maybe_vex")
16394 (set_attr "mode" "<sseinsnmode>")])
16396 (define_insn "abs<mode>2_mask"
16397 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
16398 (vec_merge:VI48_AVX512VL
16400 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
16401 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
16402 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
16404 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16405 [(set_attr "type" "sselog1")
16406 (set_attr "prefix" "evex")
16407 (set_attr "mode" "<sseinsnmode>")])
16409 (define_insn "abs<mode>2_mask"
16410 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16411 (vec_merge:VI12_AVX512VL
16413 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
16414 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
16415 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
16417 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16418 [(set_attr "type" "sselog1")
16419 (set_attr "prefix" "evex")
16420 (set_attr "mode" "<sseinsnmode>")])
16422 (define_expand "abs<mode>2"
16423 [(set (match_operand:VI_AVX2 0 "register_operand")
16425 (match_operand:VI_AVX2 1 "vector_operand")))]
16429 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
16430 && !TARGET_AVX512VL))
16432 ix86_expand_sse2_abs (operands[0], operands[1]);
16437 (define_insn "abs<mode>2"
16438 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
16440 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
16441 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16443 pabs<mmxvecsize>\t{%1, %0|%0, %1}
16444 %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
16445 [(set_attr "mmx_isa" "native,x64")
16446 (set_attr "type" "sselog1")
16447 (set_attr "prefix_rep" "0")
16448 (set_attr "prefix_extra" "1")
16449 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16450 (set_attr "mode" "DI,TI")])
16452 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16454 ;; AMD SSE4A instructions
16456 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16458 (define_insn "sse4a_movnt<mode>"
16459 [(set (match_operand:MODEF 0 "memory_operand" "=m")
16461 [(match_operand:MODEF 1 "register_operand" "x")]
16464 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
16465 [(set_attr "type" "ssemov")
16466 (set_attr "mode" "<MODE>")])
16468 (define_insn "sse4a_vmmovnt<mode>"
16469 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
16470 (unspec:<ssescalarmode>
16471 [(vec_select:<ssescalarmode>
16472 (match_operand:VF_128 1 "register_operand" "x")
16473 (parallel [(const_int 0)]))]
16476 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
16477 [(set_attr "type" "ssemov")
16478 (set_attr "mode" "<ssescalarmode>")])
16480 (define_insn "sse4a_extrqi"
16481 [(set (match_operand:V2DI 0 "register_operand" "=x")
16482 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16483 (match_operand 2 "const_0_to_255_operand")
16484 (match_operand 3 "const_0_to_255_operand")]
16487 "extrq\t{%3, %2, %0|%0, %2, %3}"
16488 [(set_attr "type" "sse")
16489 (set_attr "prefix_data16" "1")
16490 (set_attr "length_immediate" "2")
16491 (set_attr "mode" "TI")])
16493 (define_insn "sse4a_extrq"
16494 [(set (match_operand:V2DI 0 "register_operand" "=x")
16495 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16496 (match_operand:V16QI 2 "register_operand" "x")]
16499 "extrq\t{%2, %0|%0, %2}"
16500 [(set_attr "type" "sse")
16501 (set_attr "prefix_data16" "1")
16502 (set_attr "mode" "TI")])
16504 (define_insn "sse4a_insertqi"
16505 [(set (match_operand:V2DI 0 "register_operand" "=x")
16506 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16507 (match_operand:V2DI 2 "register_operand" "x")
16508 (match_operand 3 "const_0_to_255_operand")
16509 (match_operand 4 "const_0_to_255_operand")]
16512 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
16513 [(set_attr "type" "sseins")
16514 (set_attr "prefix_data16" "0")
16515 (set_attr "prefix_rep" "1")
16516 (set_attr "length_immediate" "2")
16517 (set_attr "mode" "TI")])
16519 (define_insn "sse4a_insertq"
16520 [(set (match_operand:V2DI 0 "register_operand" "=x")
16521 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16522 (match_operand:V2DI 2 "register_operand" "x")]
16525 "insertq\t{%2, %0|%0, %2}"
16526 [(set_attr "type" "sseins")
16527 (set_attr "prefix_data16" "0")
16528 (set_attr "prefix_rep" "1")
16529 (set_attr "mode" "TI")])
16531 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16533 ;; Intel SSE4.1 instructions
16535 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16537 ;; Mapping of immediate bits for blend instructions
16538 (define_mode_attr blendbits
16539 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
16541 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
16542 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16543 (vec_merge:VF_128_256
16544 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16545 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
16546 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
16549 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16550 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16551 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16552 [(set_attr "isa" "noavx,noavx,avx")
16553 (set_attr "type" "ssemov")
16554 (set_attr "length_immediate" "1")
16555 (set_attr "prefix_data16" "1,1,*")
16556 (set_attr "prefix_extra" "1")
16557 (set_attr "prefix" "orig,orig,vex")
16558 (set_attr "mode" "<MODE>")])
16560 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
16561 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16563 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
16564 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16565 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
16569 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16570 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16571 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16572 [(set_attr "isa" "noavx,noavx,avx")
16573 (set_attr "type" "ssemov")
16574 (set_attr "length_immediate" "1")
16575 (set_attr "prefix_data16" "1,1,*")
16576 (set_attr "prefix_extra" "1")
16577 (set_attr "prefix" "orig,orig,vex")
16578 (set_attr "btver2_decode" "vector,vector,vector")
16579 (set_attr "mode" "<MODE>")])
16581 ;; Also define scalar versions. These are used for conditional move.
16582 ;; Using subregs into vector modes causes register allocation lossage.
16583 ;; These patterns do not allow memory operands because the native
16584 ;; instructions read the full 128-bits.
16586 (define_insn "sse4_1_blendv<ssemodesuffix>"
16587 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
16589 [(match_operand:MODEF 1 "register_operand" "0,0,x")
16590 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
16591 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
16595 if (get_attr_mode (insn) == MODE_V4SF)
16596 return (which_alternative == 2
16597 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16598 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
16600 return (which_alternative == 2
16601 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16602 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
16604 [(set_attr "isa" "noavx,noavx,avx")
16605 (set_attr "type" "ssemov")
16606 (set_attr "length_immediate" "1")
16607 (set_attr "prefix_data16" "1,1,*")
16608 (set_attr "prefix_extra" "1")
16609 (set_attr "prefix" "orig,orig,vex")
16610 (set_attr "btver2_decode" "vector,vector,vector")
16612 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
16613 (const_string "V4SF")
16614 (match_test "TARGET_AVX")
16615 (const_string "<ssevecmode>")
16616 (match_test "optimize_function_for_size_p (cfun)")
16617 (const_string "V4SF")
16619 (const_string "<ssevecmode>")))])
16621 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
16622 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16624 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
16625 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16627 (lt:<sseintvecmode>
16628 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
16629 (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C")) 0)]
16633 "&& reload_completed"
16634 [(set (match_dup 0)
16636 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
16637 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
16638 [(set_attr "isa" "noavx,noavx,avx")
16639 (set_attr "type" "ssemov")
16640 (set_attr "length_immediate" "1")
16641 (set_attr "prefix_data16" "1,1,*")
16642 (set_attr "prefix_extra" "1")
16643 (set_attr "prefix" "orig,orig,vex")
16644 (set_attr "btver2_decode" "vector,vector,vector")
16645 (set_attr "mode" "<MODE>")])
16647 (define_mode_attr ssefltmodesuffix
16648 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
16650 (define_mode_attr ssefltvecmode
16651 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
16653 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
16654 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
16655 (unspec:<ssebytemode>
16656 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
16657 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
16658 (subreg:<ssebytemode>
16660 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
16661 (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
16665 "&& reload_completed"
16666 [(set (match_dup 0)
16667 (unspec:<ssefltvecmode>
16668 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
16670 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
16671 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
16672 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
16673 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
16675 [(set_attr "isa" "noavx,noavx,avx")
16676 (set_attr "type" "ssemov")
16677 (set_attr "length_immediate" "1")
16678 (set_attr "prefix_data16" "1,1,*")
16679 (set_attr "prefix_extra" "1")
16680 (set_attr "prefix" "orig,orig,vex")
16681 (set_attr "btver2_decode" "vector,vector,vector")
16682 (set_attr "mode" "<ssefltvecmode>")])
16684 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
16685 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16687 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
16688 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16689 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
16693 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16694 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16695 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16696 [(set_attr "isa" "noavx,noavx,avx")
16697 (set_attr "type" "ssemul")
16698 (set_attr "length_immediate" "1")
16699 (set_attr "prefix_data16" "1,1,*")
16700 (set_attr "prefix_extra" "1")
16701 (set_attr "prefix" "orig,orig,vex")
16702 (set_attr "btver2_decode" "vector,vector,vector")
16703 (set_attr "znver1_decode" "vector,vector,vector")
16704 (set_attr "mode" "<MODE>")])
16706 ;; Mode attribute used by `vmovntdqa' pattern
16707 (define_mode_attr vi8_sse4_1_avx2_avx512
16708 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
16710 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
16711 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
16712 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
16715 "%vmovntdqa\t{%1, %0|%0, %1}"
16716 [(set_attr "isa" "noavx,noavx,avx")
16717 (set_attr "type" "ssemov")
16718 (set_attr "prefix_extra" "1,1,*")
16719 (set_attr "prefix" "orig,orig,maybe_evex")
16720 (set_attr "mode" "<sseinsnmode>")])
16722 (define_insn "<sse4_1_avx2>_mpsadbw"
16723 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16725 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16726 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16727 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
16731 mpsadbw\t{%3, %2, %0|%0, %2, %3}
16732 mpsadbw\t{%3, %2, %0|%0, %2, %3}
16733 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16734 [(set_attr "isa" "noavx,noavx,avx")
16735 (set_attr "type" "sselog1")
16736 (set_attr "length_immediate" "1")
16737 (set_attr "prefix_extra" "1")
16738 (set_attr "prefix" "orig,orig,vex")
16739 (set_attr "btver2_decode" "vector,vector,vector")
16740 (set_attr "znver1_decode" "vector,vector,vector")
16741 (set_attr "mode" "<sseinsnmode>")])
16743 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
16744 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
16745 (vec_concat:VI2_AVX2
16746 (us_truncate:<ssehalfvecmode>
16747 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
16748 (us_truncate:<ssehalfvecmode>
16749 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
16750 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16752 packusdw\t{%2, %0|%0, %2}
16753 packusdw\t{%2, %0|%0, %2}
16754 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
16755 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16756 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
16757 (set_attr "type" "sselog")
16758 (set_attr "prefix_extra" "1")
16759 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
16760 (set_attr "mode" "<sseinsnmode>")])
16762 (define_insn "<sse4_1_avx2>_pblendvb"
16763 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16765 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16766 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16767 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
16771 pblendvb\t{%3, %2, %0|%0, %2, %3}
16772 pblendvb\t{%3, %2, %0|%0, %2, %3}
16773 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16774 [(set_attr "isa" "noavx,noavx,avx")
16775 (set_attr "type" "ssemov")
16776 (set_attr "prefix_extra" "1")
16777 (set_attr "length_immediate" "*,*,1")
16778 (set_attr "prefix" "orig,orig,vex")
16779 (set_attr "btver2_decode" "vector,vector,vector")
16780 (set_attr "mode" "<sseinsnmode>")])
16782 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
16783 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16785 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16786 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16787 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
16788 (match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
16793 [(set (match_dup 0)
16795 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
16797 [(set_attr "isa" "noavx,noavx,avx")
16798 (set_attr "type" "ssemov")
16799 (set_attr "prefix_extra" "1")
16800 (set_attr "length_immediate" "*,*,1")
16801 (set_attr "prefix" "orig,orig,vex")
16802 (set_attr "btver2_decode" "vector,vector,vector")
16803 (set_attr "mode" "<sseinsnmode>")])
16805 (define_insn "sse4_1_pblendw"
16806 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
16808 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
16809 (match_operand:V8HI 1 "register_operand" "0,0,x")
16810 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
16813 pblendw\t{%3, %2, %0|%0, %2, %3}
16814 pblendw\t{%3, %2, %0|%0, %2, %3}
16815 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16816 [(set_attr "isa" "noavx,noavx,avx")
16817 (set_attr "type" "ssemov")
16818 (set_attr "prefix_extra" "1")
16819 (set_attr "length_immediate" "1")
16820 (set_attr "prefix" "orig,orig,vex")
16821 (set_attr "mode" "TI")])
16823 ;; The builtin uses an 8-bit immediate. Expand that.
16824 (define_expand "avx2_pblendw"
16825 [(set (match_operand:V16HI 0 "register_operand")
16827 (match_operand:V16HI 2 "nonimmediate_operand")
16828 (match_operand:V16HI 1 "register_operand")
16829 (match_operand:SI 3 "const_0_to_255_operand")))]
16832 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
16833 operands[3] = GEN_INT (val << 8 | val);
16836 (define_insn "*avx2_pblendw"
16837 [(set (match_operand:V16HI 0 "register_operand" "=x")
16839 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
16840 (match_operand:V16HI 1 "register_operand" "x")
16841 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
16844 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
16845 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
16847 [(set_attr "type" "ssemov")
16848 (set_attr "prefix_extra" "1")
16849 (set_attr "length_immediate" "1")
16850 (set_attr "prefix" "vex")
16851 (set_attr "mode" "OI")])
16853 (define_insn "avx2_pblendd<mode>"
16854 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
16855 (vec_merge:VI4_AVX2
16856 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
16857 (match_operand:VI4_AVX2 1 "register_operand" "x")
16858 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
16860 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16861 [(set_attr "type" "ssemov")
16862 (set_attr "prefix_extra" "1")
16863 (set_attr "length_immediate" "1")
16864 (set_attr "prefix" "vex")
16865 (set_attr "mode" "<sseinsnmode>")])
16867 (define_insn "sse4_1_phminposuw"
16868 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
16869 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
16870 UNSPEC_PHMINPOSUW))]
16872 "%vphminposuw\t{%1, %0|%0, %1}"
16873 [(set_attr "isa" "noavx,noavx,avx")
16874 (set_attr "type" "sselog1")
16875 (set_attr "prefix_extra" "1")
16876 (set_attr "prefix" "orig,orig,vex")
16877 (set_attr "mode" "TI")])
16879 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
16880 [(set (match_operand:V16HI 0 "register_operand" "=v")
16882 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
16883 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16884 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16885 [(set_attr "type" "ssemov")
16886 (set_attr "prefix_extra" "1")
16887 (set_attr "prefix" "maybe_evex")
16888 (set_attr "mode" "OI")])
16890 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
16891 [(set (match_operand:V32HI 0 "register_operand" "=v")
16893 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
16895 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16896 [(set_attr "type" "ssemov")
16897 (set_attr "prefix_extra" "1")
16898 (set_attr "prefix" "evex")
16899 (set_attr "mode" "XI")])
16901 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
16902 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
16905 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
16906 (parallel [(const_int 0) (const_int 1)
16907 (const_int 2) (const_int 3)
16908 (const_int 4) (const_int 5)
16909 (const_int 6) (const_int 7)]))))]
16910 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16911 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16912 [(set_attr "isa" "noavx,noavx,avx")
16913 (set_attr "type" "ssemov")
16914 (set_attr "prefix_extra" "1")
16915 (set_attr "prefix" "orig,orig,maybe_evex")
16916 (set_attr "mode" "TI")])
16918 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
16919 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
16921 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
16922 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16923 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16924 [(set_attr "isa" "noavx,noavx,avx")
16925 (set_attr "type" "ssemov")
16926 (set_attr "prefix_extra" "1")
16927 (set_attr "prefix" "orig,orig,maybe_evex")
16928 (set_attr "mode" "TI")])
16930 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
16931 [(set (match_operand:V8HI 0 "register_operand")
16936 (match_operand:DI 1 "memory_operand")
16938 (parallel [(const_int 0) (const_int 1)
16939 (const_int 2) (const_int 3)
16940 (const_int 4) (const_int 5)
16941 (const_int 6) (const_int 7)]))))]
16942 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
16943 && can_create_pseudo_p ()"
16946 [(set (match_dup 0)
16947 (any_extend:V8HI (match_dup 1)))]
16948 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
16950 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
16951 [(set (match_operand:V16SI 0 "register_operand" "=v")
16953 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
16955 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
16956 [(set_attr "type" "ssemov")
16957 (set_attr "prefix" "evex")
16958 (set_attr "mode" "XI")])
16960 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
16961 [(set (match_operand:V8SI 0 "register_operand" "=v")
16964 (match_operand:V16QI 1 "register_operand" "v")
16965 (parallel [(const_int 0) (const_int 1)
16966 (const_int 2) (const_int 3)
16967 (const_int 4) (const_int 5)
16968 (const_int 6) (const_int 7)]))))]
16969 "TARGET_AVX2 && <mask_avx512vl_condition>"
16970 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16971 [(set_attr "type" "ssemov")
16972 (set_attr "prefix_extra" "1")
16973 (set_attr "prefix" "maybe_evex")
16974 (set_attr "mode" "OI")])
16976 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
16977 [(set (match_operand:V8SI 0 "register_operand" "=v")
16979 (match_operand:V8QI 1 "memory_operand" "m")))]
16980 "TARGET_AVX2 && <mask_avx512vl_condition>"
16981 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16982 [(set_attr "type" "ssemov")
16983 (set_attr "prefix_extra" "1")
16984 (set_attr "prefix" "maybe_evex")
16985 (set_attr "mode" "OI")])
16987 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
16988 [(set (match_operand:V8SI 0 "register_operand")
16993 (match_operand:DI 1 "memory_operand")
16995 (parallel [(const_int 0) (const_int 1)
16996 (const_int 2) (const_int 3)
16997 (const_int 4) (const_int 5)
16998 (const_int 6) (const_int 7)]))))]
16999 "TARGET_AVX2 && <mask_avx512vl_condition>
17000 && can_create_pseudo_p ()"
17003 [(set (match_dup 0)
17004 (any_extend:V8SI (match_dup 1)))]
17005 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17007 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
17008 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17011 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17012 (parallel [(const_int 0) (const_int 1)
17013 (const_int 2) (const_int 3)]))))]
17014 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17015 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17016 [(set_attr "isa" "noavx,noavx,avx")
17017 (set_attr "type" "ssemov")
17018 (set_attr "prefix_extra" "1")
17019 (set_attr "prefix" "orig,orig,maybe_evex")
17020 (set_attr "mode" "TI")])
17022 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
17023 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17025 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
17026 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17027 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17028 [(set_attr "isa" "noavx,noavx,avx")
17029 (set_attr "type" "ssemov")
17030 (set_attr "prefix_extra" "1")
17031 (set_attr "prefix" "orig,orig,maybe_evex")
17032 (set_attr "mode" "TI")])
17034 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
17035 [(set (match_operand:V4SI 0 "register_operand")
17040 (vec_duplicate:V4SI
17041 (match_operand:SI 1 "memory_operand"))
17043 [(const_int 0) (const_int 0)
17044 (const_int 0) (const_int 0)])
17046 (parallel [(const_int 0) (const_int 1)
17047 (const_int 2) (const_int 3)]))))]
17048 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17049 && can_create_pseudo_p ()"
17052 [(set (match_dup 0)
17053 (any_extend:V4SI (match_dup 1)))]
17054 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17056 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
17057 [(set (match_operand:V16SI 0 "register_operand" "=v")
17059 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
17061 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17062 [(set_attr "type" "ssemov")
17063 (set_attr "prefix" "evex")
17064 (set_attr "mode" "XI")])
17066 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
17067 [(set (match_operand:V8SI 0 "register_operand" "=v")
17069 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17070 "TARGET_AVX2 && <mask_avx512vl_condition>"
17071 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17072 [(set_attr "type" "ssemov")
17073 (set_attr "prefix_extra" "1")
17074 (set_attr "prefix" "maybe_evex")
17075 (set_attr "mode" "OI")])
17077 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
17078 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17081 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17082 (parallel [(const_int 0) (const_int 1)
17083 (const_int 2) (const_int 3)]))))]
17084 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17085 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17086 [(set_attr "isa" "noavx,noavx,avx")
17087 (set_attr "type" "ssemov")
17088 (set_attr "prefix_extra" "1")
17089 (set_attr "prefix" "orig,orig,maybe_evex")
17090 (set_attr "mode" "TI")])
17092 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
17093 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17095 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
17096 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17097 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17098 [(set_attr "isa" "noavx,noavx,avx")
17099 (set_attr "type" "ssemov")
17100 (set_attr "prefix_extra" "1")
17101 (set_attr "prefix" "orig,orig,maybe_evex")
17102 (set_attr "mode" "TI")])
17104 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
17105 [(set (match_operand:V4SI 0 "register_operand")
17110 (match_operand:DI 1 "memory_operand")
17112 (parallel [(const_int 0) (const_int 1)
17113 (const_int 2) (const_int 3)]))))]
17114 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17115 && can_create_pseudo_p ()"
17118 [(set (match_dup 0)
17119 (any_extend:V4SI (match_dup 1)))]
17120 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
17122 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
17123 [(set (match_operand:V8DI 0 "register_operand" "=v")
17126 (match_operand:V16QI 1 "register_operand" "v")
17127 (parallel [(const_int 0) (const_int 1)
17128 (const_int 2) (const_int 3)
17129 (const_int 4) (const_int 5)
17130 (const_int 6) (const_int 7)]))))]
17132 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17133 [(set_attr "type" "ssemov")
17134 (set_attr "prefix" "evex")
17135 (set_attr "mode" "XI")])
17137 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
17138 [(set (match_operand:V8DI 0 "register_operand" "=v")
17140 (match_operand:V8QI 1 "memory_operand" "m")))]
17142 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17143 [(set_attr "type" "ssemov")
17144 (set_attr "prefix" "evex")
17145 (set_attr "mode" "XI")])
17147 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
17148 [(set (match_operand:V8DI 0 "register_operand")
17153 (match_operand:DI 1 "memory_operand")
17155 (parallel [(const_int 0) (const_int 1)
17156 (const_int 2) (const_int 3)
17157 (const_int 4) (const_int 5)
17158 (const_int 6) (const_int 7)]))))]
17159 "TARGET_AVX512F && can_create_pseudo_p ()"
17162 [(set (match_dup 0)
17163 (any_extend:V8DI (match_dup 1)))]
17164 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17166 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
17167 [(set (match_operand:V4DI 0 "register_operand" "=v")
17170 (match_operand:V16QI 1 "register_operand" "v")
17171 (parallel [(const_int 0) (const_int 1)
17172 (const_int 2) (const_int 3)]))))]
17173 "TARGET_AVX2 && <mask_avx512vl_condition>"
17174 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17175 [(set_attr "type" "ssemov")
17176 (set_attr "prefix_extra" "1")
17177 (set_attr "prefix" "maybe_evex")
17178 (set_attr "mode" "OI")])
17180 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
17181 [(set (match_operand:V4DI 0 "register_operand" "=v")
17183 (match_operand:V4QI 1 "memory_operand" "m")))]
17184 "TARGET_AVX2 && <mask_avx512vl_condition>"
17185 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17186 [(set_attr "type" "ssemov")
17187 (set_attr "prefix_extra" "1")
17188 (set_attr "prefix" "maybe_evex")
17189 (set_attr "mode" "OI")])
17191 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
17192 [(set (match_operand:V4DI 0 "register_operand")
17197 (vec_duplicate:V4SI
17198 (match_operand:SI 1 "memory_operand"))
17200 [(const_int 0) (const_int 0)
17201 (const_int 0) (const_int 0)])
17203 (parallel [(const_int 0) (const_int 1)
17204 (const_int 2) (const_int 3)]))))]
17205 "TARGET_AVX2 && <mask_avx512vl_condition>
17206 && can_create_pseudo_p ()"
17209 [(set (match_dup 0)
17210 (any_extend:V4DI (match_dup 1)))]
17211 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17213 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
17214 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17217 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17218 (parallel [(const_int 0) (const_int 1)]))))]
17219 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17220 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17221 [(set_attr "isa" "noavx,noavx,avx")
17222 (set_attr "type" "ssemov")
17223 (set_attr "prefix_extra" "1")
17224 (set_attr "prefix" "orig,orig,maybe_evex")
17225 (set_attr "mode" "TI")])
17227 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
17228 [(set (match_operand:V8DI 0 "register_operand" "=v")
17230 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17232 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
17233 [(set_attr "type" "ssemov")
17234 (set_attr "prefix" "evex")
17235 (set_attr "mode" "XI")])
17237 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
17238 [(set (match_operand:V4DI 0 "register_operand" "=v")
17241 (match_operand:V8HI 1 "register_operand" "v")
17242 (parallel [(const_int 0) (const_int 1)
17243 (const_int 2) (const_int 3)]))))]
17244 "TARGET_AVX2 && <mask_avx512vl_condition>"
17245 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17246 [(set_attr "type" "ssemov")
17247 (set_attr "prefix_extra" "1")
17248 (set_attr "prefix" "maybe_evex")
17249 (set_attr "mode" "OI")])
17251 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
17252 [(set (match_operand:V4DI 0 "register_operand" "=v")
17254 (match_operand:V4HI 1 "memory_operand" "m")))]
17255 "TARGET_AVX2 && <mask_avx512vl_condition>"
17256 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17257 [(set_attr "type" "ssemov")
17258 (set_attr "prefix_extra" "1")
17259 (set_attr "prefix" "maybe_evex")
17260 (set_attr "mode" "OI")])
17262 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
17263 [(set (match_operand:V4DI 0 "register_operand")
17268 (match_operand:DI 1 "memory_operand")
17270 (parallel [(const_int 0) (const_int 1)
17271 (const_int 2) (const_int 3)]))))]
17272 "TARGET_AVX2 && <mask_avx512vl_condition>
17273 && can_create_pseudo_p ()"
17276 [(set (match_dup 0)
17277 (any_extend:V4DI (match_dup 1)))]
17278 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
17280 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
17281 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17284 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17285 (parallel [(const_int 0) (const_int 1)]))))]
17286 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17287 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17288 [(set_attr "isa" "noavx,noavx,avx")
17289 (set_attr "type" "ssemov")
17290 (set_attr "prefix_extra" "1")
17291 (set_attr "prefix" "orig,orig,maybe_evex")
17292 (set_attr "mode" "TI")])
17294 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
17295 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17297 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
17298 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17299 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17300 [(set_attr "isa" "noavx,noavx,avx")
17301 (set_attr "type" "ssemov")
17302 (set_attr "prefix_extra" "1")
17303 (set_attr "prefix" "orig,orig,maybe_evex")
17304 (set_attr "mode" "TI")])
17306 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
17307 [(set (match_operand:V2DI 0 "register_operand")
17312 (vec_duplicate:V4SI
17313 (match_operand:SI 1 "memory_operand"))
17315 [(const_int 0) (const_int 0)
17316 (const_int 0) (const_int 0)])
17318 (parallel [(const_int 0) (const_int 1)]))))]
17319 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17320 && can_create_pseudo_p ()"
17323 [(set (match_dup 0)
17324 (any_extend:V2DI (match_dup 1)))]
17325 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
17327 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
17328 [(set (match_operand:V8DI 0 "register_operand" "=v")
17330 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
17332 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17333 [(set_attr "type" "ssemov")
17334 (set_attr "prefix" "evex")
17335 (set_attr "mode" "XI")])
17337 (define_insn "avx2_<code>v4siv4di2<mask_name>"
17338 [(set (match_operand:V4DI 0 "register_operand" "=v")
17340 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
17341 "TARGET_AVX2 && <mask_avx512vl_condition>"
17342 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17343 [(set_attr "type" "ssemov")
17344 (set_attr "prefix" "maybe_evex")
17345 (set_attr "prefix_extra" "1")
17346 (set_attr "mode" "OI")])
17348 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
17349 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17352 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
17353 (parallel [(const_int 0) (const_int 1)]))))]
17354 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17355 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17356 [(set_attr "isa" "noavx,noavx,avx")
17357 (set_attr "type" "ssemov")
17358 (set_attr "prefix_extra" "1")
17359 (set_attr "prefix" "orig,orig,maybe_evex")
17360 (set_attr "mode" "TI")])
17362 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
17363 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17365 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
17366 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17367 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17368 [(set_attr "isa" "noavx,noavx,avx")
17369 (set_attr "type" "ssemov")
17370 (set_attr "prefix_extra" "1")
17371 (set_attr "prefix" "orig,orig,maybe_evex")
17372 (set_attr "mode" "TI")])
17374 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
17375 [(set (match_operand:V2DI 0 "register_operand")
17380 (match_operand:DI 1 "memory_operand")
17382 (parallel [(const_int 0) (const_int 1)]))))]
17383 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17384 && can_create_pseudo_p ()"
17387 [(set (match_dup 0)
17388 (any_extend:V2DI (match_dup 1)))]
17389 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
17391 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
17392 ;; setting FLAGS_REG. But it is not a really compare instruction.
17393 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
17394 [(set (reg:CC FLAGS_REG)
17395 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
17396 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
17399 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
17400 [(set_attr "type" "ssecomi")
17401 (set_attr "prefix_extra" "1")
17402 (set_attr "prefix" "vex")
17403 (set_attr "mode" "<MODE>")])
17405 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
17406 ;; But it is not a really compare instruction.
17407 (define_insn "<sse4_1>_ptest<mode>"
17408 [(set (reg:CC FLAGS_REG)
17409 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
17410 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
17413 "%vptest\t{%1, %0|%0, %1}"
17414 [(set_attr "isa" "noavx,noavx,avx")
17415 (set_attr "type" "ssecomi")
17416 (set_attr "prefix_extra" "1")
17417 (set_attr "prefix" "orig,orig,vex")
17418 (set (attr "btver2_decode")
17420 (match_test "<sseinsnmode>mode==OImode")
17421 (const_string "vector")
17422 (const_string "*")))
17423 (set_attr "mode" "<sseinsnmode>")])
17425 (define_insn "ptesttf2"
17426 [(set (reg:CC FLAGS_REG)
17427 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
17428 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
17431 "%vptest\t{%1, %0|%0, %1}"
17432 [(set_attr "isa" "noavx,noavx,avx")
17433 (set_attr "type" "ssecomi")
17434 (set_attr "prefix_extra" "1")
17435 (set_attr "prefix" "orig,orig,vex")
17436 (set_attr "mode" "TI")])
17438 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
17439 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17441 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
17442 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
17445 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17446 [(set_attr "isa" "noavx,noavx,avx")
17447 (set_attr "type" "ssecvt")
17448 (set_attr "prefix_data16" "1,1,*")
17449 (set_attr "prefix_extra" "1")
17450 (set_attr "length_immediate" "1")
17451 (set_attr "prefix" "orig,orig,vex")
17452 (set_attr "mode" "<MODE>")])
17454 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
17455 [(match_operand:<sseintvecmode> 0 "register_operand")
17456 (match_operand:VF1_128_256 1 "vector_operand")
17457 (match_operand:SI 2 "const_0_to_15_operand")]
17460 rtx tmp = gen_reg_rtx (<MODE>mode);
17463 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
17466 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
17470 (define_expand "avx512f_round<castmode>512"
17471 [(match_operand:VF_512 0 "register_operand")
17472 (match_operand:VF_512 1 "nonimmediate_operand")
17473 (match_operand:SI 2 "const_0_to_15_operand")]
17476 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
17480 (define_expand "avx512f_roundps512_sfix"
17481 [(match_operand:V16SI 0 "register_operand")
17482 (match_operand:V16SF 1 "nonimmediate_operand")
17483 (match_operand:SI 2 "const_0_to_15_operand")]
17486 rtx tmp = gen_reg_rtx (V16SFmode);
17487 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
17488 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
17492 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
17493 [(match_operand:<ssepackfltmode> 0 "register_operand")
17494 (match_operand:VF2 1 "vector_operand")
17495 (match_operand:VF2 2 "vector_operand")
17496 (match_operand:SI 3 "const_0_to_15_operand")]
17501 if (<MODE>mode == V2DFmode
17502 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
17504 rtx tmp2 = gen_reg_rtx (V4DFmode);
17506 tmp0 = gen_reg_rtx (V4DFmode);
17507 tmp1 = force_reg (V2DFmode, operands[1]);
17509 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
17510 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
17511 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
17515 tmp0 = gen_reg_rtx (<MODE>mode);
17516 tmp1 = gen_reg_rtx (<MODE>mode);
17519 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
17522 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
17525 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
17530 (define_insn "sse4_1_round<ssescalarmodesuffix>"
17531 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
17534 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
17535 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
17537 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
17541 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
17542 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
17543 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
17544 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17545 [(set_attr "isa" "noavx,noavx,avx,avx512f")
17546 (set_attr "type" "ssecvt")
17547 (set_attr "length_immediate" "1")
17548 (set_attr "prefix_data16" "1,1,*,*")
17549 (set_attr "prefix_extra" "1")
17550 (set_attr "prefix" "orig,orig,vex,evex")
17551 (set_attr "mode" "<MODE>")])
17553 (define_expand "round<mode>2"
17554 [(set (match_dup 3)
17556 (match_operand:VF 1 "register_operand")
17558 (set (match_operand:VF 0 "register_operand")
17560 [(match_dup 3) (match_dup 4)]
17562 "TARGET_SSE4_1 && !flag_trapping_math"
17564 machine_mode scalar_mode;
17565 const struct real_format *fmt;
17566 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
17567 rtx half, vec_half;
17569 scalar_mode = GET_MODE_INNER (<MODE>mode);
17571 /* load nextafter (0.5, 0.0) */
17572 fmt = REAL_MODE_FORMAT (scalar_mode);
17573 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
17574 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
17575 half = const_double_from_real_value (pred_half, scalar_mode);
17577 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
17578 vec_half = force_reg (<MODE>mode, vec_half);
17580 operands[2] = gen_reg_rtx (<MODE>mode);
17581 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
17583 operands[3] = gen_reg_rtx (<MODE>mode);
17584 operands[4] = GEN_INT (ROUND_TRUNC);
17587 (define_expand "round<mode>2_sfix"
17588 [(match_operand:<sseintvecmode> 0 "register_operand")
17589 (match_operand:VF1 1 "register_operand")]
17590 "TARGET_SSE4_1 && !flag_trapping_math"
17592 rtx tmp = gen_reg_rtx (<MODE>mode);
17594 emit_insn (gen_round<mode>2 (tmp, operands[1]));
17597 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
17601 (define_expand "round<mode>2_vec_pack_sfix"
17602 [(match_operand:<ssepackfltmode> 0 "register_operand")
17603 (match_operand:VF2 1 "register_operand")
17604 (match_operand:VF2 2 "register_operand")]
17605 "TARGET_SSE4_1 && !flag_trapping_math"
17609 if (<MODE>mode == V2DFmode
17610 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
17612 rtx tmp2 = gen_reg_rtx (V4DFmode);
17614 tmp0 = gen_reg_rtx (V4DFmode);
17615 tmp1 = force_reg (V2DFmode, operands[1]);
17617 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
17618 emit_insn (gen_roundv4df2 (tmp2, tmp0));
17619 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
17623 tmp0 = gen_reg_rtx (<MODE>mode);
17624 tmp1 = gen_reg_rtx (<MODE>mode);
17626 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
17627 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
17630 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
17635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17637 ;; Intel SSE4.2 string/text processing instructions
17639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17641 (define_insn_and_split "sse4_2_pcmpestr"
17642 [(set (match_operand:SI 0 "register_operand" "=c,c")
17644 [(match_operand:V16QI 2 "register_operand" "x,x")
17645 (match_operand:SI 3 "register_operand" "a,a")
17646 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
17647 (match_operand:SI 5 "register_operand" "d,d")
17648 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
17650 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
17658 (set (reg:CC FLAGS_REG)
17667 && can_create_pseudo_p ()"
17672 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
17673 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
17674 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
17677 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
17678 operands[3], operands[4],
17679 operands[5], operands[6]));
17681 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
17682 operands[3], operands[4],
17683 operands[5], operands[6]));
17684 if (flags && !(ecx || xmm0))
17685 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
17686 operands[2], operands[3],
17687 operands[4], operands[5],
17689 if (!(flags || ecx || xmm0))
17690 emit_note (NOTE_INSN_DELETED);
17694 [(set_attr "type" "sselog")
17695 (set_attr "prefix_data16" "1")
17696 (set_attr "prefix_extra" "1")
17697 (set_attr "length_immediate" "1")
17698 (set_attr "memory" "none,load")
17699 (set_attr "mode" "TI")])
17701 (define_insn "sse4_2_pcmpestri"
17702 [(set (match_operand:SI 0 "register_operand" "=c,c")
17704 [(match_operand:V16QI 1 "register_operand" "x,x")
17705 (match_operand:SI 2 "register_operand" "a,a")
17706 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17707 (match_operand:SI 4 "register_operand" "d,d")
17708 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
17710 (set (reg:CC FLAGS_REG)
17719 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
17720 [(set_attr "type" "sselog")
17721 (set_attr "prefix_data16" "1")
17722 (set_attr "prefix_extra" "1")
17723 (set_attr "prefix" "maybe_vex")
17724 (set_attr "length_immediate" "1")
17725 (set_attr "btver2_decode" "vector")
17726 (set_attr "memory" "none,load")
17727 (set_attr "mode" "TI")])
17729 (define_insn "sse4_2_pcmpestrm"
17730 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
17732 [(match_operand:V16QI 1 "register_operand" "x,x")
17733 (match_operand:SI 2 "register_operand" "a,a")
17734 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17735 (match_operand:SI 4 "register_operand" "d,d")
17736 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
17738 (set (reg:CC FLAGS_REG)
17747 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
17748 [(set_attr "type" "sselog")
17749 (set_attr "prefix_data16" "1")
17750 (set_attr "prefix_extra" "1")
17751 (set_attr "length_immediate" "1")
17752 (set_attr "prefix" "maybe_vex")
17753 (set_attr "btver2_decode" "vector")
17754 (set_attr "memory" "none,load")
17755 (set_attr "mode" "TI")])
17757 (define_insn "sse4_2_pcmpestr_cconly"
17758 [(set (reg:CC FLAGS_REG)
17760 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
17761 (match_operand:SI 3 "register_operand" "a,a,a,a")
17762 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
17763 (match_operand:SI 5 "register_operand" "d,d,d,d")
17764 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
17766 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
17767 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
17770 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
17771 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
17772 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
17773 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
17774 [(set_attr "type" "sselog")
17775 (set_attr "prefix_data16" "1")
17776 (set_attr "prefix_extra" "1")
17777 (set_attr "length_immediate" "1")
17778 (set_attr "memory" "none,load,none,load")
17779 (set_attr "btver2_decode" "vector,vector,vector,vector")
17780 (set_attr "prefix" "maybe_vex")
17781 (set_attr "mode" "TI")])
17783 (define_insn_and_split "sse4_2_pcmpistr"
17784 [(set (match_operand:SI 0 "register_operand" "=c,c")
17786 [(match_operand:V16QI 2 "register_operand" "x,x")
17787 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17788 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
17790 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
17796 (set (reg:CC FLAGS_REG)
17803 && can_create_pseudo_p ()"
17808 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
17809 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
17810 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
17813 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
17814 operands[3], operands[4]));
17816 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
17817 operands[3], operands[4]));
17818 if (flags && !(ecx || xmm0))
17819 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
17820 operands[2], operands[3],
17822 if (!(flags || ecx || xmm0))
17823 emit_note (NOTE_INSN_DELETED);
17827 [(set_attr "type" "sselog")
17828 (set_attr "prefix_data16" "1")
17829 (set_attr "prefix_extra" "1")
17830 (set_attr "length_immediate" "1")
17831 (set_attr "memory" "none,load")
17832 (set_attr "mode" "TI")])
17834 (define_insn "sse4_2_pcmpistri"
17835 [(set (match_operand:SI 0 "register_operand" "=c,c")
17837 [(match_operand:V16QI 1 "register_operand" "x,x")
17838 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
17839 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17841 (set (reg:CC FLAGS_REG)
17848 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
17849 [(set_attr "type" "sselog")
17850 (set_attr "prefix_data16" "1")
17851 (set_attr "prefix_extra" "1")
17852 (set_attr "length_immediate" "1")
17853 (set_attr "prefix" "maybe_vex")
17854 (set_attr "memory" "none,load")
17855 (set_attr "btver2_decode" "vector")
17856 (set_attr "mode" "TI")])
17858 (define_insn "sse4_2_pcmpistrm"
17859 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
17861 [(match_operand:V16QI 1 "register_operand" "x,x")
17862 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
17863 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17865 (set (reg:CC FLAGS_REG)
17872 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
17873 [(set_attr "type" "sselog")
17874 (set_attr "prefix_data16" "1")
17875 (set_attr "prefix_extra" "1")
17876 (set_attr "length_immediate" "1")
17877 (set_attr "prefix" "maybe_vex")
17878 (set_attr "memory" "none,load")
17879 (set_attr "btver2_decode" "vector")
17880 (set_attr "mode" "TI")])
17882 (define_insn "sse4_2_pcmpistr_cconly"
17883 [(set (reg:CC FLAGS_REG)
17885 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
17886 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
17887 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
17889 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
17890 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
17893 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
17894 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
17895 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
17896 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
17897 [(set_attr "type" "sselog")
17898 (set_attr "prefix_data16" "1")
17899 (set_attr "prefix_extra" "1")
17900 (set_attr "length_immediate" "1")
17901 (set_attr "memory" "none,load,none,load")
17902 (set_attr "prefix" "maybe_vex")
17903 (set_attr "btver2_decode" "vector,vector,vector,vector")
17904 (set_attr "mode" "TI")])
17906 ;; Packed float variants
17907 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
17908 [(V8DI "V8SF") (V16SI "V16SF")])
17910 (define_expand "avx512pf_gatherpf<mode>sf"
17912 [(match_operand:<avx512fmaskmode> 0 "register_operand")
17913 (mem:<GATHER_SCATTER_SF_MEM_MODE>
17915 [(match_operand 2 "vsib_address_operand")
17916 (match_operand:VI48_512 1 "register_operand")
17917 (match_operand:SI 3 "const1248_operand")]))
17918 (match_operand:SI 4 "const_2_to_3_operand")]
17919 UNSPEC_GATHER_PREFETCH)]
17923 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
17924 operands[3]), UNSPEC_VSIBADDR);
17927 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
17929 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
17930 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
17932 [(match_operand:P 2 "vsib_address_operand" "Tv")
17933 (match_operand:VI48_512 1 "register_operand" "v")
17934 (match_operand:SI 3 "const1248_operand" "n")]
17936 (match_operand:SI 4 "const_2_to_3_operand" "n")]
17937 UNSPEC_GATHER_PREFETCH)]
17940 switch (INTVAL (operands[4]))
17943 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
17944 gas changed what it requires incompatibly. */
17945 return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
17947 return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
17949 gcc_unreachable ();
17952 [(set_attr "type" "sse")
17953 (set_attr "prefix" "evex")
17954 (set_attr "mode" "XI")])
17956 ;; Packed double variants
17957 (define_expand "avx512pf_gatherpf<mode>df"
17959 [(match_operand:<avx512fmaskmode> 0 "register_operand")
17962 [(match_operand 2 "vsib_address_operand")
17963 (match_operand:VI4_256_8_512 1 "register_operand")
17964 (match_operand:SI 3 "const1248_operand")]))
17965 (match_operand:SI 4 "const_2_to_3_operand")]
17966 UNSPEC_GATHER_PREFETCH)]
17970 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
17971 operands[3]), UNSPEC_VSIBADDR);
17974 (define_insn "*avx512pf_gatherpf<mode>df_mask"
17976 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
17977 (match_operator:V8DF 5 "vsib_mem_operator"
17979 [(match_operand:P 2 "vsib_address_operand" "Tv")
17980 (match_operand:VI4_256_8_512 1 "register_operand" "v")
17981 (match_operand:SI 3 "const1248_operand" "n")]
17983 (match_operand:SI 4 "const_2_to_3_operand" "n")]
17984 UNSPEC_GATHER_PREFETCH)]
17987 switch (INTVAL (operands[4]))
17990 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
17991 gas changed what it requires incompatibly. */
17992 return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
17994 return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
17996 gcc_unreachable ();
17999 [(set_attr "type" "sse")
18000 (set_attr "prefix" "evex")
18001 (set_attr "mode" "XI")])
18003 ;; Packed float variants
18004 (define_expand "avx512pf_scatterpf<mode>sf"
18006 [(match_operand:<avx512fmaskmode> 0 "register_operand")
18007 (mem:<GATHER_SCATTER_SF_MEM_MODE>
18009 [(match_operand 2 "vsib_address_operand")
18010 (match_operand:VI48_512 1 "register_operand")
18011 (match_operand:SI 3 "const1248_operand")]))
18012 (match_operand:SI 4 "const2367_operand")]
18013 UNSPEC_SCATTER_PREFETCH)]
18017 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18018 operands[3]), UNSPEC_VSIBADDR);
18021 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
18023 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18024 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
18026 [(match_operand:P 2 "vsib_address_operand" "Tv")
18027 (match_operand:VI48_512 1 "register_operand" "v")
18028 (match_operand:SI 3 "const1248_operand" "n")]
18030 (match_operand:SI 4 "const2367_operand" "n")]
18031 UNSPEC_SCATTER_PREFETCH)]
18034 switch (INTVAL (operands[4]))
18038 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18039 gas changed what it requires incompatibly. */
18040 return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18043 return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18045 gcc_unreachable ();
18048 [(set_attr "type" "sse")
18049 (set_attr "prefix" "evex")
18050 (set_attr "mode" "XI")])
18052 ;; Packed double variants
18053 (define_expand "avx512pf_scatterpf<mode>df"
18055 [(match_operand:<avx512fmaskmode> 0 "register_operand")
18058 [(match_operand 2 "vsib_address_operand")
18059 (match_operand:VI4_256_8_512 1 "register_operand")
18060 (match_operand:SI 3 "const1248_operand")]))
18061 (match_operand:SI 4 "const2367_operand")]
18062 UNSPEC_SCATTER_PREFETCH)]
18066 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18067 operands[3]), UNSPEC_VSIBADDR);
18070 (define_insn "*avx512pf_scatterpf<mode>df_mask"
18072 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18073 (match_operator:V8DF 5 "vsib_mem_operator"
18075 [(match_operand:P 2 "vsib_address_operand" "Tv")
18076 (match_operand:VI4_256_8_512 1 "register_operand" "v")
18077 (match_operand:SI 3 "const1248_operand" "n")]
18079 (match_operand:SI 4 "const2367_operand" "n")]
18080 UNSPEC_SCATTER_PREFETCH)]
18083 switch (INTVAL (operands[4]))
18087 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18088 gas changed what it requires incompatibly. */
18089 return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18092 return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18094 gcc_unreachable ();
18097 [(set_attr "type" "sse")
18098 (set_attr "prefix" "evex")
18099 (set_attr "mode" "XI")])
18101 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
18102 [(set (match_operand:VF_512 0 "register_operand" "=v")
18104 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18107 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18108 [(set_attr "prefix" "evex")
18109 (set_attr "type" "sse")
18110 (set_attr "mode" "<MODE>")])
18112 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
18113 [(set (match_operand:VF_512 0 "register_operand" "=v")
18115 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18118 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18119 [(set_attr "prefix" "evex")
18120 (set_attr "type" "sse")
18121 (set_attr "mode" "<MODE>")])
18123 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
18124 [(set (match_operand:VF_128 0 "register_operand" "=v")
18127 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18129 (match_operand:VF_128 2 "register_operand" "v")
18132 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
18133 [(set_attr "length_immediate" "1")
18134 (set_attr "prefix" "evex")
18135 (set_attr "type" "sse")
18136 (set_attr "mode" "<MODE>")])
18138 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
18139 [(set (match_operand:VF_512 0 "register_operand" "=v")
18141 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18144 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18145 [(set_attr "prefix" "evex")
18146 (set_attr "type" "sse")
18147 (set_attr "mode" "<MODE>")])
18149 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
18150 [(set (match_operand:VF_128 0 "register_operand" "=v")
18153 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18155 (match_operand:VF_128 2 "register_operand" "v")
18158 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
18159 [(set_attr "length_immediate" "1")
18160 (set_attr "type" "sse")
18161 (set_attr "prefix" "evex")
18162 (set_attr "mode" "<MODE>")])
18164 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18166 ;; XOP instructions
18168 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18170 (define_code_iterator xop_plus [plus ss_plus])
18172 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
18173 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
18175 ;; XOP parallel integer multiply/add instructions.
18177 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
18178 [(set (match_operand:VI24_128 0 "register_operand" "=x")
18181 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
18182 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
18183 (match_operand:VI24_128 3 "register_operand" "x")))]
18185 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18186 [(set_attr "type" "ssemuladd")
18187 (set_attr "mode" "TI")])
18189 (define_insn "xop_p<macs>dql"
18190 [(set (match_operand:V2DI 0 "register_operand" "=x")
18195 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
18196 (parallel [(const_int 0) (const_int 2)])))
18199 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18200 (parallel [(const_int 0) (const_int 2)]))))
18201 (match_operand:V2DI 3 "register_operand" "x")))]
18203 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18204 [(set_attr "type" "ssemuladd")
18205 (set_attr "mode" "TI")])
18207 (define_insn "xop_p<macs>dqh"
18208 [(set (match_operand:V2DI 0 "register_operand" "=x")
18213 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
18214 (parallel [(const_int 1) (const_int 3)])))
18217 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18218 (parallel [(const_int 1) (const_int 3)]))))
18219 (match_operand:V2DI 3 "register_operand" "x")))]
18221 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18222 [(set_attr "type" "ssemuladd")
18223 (set_attr "mode" "TI")])
18225 ;; XOP parallel integer multiply/add instructions for the intrinisics
18226 (define_insn "xop_p<macs>wd"
18227 [(set (match_operand:V4SI 0 "register_operand" "=x")
18232 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
18233 (parallel [(const_int 1) (const_int 3)
18234 (const_int 5) (const_int 7)])))
18237 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18238 (parallel [(const_int 1) (const_int 3)
18239 (const_int 5) (const_int 7)]))))
18240 (match_operand:V4SI 3 "register_operand" "x")))]
18242 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18243 [(set_attr "type" "ssemuladd")
18244 (set_attr "mode" "TI")])
18246 (define_insn "xop_p<madcs>wd"
18247 [(set (match_operand:V4SI 0 "register_operand" "=x")
18253 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
18254 (parallel [(const_int 0) (const_int 2)
18255 (const_int 4) (const_int 6)])))
18258 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18259 (parallel [(const_int 0) (const_int 2)
18260 (const_int 4) (const_int 6)]))))
18265 (parallel [(const_int 1) (const_int 3)
18266 (const_int 5) (const_int 7)])))
18270 (parallel [(const_int 1) (const_int 3)
18271 (const_int 5) (const_int 7)])))))
18272 (match_operand:V4SI 3 "register_operand" "x")))]
18274 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18275 [(set_attr "type" "ssemuladd")
18276 (set_attr "mode" "TI")])
18278 ;; XOP parallel XMM conditional moves
18279 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
18280 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
18281 (if_then_else:V_128_256
18282 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
18283 (match_operand:V_128_256 1 "register_operand" "x,x")
18284 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
18286 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18287 [(set_attr "type" "sse4arg")])
18289 ;; XOP horizontal add/subtract instructions
18290 (define_insn "xop_phadd<u>bw"
18291 [(set (match_operand:V8HI 0 "register_operand" "=x")
18295 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18296 (parallel [(const_int 0) (const_int 2)
18297 (const_int 4) (const_int 6)
18298 (const_int 8) (const_int 10)
18299 (const_int 12) (const_int 14)])))
18303 (parallel [(const_int 1) (const_int 3)
18304 (const_int 5) (const_int 7)
18305 (const_int 9) (const_int 11)
18306 (const_int 13) (const_int 15)])))))]
18308 "vphadd<u>bw\t{%1, %0|%0, %1}"
18309 [(set_attr "type" "sseiadd1")])
18311 (define_insn "xop_phadd<u>bd"
18312 [(set (match_operand:V4SI 0 "register_operand" "=x")
18317 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18318 (parallel [(const_int 0) (const_int 4)
18319 (const_int 8) (const_int 12)])))
18323 (parallel [(const_int 1) (const_int 5)
18324 (const_int 9) (const_int 13)]))))
18329 (parallel [(const_int 2) (const_int 6)
18330 (const_int 10) (const_int 14)])))
18334 (parallel [(const_int 3) (const_int 7)
18335 (const_int 11) (const_int 15)]))))))]
18337 "vphadd<u>bd\t{%1, %0|%0, %1}"
18338 [(set_attr "type" "sseiadd1")])
18340 (define_insn "xop_phadd<u>bq"
18341 [(set (match_operand:V2DI 0 "register_operand" "=x")
18347 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18348 (parallel [(const_int 0) (const_int 8)])))
18352 (parallel [(const_int 1) (const_int 9)]))))
18357 (parallel [(const_int 2) (const_int 10)])))
18361 (parallel [(const_int 3) (const_int 11)])))))
18367 (parallel [(const_int 4) (const_int 12)])))
18371 (parallel [(const_int 5) (const_int 13)]))))
18376 (parallel [(const_int 6) (const_int 14)])))
18380 (parallel [(const_int 7) (const_int 15)])))))))]
18382 "vphadd<u>bq\t{%1, %0|%0, %1}"
18383 [(set_attr "type" "sseiadd1")])
18385 (define_insn "xop_phadd<u>wd"
18386 [(set (match_operand:V4SI 0 "register_operand" "=x")
18390 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18391 (parallel [(const_int 0) (const_int 2)
18392 (const_int 4) (const_int 6)])))
18396 (parallel [(const_int 1) (const_int 3)
18397 (const_int 5) (const_int 7)])))))]
18399 "vphadd<u>wd\t{%1, %0|%0, %1}"
18400 [(set_attr "type" "sseiadd1")])
18402 (define_insn "xop_phadd<u>wq"
18403 [(set (match_operand:V2DI 0 "register_operand" "=x")
18408 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18409 (parallel [(const_int 0) (const_int 4)])))
18413 (parallel [(const_int 1) (const_int 5)]))))
18418 (parallel [(const_int 2) (const_int 6)])))
18422 (parallel [(const_int 3) (const_int 7)]))))))]
18424 "vphadd<u>wq\t{%1, %0|%0, %1}"
18425 [(set_attr "type" "sseiadd1")])
18427 (define_insn "xop_phadd<u>dq"
18428 [(set (match_operand:V2DI 0 "register_operand" "=x")
18432 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
18433 (parallel [(const_int 0) (const_int 2)])))
18437 (parallel [(const_int 1) (const_int 3)])))))]
18439 "vphadd<u>dq\t{%1, %0|%0, %1}"
18440 [(set_attr "type" "sseiadd1")])
18442 (define_insn "xop_phsubbw"
18443 [(set (match_operand:V8HI 0 "register_operand" "=x")
18447 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18448 (parallel [(const_int 0) (const_int 2)
18449 (const_int 4) (const_int 6)
18450 (const_int 8) (const_int 10)
18451 (const_int 12) (const_int 14)])))
18455 (parallel [(const_int 1) (const_int 3)
18456 (const_int 5) (const_int 7)
18457 (const_int 9) (const_int 11)
18458 (const_int 13) (const_int 15)])))))]
18460 "vphsubbw\t{%1, %0|%0, %1}"
18461 [(set_attr "type" "sseiadd1")])
18463 (define_insn "xop_phsubwd"
18464 [(set (match_operand:V4SI 0 "register_operand" "=x")
18468 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18469 (parallel [(const_int 0) (const_int 2)
18470 (const_int 4) (const_int 6)])))
18474 (parallel [(const_int 1) (const_int 3)
18475 (const_int 5) (const_int 7)])))))]
18477 "vphsubwd\t{%1, %0|%0, %1}"
18478 [(set_attr "type" "sseiadd1")])
18480 (define_insn "xop_phsubdq"
18481 [(set (match_operand:V2DI 0 "register_operand" "=x")
18485 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
18486 (parallel [(const_int 0) (const_int 2)])))
18490 (parallel [(const_int 1) (const_int 3)])))))]
18492 "vphsubdq\t{%1, %0|%0, %1}"
18493 [(set_attr "type" "sseiadd1")])
18495 ;; XOP permute instructions
18496 (define_insn "xop_pperm"
18497 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
18499 [(match_operand:V16QI 1 "register_operand" "x,x")
18500 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
18501 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
18502 UNSPEC_XOP_PERMUTE))]
18503 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18504 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18505 [(set_attr "type" "sse4arg")
18506 (set_attr "mode" "TI")])
18508 ;; XOP pack instructions that combine two vectors into a smaller vector
18509 (define_insn "xop_pperm_pack_v2di_v4si"
18510 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
18513 (match_operand:V2DI 1 "register_operand" "x,x"))
18515 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
18516 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
18517 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18518 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18519 [(set_attr "type" "sse4arg")
18520 (set_attr "mode" "TI")])
18522 (define_insn "xop_pperm_pack_v4si_v8hi"
18523 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
18526 (match_operand:V4SI 1 "register_operand" "x,x"))
18528 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
18529 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
18530 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18531 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18532 [(set_attr "type" "sse4arg")
18533 (set_attr "mode" "TI")])
18535 (define_insn "xop_pperm_pack_v8hi_v16qi"
18536 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
18539 (match_operand:V8HI 1 "register_operand" "x,x"))
18541 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
18542 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
18543 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18544 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18545 [(set_attr "type" "sse4arg")
18546 (set_attr "mode" "TI")])
18548 ;; XOP packed rotate instructions
18549 (define_expand "rotl<mode>3"
18550 [(set (match_operand:VI_128 0 "register_operand")
18552 (match_operand:VI_128 1 "nonimmediate_operand")
18553 (match_operand:SI 2 "general_operand")))]
18556 /* If we were given a scalar, convert it to parallel */
18557 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
18559 rtvec vs = rtvec_alloc (<ssescalarnum>);
18560 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
18561 rtx reg = gen_reg_rtx (<MODE>mode);
18562 rtx op2 = operands[2];
18565 if (GET_MODE (op2) != <ssescalarmode>mode)
18567 op2 = gen_reg_rtx (<ssescalarmode>mode);
18568 convert_move (op2, operands[2], false);
18571 for (i = 0; i < <ssescalarnum>; i++)
18572 RTVEC_ELT (vs, i) = op2;
18574 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
18575 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
18580 (define_expand "rotr<mode>3"
18581 [(set (match_operand:VI_128 0 "register_operand")
18583 (match_operand:VI_128 1 "nonimmediate_operand")
18584 (match_operand:SI 2 "general_operand")))]
18587 /* If we were given a scalar, convert it to parallel */
18588 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
18590 rtvec vs = rtvec_alloc (<ssescalarnum>);
18591 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
18592 rtx neg = gen_reg_rtx (<MODE>mode);
18593 rtx reg = gen_reg_rtx (<MODE>mode);
18594 rtx op2 = operands[2];
18597 if (GET_MODE (op2) != <ssescalarmode>mode)
18599 op2 = gen_reg_rtx (<ssescalarmode>mode);
18600 convert_move (op2, operands[2], false);
18603 for (i = 0; i < <ssescalarnum>; i++)
18604 RTVEC_ELT (vs, i) = op2;
18606 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
18607 emit_insn (gen_neg<mode>2 (neg, reg));
18608 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
18613 (define_insn "xop_rotl<mode>3"
18614 [(set (match_operand:VI_128 0 "register_operand" "=x")
18616 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
18617 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
18619 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18620 [(set_attr "type" "sseishft")
18621 (set_attr "length_immediate" "1")
18622 (set_attr "mode" "TI")])
18624 (define_insn "xop_rotr<mode>3"
18625 [(set (match_operand:VI_128 0 "register_operand" "=x")
18627 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
18628 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
18632 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
18633 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
18635 [(set_attr "type" "sseishft")
18636 (set_attr "length_immediate" "1")
18637 (set_attr "mode" "TI")])
18639 (define_expand "vrotr<mode>3"
18640 [(match_operand:VI_128 0 "register_operand")
18641 (match_operand:VI_128 1 "register_operand")
18642 (match_operand:VI_128 2 "register_operand")]
18645 rtx reg = gen_reg_rtx (<MODE>mode);
18646 emit_insn (gen_neg<mode>2 (reg, operands[2]));
18647 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
18651 (define_expand "vrotl<mode>3"
18652 [(match_operand:VI_128 0 "register_operand")
18653 (match_operand:VI_128 1 "register_operand")
18654 (match_operand:VI_128 2 "register_operand")]
18657 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
18661 (define_insn "xop_vrotl<mode>3"
18662 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
18663 (if_then_else:VI_128
18665 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18668 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18672 (neg:VI_128 (match_dup 2)))))]
18673 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18674 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18675 [(set_attr "type" "sseishft")
18676 (set_attr "prefix_data16" "0")
18677 (set_attr "prefix_extra" "2")
18678 (set_attr "mode" "TI")])
18680 ;; XOP packed shift instructions.
18681 (define_expand "vlshr<mode>3"
18682 [(set (match_operand:VI12_128 0 "register_operand")
18684 (match_operand:VI12_128 1 "register_operand")
18685 (match_operand:VI12_128 2 "nonimmediate_operand")))]
18688 rtx neg = gen_reg_rtx (<MODE>mode);
18689 emit_insn (gen_neg<mode>2 (neg, operands[2]));
18690 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
18694 (define_expand "vlshr<mode>3"
18695 [(set (match_operand:VI48_128 0 "register_operand")
18697 (match_operand:VI48_128 1 "register_operand")
18698 (match_operand:VI48_128 2 "nonimmediate_operand")))]
18699 "TARGET_AVX2 || TARGET_XOP"
18703 rtx neg = gen_reg_rtx (<MODE>mode);
18704 emit_insn (gen_neg<mode>2 (neg, operands[2]));
18705 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
18710 (define_expand "vlshr<mode>3"
18711 [(set (match_operand:VI48_512 0 "register_operand")
18713 (match_operand:VI48_512 1 "register_operand")
18714 (match_operand:VI48_512 2 "nonimmediate_operand")))]
18717 (define_expand "vlshr<mode>3"
18718 [(set (match_operand:VI48_256 0 "register_operand")
18720 (match_operand:VI48_256 1 "register_operand")
18721 (match_operand:VI48_256 2 "nonimmediate_operand")))]
18724 (define_expand "vashrv8hi3<mask_name>"
18725 [(set (match_operand:V8HI 0 "register_operand")
18727 (match_operand:V8HI 1 "register_operand")
18728 (match_operand:V8HI 2 "nonimmediate_operand")))]
18729 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
18733 rtx neg = gen_reg_rtx (V8HImode);
18734 emit_insn (gen_negv8hi2 (neg, operands[2]));
18735 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
18740 (define_expand "vashrv16qi3"
18741 [(set (match_operand:V16QI 0 "register_operand")
18743 (match_operand:V16QI 1 "register_operand")
18744 (match_operand:V16QI 2 "nonimmediate_operand")))]
18747 rtx neg = gen_reg_rtx (V16QImode);
18748 emit_insn (gen_negv16qi2 (neg, operands[2]));
18749 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
18753 (define_expand "vashrv2di3<mask_name>"
18754 [(set (match_operand:V2DI 0 "register_operand")
18756 (match_operand:V2DI 1 "register_operand")
18757 (match_operand:V2DI 2 "nonimmediate_operand")))]
18758 "TARGET_XOP || TARGET_AVX512VL"
18762 rtx neg = gen_reg_rtx (V2DImode);
18763 emit_insn (gen_negv2di2 (neg, operands[2]));
18764 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
18769 (define_expand "vashrv4si3"
18770 [(set (match_operand:V4SI 0 "register_operand")
18771 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
18772 (match_operand:V4SI 2 "nonimmediate_operand")))]
18773 "TARGET_AVX2 || TARGET_XOP"
18777 rtx neg = gen_reg_rtx (V4SImode);
18778 emit_insn (gen_negv4si2 (neg, operands[2]));
18779 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
18784 (define_expand "vashrv16si3"
18785 [(set (match_operand:V16SI 0 "register_operand")
18786 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
18787 (match_operand:V16SI 2 "nonimmediate_operand")))]
18790 (define_expand "vashrv8si3"
18791 [(set (match_operand:V8SI 0 "register_operand")
18792 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
18793 (match_operand:V8SI 2 "nonimmediate_operand")))]
18796 (define_expand "vashl<mode>3"
18797 [(set (match_operand:VI12_128 0 "register_operand")
18799 (match_operand:VI12_128 1 "register_operand")
18800 (match_operand:VI12_128 2 "nonimmediate_operand")))]
18803 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
18807 (define_expand "vashl<mode>3"
18808 [(set (match_operand:VI48_128 0 "register_operand")
18810 (match_operand:VI48_128 1 "register_operand")
18811 (match_operand:VI48_128 2 "nonimmediate_operand")))]
18812 "TARGET_AVX2 || TARGET_XOP"
18816 operands[2] = force_reg (<MODE>mode, operands[2]);
18817 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
18822 (define_expand "vashl<mode>3"
18823 [(set (match_operand:VI48_512 0 "register_operand")
18825 (match_operand:VI48_512 1 "register_operand")
18826 (match_operand:VI48_512 2 "nonimmediate_operand")))]
18829 (define_expand "vashl<mode>3"
18830 [(set (match_operand:VI48_256 0 "register_operand")
18832 (match_operand:VI48_256 1 "register_operand")
18833 (match_operand:VI48_256 2 "nonimmediate_operand")))]
18836 (define_insn "xop_sha<mode>3"
18837 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
18838 (if_then_else:VI_128
18840 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18843 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18847 (neg:VI_128 (match_dup 2)))))]
18848 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18849 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18850 [(set_attr "type" "sseishft")
18851 (set_attr "prefix_data16" "0")
18852 (set_attr "prefix_extra" "2")
18853 (set_attr "mode" "TI")])
18855 (define_insn "xop_shl<mode>3"
18856 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
18857 (if_then_else:VI_128
18859 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18862 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18866 (neg:VI_128 (match_dup 2)))))]
18867 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18868 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18869 [(set_attr "type" "sseishft")
18870 (set_attr "prefix_data16" "0")
18871 (set_attr "prefix_extra" "2")
18872 (set_attr "mode" "TI")])
18874 (define_expand "<shift_insn><mode>3"
18875 [(set (match_operand:VI1_AVX512 0 "register_operand")
18876 (any_shift:VI1_AVX512
18877 (match_operand:VI1_AVX512 1 "register_operand")
18878 (match_operand:SI 2 "nonmemory_operand")))]
18881 if (TARGET_XOP && <MODE>mode == V16QImode)
18883 bool negate = false;
18884 rtx (*gen) (rtx, rtx, rtx);
18888 if (<CODE> != ASHIFT)
18890 if (CONST_INT_P (operands[2]))
18891 operands[2] = GEN_INT (-INTVAL (operands[2]));
18895 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
18896 for (i = 0; i < 16; i++)
18897 XVECEXP (par, 0, i) = operands[2];
18899 tmp = gen_reg_rtx (V16QImode);
18900 emit_insn (gen_vec_initv16qiqi (tmp, par));
18903 emit_insn (gen_negv16qi2 (tmp, tmp));
18905 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
18906 emit_insn (gen (operands[0], operands[1], tmp));
18909 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
18913 (define_expand "ashrv2di3"
18914 [(set (match_operand:V2DI 0 "register_operand")
18916 (match_operand:V2DI 1 "register_operand")
18917 (match_operand:DI 2 "nonmemory_operand")))]
18918 "TARGET_XOP || TARGET_AVX512VL"
18920 if (!TARGET_AVX512VL)
18922 rtx reg = gen_reg_rtx (V2DImode);
18924 bool negate = false;
18927 if (CONST_INT_P (operands[2]))
18928 operands[2] = GEN_INT (-INTVAL (operands[2]));
18932 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
18933 for (i = 0; i < 2; i++)
18934 XVECEXP (par, 0, i) = operands[2];
18936 emit_insn (gen_vec_initv2didi (reg, par));
18939 emit_insn (gen_negv2di2 (reg, reg));
18941 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
18946 ;; XOP FRCZ support
18947 (define_insn "xop_frcz<mode>2"
18948 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
18950 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
18953 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
18954 [(set_attr "type" "ssecvt1")
18955 (set_attr "mode" "<MODE>")])
18957 (define_expand "xop_vmfrcz<mode>2"
18958 [(set (match_operand:VF_128 0 "register_operand")
18961 [(match_operand:VF_128 1 "nonimmediate_operand")]
18966 "operands[2] = CONST0_RTX (<MODE>mode);")
18968 (define_insn "*xop_vmfrcz<mode>2"
18969 [(set (match_operand:VF_128 0 "register_operand" "=x")
18972 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
18974 (match_operand:VF_128 2 "const0_operand")
18977 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
18978 [(set_attr "type" "ssecvt1")
18979 (set_attr "mode" "<MODE>")])
18981 (define_insn "xop_maskcmp<mode>3"
18982 [(set (match_operand:VI_128 0 "register_operand" "=x")
18983 (match_operator:VI_128 1 "ix86_comparison_int_operator"
18984 [(match_operand:VI_128 2 "register_operand" "x")
18985 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
18987 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
18988 [(set_attr "type" "sse4arg")
18989 (set_attr "prefix_data16" "0")
18990 (set_attr "prefix_rep" "0")
18991 (set_attr "prefix_extra" "2")
18992 (set_attr "length_immediate" "1")
18993 (set_attr "mode" "TI")])
18995 (define_insn "xop_maskcmp_uns<mode>3"
18996 [(set (match_operand:VI_128 0 "register_operand" "=x")
18997 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
18998 [(match_operand:VI_128 2 "register_operand" "x")
18999 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
19001 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19002 [(set_attr "type" "ssecmp")
19003 (set_attr "prefix_data16" "0")
19004 (set_attr "prefix_rep" "0")
19005 (set_attr "prefix_extra" "2")
19006 (set_attr "length_immediate" "1")
19007 (set_attr "mode" "TI")])
19009 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
19010 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
19011 ;; the exact instruction generated for the intrinsic.
19012 (define_insn "xop_maskcmp_uns2<mode>3"
19013 [(set (match_operand:VI_128 0 "register_operand" "=x")
19015 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
19016 [(match_operand:VI_128 2 "register_operand" "x")
19017 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
19018 UNSPEC_XOP_UNSIGNED_CMP))]
19020 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19021 [(set_attr "type" "ssecmp")
19022 (set_attr "prefix_data16" "0")
19023 (set_attr "prefix_extra" "2")
19024 (set_attr "length_immediate" "1")
19025 (set_attr "mode" "TI")])
19027 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
19028 ;; being added here to be complete.
19029 (define_insn "xop_pcom_tf<mode>3"
19030 [(set (match_operand:VI_128 0 "register_operand" "=x")
19032 [(match_operand:VI_128 1 "register_operand" "x")
19033 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
19034 (match_operand:SI 3 "const_int_operand" "n")]
19035 UNSPEC_XOP_TRUEFALSE))]
19038 return ((INTVAL (operands[3]) != 0)
19039 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19040 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
19042 [(set_attr "type" "ssecmp")
19043 (set_attr "prefix_data16" "0")
19044 (set_attr "prefix_extra" "2")
19045 (set_attr "length_immediate" "1")
19046 (set_attr "mode" "TI")])
19048 (define_insn "xop_vpermil2<mode>3"
19049 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
19051 [(match_operand:VF_128_256 1 "register_operand" "x,x")
19052 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
19053 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
19054 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
19057 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
19058 [(set_attr "type" "sse4arg")
19059 (set_attr "length_immediate" "1")
19060 (set_attr "mode" "<MODE>")])
19062 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19064 (define_insn "aesenc"
19065 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19066 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19067 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19071 aesenc\t{%2, %0|%0, %2}
19072 vaesenc\t{%2, %1, %0|%0, %1, %2}"
19073 [(set_attr "isa" "noavx,avx")
19074 (set_attr "type" "sselog1")
19075 (set_attr "prefix_extra" "1")
19076 (set_attr "prefix" "orig,vex")
19077 (set_attr "btver2_decode" "double,double")
19078 (set_attr "mode" "TI")])
19080 (define_insn "aesenclast"
19081 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19082 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19083 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19084 UNSPEC_AESENCLAST))]
19087 aesenclast\t{%2, %0|%0, %2}
19088 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
19089 [(set_attr "isa" "noavx,avx")
19090 (set_attr "type" "sselog1")
19091 (set_attr "prefix_extra" "1")
19092 (set_attr "prefix" "orig,vex")
19093 (set_attr "btver2_decode" "double,double")
19094 (set_attr "mode" "TI")])
19096 (define_insn "aesdec"
19097 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19098 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19099 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19103 aesdec\t{%2, %0|%0, %2}
19104 vaesdec\t{%2, %1, %0|%0, %1, %2}"
19105 [(set_attr "isa" "noavx,avx")
19106 (set_attr "type" "sselog1")
19107 (set_attr "prefix_extra" "1")
19108 (set_attr "prefix" "orig,vex")
19109 (set_attr "btver2_decode" "double,double")
19110 (set_attr "mode" "TI")])
19112 (define_insn "aesdeclast"
19113 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19114 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19115 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19116 UNSPEC_AESDECLAST))]
19119 aesdeclast\t{%2, %0|%0, %2}
19120 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
19121 [(set_attr "isa" "noavx,avx")
19122 (set_attr "type" "sselog1")
19123 (set_attr "prefix_extra" "1")
19124 (set_attr "prefix" "orig,vex")
19125 (set_attr "btver2_decode" "double,double")
19126 (set_attr "mode" "TI")])
19128 (define_insn "aesimc"
19129 [(set (match_operand:V2DI 0 "register_operand" "=x")
19130 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
19133 "%vaesimc\t{%1, %0|%0, %1}"
19134 [(set_attr "type" "sselog1")
19135 (set_attr "prefix_extra" "1")
19136 (set_attr "prefix" "maybe_vex")
19137 (set_attr "mode" "TI")])
19139 (define_insn "aeskeygenassist"
19140 [(set (match_operand:V2DI 0 "register_operand" "=x")
19141 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
19142 (match_operand:SI 2 "const_0_to_255_operand" "n")]
19143 UNSPEC_AESKEYGENASSIST))]
19145 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
19146 [(set_attr "type" "sselog1")
19147 (set_attr "prefix_extra" "1")
19148 (set_attr "length_immediate" "1")
19149 (set_attr "prefix" "maybe_vex")
19150 (set_attr "mode" "TI")])
19152 (define_insn "pclmulqdq"
19153 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19154 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19155 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
19156 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
19160 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
19161 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19162 [(set_attr "isa" "noavx,avx")
19163 (set_attr "type" "sselog1")
19164 (set_attr "prefix_extra" "1")
19165 (set_attr "length_immediate" "1")
19166 (set_attr "prefix" "orig,vex")
19167 (set_attr "mode" "TI")])
19169 (define_expand "avx_vzeroall"
19170 [(match_par_dup 0 [(const_int 0)])]
19173 int nregs = TARGET_64BIT ? 16 : 8;
19176 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
19178 XVECEXP (operands[0], 0, 0)
19179 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
19182 for (regno = 0; regno < nregs; regno++)
19183 XVECEXP (operands[0], 0, regno + 1)
19184 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
19185 CONST0_RTX (V8SImode));
19188 (define_insn "*avx_vzeroall"
19189 [(match_parallel 0 "vzeroall_operation"
19190 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
19193 [(set_attr "type" "sse")
19194 (set_attr "modrm" "0")
19195 (set_attr "memory" "none")
19196 (set_attr "prefix" "vex")
19197 (set_attr "btver2_decode" "vector")
19198 (set_attr "mode" "OI")])
19200 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
19201 ;; if the upper 128bits are unused.
19202 (define_insn "avx_vzeroupper"
19203 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
19206 [(set_attr "type" "sse")
19207 (set_attr "modrm" "0")
19208 (set_attr "memory" "none")
19209 (set_attr "prefix" "vex")
19210 (set_attr "btver2_decode" "vector")
19211 (set_attr "mode" "OI")])
19213 (define_mode_attr pbroadcast_evex_isa
19214 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
19215 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
19216 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
19217 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
19219 (define_insn "avx2_pbroadcast<mode>"
19220 [(set (match_operand:VI 0 "register_operand" "=x,v")
19222 (vec_select:<ssescalarmode>
19223 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
19224 (parallel [(const_int 0)]))))]
19226 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
19227 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
19228 (set_attr "type" "ssemov")
19229 (set_attr "prefix_extra" "1")
19230 (set_attr "prefix" "vex,evex")
19231 (set_attr "mode" "<sseinsnmode>")])
19233 (define_insn "avx2_pbroadcast<mode>_1"
19234 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
19235 (vec_duplicate:VI_256
19236 (vec_select:<ssescalarmode>
19237 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
19238 (parallel [(const_int 0)]))))]
19241 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
19242 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
19243 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
19244 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
19245 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
19246 (set_attr "type" "ssemov")
19247 (set_attr "prefix_extra" "1")
19248 (set_attr "prefix" "vex")
19249 (set_attr "mode" "<sseinsnmode>")])
19251 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
19252 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
19253 (unspec:VI48F_256_512
19254 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
19255 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19257 "TARGET_AVX2 && <mask_mode512bit_condition>"
19258 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19259 [(set_attr "type" "sselog")
19260 (set_attr "prefix" "<mask_prefix2>")
19261 (set_attr "mode" "<sseinsnmode>")])
19263 (define_insn "<avx512>_permvar<mode><mask_name>"
19264 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19265 (unspec:VI1_AVX512VL
19266 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
19267 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19269 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
19270 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19271 [(set_attr "type" "sselog")
19272 (set_attr "prefix" "<mask_prefix2>")
19273 (set_attr "mode" "<sseinsnmode>")])
19275 (define_insn "<avx512>_permvar<mode><mask_name>"
19276 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19277 (unspec:VI2_AVX512VL
19278 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
19279 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19281 "TARGET_AVX512BW && <mask_mode512bit_condition>"
19282 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19283 [(set_attr "type" "sselog")
19284 (set_attr "prefix" "<mask_prefix2>")
19285 (set_attr "mode" "<sseinsnmode>")])
19287 (define_expand "avx2_perm<mode>"
19288 [(match_operand:VI8F_256 0 "register_operand")
19289 (match_operand:VI8F_256 1 "nonimmediate_operand")
19290 (match_operand:SI 2 "const_0_to_255_operand")]
19293 int mask = INTVAL (operands[2]);
19294 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
19295 GEN_INT ((mask >> 0) & 3),
19296 GEN_INT ((mask >> 2) & 3),
19297 GEN_INT ((mask >> 4) & 3),
19298 GEN_INT ((mask >> 6) & 3)));
19302 (define_expand "avx512vl_perm<mode>_mask"
19303 [(match_operand:VI8F_256 0 "register_operand")
19304 (match_operand:VI8F_256 1 "nonimmediate_operand")
19305 (match_operand:SI 2 "const_0_to_255_operand")
19306 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
19307 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19310 int mask = INTVAL (operands[2]);
19311 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
19312 GEN_INT ((mask >> 0) & 3),
19313 GEN_INT ((mask >> 2) & 3),
19314 GEN_INT ((mask >> 4) & 3),
19315 GEN_INT ((mask >> 6) & 3),
19316 operands[3], operands[4]));
19320 (define_insn "avx2_perm<mode>_1<mask_name>"
19321 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
19322 (vec_select:VI8F_256
19323 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
19324 (parallel [(match_operand 2 "const_0_to_3_operand")
19325 (match_operand 3 "const_0_to_3_operand")
19326 (match_operand 4 "const_0_to_3_operand")
19327 (match_operand 5 "const_0_to_3_operand")])))]
19328 "TARGET_AVX2 && <mask_mode512bit_condition>"
19331 mask |= INTVAL (operands[2]) << 0;
19332 mask |= INTVAL (operands[3]) << 2;
19333 mask |= INTVAL (operands[4]) << 4;
19334 mask |= INTVAL (operands[5]) << 6;
19335 operands[2] = GEN_INT (mask);
19336 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
19338 [(set_attr "type" "sselog")
19339 (set_attr "prefix" "<mask_prefix2>")
19340 (set_attr "mode" "<sseinsnmode>")])
19342 (define_expand "avx512f_perm<mode>"
19343 [(match_operand:V8FI 0 "register_operand")
19344 (match_operand:V8FI 1 "nonimmediate_operand")
19345 (match_operand:SI 2 "const_0_to_255_operand")]
19348 int mask = INTVAL (operands[2]);
19349 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
19350 GEN_INT ((mask >> 0) & 3),
19351 GEN_INT ((mask >> 2) & 3),
19352 GEN_INT ((mask >> 4) & 3),
19353 GEN_INT ((mask >> 6) & 3),
19354 GEN_INT (((mask >> 0) & 3) + 4),
19355 GEN_INT (((mask >> 2) & 3) + 4),
19356 GEN_INT (((mask >> 4) & 3) + 4),
19357 GEN_INT (((mask >> 6) & 3) + 4)));
19361 (define_expand "avx512f_perm<mode>_mask"
19362 [(match_operand:V8FI 0 "register_operand")
19363 (match_operand:V8FI 1 "nonimmediate_operand")
19364 (match_operand:SI 2 "const_0_to_255_operand")
19365 (match_operand:V8FI 3 "nonimm_or_0_operand")
19366 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19369 int mask = INTVAL (operands[2]);
19370 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
19371 GEN_INT ((mask >> 0) & 3),
19372 GEN_INT ((mask >> 2) & 3),
19373 GEN_INT ((mask >> 4) & 3),
19374 GEN_INT ((mask >> 6) & 3),
19375 GEN_INT (((mask >> 0) & 3) + 4),
19376 GEN_INT (((mask >> 2) & 3) + 4),
19377 GEN_INT (((mask >> 4) & 3) + 4),
19378 GEN_INT (((mask >> 6) & 3) + 4),
19379 operands[3], operands[4]));
19383 (define_insn "avx512f_perm<mode>_1<mask_name>"
19384 [(set (match_operand:V8FI 0 "register_operand" "=v")
19386 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
19387 (parallel [(match_operand 2 "const_0_to_3_operand")
19388 (match_operand 3 "const_0_to_3_operand")
19389 (match_operand 4 "const_0_to_3_operand")
19390 (match_operand 5 "const_0_to_3_operand")
19391 (match_operand 6 "const_4_to_7_operand")
19392 (match_operand 7 "const_4_to_7_operand")
19393 (match_operand 8 "const_4_to_7_operand")
19394 (match_operand 9 "const_4_to_7_operand")])))]
19395 "TARGET_AVX512F && <mask_mode512bit_condition>
19396 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
19397 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
19398 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
19399 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
19402 mask |= INTVAL (operands[2]) << 0;
19403 mask |= INTVAL (operands[3]) << 2;
19404 mask |= INTVAL (operands[4]) << 4;
19405 mask |= INTVAL (operands[5]) << 6;
19406 operands[2] = GEN_INT (mask);
19407 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
19409 [(set_attr "type" "sselog")
19410 (set_attr "prefix" "<mask_prefix2>")
19411 (set_attr "mode" "<sseinsnmode>")])
19413 (define_insn "avx2_permv2ti"
19414 [(set (match_operand:V4DI 0 "register_operand" "=x")
19416 [(match_operand:V4DI 1 "register_operand" "x")
19417 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
19418 (match_operand:SI 3 "const_0_to_255_operand" "n")]
19421 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19422 [(set_attr "type" "sselog")
19423 (set_attr "prefix" "vex")
19424 (set_attr "mode" "OI")])
19426 (define_insn "avx2_vec_dupv4df"
19427 [(set (match_operand:V4DF 0 "register_operand" "=v")
19428 (vec_duplicate:V4DF
19430 (match_operand:V2DF 1 "register_operand" "v")
19431 (parallel [(const_int 0)]))))]
19433 "vbroadcastsd\t{%1, %0|%0, %1}"
19434 [(set_attr "type" "sselog1")
19435 (set_attr "prefix" "maybe_evex")
19436 (set_attr "mode" "V4DF")])
19438 (define_insn "<avx512>_vec_dup<mode>_1"
19439 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
19440 (vec_duplicate:VI_AVX512BW
19441 (vec_select:<ssescalarmode>
19442 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
19443 (parallel [(const_int 0)]))))]
19446 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
19447 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
19448 [(set_attr "type" "ssemov")
19449 (set_attr "prefix" "evex")
19450 (set_attr "mode" "<sseinsnmode>")])
19452 (define_insn "<avx512>_vec_dup<mode><mask_name>"
19453 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
19454 (vec_duplicate:V48_AVX512VL
19455 (vec_select:<ssescalarmode>
19456 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
19457 (parallel [(const_int 0)]))))]
19460 /* There is no DF broadcast (in AVX-512*) to 128b register.
19461 Mimic it with integer variant. */
19462 if (<MODE>mode == V2DFmode)
19463 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
19465 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
19467 [(set_attr "type" "ssemov")
19468 (set_attr "prefix" "evex")
19469 (set_attr "mode" "<sseinsnmode>")])
19471 (define_insn "<avx512>_vec_dup<mode><mask_name>"
19472 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
19473 (vec_duplicate:VI12_AVX512VL
19474 (vec_select:<ssescalarmode>
19475 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
19476 (parallel [(const_int 0)]))))]
19478 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
19479 [(set_attr "type" "ssemov")
19480 (set_attr "prefix" "evex")
19481 (set_attr "mode" "<sseinsnmode>")])
19483 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
19484 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
19485 (vec_duplicate:V16FI
19486 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
19489 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
19490 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19491 [(set_attr "type" "ssemov")
19492 (set_attr "prefix" "evex")
19493 (set_attr "mode" "<sseinsnmode>")])
19495 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
19496 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
19497 (vec_duplicate:V8FI
19498 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
19501 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
19502 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19503 [(set_attr "type" "ssemov")
19504 (set_attr "prefix" "evex")
19505 (set_attr "mode" "<sseinsnmode>")])
19507 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
19508 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
19509 (vec_duplicate:VI12_AVX512VL
19510 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
19513 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
19514 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
19515 [(set_attr "type" "ssemov")
19516 (set_attr "prefix" "evex")
19517 (set_attr "mode" "<sseinsnmode>")])
19519 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
19520 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
19521 (vec_duplicate:V48_AVX512VL
19522 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
19524 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19525 [(set_attr "type" "ssemov")
19526 (set_attr "prefix" "evex")
19527 (set_attr "mode" "<sseinsnmode>")
19528 (set (attr "enabled")
19529 (if_then_else (eq_attr "alternative" "1")
19530 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
19531 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
19534 (define_insn "vec_dupv4sf"
19535 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
19536 (vec_duplicate:V4SF
19537 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
19540 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
19541 vbroadcastss\t{%1, %0|%0, %1}
19542 shufps\t{$0, %0, %0|%0, %0, 0}"
19543 [(set_attr "isa" "avx,avx,noavx")
19544 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
19545 (set_attr "length_immediate" "1,0,1")
19546 (set_attr "prefix_extra" "0,1,*")
19547 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
19548 (set_attr "mode" "V4SF")])
19550 (define_insn "*vec_dupv4si"
19551 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
19552 (vec_duplicate:V4SI
19553 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
19556 %vpshufd\t{$0, %1, %0|%0, %1, 0}
19557 vbroadcastss\t{%1, %0|%0, %1}
19558 shufps\t{$0, %0, %0|%0, %0, 0}"
19559 [(set_attr "isa" "sse2,avx,noavx")
19560 (set_attr "type" "sselog1,ssemov,sselog1")
19561 (set_attr "length_immediate" "1,0,1")
19562 (set_attr "prefix_extra" "0,1,*")
19563 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
19564 (set_attr "mode" "TI,V4SF,V4SF")])
19566 (define_insn "*vec_dupv2di"
19567 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
19568 (vec_duplicate:V2DI
19569 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
19573 vpunpcklqdq\t{%d1, %0|%0, %d1}
19574 %vmovddup\t{%1, %0|%0, %1}
19576 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
19577 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
19578 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
19579 (set_attr "mode" "TI,TI,DF,V4SF")])
19581 (define_insn "avx2_vbroadcasti128_<mode>"
19582 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
19584 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
19588 vbroadcasti128\t{%1, %0|%0, %1}
19589 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
19590 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
19591 [(set_attr "isa" "*,avx512dq,avx512vl")
19592 (set_attr "type" "ssemov")
19593 (set_attr "prefix_extra" "1")
19594 (set_attr "prefix" "vex,evex,evex")
19595 (set_attr "mode" "OI")])
19597 ;; Modes handled by AVX vec_dup patterns.
19598 (define_mode_iterator AVX_VEC_DUP_MODE
19599 [V8SI V8SF V4DI V4DF])
19600 (define_mode_attr vecdupssescalarmodesuffix
19601 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
19602 ;; Modes handled by AVX2 vec_dup patterns.
19603 (define_mode_iterator AVX2_VEC_DUP_MODE
19604 [V32QI V16QI V16HI V8HI V8SI V4SI])
19606 (define_insn "*vec_dup<mode>"
19607 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
19608 (vec_duplicate:AVX2_VEC_DUP_MODE
19609 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
19612 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
19613 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
19615 [(set_attr "isa" "*,*,noavx512vl")
19616 (set_attr "type" "ssemov")
19617 (set_attr "prefix_extra" "1")
19618 (set_attr "prefix" "maybe_evex")
19619 (set_attr "mode" "<sseinsnmode>")
19620 (set (attr "preferred_for_speed")
19621 (cond [(eq_attr "alternative" "2")
19622 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
19624 (symbol_ref "true")))])
19626 (define_insn "vec_dup<mode>"
19627 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
19628 (vec_duplicate:AVX_VEC_DUP_MODE
19629 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
19632 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
19633 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
19634 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
19635 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
19637 [(set_attr "type" "ssemov")
19638 (set_attr "prefix_extra" "1")
19639 (set_attr "prefix" "maybe_evex")
19640 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
19641 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
19644 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
19645 (vec_duplicate:AVX2_VEC_DUP_MODE
19646 (match_operand:<ssescalarmode> 1 "register_operand")))]
19648 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
19649 available, because then we can broadcast from GPRs directly.
19650 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
19651 for V*SI mode it requires just -mavx512vl. */
19652 && !(TARGET_AVX512VL
19653 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
19654 && reload_completed && GENERAL_REG_P (operands[1])"
19657 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
19658 CONST0_RTX (V4SImode),
19659 gen_lowpart (SImode, operands[1])));
19660 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
19661 gen_lowpart (<ssexmmmode>mode,
19667 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
19668 (vec_duplicate:AVX_VEC_DUP_MODE
19669 (match_operand:<ssescalarmode> 1 "register_operand")))]
19670 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
19671 [(set (match_dup 2)
19672 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
19674 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
19675 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
19677 (define_insn "avx_vbroadcastf128_<mode>"
19678 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
19680 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
19684 vbroadcast<i128>\t{%1, %0|%0, %1}
19685 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
19686 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
19687 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
19688 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
19689 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
19690 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
19691 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
19692 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
19693 (set_attr "prefix_extra" "1")
19694 (set_attr "length_immediate" "0,1,1,0,1,0,1")
19695 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
19696 (set_attr "mode" "<sseinsnmode>")])
19698 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
19699 (define_mode_iterator VI4F_BRCST32x2
19700 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
19701 V16SF (V8SF "TARGET_AVX512VL")])
19703 (define_mode_attr 64x2mode
19704 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
19706 (define_mode_attr 32x2mode
19707 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
19708 (V8SF "V2SF") (V4SI "V2SI")])
19710 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
19711 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
19712 (vec_duplicate:VI4F_BRCST32x2
19713 (vec_select:<32x2mode>
19714 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
19715 (parallel [(const_int 0) (const_int 1)]))))]
19717 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
19718 [(set_attr "type" "ssemov")
19719 (set_attr "prefix_extra" "1")
19720 (set_attr "prefix" "evex")
19721 (set_attr "mode" "<sseinsnmode>")])
19723 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
19724 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
19725 (vec_duplicate:VI4F_256
19726 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
19729 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
19730 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19731 [(set_attr "type" "ssemov")
19732 (set_attr "prefix_extra" "1")
19733 (set_attr "prefix" "evex")
19734 (set_attr "mode" "<sseinsnmode>")])
19736 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
19737 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
19738 (vec_duplicate:V16FI
19739 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
19742 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
19743 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19744 [(set_attr "type" "ssemov")
19745 (set_attr "prefix_extra" "1")
19746 (set_attr "prefix" "evex")
19747 (set_attr "mode" "<sseinsnmode>")])
19749 ;; For broadcast[i|f]64x2
19750 (define_mode_iterator VI8F_BRCST64x2
19751 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
19753 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
19754 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
19755 (vec_duplicate:VI8F_BRCST64x2
19756 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
19759 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
19760 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19761 [(set_attr "type" "ssemov")
19762 (set_attr "prefix_extra" "1")
19763 (set_attr "prefix" "evex")
19764 (set_attr "mode" "<sseinsnmode>")])
19766 (define_insn "avx512cd_maskb_vec_dup<mode>"
19767 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19768 (vec_duplicate:VI8_AVX512VL
19770 (match_operand:QI 1 "register_operand" "k"))))]
19772 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
19773 [(set_attr "type" "mskmov")
19774 (set_attr "prefix" "evex")
19775 (set_attr "mode" "XI")])
19777 (define_insn "avx512cd_maskw_vec_dup<mode>"
19778 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
19779 (vec_duplicate:VI4_AVX512VL
19781 (match_operand:HI 1 "register_operand" "k"))))]
19783 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
19784 [(set_attr "type" "mskmov")
19785 (set_attr "prefix" "evex")
19786 (set_attr "mode" "XI")])
19788 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
19789 ;; If it so happens that the input is in memory, use vbroadcast.
19790 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
19791 (define_insn "*avx_vperm_broadcast_v4sf"
19792 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
19794 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
19795 (match_parallel 2 "avx_vbroadcast_operand"
19796 [(match_operand 3 "const_int_operand" "C,n,n")])))]
19799 int elt = INTVAL (operands[3]);
19800 switch (which_alternative)
19804 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
19805 return "vbroadcastss\t{%1, %0|%0, %k1}";
19807 operands[2] = GEN_INT (elt * 0x55);
19808 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
19810 gcc_unreachable ();
19813 [(set_attr "type" "ssemov,ssemov,sselog1")
19814 (set_attr "prefix_extra" "1")
19815 (set_attr "length_immediate" "0,0,1")
19816 (set_attr "prefix" "maybe_evex")
19817 (set_attr "mode" "SF,SF,V4SF")])
19819 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
19820 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
19822 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
19823 (match_parallel 2 "avx_vbroadcast_operand"
19824 [(match_operand 3 "const_int_operand" "C,n,n")])))]
19827 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
19828 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
19830 rtx op0 = operands[0], op1 = operands[1];
19831 int elt = INTVAL (operands[3]);
19837 if (TARGET_AVX2 && elt == 0)
19839 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
19844 /* Shuffle element we care about into all elements of the 128-bit lane.
19845 The other lane gets shuffled too, but we don't care. */
19846 if (<MODE>mode == V4DFmode)
19847 mask = (elt & 1 ? 15 : 0);
19849 mask = (elt & 3) * 0x55;
19850 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
19852 /* Shuffle the lane we care about into both lanes of the dest. */
19853 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
19854 if (EXT_REX_SSE_REG_P (op0))
19856 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
19858 gcc_assert (<MODE>mode == V8SFmode);
19859 if ((mask & 1) == 0)
19860 emit_insn (gen_avx2_vec_dupv8sf (op0,
19861 gen_lowpart (V4SFmode, op0)));
19863 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
19864 GEN_INT (4), GEN_INT (5),
19865 GEN_INT (6), GEN_INT (7),
19866 GEN_INT (12), GEN_INT (13),
19867 GEN_INT (14), GEN_INT (15)));
19871 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
19875 operands[1] = adjust_address (op1, <ssescalarmode>mode,
19876 elt * GET_MODE_SIZE (<ssescalarmode>mode));
19879 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
19880 [(set (match_operand:VF2 0 "register_operand")
19882 (match_operand:VF2 1 "nonimmediate_operand")
19883 (match_operand:SI 2 "const_0_to_255_operand")))]
19884 "TARGET_AVX && <mask_mode512bit_condition>"
19886 int mask = INTVAL (operands[2]);
19887 rtx perm[<ssescalarnum>];
19890 for (i = 0; i < <ssescalarnum>; i = i + 2)
19892 perm[i] = GEN_INT (((mask >> i) & 1) + i);
19893 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
19897 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
19900 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
19901 [(set (match_operand:VF1 0 "register_operand")
19903 (match_operand:VF1 1 "nonimmediate_operand")
19904 (match_operand:SI 2 "const_0_to_255_operand")))]
19905 "TARGET_AVX && <mask_mode512bit_condition>"
19907 int mask = INTVAL (operands[2]);
19908 rtx perm[<ssescalarnum>];
19911 for (i = 0; i < <ssescalarnum>; i = i + 4)
19913 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
19914 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
19915 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
19916 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
19920 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
19923 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
19924 [(set (match_operand:VF 0 "register_operand" "=v")
19926 (match_operand:VF 1 "nonimmediate_operand" "vm")
19927 (match_parallel 2 ""
19928 [(match_operand 3 "const_int_operand")])))]
19929 "TARGET_AVX && <mask_mode512bit_condition>
19930 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
19932 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
19933 operands[2] = GEN_INT (mask);
19934 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
19936 [(set_attr "type" "sselog")
19937 (set_attr "prefix_extra" "1")
19938 (set_attr "length_immediate" "1")
19939 (set_attr "prefix" "<mask_prefix>")
19940 (set_attr "mode" "<sseinsnmode>")])
19942 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
19943 [(set (match_operand:VF 0 "register_operand" "=v")
19945 [(match_operand:VF 1 "register_operand" "v")
19946 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
19948 "TARGET_AVX && <mask_mode512bit_condition>"
19949 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19950 [(set_attr "type" "sselog")
19951 (set_attr "prefix_extra" "1")
19952 (set_attr "btver2_decode" "vector")
19953 (set_attr "prefix" "<mask_prefix>")
19954 (set_attr "mode" "<sseinsnmode>")])
19956 (define_mode_iterator VPERMI2
19957 [V16SI V16SF V8DI V8DF
19958 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
19959 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
19960 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
19961 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
19962 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
19963 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
19964 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
19965 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
19967 (define_mode_iterator VPERMI2I
19969 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
19970 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
19971 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
19972 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
19973 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
19974 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
19976 (define_expand "<avx512>_vpermi2var<mode>3_mask"
19977 [(set (match_operand:VPERMI2 0 "register_operand")
19980 [(match_operand:<sseintvecmode> 2 "register_operand")
19981 (match_operand:VPERMI2 1 "register_operand")
19982 (match_operand:VPERMI2 3 "nonimmediate_operand")]
19985 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
19988 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
19989 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
19992 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
19993 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
19994 (vec_merge:VPERMI2I
19996 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
19997 (match_operand:VPERMI2I 1 "register_operand" "v")
19998 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
20001 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20003 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20004 [(set_attr "type" "sselog")
20005 (set_attr "prefix" "evex")
20006 (set_attr "mode" "<sseinsnmode>")])
20008 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
20009 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
20010 (vec_merge:VF_AVX512VL
20011 (unspec:VF_AVX512VL
20012 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
20013 (match_operand:VF_AVX512VL 1 "register_operand" "v")
20014 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
20016 (subreg:VF_AVX512VL (match_dup 2) 0)
20017 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20019 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20020 [(set_attr "type" "sselog")
20021 (set_attr "prefix" "evex")
20022 (set_attr "mode" "<sseinsnmode>")])
20024 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
20025 [(match_operand:VPERMI2 0 "register_operand")
20026 (match_operand:<sseintvecmode> 1 "register_operand")
20027 (match_operand:VPERMI2 2 "register_operand")
20028 (match_operand:VPERMI2 3 "nonimmediate_operand")
20029 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20032 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
20033 operands[0], operands[1], operands[2], operands[3],
20034 CONST0_RTX (<MODE>mode), operands[4]));
20038 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
20039 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
20041 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
20042 (match_operand:VPERMI2 2 "register_operand" "0,v")
20043 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
20047 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
20048 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
20049 [(set_attr "type" "sselog")
20050 (set_attr "prefix" "evex")
20051 (set_attr "mode" "<sseinsnmode>")])
20053 (define_insn "<avx512>_vpermt2var<mode>3_mask"
20054 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
20057 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
20058 (match_operand:VPERMI2 2 "register_operand" "0")
20059 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
20062 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20064 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20065 [(set_attr "type" "sselog")
20066 (set_attr "prefix" "evex")
20067 (set_attr "mode" "<sseinsnmode>")])
20069 (define_expand "avx_vperm2f128<mode>3"
20070 [(set (match_operand:AVX256MODE2P 0 "register_operand")
20071 (unspec:AVX256MODE2P
20072 [(match_operand:AVX256MODE2P 1 "register_operand")
20073 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
20074 (match_operand:SI 3 "const_0_to_255_operand")]
20075 UNSPEC_VPERMIL2F128))]
20078 int mask = INTVAL (operands[3]);
20079 if ((mask & 0x88) == 0)
20081 rtx perm[<ssescalarnum>], t1, t2;
20082 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
20084 base = (mask & 3) * nelt2;
20085 for (i = 0; i < nelt2; ++i)
20086 perm[i] = GEN_INT (base + i);
20088 base = ((mask >> 4) & 3) * nelt2;
20089 for (i = 0; i < nelt2; ++i)
20090 perm[i + nelt2] = GEN_INT (base + i);
20092 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
20093 operands[1], operands[2]);
20094 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
20095 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
20096 t2 = gen_rtx_SET (operands[0], t2);
20102 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
20103 ;; means that in order to represent this properly in rtl we'd have to
20104 ;; nest *another* vec_concat with a zero operand and do the select from
20105 ;; a 4x wide vector. That doesn't seem very nice.
20106 (define_insn "*avx_vperm2f128<mode>_full"
20107 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
20108 (unspec:AVX256MODE2P
20109 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
20110 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
20111 (match_operand:SI 3 "const_0_to_255_operand" "n")]
20112 UNSPEC_VPERMIL2F128))]
20114 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20115 [(set_attr "type" "sselog")
20116 (set_attr "prefix_extra" "1")
20117 (set_attr "length_immediate" "1")
20118 (set_attr "prefix" "vex")
20119 (set_attr "mode" "<sseinsnmode>")])
20121 (define_insn "*avx_vperm2f128<mode>_nozero"
20122 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
20123 (vec_select:AVX256MODE2P
20124 (vec_concat:<ssedoublevecmode>
20125 (match_operand:AVX256MODE2P 1 "register_operand" "x")
20126 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
20127 (match_parallel 3 ""
20128 [(match_operand 4 "const_int_operand")])))]
20130 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
20132 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
20134 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
20136 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
20137 operands[3] = GEN_INT (mask);
20138 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
20140 [(set_attr "type" "sselog")
20141 (set_attr "prefix_extra" "1")
20142 (set_attr "length_immediate" "1")
20143 (set_attr "prefix" "vex")
20144 (set_attr "mode" "<sseinsnmode>")])
20146 (define_insn "*ssse3_palignr<mode>_perm"
20147 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
20149 (match_operand:V_128 1 "register_operand" "0,x,v")
20150 (match_parallel 2 "palignr_operand"
20151 [(match_operand 3 "const_int_operand" "n,n,n")])))]
20154 operands[2] = (GEN_INT (INTVAL (operands[3])
20155 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
20157 switch (which_alternative)
20160 return "palignr\t{%2, %1, %0|%0, %1, %2}";
20163 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
20165 gcc_unreachable ();
20168 [(set_attr "isa" "noavx,avx,avx512bw")
20169 (set_attr "type" "sseishft")
20170 (set_attr "atom_unit" "sishuf")
20171 (set_attr "prefix_data16" "1,*,*")
20172 (set_attr "prefix_extra" "1")
20173 (set_attr "length_immediate" "1")
20174 (set_attr "prefix" "orig,vex,evex")])
20176 (define_expand "avx512vl_vinsert<mode>"
20177 [(match_operand:VI48F_256 0 "register_operand")
20178 (match_operand:VI48F_256 1 "register_operand")
20179 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
20180 (match_operand:SI 3 "const_0_to_1_operand")
20181 (match_operand:VI48F_256 4 "register_operand")
20182 (match_operand:<avx512fmaskmode> 5 "register_operand")]
20185 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20187 switch (INTVAL (operands[3]))
20190 insn = gen_vec_set_lo_<mode>_mask;
20193 insn = gen_vec_set_hi_<mode>_mask;
20196 gcc_unreachable ();
20199 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
20204 (define_expand "avx_vinsertf128<mode>"
20205 [(match_operand:V_256 0 "register_operand")
20206 (match_operand:V_256 1 "register_operand")
20207 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
20208 (match_operand:SI 3 "const_0_to_1_operand")]
20211 rtx (*insn)(rtx, rtx, rtx);
20213 switch (INTVAL (operands[3]))
20216 insn = gen_vec_set_lo_<mode>;
20219 insn = gen_vec_set_hi_<mode>;
20222 gcc_unreachable ();
20225 emit_insn (insn (operands[0], operands[1], operands[2]));
20229 (define_insn "vec_set_lo_<mode><mask_name>"
20230 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20231 (vec_concat:VI8F_256
20232 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
20233 (vec_select:<ssehalfvecmode>
20234 (match_operand:VI8F_256 1 "register_operand" "v")
20235 (parallel [(const_int 2) (const_int 3)]))))]
20236 "TARGET_AVX && <mask_avx512dq_condition>"
20238 if (TARGET_AVX512DQ)
20239 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20240 else if (TARGET_AVX512VL)
20241 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20243 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
20245 [(set_attr "type" "sselog")
20246 (set_attr "prefix_extra" "1")
20247 (set_attr "length_immediate" "1")
20248 (set_attr "prefix" "vex")
20249 (set_attr "mode" "<sseinsnmode>")])
20251 (define_insn "vec_set_hi_<mode><mask_name>"
20252 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20253 (vec_concat:VI8F_256
20254 (vec_select:<ssehalfvecmode>
20255 (match_operand:VI8F_256 1 "register_operand" "v")
20256 (parallel [(const_int 0) (const_int 1)]))
20257 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
20258 "TARGET_AVX && <mask_avx512dq_condition>"
20260 if (TARGET_AVX512DQ)
20261 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20262 else if (TARGET_AVX512VL)
20263 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20265 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
20267 [(set_attr "type" "sselog")
20268 (set_attr "prefix_extra" "1")
20269 (set_attr "length_immediate" "1")
20270 (set_attr "prefix" "vex")
20271 (set_attr "mode" "<sseinsnmode>")])
20273 (define_insn "vec_set_lo_<mode><mask_name>"
20274 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
20275 (vec_concat:VI4F_256
20276 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
20277 (vec_select:<ssehalfvecmode>
20278 (match_operand:VI4F_256 1 "register_operand" "v")
20279 (parallel [(const_int 4) (const_int 5)
20280 (const_int 6) (const_int 7)]))))]
20283 if (TARGET_AVX512VL)
20284 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20286 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
20288 [(set_attr "type" "sselog")
20289 (set_attr "prefix_extra" "1")
20290 (set_attr "length_immediate" "1")
20291 (set_attr "prefix" "vex")
20292 (set_attr "mode" "<sseinsnmode>")])
20294 (define_insn "vec_set_hi_<mode><mask_name>"
20295 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
20296 (vec_concat:VI4F_256
20297 (vec_select:<ssehalfvecmode>
20298 (match_operand:VI4F_256 1 "register_operand" "v")
20299 (parallel [(const_int 0) (const_int 1)
20300 (const_int 2) (const_int 3)]))
20301 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
20304 if (TARGET_AVX512VL)
20305 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20307 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
20309 [(set_attr "type" "sselog")
20310 (set_attr "prefix_extra" "1")
20311 (set_attr "length_immediate" "1")
20312 (set_attr "prefix" "vex")
20313 (set_attr "mode" "<sseinsnmode>")])
20315 (define_insn "vec_set_lo_v16hi"
20316 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
20318 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
20320 (match_operand:V16HI 1 "register_operand" "x,v")
20321 (parallel [(const_int 8) (const_int 9)
20322 (const_int 10) (const_int 11)
20323 (const_int 12) (const_int 13)
20324 (const_int 14) (const_int 15)]))))]
20327 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
20328 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
20329 [(set_attr "type" "sselog")
20330 (set_attr "prefix_extra" "1")
20331 (set_attr "length_immediate" "1")
20332 (set_attr "prefix" "vex,evex")
20333 (set_attr "mode" "OI")])
20335 (define_insn "vec_set_hi_v16hi"
20336 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
20339 (match_operand:V16HI 1 "register_operand" "x,v")
20340 (parallel [(const_int 0) (const_int 1)
20341 (const_int 2) (const_int 3)
20342 (const_int 4) (const_int 5)
20343 (const_int 6) (const_int 7)]))
20344 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
20347 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
20348 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
20349 [(set_attr "type" "sselog")
20350 (set_attr "prefix_extra" "1")
20351 (set_attr "length_immediate" "1")
20352 (set_attr "prefix" "vex,evex")
20353 (set_attr "mode" "OI")])
20355 (define_insn "vec_set_lo_v32qi"
20356 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
20358 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
20360 (match_operand:V32QI 1 "register_operand" "x,v")
20361 (parallel [(const_int 16) (const_int 17)
20362 (const_int 18) (const_int 19)
20363 (const_int 20) (const_int 21)
20364 (const_int 22) (const_int 23)
20365 (const_int 24) (const_int 25)
20366 (const_int 26) (const_int 27)
20367 (const_int 28) (const_int 29)
20368 (const_int 30) (const_int 31)]))))]
20371 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
20372 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
20373 [(set_attr "type" "sselog")
20374 (set_attr "prefix_extra" "1")
20375 (set_attr "length_immediate" "1")
20376 (set_attr "prefix" "vex,evex")
20377 (set_attr "mode" "OI")])
20379 (define_insn "vec_set_hi_v32qi"
20380 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
20383 (match_operand:V32QI 1 "register_operand" "x,v")
20384 (parallel [(const_int 0) (const_int 1)
20385 (const_int 2) (const_int 3)
20386 (const_int 4) (const_int 5)
20387 (const_int 6) (const_int 7)
20388 (const_int 8) (const_int 9)
20389 (const_int 10) (const_int 11)
20390 (const_int 12) (const_int 13)
20391 (const_int 14) (const_int 15)]))
20392 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
20395 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
20396 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
20397 [(set_attr "type" "sselog")
20398 (set_attr "prefix_extra" "1")
20399 (set_attr "length_immediate" "1")
20400 (set_attr "prefix" "vex,evex")
20401 (set_attr "mode" "OI")])
20403 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
20404 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
20406 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
20407 (match_operand:V48_AVX2 1 "memory_operand" "m")]
20410 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
20411 [(set_attr "type" "sselog1")
20412 (set_attr "prefix_extra" "1")
20413 (set_attr "prefix" "vex")
20414 (set_attr "btver2_decode" "vector")
20415 (set_attr "mode" "<sseinsnmode>")])
20417 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
20418 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
20420 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
20421 (match_operand:V48_AVX2 2 "register_operand" "x")
20425 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20426 [(set_attr "type" "sselog1")
20427 (set_attr "prefix_extra" "1")
20428 (set_attr "prefix" "vex")
20429 (set_attr "btver2_decode" "vector")
20430 (set_attr "mode" "<sseinsnmode>")])
20432 (define_expand "maskload<mode><sseintvecmodelower>"
20433 [(set (match_operand:V48_AVX2 0 "register_operand")
20435 [(match_operand:<sseintvecmode> 2 "register_operand")
20436 (match_operand:V48_AVX2 1 "memory_operand")]
20440 (define_expand "maskload<mode><avx512fmaskmodelower>"
20441 [(set (match_operand:V48_AVX512VL 0 "register_operand")
20442 (vec_merge:V48_AVX512VL
20443 (match_operand:V48_AVX512VL 1 "memory_operand")
20445 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20448 (define_expand "maskload<mode><avx512fmaskmodelower>"
20449 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
20450 (vec_merge:VI12_AVX512VL
20451 (match_operand:VI12_AVX512VL 1 "memory_operand")
20453 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20456 (define_expand "maskstore<mode><sseintvecmodelower>"
20457 [(set (match_operand:V48_AVX2 0 "memory_operand")
20459 [(match_operand:<sseintvecmode> 2 "register_operand")
20460 (match_operand:V48_AVX2 1 "register_operand")
20465 (define_expand "maskstore<mode><avx512fmaskmodelower>"
20466 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
20467 (vec_merge:V48_AVX512VL
20468 (match_operand:V48_AVX512VL 1 "register_operand")
20470 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20473 (define_expand "maskstore<mode><avx512fmaskmodelower>"
20474 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
20475 (vec_merge:VI12_AVX512VL
20476 (match_operand:VI12_AVX512VL 1 "register_operand")
20478 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20481 (define_expand "cbranch<mode>4"
20482 [(set (reg:CC FLAGS_REG)
20483 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
20484 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
20485 (set (pc) (if_then_else
20486 (match_operator 0 "bt_comparison_operator"
20487 [(reg:CC FLAGS_REG) (const_int 0)])
20488 (label_ref (match_operand 3))
20492 ix86_expand_branch (GET_CODE (operands[0]),
20493 operands[1], operands[2], operands[3]);
20498 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
20499 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
20500 (unspec:AVX256MODE2P
20501 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
20503 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
20505 "&& reload_completed"
20506 [(set (match_dup 0) (match_dup 1))]
20508 if (REG_P (operands[0]))
20509 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
20511 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
20512 <ssehalfvecmode>mode);
20515 ;; Modes handled by vec_init expanders.
20516 (define_mode_iterator VEC_INIT_MODE
20517 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
20518 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
20519 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
20520 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
20521 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
20522 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
20523 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
20525 ;; Likewise, but for initialization from half sized vectors.
20526 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
20527 (define_mode_iterator VEC_INIT_HALF_MODE
20528 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
20529 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
20530 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
20531 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
20532 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
20533 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
20534 (V4TI "TARGET_AVX512F")])
20536 (define_expand "vec_init<mode><ssescalarmodelower>"
20537 [(match_operand:VEC_INIT_MODE 0 "register_operand")
20541 ix86_expand_vector_init (false, operands[0], operands[1]);
20545 (define_expand "vec_init<mode><ssehalfvecmodelower>"
20546 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
20550 ix86_expand_vector_init (false, operands[0], operands[1]);
20554 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
20555 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
20556 (ashiftrt:VI48_AVX512F_AVX512VL
20557 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
20558 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
20559 "TARGET_AVX2 && <mask_mode512bit_condition>"
20560 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20561 [(set_attr "type" "sseishft")
20562 (set_attr "prefix" "maybe_evex")
20563 (set_attr "mode" "<sseinsnmode>")])
20565 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
20566 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20567 (ashiftrt:VI2_AVX512VL
20568 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
20569 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
20571 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20572 [(set_attr "type" "sseishft")
20573 (set_attr "prefix" "maybe_evex")
20574 (set_attr "mode" "<sseinsnmode>")])
20576 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
20577 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
20578 (any_lshift:VI48_AVX512F
20579 (match_operand:VI48_AVX512F 1 "register_operand" "v")
20580 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
20581 "TARGET_AVX2 && <mask_mode512bit_condition>"
20582 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20583 [(set_attr "type" "sseishft")
20584 (set_attr "prefix" "maybe_evex")
20585 (set_attr "mode" "<sseinsnmode>")])
20587 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
20588 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20589 (any_lshift:VI2_AVX512VL
20590 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
20591 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
20593 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20594 [(set_attr "type" "sseishft")
20595 (set_attr "prefix" "maybe_evex")
20596 (set_attr "mode" "<sseinsnmode>")])
20598 (define_insn "avx_vec_concat<mode>"
20599 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
20600 (vec_concat:V_256_512
20601 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,v,x,v")
20602 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
20605 switch (which_alternative)
20608 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20610 if (<MODE_SIZE> == 64)
20612 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
20613 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20615 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20619 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
20620 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20622 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20626 switch (get_attr_mode (insn))
20629 return "vmovaps\t{%1, %t0|%t0, %1}";
20631 return "vmovapd\t{%1, %t0|%t0, %1}";
20633 return "vmovaps\t{%1, %x0|%x0, %1}";
20635 return "vmovapd\t{%1, %x0|%x0, %1}";
20637 if (which_alternative == 2)
20638 return "vmovdqa\t{%1, %t0|%t0, %1}";
20639 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
20640 return "vmovdqa64\t{%1, %t0|%t0, %1}";
20642 return "vmovdqa32\t{%1, %t0|%t0, %1}";
20644 if (which_alternative == 2)
20645 return "vmovdqa\t{%1, %x0|%x0, %1}";
20646 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
20647 return "vmovdqa64\t{%1, %x0|%x0, %1}";
20649 return "vmovdqa32\t{%1, %x0|%x0, %1}";
20651 gcc_unreachable ();
20654 gcc_unreachable ();
20657 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
20658 (set_attr "prefix_extra" "1,1,*,*")
20659 (set_attr "length_immediate" "1,1,*,*")
20660 (set_attr "prefix" "maybe_evex")
20661 (set_attr "mode" "<sseinsnmode>")])
20663 (define_insn "vcvtph2ps<mask_name>"
20664 [(set (match_operand:V4SF 0 "register_operand" "=v")
20666 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
20668 (parallel [(const_int 0) (const_int 1)
20669 (const_int 2) (const_int 3)])))]
20670 "TARGET_F16C || TARGET_AVX512VL"
20671 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20672 [(set_attr "type" "ssecvt")
20673 (set_attr "prefix" "maybe_evex")
20674 (set_attr "mode" "V4SF")])
20676 (define_insn "*vcvtph2ps_load<mask_name>"
20677 [(set (match_operand:V4SF 0 "register_operand" "=v")
20678 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
20679 UNSPEC_VCVTPH2PS))]
20680 "TARGET_F16C || TARGET_AVX512VL"
20681 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20682 [(set_attr "type" "ssecvt")
20683 (set_attr "prefix" "vex")
20684 (set_attr "mode" "V8SF")])
20686 (define_insn "vcvtph2ps256<mask_name>"
20687 [(set (match_operand:V8SF 0 "register_operand" "=v")
20688 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
20689 UNSPEC_VCVTPH2PS))]
20690 "TARGET_F16C || TARGET_AVX512VL"
20691 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20692 [(set_attr "type" "ssecvt")
20693 (set_attr "prefix" "vex")
20694 (set_attr "btver2_decode" "double")
20695 (set_attr "mode" "V8SF")])
20697 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
20698 [(set (match_operand:V16SF 0 "register_operand" "=v")
20700 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
20701 UNSPEC_VCVTPH2PS))]
20703 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
20704 [(set_attr "type" "ssecvt")
20705 (set_attr "prefix" "evex")
20706 (set_attr "mode" "V16SF")])
20708 (define_expand "vcvtps2ph_mask"
20709 [(set (match_operand:V8HI 0 "register_operand")
20712 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
20713 (match_operand:SI 2 "const_0_to_255_operand")]
20716 (match_operand:V8HI 3 "nonimm_or_0_operand")
20717 (match_operand:QI 4 "register_operand")))]
20719 "operands[5] = CONST0_RTX (V4HImode);")
20721 (define_expand "vcvtps2ph"
20722 [(set (match_operand:V8HI 0 "register_operand")
20724 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
20725 (match_operand:SI 2 "const_0_to_255_operand")]
20729 "operands[3] = CONST0_RTX (V4HImode);")
20731 (define_insn "*vcvtps2ph<mask_name>"
20732 [(set (match_operand:V8HI 0 "register_operand" "=v")
20734 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
20735 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20737 (match_operand:V4HI 3 "const0_operand")))]
20738 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
20739 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
20740 [(set_attr "type" "ssecvt")
20741 (set_attr "prefix" "maybe_evex")
20742 (set_attr "mode" "V4SF")])
20744 (define_insn "*vcvtps2ph_store<mask_name>"
20745 [(set (match_operand:V4HI 0 "memory_operand" "=m")
20746 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
20747 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20748 UNSPEC_VCVTPS2PH))]
20749 "TARGET_F16C || TARGET_AVX512VL"
20750 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20751 [(set_attr "type" "ssecvt")
20752 (set_attr "prefix" "maybe_evex")
20753 (set_attr "mode" "V4SF")])
20755 (define_insn "vcvtps2ph256<mask_name>"
20756 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
20757 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
20758 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20759 UNSPEC_VCVTPS2PH))]
20760 "TARGET_F16C || TARGET_AVX512VL"
20761 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20762 [(set_attr "type" "ssecvt")
20763 (set_attr "prefix" "maybe_evex")
20764 (set_attr "btver2_decode" "vector")
20765 (set_attr "mode" "V8SF")])
20767 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
20768 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
20770 [(match_operand:V16SF 1 "register_operand" "v")
20771 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20772 UNSPEC_VCVTPS2PH))]
20774 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20775 [(set_attr "type" "ssecvt")
20776 (set_attr "prefix" "evex")
20777 (set_attr "mode" "V16SF")])
20779 ;; For gather* insn patterns
20780 (define_mode_iterator VEC_GATHER_MODE
20781 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
20782 (define_mode_attr VEC_GATHER_IDXSI
20783 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
20784 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
20785 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
20786 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
20788 (define_mode_attr VEC_GATHER_IDXDI
20789 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
20790 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
20791 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
20792 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
20794 (define_mode_attr VEC_GATHER_SRCDI
20795 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
20796 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
20797 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
20798 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
20800 (define_expand "avx2_gathersi<mode>"
20801 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
20802 (unspec:VEC_GATHER_MODE
20803 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
20804 (mem:<ssescalarmode>
20806 [(match_operand 2 "vsib_address_operand")
20807 (match_operand:<VEC_GATHER_IDXSI>
20808 3 "register_operand")
20809 (match_operand:SI 5 "const1248_operand ")]))
20810 (mem:BLK (scratch))
20811 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
20813 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
20817 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20818 operands[5]), UNSPEC_VSIBADDR);
20821 (define_insn "*avx2_gathersi<mode>"
20822 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20823 (unspec:VEC_GATHER_MODE
20824 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
20825 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20827 [(match_operand:P 3 "vsib_address_operand" "Tv")
20828 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
20829 (match_operand:SI 6 "const1248_operand" "n")]
20831 (mem:BLK (scratch))
20832 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
20834 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20836 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
20837 [(set_attr "type" "ssemov")
20838 (set_attr "prefix" "vex")
20839 (set_attr "mode" "<sseinsnmode>")])
20841 (define_insn "*avx2_gathersi<mode>_2"
20842 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20843 (unspec:VEC_GATHER_MODE
20845 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20847 [(match_operand:P 2 "vsib_address_operand" "Tv")
20848 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
20849 (match_operand:SI 5 "const1248_operand" "n")]
20851 (mem:BLK (scratch))
20852 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
20854 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20856 "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
20857 [(set_attr "type" "ssemov")
20858 (set_attr "prefix" "vex")
20859 (set_attr "mode" "<sseinsnmode>")])
20861 (define_expand "avx2_gatherdi<mode>"
20862 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
20863 (unspec:VEC_GATHER_MODE
20864 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
20865 (mem:<ssescalarmode>
20867 [(match_operand 2 "vsib_address_operand")
20868 (match_operand:<VEC_GATHER_IDXDI>
20869 3 "register_operand")
20870 (match_operand:SI 5 "const1248_operand ")]))
20871 (mem:BLK (scratch))
20872 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
20874 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
20878 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20879 operands[5]), UNSPEC_VSIBADDR);
20882 (define_insn "*avx2_gatherdi<mode>"
20883 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20884 (unspec:VEC_GATHER_MODE
20885 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
20886 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20888 [(match_operand:P 3 "vsib_address_operand" "Tv")
20889 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
20890 (match_operand:SI 6 "const1248_operand" "n")]
20892 (mem:BLK (scratch))
20893 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
20895 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20897 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
20898 [(set_attr "type" "ssemov")
20899 (set_attr "prefix" "vex")
20900 (set_attr "mode" "<sseinsnmode>")])
20902 (define_insn "*avx2_gatherdi<mode>_2"
20903 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20904 (unspec:VEC_GATHER_MODE
20906 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20908 [(match_operand:P 2 "vsib_address_operand" "Tv")
20909 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
20910 (match_operand:SI 5 "const1248_operand" "n")]
20912 (mem:BLK (scratch))
20913 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
20915 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20918 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
20919 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
20920 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
20922 [(set_attr "type" "ssemov")
20923 (set_attr "prefix" "vex")
20924 (set_attr "mode" "<sseinsnmode>")])
20926 (define_insn "*avx2_gatherdi<mode>_3"
20927 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
20928 (vec_select:<VEC_GATHER_SRCDI>
20930 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
20931 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20933 [(match_operand:P 3 "vsib_address_operand" "Tv")
20934 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
20935 (match_operand:SI 6 "const1248_operand" "n")]
20937 (mem:BLK (scratch))
20938 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
20940 (parallel [(const_int 0) (const_int 1)
20941 (const_int 2) (const_int 3)])))
20942 (clobber (match_scratch:VI4F_256 1 "=&x"))]
20944 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
20945 [(set_attr "type" "ssemov")
20946 (set_attr "prefix" "vex")
20947 (set_attr "mode" "<sseinsnmode>")])
20949 (define_insn "*avx2_gatherdi<mode>_4"
20950 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
20951 (vec_select:<VEC_GATHER_SRCDI>
20954 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20956 [(match_operand:P 2 "vsib_address_operand" "Tv")
20957 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
20958 (match_operand:SI 5 "const1248_operand" "n")]
20960 (mem:BLK (scratch))
20961 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
20963 (parallel [(const_int 0) (const_int 1)
20964 (const_int 2) (const_int 3)])))
20965 (clobber (match_scratch:VI4F_256 1 "=&x"))]
20967 "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
20968 [(set_attr "type" "ssemov")
20969 (set_attr "prefix" "vex")
20970 (set_attr "mode" "<sseinsnmode>")])
20972 (define_expand "<avx512>_gathersi<mode>"
20973 [(parallel [(set (match_operand:VI48F 0 "register_operand")
20975 [(match_operand:VI48F 1 "register_operand")
20976 (match_operand:<avx512fmaskmode> 4 "register_operand")
20977 (mem:<ssescalarmode>
20979 [(match_operand 2 "vsib_address_operand")
20980 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
20981 (match_operand:SI 5 "const1248_operand")]))]
20983 (clobber (match_scratch:<avx512fmaskmode> 7))])]
20987 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20988 operands[5]), UNSPEC_VSIBADDR);
20991 (define_insn "*avx512f_gathersi<mode>"
20992 [(set (match_operand:VI48F 0 "register_operand" "=&v")
20994 [(match_operand:VI48F 1 "register_operand" "0")
20995 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
20996 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20998 [(match_operand:P 4 "vsib_address_operand" "Tv")
20999 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
21000 (match_operand:SI 5 "const1248_operand" "n")]
21001 UNSPEC_VSIBADDR)])]
21003 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
21005 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
21006 ;; gas changed what it requires incompatibly.
21007 "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
21008 [(set_attr "type" "ssemov")
21009 (set_attr "prefix" "evex")
21010 (set_attr "mode" "<sseinsnmode>")])
21012 (define_insn "*avx512f_gathersi<mode>_2"
21013 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21016 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
21017 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
21019 [(match_operand:P 3 "vsib_address_operand" "Tv")
21020 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
21021 (match_operand:SI 4 "const1248_operand" "n")]
21022 UNSPEC_VSIBADDR)])]
21024 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
21026 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21027 ;; gas changed what it requires incompatibly.
21028 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
21029 [(set_attr "type" "ssemov")
21030 (set_attr "prefix" "evex")
21031 (set_attr "mode" "<sseinsnmode>")])
21034 (define_expand "<avx512>_gatherdi<mode>"
21035 [(parallel [(set (match_operand:VI48F 0 "register_operand")
21037 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
21038 (match_operand:QI 4 "register_operand")
21039 (mem:<ssescalarmode>
21041 [(match_operand 2 "vsib_address_operand")
21042 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
21043 (match_operand:SI 5 "const1248_operand")]))]
21045 (clobber (match_scratch:QI 7))])]
21049 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
21050 operands[5]), UNSPEC_VSIBADDR);
21053 (define_insn "*avx512f_gatherdi<mode>"
21054 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21056 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
21057 (match_operand:QI 7 "register_operand" "2")
21058 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21060 [(match_operand:P 4 "vsib_address_operand" "Tv")
21061 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
21062 (match_operand:SI 5 "const1248_operand" "n")]
21063 UNSPEC_VSIBADDR)])]
21065 (clobber (match_scratch:QI 2 "=&Yk"))]
21067 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
21068 ;; gas changed what it requires incompatibly.
21069 "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
21070 [(set_attr "type" "ssemov")
21071 (set_attr "prefix" "evex")
21072 (set_attr "mode" "<sseinsnmode>")])
21074 (define_insn "*avx512f_gatherdi<mode>_2"
21075 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21078 (match_operand:QI 6 "register_operand" "1")
21079 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
21081 [(match_operand:P 3 "vsib_address_operand" "Tv")
21082 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
21083 (match_operand:SI 4 "const1248_operand" "n")]
21084 UNSPEC_VSIBADDR)])]
21086 (clobber (match_scratch:QI 1 "=&Yk"))]
21089 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21090 gas changed what it requires incompatibly. */
21091 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
21093 if (<MODE_SIZE> != 64)
21094 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
21096 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
21098 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
21100 [(set_attr "type" "ssemov")
21101 (set_attr "prefix" "evex")
21102 (set_attr "mode" "<sseinsnmode>")])
21104 (define_expand "<avx512>_scattersi<mode>"
21105 [(parallel [(set (mem:VI48F
21107 [(match_operand 0 "vsib_address_operand")
21108 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
21109 (match_operand:SI 4 "const1248_operand")]))
21111 [(match_operand:<avx512fmaskmode> 1 "register_operand")
21112 (match_operand:VI48F 3 "register_operand")]
21114 (clobber (match_scratch:<avx512fmaskmode> 6))])]
21118 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
21119 operands[4]), UNSPEC_VSIBADDR);
21122 (define_insn "*avx512f_scattersi<mode>"
21123 [(set (match_operator:VI48F 5 "vsib_mem_operator"
21125 [(match_operand:P 0 "vsib_address_operand" "Tv")
21126 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
21127 (match_operand:SI 4 "const1248_operand" "n")]
21130 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
21131 (match_operand:VI48F 3 "register_operand" "v")]
21133 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
21135 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21136 ;; gas changed what it requires incompatibly.
21137 "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
21138 [(set_attr "type" "ssemov")
21139 (set_attr "prefix" "evex")
21140 (set_attr "mode" "<sseinsnmode>")])
21142 (define_expand "<avx512>_scatterdi<mode>"
21143 [(parallel [(set (mem:VI48F
21145 [(match_operand 0 "vsib_address_operand")
21146 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
21147 (match_operand:SI 4 "const1248_operand")]))
21149 [(match_operand:QI 1 "register_operand")
21150 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
21152 (clobber (match_scratch:QI 6))])]
21156 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
21157 operands[4]), UNSPEC_VSIBADDR);
21160 (define_insn "*avx512f_scatterdi<mode>"
21161 [(set (match_operator:VI48F 5 "vsib_mem_operator"
21163 [(match_operand:P 0 "vsib_address_operand" "Tv")
21164 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
21165 (match_operand:SI 4 "const1248_operand" "n")]
21168 [(match_operand:QI 6 "register_operand" "1")
21169 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
21171 (clobber (match_scratch:QI 1 "=&Yk"))]
21173 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21174 ;; gas changed what it requires incompatibly.
21175 "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
21176 [(set_attr "type" "ssemov")
21177 (set_attr "prefix" "evex")
21178 (set_attr "mode" "<sseinsnmode>")])
21180 (define_insn "<avx512>_compress<mode>_mask"
21181 [(set (match_operand:VI48F 0 "register_operand" "=v")
21183 [(match_operand:VI48F 1 "register_operand" "v")
21184 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
21185 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
21188 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21189 [(set_attr "type" "ssemov")
21190 (set_attr "prefix" "evex")
21191 (set_attr "mode" "<sseinsnmode>")])
21193 (define_insn "compress<mode>_mask"
21194 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
21195 (unspec:VI12_AVX512VLBW
21196 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
21197 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
21198 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
21200 "TARGET_AVX512VBMI2"
21201 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21202 [(set_attr "type" "ssemov")
21203 (set_attr "prefix" "evex")
21204 (set_attr "mode" "<sseinsnmode>")])
21206 (define_insn "<avx512>_compressstore<mode>_mask"
21207 [(set (match_operand:VI48F 0 "memory_operand" "=m")
21209 [(match_operand:VI48F 1 "register_operand" "x")
21211 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
21212 UNSPEC_COMPRESS_STORE))]
21214 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
21215 [(set_attr "type" "ssemov")
21216 (set_attr "prefix" "evex")
21217 (set_attr "memory" "store")
21218 (set_attr "mode" "<sseinsnmode>")])
21220 (define_insn "compressstore<mode>_mask"
21221 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
21222 (unspec:VI12_AVX512VLBW
21223 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
21225 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
21226 UNSPEC_COMPRESS_STORE))]
21227 "TARGET_AVX512VBMI2"
21228 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
21229 [(set_attr "type" "ssemov")
21230 (set_attr "prefix" "evex")
21231 (set_attr "memory" "store")
21232 (set_attr "mode" "<sseinsnmode>")])
21234 (define_expand "<avx512>_expand<mode>_maskz"
21235 [(set (match_operand:VI48F 0 "register_operand")
21237 [(match_operand:VI48F 1 "nonimmediate_operand")
21238 (match_operand:VI48F 2 "nonimm_or_0_operand")
21239 (match_operand:<avx512fmaskmode> 3 "register_operand")]
21242 "operands[2] = CONST0_RTX (<MODE>mode);")
21244 (define_insn "<avx512>_expand<mode>_mask"
21245 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
21247 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
21248 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
21249 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
21252 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21253 [(set_attr "type" "ssemov")
21254 (set_attr "prefix" "evex")
21255 (set_attr "memory" "none,load")
21256 (set_attr "mode" "<sseinsnmode>")])
21258 (define_insn "expand<mode>_mask"
21259 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
21260 (unspec:VI12_AVX512VLBW
21261 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
21262 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
21263 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
21265 "TARGET_AVX512VBMI2"
21266 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21267 [(set_attr "type" "ssemov")
21268 (set_attr "prefix" "evex")
21269 (set_attr "memory" "none,load")
21270 (set_attr "mode" "<sseinsnmode>")])
21272 (define_expand "expand<mode>_maskz"
21273 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
21274 (unspec:VI12_AVX512VLBW
21275 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
21276 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
21277 (match_operand:<avx512fmaskmode> 3 "register_operand")]
21279 "TARGET_AVX512VBMI2"
21280 "operands[2] = CONST0_RTX (<MODE>mode);")
21282 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
21283 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21284 (unspec:VF_AVX512VL
21285 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
21286 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
21287 (match_operand:SI 3 "const_0_to_15_operand")]
21289 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
21290 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
21291 [(set_attr "type" "sse")
21292 (set_attr "prefix" "evex")
21293 (set_attr "mode" "<MODE>")])
21295 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
21296 [(set (match_operand:VF_128 0 "register_operand" "=v")
21299 [(match_operand:VF_128 1 "register_operand" "v")
21300 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
21301 (match_operand:SI 3 "const_0_to_15_operand")]
21306 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
21307 [(set_attr "type" "sse")
21308 (set_attr "prefix" "evex")
21309 (set_attr "mode" "<MODE>")])
21311 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
21312 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
21313 (unspec:<avx512fmaskmode>
21314 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
21315 (match_operand:QI 2 "const_0_to_255_operand" "n")]
21318 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
21319 [(set_attr "type" "sse")
21320 (set_attr "length_immediate" "1")
21321 (set_attr "prefix" "evex")
21322 (set_attr "mode" "<MODE>")])
21324 (define_insn "avx512dq_vmfpclass<mode>"
21325 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
21326 (and:<avx512fmaskmode>
21327 (unspec:<avx512fmaskmode>
21328 [(match_operand:VF_128 1 "register_operand" "v")
21329 (match_operand:QI 2 "const_0_to_255_operand" "n")]
21333 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
21334 [(set_attr "type" "sse")
21335 (set_attr "length_immediate" "1")
21336 (set_attr "prefix" "evex")
21337 (set_attr "mode" "<MODE>")])
21339 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
21340 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21341 (unspec:VF_AVX512VL
21342 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
21343 (match_operand:SI 2 "const_0_to_15_operand")]
21346 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
21347 [(set_attr "prefix" "evex")
21348 (set_attr "mode" "<MODE>")])
21350 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
21351 [(set (match_operand:VF_128 0 "register_operand" "=v")
21354 [(match_operand:VF_128 1 "register_operand" "v")
21355 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
21356 (match_operand:SI 3 "const_0_to_15_operand")]
21361 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
21362 [(set_attr "prefix" "evex")
21363 (set_attr "mode" "<ssescalarmode>")])
21365 ;; The correct representation for this is absolutely enormous, and
21366 ;; surely not generally useful.
21367 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
21368 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21369 (unspec:VI2_AVX512VL
21370 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
21371 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
21372 (match_operand:SI 3 "const_0_to_255_operand")]
21375 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
21376 [(set_attr "type" "sselog1")
21377 (set_attr "length_immediate" "1")
21378 (set_attr "prefix" "evex")
21379 (set_attr "mode" "<sseinsnmode>")])
21381 (define_insn "clz<mode>2<mask_name>"
21382 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
21384 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
21386 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21387 [(set_attr "type" "sse")
21388 (set_attr "prefix" "evex")
21389 (set_attr "mode" "<sseinsnmode>")])
21391 (define_insn "<mask_codefor>conflict<mode><mask_name>"
21392 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
21393 (unspec:VI48_AVX512VL
21394 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
21397 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21398 [(set_attr "type" "sse")
21399 (set_attr "prefix" "evex")
21400 (set_attr "mode" "<sseinsnmode>")])
21402 (define_insn "sha1msg1"
21403 [(set (match_operand:V4SI 0 "register_operand" "=x")
21405 [(match_operand:V4SI 1 "register_operand" "0")
21406 (match_operand:V4SI 2 "vector_operand" "xBm")]
21409 "sha1msg1\t{%2, %0|%0, %2}"
21410 [(set_attr "type" "sselog1")
21411 (set_attr "mode" "TI")])
21413 (define_insn "sha1msg2"
21414 [(set (match_operand:V4SI 0 "register_operand" "=x")
21416 [(match_operand:V4SI 1 "register_operand" "0")
21417 (match_operand:V4SI 2 "vector_operand" "xBm")]
21420 "sha1msg2\t{%2, %0|%0, %2}"
21421 [(set_attr "type" "sselog1")
21422 (set_attr "mode" "TI")])
21424 (define_insn "sha1nexte"
21425 [(set (match_operand:V4SI 0 "register_operand" "=x")
21427 [(match_operand:V4SI 1 "register_operand" "0")
21428 (match_operand:V4SI 2 "vector_operand" "xBm")]
21429 UNSPEC_SHA1NEXTE))]
21431 "sha1nexte\t{%2, %0|%0, %2}"
21432 [(set_attr "type" "sselog1")
21433 (set_attr "mode" "TI")])
21435 (define_insn "sha1rnds4"
21436 [(set (match_operand:V4SI 0 "register_operand" "=x")
21438 [(match_operand:V4SI 1 "register_operand" "0")
21439 (match_operand:V4SI 2 "vector_operand" "xBm")
21440 (match_operand:SI 3 "const_0_to_3_operand" "n")]
21441 UNSPEC_SHA1RNDS4))]
21443 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
21444 [(set_attr "type" "sselog1")
21445 (set_attr "length_immediate" "1")
21446 (set_attr "mode" "TI")])
21448 (define_insn "sha256msg1"
21449 [(set (match_operand:V4SI 0 "register_operand" "=x")
21451 [(match_operand:V4SI 1 "register_operand" "0")
21452 (match_operand:V4SI 2 "vector_operand" "xBm")]
21453 UNSPEC_SHA256MSG1))]
21455 "sha256msg1\t{%2, %0|%0, %2}"
21456 [(set_attr "type" "sselog1")
21457 (set_attr "mode" "TI")])
21459 (define_insn "sha256msg2"
21460 [(set (match_operand:V4SI 0 "register_operand" "=x")
21462 [(match_operand:V4SI 1 "register_operand" "0")
21463 (match_operand:V4SI 2 "vector_operand" "xBm")]
21464 UNSPEC_SHA256MSG2))]
21466 "sha256msg2\t{%2, %0|%0, %2}"
21467 [(set_attr "type" "sselog1")
21468 (set_attr "mode" "TI")])
21470 (define_insn "sha256rnds2"
21471 [(set (match_operand:V4SI 0 "register_operand" "=x")
21473 [(match_operand:V4SI 1 "register_operand" "0")
21474 (match_operand:V4SI 2 "vector_operand" "xBm")
21475 (match_operand:V4SI 3 "register_operand" "Yz")]
21476 UNSPEC_SHA256RNDS2))]
21478 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
21479 [(set_attr "type" "sselog1")
21480 (set_attr "length_immediate" "1")
21481 (set_attr "mode" "TI")])
21483 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
21484 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
21485 (unspec:AVX512MODE2P
21486 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
21488 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21490 "&& reload_completed"
21491 [(set (match_dup 0) (match_dup 1))]
21493 if (REG_P (operands[0]))
21494 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
21496 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21497 <ssequartermode>mode);
21500 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
21501 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
21502 (unspec:AVX512MODE2P
21503 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
21505 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21507 "&& reload_completed"
21508 [(set (match_dup 0) (match_dup 1))]
21510 if (REG_P (operands[0]))
21511 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
21513 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21514 <ssehalfvecmode>mode);
21517 (define_int_iterator VPMADD52
21518 [UNSPEC_VPMADD52LUQ
21519 UNSPEC_VPMADD52HUQ])
21521 (define_int_attr vpmadd52type
21522 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
21524 (define_expand "vpamdd52huq<mode>_maskz"
21525 [(match_operand:VI8_AVX512VL 0 "register_operand")
21526 (match_operand:VI8_AVX512VL 1 "register_operand")
21527 (match_operand:VI8_AVX512VL 2 "register_operand")
21528 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
21529 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21530 "TARGET_AVX512IFMA"
21532 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
21533 operands[0], operands[1], operands[2], operands[3],
21534 CONST0_RTX (<MODE>mode), operands[4]));
21538 (define_expand "vpamdd52luq<mode>_maskz"
21539 [(match_operand:VI8_AVX512VL 0 "register_operand")
21540 (match_operand:VI8_AVX512VL 1 "register_operand")
21541 (match_operand:VI8_AVX512VL 2 "register_operand")
21542 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
21543 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21544 "TARGET_AVX512IFMA"
21546 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
21547 operands[0], operands[1], operands[2], operands[3],
21548 CONST0_RTX (<MODE>mode), operands[4]));
21552 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
21553 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
21554 (unspec:VI8_AVX512VL
21555 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
21556 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
21557 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
21559 "TARGET_AVX512IFMA"
21560 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
21561 [(set_attr "type" "ssemuladd")
21562 (set_attr "prefix" "evex")
21563 (set_attr "mode" "<sseinsnmode>")])
21565 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
21566 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
21567 (vec_merge:VI8_AVX512VL
21568 (unspec:VI8_AVX512VL
21569 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
21570 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
21571 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
21574 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21575 "TARGET_AVX512IFMA"
21576 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
21577 [(set_attr "type" "ssemuladd")
21578 (set_attr "prefix" "evex")
21579 (set_attr "mode" "<sseinsnmode>")])
21581 (define_insn "vpmultishiftqb<mode><mask_name>"
21582 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
21583 (unspec:VI1_AVX512VL
21584 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
21585 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
21586 UNSPEC_VPMULTISHIFT))]
21587 "TARGET_AVX512VBMI"
21588 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21589 [(set_attr "type" "sselog")
21590 (set_attr "prefix" "evex")
21591 (set_attr "mode" "<sseinsnmode>")])
21593 (define_mode_iterator IMOD4
21594 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
21596 (define_mode_attr imod4_narrow
21597 [(V64SF "V16SF") (V64SI "V16SI")])
21599 (define_expand "mov<mode>"
21600 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
21601 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
21604 ix86_expand_vector_move (<MODE>mode, operands);
21608 (define_insn_and_split "*mov<mode>_internal"
21609 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
21610 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
21612 && (register_operand (operands[0], <MODE>mode)
21613 || register_operand (operands[1], <MODE>mode))"
21615 "&& reload_completed"
21621 for (i = 0; i < 4; i++)
21623 op0 = simplify_subreg
21624 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
21625 op1 = simplify_subreg
21626 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
21627 emit_move_insn (op0, op1);
21632 (define_insn "avx5124fmaddps_4fmaddps"
21633 [(set (match_operand:V16SF 0 "register_operand" "=v")
21635 [(match_operand:V16SF 1 "register_operand" "0")
21636 (match_operand:V64SF 2 "register_operand" "v")
21637 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
21638 "TARGET_AVX5124FMAPS"
21639 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
21640 [(set_attr ("type") ("ssemuladd"))
21641 (set_attr ("prefix") ("evex"))
21642 (set_attr ("mode") ("V16SF"))])
21644 (define_insn "avx5124fmaddps_4fmaddps_mask"
21645 [(set (match_operand:V16SF 0 "register_operand" "=v")
21648 [(match_operand:V64SF 1 "register_operand" "v")
21649 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
21650 (match_operand:V16SF 3 "register_operand" "0")
21651 (match_operand:HI 4 "register_operand" "Yk")))]
21652 "TARGET_AVX5124FMAPS"
21653 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21654 [(set_attr ("type") ("ssemuladd"))
21655 (set_attr ("prefix") ("evex"))
21656 (set_attr ("mode") ("V16SF"))])
21658 (define_insn "avx5124fmaddps_4fmaddps_maskz"
21659 [(set (match_operand:V16SF 0 "register_operand" "=v")
21662 [(match_operand:V16SF 1 "register_operand" "0")
21663 (match_operand:V64SF 2 "register_operand" "v")
21664 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
21665 (match_operand:V16SF 4 "const0_operand" "C")
21666 (match_operand:HI 5 "register_operand" "Yk")))]
21667 "TARGET_AVX5124FMAPS"
21668 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21669 [(set_attr ("type") ("ssemuladd"))
21670 (set_attr ("prefix") ("evex"))
21671 (set_attr ("mode") ("V16SF"))])
21673 (define_insn "avx5124fmaddps_4fmaddss"
21674 [(set (match_operand:V4SF 0 "register_operand" "=v")
21676 [(match_operand:V4SF 1 "register_operand" "0")
21677 (match_operand:V64SF 2 "register_operand" "v")
21678 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
21679 "TARGET_AVX5124FMAPS"
21680 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
21681 [(set_attr ("type") ("ssemuladd"))
21682 (set_attr ("prefix") ("evex"))
21683 (set_attr ("mode") ("SF"))])
21685 (define_insn "avx5124fmaddps_4fmaddss_mask"
21686 [(set (match_operand:V4SF 0 "register_operand" "=v")
21689 [(match_operand:V64SF 1 "register_operand" "v")
21690 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
21691 (match_operand:V4SF 3 "register_operand" "0")
21692 (match_operand:QI 4 "register_operand" "Yk")))]
21693 "TARGET_AVX5124FMAPS"
21694 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
21695 [(set_attr ("type") ("ssemuladd"))
21696 (set_attr ("prefix") ("evex"))
21697 (set_attr ("mode") ("SF"))])
21699 (define_insn "avx5124fmaddps_4fmaddss_maskz"
21700 [(set (match_operand:V4SF 0 "register_operand" "=v")
21703 [(match_operand:V4SF 1 "register_operand" "0")
21704 (match_operand:V64SF 2 "register_operand" "v")
21705 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
21706 (match_operand:V4SF 4 "const0_operand" "C")
21707 (match_operand:QI 5 "register_operand" "Yk")))]
21708 "TARGET_AVX5124FMAPS"
21709 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
21710 [(set_attr ("type") ("ssemuladd"))
21711 (set_attr ("prefix") ("evex"))
21712 (set_attr ("mode") ("SF"))])
21714 (define_insn "avx5124fmaddps_4fnmaddps"
21715 [(set (match_operand:V16SF 0 "register_operand" "=v")
21717 [(match_operand:V16SF 1 "register_operand" "0")
21718 (match_operand:V64SF 2 "register_operand" "v")
21719 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
21720 "TARGET_AVX5124FMAPS"
21721 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
21722 [(set_attr ("type") ("ssemuladd"))
21723 (set_attr ("prefix") ("evex"))
21724 (set_attr ("mode") ("V16SF"))])
21726 (define_insn "avx5124fmaddps_4fnmaddps_mask"
21727 [(set (match_operand:V16SF 0 "register_operand" "=v")
21730 [(match_operand:V64SF 1 "register_operand" "v")
21731 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21732 (match_operand:V16SF 3 "register_operand" "0")
21733 (match_operand:HI 4 "register_operand" "Yk")))]
21734 "TARGET_AVX5124FMAPS"
21735 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21736 [(set_attr ("type") ("ssemuladd"))
21737 (set_attr ("prefix") ("evex"))
21738 (set_attr ("mode") ("V16SF"))])
21740 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
21741 [(set (match_operand:V16SF 0 "register_operand" "=v")
21744 [(match_operand:V16SF 1 "register_operand" "0")
21745 (match_operand:V64SF 2 "register_operand" "v")
21746 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21747 (match_operand:V16SF 4 "const0_operand" "C")
21748 (match_operand:HI 5 "register_operand" "Yk")))]
21749 "TARGET_AVX5124FMAPS"
21750 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21751 [(set_attr ("type") ("ssemuladd"))
21752 (set_attr ("prefix") ("evex"))
21753 (set_attr ("mode") ("V16SF"))])
21755 (define_insn "avx5124fmaddps_4fnmaddss"
21756 [(set (match_operand:V4SF 0 "register_operand" "=v")
21758 [(match_operand:V4SF 1 "register_operand" "0")
21759 (match_operand:V64SF 2 "register_operand" "v")
21760 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
21761 "TARGET_AVX5124FMAPS"
21762 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
21763 [(set_attr ("type") ("ssemuladd"))
21764 (set_attr ("prefix") ("evex"))
21765 (set_attr ("mode") ("SF"))])
21767 (define_insn "avx5124fmaddps_4fnmaddss_mask"
21768 [(set (match_operand:V4SF 0 "register_operand" "=v")
21771 [(match_operand:V64SF 1 "register_operand" "v")
21772 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21773 (match_operand:V4SF 3 "register_operand" "0")
21774 (match_operand:QI 4 "register_operand" "Yk")))]
21775 "TARGET_AVX5124FMAPS"
21776 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
21777 [(set_attr ("type") ("ssemuladd"))
21778 (set_attr ("prefix") ("evex"))
21779 (set_attr ("mode") ("SF"))])
21781 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
21782 [(set (match_operand:V4SF 0 "register_operand" "=v")
21785 [(match_operand:V4SF 1 "register_operand" "0")
21786 (match_operand:V64SF 2 "register_operand" "v")
21787 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21788 (match_operand:V4SF 4 "const0_operand" "C")
21789 (match_operand:QI 5 "register_operand" "Yk")))]
21790 "TARGET_AVX5124FMAPS"
21791 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
21792 [(set_attr ("type") ("ssemuladd"))
21793 (set_attr ("prefix") ("evex"))
21794 (set_attr ("mode") ("SF"))])
21796 (define_insn "avx5124vnniw_vp4dpwssd"
21797 [(set (match_operand:V16SI 0 "register_operand" "=v")
21799 [(match_operand:V16SI 1 "register_operand" "0")
21800 (match_operand:V64SI 2 "register_operand" "v")
21801 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
21802 "TARGET_AVX5124VNNIW"
21803 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
21804 [(set_attr ("type") ("ssemuladd"))
21805 (set_attr ("prefix") ("evex"))
21806 (set_attr ("mode") ("TI"))])
21808 (define_insn "avx5124vnniw_vp4dpwssd_mask"
21809 [(set (match_operand:V16SI 0 "register_operand" "=v")
21812 [(match_operand:V64SI 1 "register_operand" "v")
21813 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
21814 (match_operand:V16SI 3 "register_operand" "0")
21815 (match_operand:HI 4 "register_operand" "Yk")))]
21816 "TARGET_AVX5124VNNIW"
21817 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21818 [(set_attr ("type") ("ssemuladd"))
21819 (set_attr ("prefix") ("evex"))
21820 (set_attr ("mode") ("TI"))])
21822 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
21823 [(set (match_operand:V16SI 0 "register_operand" "=v")
21826 [(match_operand:V16SI 1 "register_operand" "0")
21827 (match_operand:V64SI 2 "register_operand" "v")
21828 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
21829 (match_operand:V16SI 4 "const0_operand" "C")
21830 (match_operand:HI 5 "register_operand" "Yk")))]
21831 "TARGET_AVX5124VNNIW"
21832 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21833 [(set_attr ("type") ("ssemuladd"))
21834 (set_attr ("prefix") ("evex"))
21835 (set_attr ("mode") ("TI"))])
21837 (define_insn "avx5124vnniw_vp4dpwssds"
21838 [(set (match_operand:V16SI 0 "register_operand" "=v")
21840 [(match_operand:V16SI 1 "register_operand" "0")
21841 (match_operand:V64SI 2 "register_operand" "v")
21842 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
21843 "TARGET_AVX5124VNNIW"
21844 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
21845 [(set_attr ("type") ("ssemuladd"))
21846 (set_attr ("prefix") ("evex"))
21847 (set_attr ("mode") ("TI"))])
21849 (define_insn "avx5124vnniw_vp4dpwssds_mask"
21850 [(set (match_operand:V16SI 0 "register_operand" "=v")
21853 [(match_operand:V64SI 1 "register_operand" "v")
21854 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
21855 (match_operand:V16SI 3 "register_operand" "0")
21856 (match_operand:HI 4 "register_operand" "Yk")))]
21857 "TARGET_AVX5124VNNIW"
21858 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21859 [(set_attr ("type") ("ssemuladd"))
21860 (set_attr ("prefix") ("evex"))
21861 (set_attr ("mode") ("TI"))])
21863 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
21864 [(set (match_operand:V16SI 0 "register_operand" "=v")
21867 [(match_operand:V16SI 1 "register_operand" "0")
21868 (match_operand:V64SI 2 "register_operand" "v")
21869 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
21870 (match_operand:V16SI 4 "const0_operand" "C")
21871 (match_operand:HI 5 "register_operand" "Yk")))]
21872 "TARGET_AVX5124VNNIW"
21873 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21874 [(set_attr ("type") ("ssemuladd"))
21875 (set_attr ("prefix") ("evex"))
21876 (set_attr ("mode") ("TI"))])
21878 (define_insn "vpopcount<mode><mask_name>"
21879 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
21880 (popcount:VI48_AVX512VL
21881 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
21882 "TARGET_AVX512VPOPCNTDQ"
21883 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
21885 ;; Save multiple registers out-of-line.
21886 (define_insn "save_multiple<mode>"
21887 [(match_parallel 0 "save_multiple"
21888 [(use (match_operand:P 1 "symbol_operand"))])]
21889 "TARGET_SSE && TARGET_64BIT"
21892 ;; Restore multiple registers out-of-line.
21893 (define_insn "restore_multiple<mode>"
21894 [(match_parallel 0 "restore_multiple"
21895 [(use (match_operand:P 1 "symbol_operand"))])]
21896 "TARGET_SSE && TARGET_64BIT"
21899 ;; Restore multiple registers out-of-line and return.
21900 (define_insn "restore_multiple_and_return<mode>"
21901 [(match_parallel 0 "restore_multiple"
21903 (use (match_operand:P 1 "symbol_operand"))
21904 (set (reg:DI SP_REG) (reg:DI R10_REG))
21906 "TARGET_SSE && TARGET_64BIT"
21909 ;; Restore multiple registers out-of-line when hard frame pointer is used,
21910 ;; perform the leave operation prior to returning (from the function).
21911 (define_insn "restore_multiple_leave_return<mode>"
21912 [(match_parallel 0 "restore_multiple"
21914 (use (match_operand:P 1 "symbol_operand"))
21915 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
21916 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
21917 (clobber (mem:BLK (scratch)))
21919 "TARGET_SSE && TARGET_64BIT"
21922 (define_insn "vpopcount<mode><mask_name>"
21923 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
21924 (popcount:VI12_AVX512VL
21925 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
21926 "TARGET_AVX512BITALG"
21927 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
21929 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
21930 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21931 (unspec:VI1_AVX512F
21932 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21933 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
21934 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
21935 UNSPEC_GF2P8AFFINEINV))]
21938 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
21939 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
21940 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
21941 [(set_attr "isa" "noavx,avx,avx512f")
21942 (set_attr "prefix_data16" "1,*,*")
21943 (set_attr "prefix_extra" "1")
21944 (set_attr "prefix" "orig,maybe_evex,evex")
21945 (set_attr "mode" "<sseinsnmode>")])
21947 (define_insn "vgf2p8affineqb_<mode><mask_name>"
21948 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21949 (unspec:VI1_AVX512F
21950 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21951 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
21952 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
21953 UNSPEC_GF2P8AFFINE))]
21956 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
21957 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
21958 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
21959 [(set_attr "isa" "noavx,avx,avx512f")
21960 (set_attr "prefix_data16" "1,*,*")
21961 (set_attr "prefix_extra" "1")
21962 (set_attr "prefix" "orig,maybe_evex,evex")
21963 (set_attr "mode" "<sseinsnmode>")])
21965 (define_insn "vgf2p8mulb_<mode><mask_name>"
21966 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21967 (unspec:VI1_AVX512F
21968 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21969 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")]
21973 gf2p8mulb\t{%2, %0| %0, %2}
21974 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}
21975 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
21976 [(set_attr "isa" "noavx,avx,avx512f")
21977 (set_attr "prefix_data16" "1,*,*")
21978 (set_attr "prefix_extra" "1")
21979 (set_attr "prefix" "orig,maybe_evex,evex")
21980 (set_attr "mode" "<sseinsnmode>")])
21982 (define_insn "vpshrd_<mode><mask_name>"
21983 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21984 (unspec:VI248_AVX512VL
21985 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
21986 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
21987 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21989 "TARGET_AVX512VBMI2"
21990 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
21991 [(set_attr ("prefix") ("evex"))])
21993 (define_insn "vpshld_<mode><mask_name>"
21994 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21995 (unspec:VI248_AVX512VL
21996 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
21997 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
21998 (match_operand:SI 3 "const_0_to_255_operand" "n")]
22000 "TARGET_AVX512VBMI2"
22001 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
22002 [(set_attr ("prefix") ("evex"))])
22004 (define_insn "vpshrdv_<mode>"
22005 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22006 (unspec:VI248_AVX512VL
22007 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22008 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22009 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22011 "TARGET_AVX512VBMI2"
22012 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
22013 [(set_attr ("prefix") ("evex"))
22014 (set_attr "mode" "<sseinsnmode>")])
22016 (define_insn "vpshrdv_<mode>_mask"
22017 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22018 (vec_merge:VI248_AVX512VL
22019 (unspec:VI248_AVX512VL
22020 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22021 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22022 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22025 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22026 "TARGET_AVX512VBMI2"
22027 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22028 [(set_attr ("prefix") ("evex"))
22029 (set_attr "mode" "<sseinsnmode>")])
22031 (define_expand "vpshrdv_<mode>_maskz"
22032 [(match_operand:VI248_AVX512VL 0 "register_operand")
22033 (match_operand:VI248_AVX512VL 1 "register_operand")
22034 (match_operand:VI248_AVX512VL 2 "register_operand")
22035 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
22036 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22037 "TARGET_AVX512VBMI2"
22039 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
22040 operands[2], operands[3],
22041 CONST0_RTX (<MODE>mode),
22046 (define_insn "vpshrdv_<mode>_maskz_1"
22047 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22048 (vec_merge:VI248_AVX512VL
22049 (unspec:VI248_AVX512VL
22050 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22051 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22052 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22054 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
22055 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22056 "TARGET_AVX512VBMI2"
22057 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22058 [(set_attr ("prefix") ("evex"))
22059 (set_attr "mode" "<sseinsnmode>")])
22061 (define_insn "vpshldv_<mode>"
22062 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22063 (unspec:VI248_AVX512VL
22064 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22065 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22066 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22068 "TARGET_AVX512VBMI2"
22069 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
22070 [(set_attr ("prefix") ("evex"))
22071 (set_attr "mode" "<sseinsnmode>")])
22073 (define_insn "vpshldv_<mode>_mask"
22074 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22075 (vec_merge:VI248_AVX512VL
22076 (unspec:VI248_AVX512VL
22077 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22078 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22079 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22082 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22083 "TARGET_AVX512VBMI2"
22084 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22085 [(set_attr ("prefix") ("evex"))
22086 (set_attr "mode" "<sseinsnmode>")])
22088 (define_expand "vpshldv_<mode>_maskz"
22089 [(match_operand:VI248_AVX512VL 0 "register_operand")
22090 (match_operand:VI248_AVX512VL 1 "register_operand")
22091 (match_operand:VI248_AVX512VL 2 "register_operand")
22092 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
22093 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22094 "TARGET_AVX512VBMI2"
22096 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
22097 operands[2], operands[3],
22098 CONST0_RTX (<MODE>mode),
22103 (define_insn "vpshldv_<mode>_maskz_1"
22104 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22105 (vec_merge:VI248_AVX512VL
22106 (unspec:VI248_AVX512VL
22107 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22108 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22109 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22111 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
22112 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22113 "TARGET_AVX512VBMI2"
22114 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22115 [(set_attr ("prefix") ("evex"))
22116 (set_attr "mode" "<sseinsnmode>")])
22118 (define_insn "vpdpbusd_<mode>"
22119 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22120 (unspec:VI4_AVX512VL
22121 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22122 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22123 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22124 UNSPEC_VPMADDUBSWACCD))]
22125 "TARGET_AVX512VNNI"
22126 "vpdpbusd\t{%3, %2, %0|%0, %2, %3 }"
22127 [(set_attr ("prefix") ("evex"))])
22129 (define_insn "vpdpbusd_<mode>_mask"
22130 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22131 (vec_merge:VI4_AVX512VL
22132 (unspec:VI4_AVX512VL
22133 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22134 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22135 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22136 UNSPEC_VPMADDUBSWACCD)
22138 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22139 "TARGET_AVX512VNNI"
22140 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22141 [(set_attr ("prefix") ("evex"))])
22143 (define_expand "vpdpbusd_<mode>_maskz"
22144 [(match_operand:VI4_AVX512VL 0 "register_operand")
22145 (match_operand:VI4_AVX512VL 1 "register_operand")
22146 (match_operand:VI4_AVX512VL 2 "register_operand")
22147 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22148 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22149 "TARGET_AVX512VNNI"
22151 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
22152 operands[2], operands[3],
22153 CONST0_RTX (<MODE>mode),
22158 (define_insn "vpdpbusd_<mode>_maskz_1"
22159 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22160 (vec_merge:VI4_AVX512VL
22161 (unspec:VI4_AVX512VL
22162 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22163 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22164 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
22165 ] UNSPEC_VPMADDUBSWACCD)
22166 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22167 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22168 "TARGET_AVX512VNNI"
22169 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22170 [(set_attr ("prefix") ("evex"))])
22173 (define_insn "vpdpbusds_<mode>"
22174 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22175 (unspec:VI4_AVX512VL
22176 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22177 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22178 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22179 UNSPEC_VPMADDUBSWACCSSD))]
22180 "TARGET_AVX512VNNI"
22181 "vpdpbusds\t{%3, %2, %0|%0, %2, %3 }"
22182 [(set_attr ("prefix") ("evex"))])
22184 (define_insn "vpdpbusds_<mode>_mask"
22185 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22186 (vec_merge:VI4_AVX512VL
22187 (unspec:VI4_AVX512VL
22188 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22189 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22190 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22191 UNSPEC_VPMADDUBSWACCSSD)
22193 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22194 "TARGET_AVX512VNNI"
22195 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22196 [(set_attr ("prefix") ("evex"))])
22198 (define_expand "vpdpbusds_<mode>_maskz"
22199 [(match_operand:VI4_AVX512VL 0 "register_operand")
22200 (match_operand:VI4_AVX512VL 1 "register_operand")
22201 (match_operand:VI4_AVX512VL 2 "register_operand")
22202 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22203 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22204 "TARGET_AVX512VNNI"
22206 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
22207 operands[2], operands[3],
22208 CONST0_RTX (<MODE>mode),
22213 (define_insn "vpdpbusds_<mode>_maskz_1"
22214 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22215 (vec_merge:VI4_AVX512VL
22216 (unspec:VI4_AVX512VL
22217 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22218 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22219 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22220 UNSPEC_VPMADDUBSWACCSSD)
22221 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22222 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22223 "TARGET_AVX512VNNI"
22224 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22225 [(set_attr ("prefix") ("evex"))])
22228 (define_insn "vpdpwssd_<mode>"
22229 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22230 (unspec:VI4_AVX512VL
22231 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22232 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22233 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22234 UNSPEC_VPMADDWDACCD))]
22235 "TARGET_AVX512VNNI"
22236 "vpdpwssd\t{%3, %2, %0|%0, %2, %3 }"
22237 [(set_attr ("prefix") ("evex"))])
22239 (define_insn "vpdpwssd_<mode>_mask"
22240 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22241 (vec_merge:VI4_AVX512VL
22242 (unspec:VI4_AVX512VL
22243 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22244 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22245 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22246 UNSPEC_VPMADDWDACCD)
22248 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22249 "TARGET_AVX512VNNI"
22250 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22251 [(set_attr ("prefix") ("evex"))])
22253 (define_expand "vpdpwssd_<mode>_maskz"
22254 [(match_operand:VI4_AVX512VL 0 "register_operand")
22255 (match_operand:VI4_AVX512VL 1 "register_operand")
22256 (match_operand:VI4_AVX512VL 2 "register_operand")
22257 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22258 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22259 "TARGET_AVX512VNNI"
22261 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
22262 operands[2], operands[3],
22263 CONST0_RTX (<MODE>mode),
22268 (define_insn "vpdpwssd_<mode>_maskz_1"
22269 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22270 (vec_merge:VI4_AVX512VL
22271 (unspec:VI4_AVX512VL
22272 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22273 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22274 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22275 UNSPEC_VPMADDWDACCD)
22276 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22277 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22278 "TARGET_AVX512VNNI"
22279 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22280 [(set_attr ("prefix") ("evex"))])
22283 (define_insn "vpdpwssds_<mode>"
22284 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22285 (unspec:VI4_AVX512VL
22286 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22287 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22288 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22289 UNSPEC_VPMADDWDACCSSD))]
22290 "TARGET_AVX512VNNI"
22291 "vpdpwssds\t{%3, %2, %0|%0, %2, %3 }"
22292 [(set_attr ("prefix") ("evex"))])
22294 (define_insn "vpdpwssds_<mode>_mask"
22295 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22296 (vec_merge:VI4_AVX512VL
22297 (unspec:VI4_AVX512VL
22298 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22299 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22300 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22301 UNSPEC_VPMADDWDACCSSD)
22303 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22304 "TARGET_AVX512VNNI"
22305 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22306 [(set_attr ("prefix") ("evex"))])
22308 (define_expand "vpdpwssds_<mode>_maskz"
22309 [(match_operand:VI4_AVX512VL 0 "register_operand")
22310 (match_operand:VI4_AVX512VL 1 "register_operand")
22311 (match_operand:VI4_AVX512VL 2 "register_operand")
22312 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22313 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22314 "TARGET_AVX512VNNI"
22316 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
22317 operands[2], operands[3],
22318 CONST0_RTX (<MODE>mode),
22323 (define_insn "vpdpwssds_<mode>_maskz_1"
22324 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22325 (vec_merge:VI4_AVX512VL
22326 (unspec:VI4_AVX512VL
22327 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22328 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22329 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22330 UNSPEC_VPMADDWDACCSSD)
22331 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22332 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22333 "TARGET_AVX512VNNI"
22334 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22335 [(set_attr ("prefix") ("evex"))])
22337 (define_insn "vaesdec_<mode>"
22338 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22339 (unspec:VI1_AVX512VL_F
22340 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22341 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22344 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
22347 (define_insn "vaesdeclast_<mode>"
22348 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22349 (unspec:VI1_AVX512VL_F
22350 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22351 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22352 UNSPEC_VAESDECLAST))]
22354 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
22357 (define_insn "vaesenc_<mode>"
22358 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22359 (unspec:VI1_AVX512VL_F
22360 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22361 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22364 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
22367 (define_insn "vaesenclast_<mode>"
22368 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22369 (unspec:VI1_AVX512VL_F
22370 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22371 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22372 UNSPEC_VAESENCLAST))]
22374 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
22377 (define_insn "vpclmulqdq_<mode>"
22378 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
22379 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
22380 (match_operand:VI8_FVL 2 "vector_operand" "vm")
22381 (match_operand:SI 3 "const_0_to_255_operand" "n")]
22382 UNSPEC_VPCLMULQDQ))]
22383 "TARGET_VPCLMULQDQ"
22384 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
22385 [(set_attr "mode" "DI")])
22387 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
22388 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22389 (unspec:<avx512fmaskmode>
22390 [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
22391 (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
22392 UNSPEC_VPSHUFBIT))]
22393 "TARGET_AVX512BITALG"
22394 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
22395 [(set_attr "prefix" "evex")
22396 (set_attr "mode" "<sseinsnmode>")])
22398 (define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
22399 ;; Converting from BF to SF
22400 (define_mode_attr bf16_cvt_2sf
22401 [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
22402 ;; Converting from SF to BF
22403 (define_mode_attr sf_cvt_bf16
22404 [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
22405 ;; Mapping from BF to SF
22406 (define_mode_attr sf_bf16
22407 [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
22409 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
22410 [(match_operand:BF16 0 "register_operand")
22411 (match_operand:<bf16_cvt_2sf> 1 "register_operand")
22412 (match_operand:<bf16_cvt_2sf> 2 "register_operand")
22413 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22414 "TARGET_AVX512BF16"
22416 emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
22417 operands[2], CONST0_RTX(<MODE>mode), operands[3]));
22421 (define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
22422 [(set (match_operand:BF16 0 "register_operand" "=v")
22424 [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
22425 (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
22426 UNSPEC_VCVTNE2PS2BF16))]
22427 "TARGET_AVX512BF16"
22428 "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
22430 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
22431 [(match_operand:<sf_cvt_bf16> 0 "register_operand")
22432 (match_operand:VF1_AVX512VL 1 "register_operand")
22433 (match_operand:<avx512fmaskmode> 2 "register_operand")]
22434 "TARGET_AVX512BF16"
22436 emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
22437 CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
22441 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
22442 [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
22443 (unspec:<sf_cvt_bf16>
22444 [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
22445 UNSPEC_VCVTNEPS2BF16))]
22446 "TARGET_AVX512BF16"
22447 "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
22449 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
22450 [(match_operand:VF1_AVX512VL 0 "register_operand")
22451 (match_operand:VF1_AVX512VL 1 "register_operand")
22452 (match_operand:<sf_bf16> 2 "register_operand")
22453 (match_operand:<sf_bf16> 3 "register_operand")
22454 (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
22455 "TARGET_AVX512BF16"
22457 emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
22458 operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
22462 (define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
22463 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
22464 (unspec:VF1_AVX512VL
22465 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
22466 (match_operand:<sf_bf16> 2 "register_operand" "v")
22467 (match_operand:<sf_bf16> 3 "register_operand" "v")]
22468 UNSPEC_VDPBF16PS))]
22469 "TARGET_AVX512BF16"
22470 "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
22472 (define_insn "avx512f_dpbf16ps_<mode>_mask"
22473 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
22474 (vec_merge:VF1_AVX512VL
22475 (unspec:VF1_AVX512VL
22476 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
22477 (match_operand:<sf_bf16> 2 "register_operand" "v")
22478 (match_operand:<sf_bf16> 3 "register_operand" "v")]
22481 (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
22482 "TARGET_AVX512BF16"
22483 "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")