1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2017 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
53 UNSPEC_XOP_UNSIGNED_CMP
64 UNSPEC_AESKEYGENASSIST
85 ;; For AVX512F support
87 UNSPEC_UNSIGNED_FIX_NOTRUNC
102 UNSPEC_COMPRESS_STORE
112 ;; For embed. rounding feature
113 UNSPEC_EMBEDDED_ROUNDING
115 ;; For AVX512PF support
116 UNSPEC_GATHER_PREFETCH
117 UNSPEC_SCATTER_PREFETCH
119 ;; For AVX512ER support
133 ;; For AVX512BW support
141 ;; For AVX512DQ support
146 ;; For AVX512IFMA support
150 ;; For AVX512VBMI support
153 ;; For AVX5124FMAPS/AVX5124VNNIW support
160 UNSPEC_GF2P8AFFINEINV
164 ;; For AVX512VBMI2 support
169 (define_c_enum "unspecv" [
179 ;; All vector modes including V?TImode, used in move patterns.
180 (define_mode_iterator VMOVE
181 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
182 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
183 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
184 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
185 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
186 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
187 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
189 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
190 (define_mode_iterator V48_AVX512VL
191 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
192 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
193 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
194 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
196 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
197 (define_mode_iterator VI12_AVX512VL
198 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
199 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
201 ;; Same iterator, but without supposed TARGET_AVX512BW
202 (define_mode_iterator VI12_AVX512VLBW
203 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
204 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
205 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
207 (define_mode_iterator VI1_AVX512VL
208 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
211 (define_mode_iterator V
212 [(V32QI "TARGET_AVX") V16QI
213 (V16HI "TARGET_AVX") V8HI
214 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
215 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
216 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
217 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
219 ;; All 128bit vector modes
220 (define_mode_iterator V_128
221 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
223 ;; All 256bit vector modes
224 (define_mode_iterator V_256
225 [V32QI V16HI V8SI V4DI V8SF V4DF])
227 ;; All 512bit vector modes
228 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
230 ;; All 256bit and 512bit vector modes
231 (define_mode_iterator V_256_512
232 [V32QI V16HI V8SI V4DI V8SF V4DF
233 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
234 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
236 ;; All vector float modes
237 (define_mode_iterator VF
238 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
239 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
241 ;; 128- and 256-bit float vector modes
242 (define_mode_iterator VF_128_256
243 [(V8SF "TARGET_AVX") V4SF
244 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
246 ;; All SFmode vector float modes
247 (define_mode_iterator VF1
248 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
250 ;; 128- and 256-bit SF vector modes
251 (define_mode_iterator VF1_128_256
252 [(V8SF "TARGET_AVX") V4SF])
254 (define_mode_iterator VF1_128_256VL
255 [V8SF (V4SF "TARGET_AVX512VL")])
257 ;; All DFmode vector float modes
258 (define_mode_iterator VF2
259 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
261 ;; 128- and 256-bit DF vector modes
262 (define_mode_iterator VF2_128_256
263 [(V4DF "TARGET_AVX") V2DF])
265 (define_mode_iterator VF2_512_256
266 [(V8DF "TARGET_AVX512F") V4DF])
268 (define_mode_iterator VF2_512_256VL
269 [V8DF (V4DF "TARGET_AVX512VL")])
271 ;; All 128bit vector float modes
272 (define_mode_iterator VF_128
273 [V4SF (V2DF "TARGET_SSE2")])
275 ;; All 256bit vector float modes
276 (define_mode_iterator VF_256
279 ;; All 512bit vector float modes
280 (define_mode_iterator VF_512
283 (define_mode_iterator VI48_AVX512VL
284 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
285 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
287 (define_mode_iterator VF_AVX512VL
288 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
289 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
291 (define_mode_iterator VF2_AVX512VL
292 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
294 (define_mode_iterator VF1_AVX512VL
295 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
297 ;; All vector integer modes
298 (define_mode_iterator VI
299 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
300 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
301 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
302 (V8SI "TARGET_AVX") V4SI
303 (V4DI "TARGET_AVX") V2DI])
305 (define_mode_iterator VI_AVX2
306 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
307 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
308 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
309 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
311 ;; All QImode vector integer modes
312 (define_mode_iterator VI1
313 [(V32QI "TARGET_AVX") V16QI])
315 ;; All DImode vector integer modes
316 (define_mode_iterator V_AVX
317 [V16QI V8HI V4SI V2DI V4SF V2DF
318 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
319 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
320 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
322 (define_mode_iterator VI48_AVX
324 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
326 (define_mode_iterator VI8
327 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
329 (define_mode_iterator VI8_AVX512VL
330 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
332 (define_mode_iterator VI8_256_512
333 [V8DI (V4DI "TARGET_AVX512VL")])
335 (define_mode_iterator VI1_AVX2
336 [(V32QI "TARGET_AVX2") V16QI])
338 (define_mode_iterator VI1_AVX512
339 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
341 (define_mode_iterator VI1_AVX512F
342 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
344 (define_mode_iterator VI2_AVX2
345 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
347 (define_mode_iterator VI2_AVX512F
348 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
350 (define_mode_iterator VI4_AVX
351 [(V8SI "TARGET_AVX") V4SI])
353 (define_mode_iterator VI4_AVX2
354 [(V8SI "TARGET_AVX2") V4SI])
356 (define_mode_iterator VI4_AVX512F
357 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
359 (define_mode_iterator VI4_AVX512VL
360 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
362 (define_mode_iterator VI48_AVX512F_AVX512VL
363 [V4SI V8SI (V16SI "TARGET_AVX512F")
364 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
366 (define_mode_iterator VI2_AVX512VL
367 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
369 (define_mode_iterator VI8_AVX2_AVX512BW
370 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
372 (define_mode_iterator VI8_AVX2
373 [(V4DI "TARGET_AVX2") V2DI])
375 (define_mode_iterator VI8_AVX2_AVX512F
376 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
378 (define_mode_iterator VI4_128_8_256
382 (define_mode_iterator V8FI
386 (define_mode_iterator V16FI
389 ;; ??? We should probably use TImode instead.
390 (define_mode_iterator VIMAX_AVX2_AVX512BW
391 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
393 ;; Suppose TARGET_AVX512BW as baseline
394 (define_mode_iterator VIMAX_AVX512VL
395 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
397 (define_mode_iterator VIMAX_AVX2
398 [(V2TI "TARGET_AVX2") V1TI])
400 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
401 (define_mode_iterator SSESCALARMODE
402 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
404 (define_mode_iterator VI12_AVX2
405 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
406 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
408 (define_mode_iterator VI24_AVX2
409 [(V16HI "TARGET_AVX2") V8HI
410 (V8SI "TARGET_AVX2") V4SI])
412 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
413 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
414 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
415 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
417 (define_mode_iterator VI124_AVX2
418 [(V32QI "TARGET_AVX2") V16QI
419 (V16HI "TARGET_AVX2") V8HI
420 (V8SI "TARGET_AVX2") V4SI])
422 (define_mode_iterator VI2_AVX2_AVX512BW
423 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
425 (define_mode_iterator VI248_VLBW
426 [(V32HI "TARGET_AVX512BW") V16SI V8DI
427 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
428 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
429 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
431 (define_mode_iterator VI48_AVX2
432 [(V8SI "TARGET_AVX2") V4SI
433 (V4DI "TARGET_AVX2") V2DI])
435 (define_mode_iterator VI248_AVX2
436 [(V16HI "TARGET_AVX2") V8HI
437 (V8SI "TARGET_AVX2") V4SI
438 (V4DI "TARGET_AVX2") V2DI])
440 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
441 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
442 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
443 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
445 (define_mode_iterator VI248_AVX512BW
446 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
448 (define_mode_iterator VI248_AVX512BW_AVX512VL
449 [(V32HI "TARGET_AVX512BW")
450 (V4DI "TARGET_AVX512VL") V16SI V8DI])
452 ;; Suppose TARGET_AVX512VL as baseline
453 (define_mode_iterator VI248_AVX512BW_1
454 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
458 (define_mode_iterator VI248_AVX512BW_2
459 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
463 (define_mode_iterator VI48_AVX512F
464 [(V16SI "TARGET_AVX512F") V8SI V4SI
465 (V8DI "TARGET_AVX512F") V4DI V2DI])
467 (define_mode_iterator VI48_AVX_AVX512F
468 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
469 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
471 (define_mode_iterator VI12_AVX_AVX512F
472 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
473 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
475 (define_mode_iterator V48_AVX2
478 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
479 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
481 (define_mode_attr avx512
482 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
483 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
484 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
485 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
486 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
487 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
489 (define_mode_attr sse2_avx_avx512f
490 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
491 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
492 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
493 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
494 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
495 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
497 (define_mode_attr sse2_avx2
498 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
499 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
500 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
501 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
502 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
504 (define_mode_attr ssse3_avx2
505 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
506 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
507 (V4SI "ssse3") (V8SI "avx2")
508 (V2DI "ssse3") (V4DI "avx2")
509 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
511 (define_mode_attr sse4_1_avx2
512 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
513 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
514 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
515 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
517 (define_mode_attr avx_avx2
518 [(V4SF "avx") (V2DF "avx")
519 (V8SF "avx") (V4DF "avx")
520 (V4SI "avx2") (V2DI "avx2")
521 (V8SI "avx2") (V4DI "avx2")])
523 (define_mode_attr vec_avx2
524 [(V16QI "vec") (V32QI "avx2")
525 (V8HI "vec") (V16HI "avx2")
526 (V4SI "vec") (V8SI "avx2")
527 (V2DI "vec") (V4DI "avx2")])
529 (define_mode_attr avx2_avx512
530 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
531 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
532 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
533 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
534 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
536 (define_mode_attr shuffletype
537 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
538 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
539 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
540 (V32HI "i") (V16HI "i") (V8HI "i")
541 (V64QI "i") (V32QI "i") (V16QI "i")
542 (V4TI "i") (V2TI "i") (V1TI "i")])
544 (define_mode_attr ssequartermode
545 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
547 (define_mode_attr ssedoublemodelower
548 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
549 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
550 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
552 (define_mode_attr ssedoublemode
553 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
554 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
555 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
556 (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
557 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
558 (V4DI "V8DI") (V8DI "V16DI")])
560 (define_mode_attr ssebytemode
561 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
563 ;; All 128bit vector integer modes
564 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
566 ;; All 256bit vector integer modes
567 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
569 ;; Various 128bit vector integer mode combinations
570 (define_mode_iterator VI12_128 [V16QI V8HI])
571 (define_mode_iterator VI14_128 [V16QI V4SI])
572 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
573 (define_mode_iterator VI24_128 [V8HI V4SI])
574 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
575 (define_mode_iterator VI48_128 [V4SI V2DI])
577 ;; Various 256bit and 512 vector integer mode combinations
578 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
579 (define_mode_iterator VI124_256_AVX512F_AVX512BW
581 (V64QI "TARGET_AVX512BW")
582 (V32HI "TARGET_AVX512BW")
583 (V16SI "TARGET_AVX512F")])
584 (define_mode_iterator VI48_256 [V8SI V4DI])
585 (define_mode_iterator VI48_512 [V16SI V8DI])
586 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
587 (define_mode_iterator VI_AVX512BW
588 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
590 ;; Int-float size matches
591 (define_mode_iterator VI4F_128 [V4SI V4SF])
592 (define_mode_iterator VI8F_128 [V2DI V2DF])
593 (define_mode_iterator VI4F_256 [V8SI V8SF])
594 (define_mode_iterator VI8F_256 [V4DI V4DF])
595 (define_mode_iterator VI48F_256_512
597 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
598 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
599 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
600 (define_mode_iterator VF48_I1248
601 [V16SI V16SF V8DI V8DF V32HI V64QI])
602 (define_mode_iterator VI48F
603 [V16SI V16SF V8DI V8DF
604 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
605 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
606 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
607 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
608 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
610 ;; Mapping from float mode to required SSE level
611 (define_mode_attr sse
612 [(SF "sse") (DF "sse2")
613 (V4SF "sse") (V2DF "sse2")
614 (V16SF "avx512f") (V8SF "avx")
615 (V8DF "avx512f") (V4DF "avx")])
617 (define_mode_attr sse2
618 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
619 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
621 (define_mode_attr sse3
622 [(V16QI "sse3") (V32QI "avx")])
624 (define_mode_attr sse4_1
625 [(V4SF "sse4_1") (V2DF "sse4_1")
626 (V8SF "avx") (V4DF "avx")
628 (V4DI "avx") (V2DI "sse4_1")
629 (V8SI "avx") (V4SI "sse4_1")
630 (V16QI "sse4_1") (V32QI "avx")
631 (V8HI "sse4_1") (V16HI "avx")])
633 (define_mode_attr avxsizesuffix
634 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
635 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
636 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
637 (V16SF "512") (V8DF "512")
638 (V8SF "256") (V4DF "256")
639 (V4SF "") (V2DF "")])
641 ;; SSE instruction mode
642 (define_mode_attr sseinsnmode
643 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
644 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
645 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
646 (V16SF "V16SF") (V8DF "V8DF")
647 (V8SF "V8SF") (V4DF "V4DF")
648 (V4SF "V4SF") (V2DF "V2DF")
651 ;; Mapping of vector modes to corresponding mask size
652 (define_mode_attr avx512fmaskmode
653 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
654 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
655 (V16SI "HI") (V8SI "QI") (V4SI "QI")
656 (V8DI "QI") (V4DI "QI") (V2DI "QI")
657 (V16SF "HI") (V8SF "QI") (V4SF "QI")
658 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
660 ;; Mapping of vector modes to corresponding mask size
661 (define_mode_attr avx512fmaskmodelower
662 [(V64QI "di") (V32QI "si") (V16QI "hi")
663 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
664 (V16SI "hi") (V8SI "qi") (V4SI "qi")
665 (V8DI "qi") (V4DI "qi") (V2DI "qi")
666 (V16SF "hi") (V8SF "qi") (V4SF "qi")
667 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
669 ;; Mapping of vector float modes to an integer mode of the same size
670 (define_mode_attr sseintvecmode
671 [(V16SF "V16SI") (V8DF "V8DI")
672 (V8SF "V8SI") (V4DF "V4DI")
673 (V4SF "V4SI") (V2DF "V2DI")
674 (V16SI "V16SI") (V8DI "V8DI")
675 (V8SI "V8SI") (V4DI "V4DI")
676 (V4SI "V4SI") (V2DI "V2DI")
677 (V16HI "V16HI") (V8HI "V8HI")
678 (V32HI "V32HI") (V64QI "V64QI")
679 (V32QI "V32QI") (V16QI "V16QI")])
681 (define_mode_attr sseintvecmode2
682 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
683 (V8SF "OI") (V4SF "TI")])
685 (define_mode_attr sseintvecmodelower
686 [(V16SF "v16si") (V8DF "v8di")
687 (V8SF "v8si") (V4DF "v4di")
688 (V4SF "v4si") (V2DF "v2di")
689 (V8SI "v8si") (V4DI "v4di")
690 (V4SI "v4si") (V2DI "v2di")
691 (V16HI "v16hi") (V8HI "v8hi")
692 (V32QI "v32qi") (V16QI "v16qi")])
694 ;; Mapping of vector modes to a vector mode of double size
695 (define_mode_attr ssedoublevecmode
696 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
697 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
698 (V8SF "V16SF") (V4DF "V8DF")
699 (V4SF "V8SF") (V2DF "V4DF")])
701 ;; Mapping of vector modes to a vector mode of half size
702 (define_mode_attr ssehalfvecmode
703 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
704 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
705 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
706 (V16SF "V8SF") (V8DF "V4DF")
707 (V8SF "V4SF") (V4DF "V2DF")
710 (define_mode_attr ssehalfvecmodelower
711 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
712 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
713 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
714 (V16SF "v8sf") (V8DF "v4df")
715 (V8SF "v4sf") (V4DF "v2df")
718 ;; Mapping of vector modes ti packed single mode of the same size
719 (define_mode_attr ssePSmode
720 [(V16SI "V16SF") (V8DF "V16SF")
721 (V16SF "V16SF") (V8DI "V16SF")
722 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
723 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
724 (V8SI "V8SF") (V4SI "V4SF")
725 (V4DI "V8SF") (V2DI "V4SF")
726 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
727 (V8SF "V8SF") (V4SF "V4SF")
728 (V4DF "V8SF") (V2DF "V4SF")])
730 (define_mode_attr ssePSmode2
731 [(V8DI "V8SF") (V4DI "V4SF")])
733 ;; Mapping of vector modes back to the scalar modes
734 (define_mode_attr ssescalarmode
735 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
736 (V32HI "HI") (V16HI "HI") (V8HI "HI")
737 (V16SI "SI") (V8SI "SI") (V4SI "SI")
738 (V8DI "DI") (V4DI "DI") (V2DI "DI")
739 (V16SF "SF") (V8SF "SF") (V4SF "SF")
740 (V8DF "DF") (V4DF "DF") (V2DF "DF")
741 (V4TI "TI") (V2TI "TI")])
743 ;; Mapping of vector modes back to the scalar modes
744 (define_mode_attr ssescalarmodelower
745 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
746 (V32HI "hi") (V16HI "hi") (V8HI "hi")
747 (V16SI "si") (V8SI "si") (V4SI "si")
748 (V8DI "di") (V4DI "di") (V2DI "di")
749 (V16SF "sf") (V8SF "sf") (V4SF "sf")
750 (V8DF "df") (V4DF "df") (V2DF "df")
751 (V4TI "ti") (V2TI "ti")])
753 ;; Mapping of vector modes to the 128bit modes
754 (define_mode_attr ssexmmmode
755 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
756 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
757 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
758 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
759 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
760 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
762 ;; Pointer size override for scalar modes (Intel asm dialect)
763 (define_mode_attr iptr
764 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
765 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
766 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
767 (V8SF "k") (V4DF "q")
768 (V4SF "k") (V2DF "q")
771 ;; Number of scalar elements in each vector type
772 (define_mode_attr ssescalarnum
773 [(V64QI "64") (V16SI "16") (V8DI "8")
774 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
775 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
776 (V16SF "16") (V8DF "8")
777 (V8SF "8") (V4DF "4")
778 (V4SF "4") (V2DF "2")])
780 ;; Mask of scalar elements in each vector type
781 (define_mode_attr ssescalarnummask
782 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
783 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
784 (V8SF "7") (V4DF "3")
785 (V4SF "3") (V2DF "1")])
787 (define_mode_attr ssescalarsize
788 [(V4TI "64") (V2TI "64") (V1TI "64")
789 (V8DI "64") (V4DI "64") (V2DI "64")
790 (V64QI "8") (V32QI "8") (V16QI "8")
791 (V32HI "16") (V16HI "16") (V8HI "16")
792 (V16SI "32") (V8SI "32") (V4SI "32")
793 (V16SF "32") (V8SF "32") (V4SF "32")
794 (V8DF "64") (V4DF "64") (V2DF "64")])
796 ;; SSE prefix for integer vector modes
797 (define_mode_attr sseintprefix
798 [(V2DI "p") (V2DF "")
803 (V16SI "p") (V16SF "")
804 (V16QI "p") (V8HI "p")
805 (V32QI "p") (V16HI "p")
806 (V64QI "p") (V32HI "p")])
808 ;; SSE scalar suffix for vector modes
809 (define_mode_attr ssescalarmodesuffix
811 (V8SF "ss") (V4DF "sd")
812 (V4SF "ss") (V2DF "sd")
813 (V8SI "ss") (V4DI "sd")
816 ;; Pack/unpack vector modes
817 (define_mode_attr sseunpackmode
818 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
819 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
820 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
822 (define_mode_attr ssepackmode
823 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
824 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
825 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
827 ;; Mapping of the max integer size for xop rotate immediate constraint
828 (define_mode_attr sserotatemax
829 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
831 ;; Mapping of mode to cast intrinsic name
832 (define_mode_attr castmode
833 [(V8SI "si") (V8SF "ps") (V4DF "pd")
834 (V16SI "si") (V16SF "ps") (V8DF "pd")])
836 ;; Instruction suffix for sign and zero extensions.
837 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
839 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
840 ;; i64x4 or f64x4 for 512bit modes.
841 (define_mode_attr i128
842 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
843 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
844 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
846 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
847 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
848 (define_mode_attr i128vldq
849 [(V8SF "f32x4") (V4DF "f64x2")
850 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
853 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
854 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
856 ;; Mapping for dbpsabbw modes
857 (define_mode_attr dbpsadbwmode
858 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
860 ;; Mapping suffixes for broadcast
861 (define_mode_attr bcstscalarsuff
862 [(V64QI "b") (V32QI "b") (V16QI "b")
863 (V32HI "w") (V16HI "w") (V8HI "w")
864 (V16SI "d") (V8SI "d") (V4SI "d")
865 (V8DI "q") (V4DI "q") (V2DI "q")
866 (V16SF "ss") (V8SF "ss") (V4SF "ss")
867 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
869 ;; Tie mode of assembler operand to mode iterator
870 (define_mode_attr concat_tg_mode
871 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
872 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
874 ;; Tie mode of assembler operand to mode iterator
875 (define_mode_attr xtg_mode
876 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
877 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
878 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
880 ;; Half mask mode for unpacks
881 (define_mode_attr HALFMASKMODE
882 [(DI "SI") (SI "HI")])
884 ;; Double mask mode for packs
885 (define_mode_attr DOUBLEMASKMODE
886 [(HI "SI") (SI "DI")])
889 ;; Include define_subst patterns for instructions with mask
892 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
894 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
898 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
900 ;; All of these patterns are enabled for SSE1 as well as SSE2.
901 ;; This is essential for maintaining stable calling conventions.
903 (define_expand "mov<mode>"
904 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
905 (match_operand:VMOVE 1 "nonimmediate_operand"))]
908 ix86_expand_vector_move (<MODE>mode, operands);
912 (define_insn "mov<mode>_internal"
913 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
915 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
918 && (register_operand (operands[0], <MODE>mode)
919 || register_operand (operands[1], <MODE>mode))"
921 switch (get_attr_type (insn))
924 return standard_sse_constant_opcode (insn, operands[1]);
927 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
928 in avx512f, so we need to use workarounds, to access sse registers
929 16-31, which are evex-only. In avx512vl we don't need workarounds. */
930 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
931 && (EXT_REX_SSE_REG_P (operands[0])
932 || EXT_REX_SSE_REG_P (operands[1])))
934 if (memory_operand (operands[0], <MODE>mode))
936 if (<MODE_SIZE> == 32)
937 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
938 else if (<MODE_SIZE> == 16)
939 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
943 else if (memory_operand (operands[1], <MODE>mode))
945 if (<MODE_SIZE> == 32)
946 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
947 else if (<MODE_SIZE> == 16)
948 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
953 /* Reg -> reg move is always aligned. Just use wider move. */
954 switch (get_attr_mode (insn))
958 return "vmovaps\t{%g1, %g0|%g0, %g1}";
961 return "vmovapd\t{%g1, %g0|%g0, %g1}";
964 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
970 switch (get_attr_mode (insn))
975 if (misaligned_operand (operands[0], <MODE>mode)
976 || misaligned_operand (operands[1], <MODE>mode))
977 return "%vmovups\t{%1, %0|%0, %1}";
979 return "%vmovaps\t{%1, %0|%0, %1}";
984 if (misaligned_operand (operands[0], <MODE>mode)
985 || misaligned_operand (operands[1], <MODE>mode))
986 return "%vmovupd\t{%1, %0|%0, %1}";
988 return "%vmovapd\t{%1, %0|%0, %1}";
992 if (misaligned_operand (operands[0], <MODE>mode)
993 || misaligned_operand (operands[1], <MODE>mode))
994 return TARGET_AVX512VL ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
995 : "%vmovdqu\t{%1, %0|%0, %1}";
997 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
998 : "%vmovdqa\t{%1, %0|%0, %1}";
1000 if (misaligned_operand (operands[0], <MODE>mode)
1001 || misaligned_operand (operands[1], <MODE>mode))
1002 return (<MODE>mode == V16SImode
1003 || <MODE>mode == V8DImode
1005 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1006 : "vmovdqu64\t{%1, %0|%0, %1}";
1008 return "vmovdqa64\t{%1, %0|%0, %1}";
1018 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1019 (set_attr "prefix" "maybe_vex")
1021 (cond [(and (eq_attr "alternative" "1")
1022 (match_test "TARGET_AVX512VL"))
1023 (const_string "<sseinsnmode>")
1024 (and (match_test "<MODE_SIZE> == 16")
1025 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1026 (and (eq_attr "alternative" "3")
1027 (match_test "TARGET_SSE_TYPELESS_STORES"))))
1028 (const_string "<ssePSmode>")
1029 (match_test "TARGET_AVX")
1030 (const_string "<sseinsnmode>")
1031 (ior (not (match_test "TARGET_SSE2"))
1032 (match_test "optimize_function_for_size_p (cfun)"))
1033 (const_string "V4SF")
1034 (and (eq_attr "alternative" "0")
1035 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1038 (const_string "<sseinsnmode>")))
1039 (set (attr "enabled")
1040 (cond [(and (match_test "<MODE_SIZE> == 16")
1041 (eq_attr "alternative" "1"))
1042 (symbol_ref "TARGET_SSE2")
1043 (and (match_test "<MODE_SIZE> == 32")
1044 (eq_attr "alternative" "1"))
1045 (symbol_ref "TARGET_AVX2")
1047 (symbol_ref "true")))])
1049 (define_insn "<avx512>_load<mode>_mask"
1050 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1051 (vec_merge:V48_AVX512VL
1052 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1053 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
1054 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1057 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1059 if (misaligned_operand (operands[1], <MODE>mode))
1060 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1062 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1066 if (misaligned_operand (operands[1], <MODE>mode))
1067 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1069 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1072 [(set_attr "type" "ssemov")
1073 (set_attr "prefix" "evex")
1074 (set_attr "memory" "none,load")
1075 (set_attr "mode" "<sseinsnmode>")])
1077 (define_insn "<avx512>_load<mode>_mask"
1078 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1079 (vec_merge:VI12_AVX512VL
1080 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1081 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
1082 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1084 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1085 [(set_attr "type" "ssemov")
1086 (set_attr "prefix" "evex")
1087 (set_attr "memory" "none,load")
1088 (set_attr "mode" "<sseinsnmode>")])
1090 (define_insn "<avx512>_blendm<mode>"
1091 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1092 (vec_merge:V48_AVX512VL
1093 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1094 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1095 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1097 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1098 [(set_attr "type" "ssemov")
1099 (set_attr "prefix" "evex")
1100 (set_attr "mode" "<sseinsnmode>")])
1102 (define_insn "<avx512>_blendm<mode>"
1103 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1104 (vec_merge:VI12_AVX512VL
1105 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1106 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1107 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1109 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1110 [(set_attr "type" "ssemov")
1111 (set_attr "prefix" "evex")
1112 (set_attr "mode" "<sseinsnmode>")])
1114 (define_insn "<avx512>_store<mode>_mask"
1115 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1116 (vec_merge:V48_AVX512VL
1117 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1119 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1122 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1124 if (misaligned_operand (operands[0], <MODE>mode))
1125 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1127 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1131 if (misaligned_operand (operands[0], <MODE>mode))
1132 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1134 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1137 [(set_attr "type" "ssemov")
1138 (set_attr "prefix" "evex")
1139 (set_attr "memory" "store")
1140 (set_attr "mode" "<sseinsnmode>")])
1142 (define_insn "<avx512>_store<mode>_mask"
1143 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1144 (vec_merge:VI12_AVX512VL
1145 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1147 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1149 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1150 [(set_attr "type" "ssemov")
1151 (set_attr "prefix" "evex")
1152 (set_attr "memory" "store")
1153 (set_attr "mode" "<sseinsnmode>")])
1155 (define_insn "sse2_movq128"
1156 [(set (match_operand:V2DI 0 "register_operand" "=v")
1159 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1160 (parallel [(const_int 0)]))
1163 "%vmovq\t{%1, %0|%0, %q1}"
1164 [(set_attr "type" "ssemov")
1165 (set_attr "prefix" "maybe_vex")
1166 (set_attr "mode" "TI")])
1168 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1169 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1170 ;; from memory, we'd prefer to load the memory directly into the %xmm
1171 ;; register. To facilitate this happy circumstance, this pattern won't
1172 ;; split until after register allocation. If the 64-bit value didn't
1173 ;; come from memory, this is the best we can do. This is much better
1174 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1177 (define_insn_and_split "movdi_to_sse"
1179 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1180 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1181 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1182 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1184 "&& reload_completed"
1187 if (register_operand (operands[1], DImode))
1189 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1190 Assemble the 64-bit DImode value in an xmm register. */
1191 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1192 gen_lowpart (SImode, operands[1])));
1193 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1194 gen_highpart (SImode, operands[1])));
1195 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1198 else if (memory_operand (operands[1], DImode))
1200 rtx tmp = gen_reg_rtx (V2DImode);
1201 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1202 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1210 [(set (match_operand:V4SF 0 "register_operand")
1211 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1212 "TARGET_SSE && reload_completed"
1215 (vec_duplicate:V4SF (match_dup 1))
1219 operands[1] = gen_lowpart (SFmode, operands[1]);
1220 operands[2] = CONST0_RTX (V4SFmode);
1224 [(set (match_operand:V2DF 0 "register_operand")
1225 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1226 "TARGET_SSE2 && reload_completed"
1227 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1229 operands[1] = gen_lowpart (DFmode, operands[1]);
1230 operands[2] = CONST0_RTX (DFmode);
1233 (define_expand "movmisalign<mode>"
1234 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1235 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1238 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1242 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1244 [(set (match_operand:V2DF 0 "sse_reg_operand")
1245 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1246 (match_operand:DF 4 "const0_operand")))
1247 (set (match_operand:V2DF 2 "sse_reg_operand")
1248 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1249 (parallel [(const_int 0)]))
1250 (match_operand:DF 3 "memory_operand")))]
1251 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1252 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1253 [(set (match_dup 2) (match_dup 5))]
1254 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1257 [(set (match_operand:DF 0 "sse_reg_operand")
1258 (match_operand:DF 1 "memory_operand"))
1259 (set (match_operand:V2DF 2 "sse_reg_operand")
1260 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1261 (match_operand:DF 3 "memory_operand")))]
1262 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1263 && REGNO (operands[4]) == REGNO (operands[2])
1264 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1265 [(set (match_dup 2) (match_dup 5))]
1266 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1268 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1270 [(set (match_operand:DF 0 "memory_operand")
1271 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1272 (parallel [(const_int 0)])))
1273 (set (match_operand:DF 2 "memory_operand")
1274 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1275 (parallel [(const_int 1)])))]
1276 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1277 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1278 [(set (match_dup 4) (match_dup 1))]
1279 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1281 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1282 [(set (match_operand:VI1 0 "register_operand" "=x")
1283 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1286 "%vlddqu\t{%1, %0|%0, %1}"
1287 [(set_attr "type" "ssemov")
1288 (set_attr "movu" "1")
1289 (set (attr "prefix_data16")
1291 (match_test "TARGET_AVX")
1293 (const_string "0")))
1294 (set (attr "prefix_rep")
1296 (match_test "TARGET_AVX")
1298 (const_string "1")))
1299 (set_attr "prefix" "maybe_vex")
1300 (set_attr "mode" "<sseinsnmode>")])
1302 (define_insn "sse2_movnti<mode>"
1303 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1304 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1307 "movnti\t{%1, %0|%0, %1}"
1308 [(set_attr "type" "ssemov")
1309 (set_attr "prefix_data16" "0")
1310 (set_attr "mode" "<MODE>")])
1312 (define_insn "<sse>_movnt<mode>"
1313 [(set (match_operand:VF 0 "memory_operand" "=m")
1315 [(match_operand:VF 1 "register_operand" "v")]
1318 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1319 [(set_attr "type" "ssemov")
1320 (set_attr "prefix" "maybe_vex")
1321 (set_attr "mode" "<MODE>")])
1323 (define_insn "<sse2>_movnt<mode>"
1324 [(set (match_operand:VI8 0 "memory_operand" "=m")
1325 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1328 "%vmovntdq\t{%1, %0|%0, %1}"
1329 [(set_attr "type" "ssecvt")
1330 (set (attr "prefix_data16")
1332 (match_test "TARGET_AVX")
1334 (const_string "1")))
1335 (set_attr "prefix" "maybe_vex")
1336 (set_attr "mode" "<sseinsnmode>")])
1338 ; Expand patterns for non-temporal stores. At the moment, only those
1339 ; that directly map to insns are defined; it would be possible to
1340 ; define patterns for other modes that would expand to several insns.
1342 ;; Modes handled by storent patterns.
1343 (define_mode_iterator STORENT_MODE
1344 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1345 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1346 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1347 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1348 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1350 (define_expand "storent<mode>"
1351 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1352 (unspec:STORENT_MODE
1353 [(match_operand:STORENT_MODE 1 "register_operand")]
1357 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1361 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1363 ;; All integer modes with AVX512BW/DQ.
1364 (define_mode_iterator SWI1248_AVX512BWDQ
1365 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1367 ;; All integer modes with AVX512BW, where HImode operation
1368 ;; can be used instead of QImode.
1369 (define_mode_iterator SWI1248_AVX512BW
1370 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1372 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1373 (define_mode_iterator SWI1248_AVX512BWDQ2
1374 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1375 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1377 (define_expand "kmov<mskmodesuffix>"
1378 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1379 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1381 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1383 (define_insn "k<code><mode>"
1384 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1385 (any_logic:SWI1248_AVX512BW
1386 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1387 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1388 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1391 if (get_attr_mode (insn) == MODE_HI)
1392 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1394 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1396 [(set_attr "type" "msklog")
1397 (set_attr "prefix" "vex")
1399 (cond [(and (match_test "<MODE>mode == QImode")
1400 (not (match_test "TARGET_AVX512DQ")))
1403 (const_string "<MODE>")))])
1405 (define_insn "kandn<mode>"
1406 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1407 (and:SWI1248_AVX512BW
1408 (not:SWI1248_AVX512BW
1409 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1410 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1411 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1414 if (get_attr_mode (insn) == MODE_HI)
1415 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1417 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1419 [(set_attr "type" "msklog")
1420 (set_attr "prefix" "vex")
1422 (cond [(and (match_test "<MODE>mode == QImode")
1423 (not (match_test "TARGET_AVX512DQ")))
1426 (const_string "<MODE>")))])
1428 (define_insn "kxnor<mode>"
1429 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1430 (not:SWI1248_AVX512BW
1431 (xor:SWI1248_AVX512BW
1432 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1433 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1434 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1437 if (get_attr_mode (insn) == MODE_HI)
1438 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1440 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1442 [(set_attr "type" "msklog")
1443 (set_attr "prefix" "vex")
1445 (cond [(and (match_test "<MODE>mode == QImode")
1446 (not (match_test "TARGET_AVX512DQ")))
1449 (const_string "<MODE>")))])
1451 (define_insn "knot<mode>"
1452 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1453 (not:SWI1248_AVX512BW
1454 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1455 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1458 if (get_attr_mode (insn) == MODE_HI)
1459 return "knotw\t{%1, %0|%0, %1}";
1461 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1463 [(set_attr "type" "msklog")
1464 (set_attr "prefix" "vex")
1466 (cond [(and (match_test "<MODE>mode == QImode")
1467 (not (match_test "TARGET_AVX512DQ")))
1470 (const_string "<MODE>")))])
1472 (define_insn "kadd<mode>"
1473 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1474 (plus:SWI1248_AVX512BWDQ2
1475 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1476 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1477 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1479 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1480 [(set_attr "type" "msklog")
1481 (set_attr "prefix" "vex")
1482 (set_attr "mode" "<MODE>")])
1484 ;; Mask variant shift mnemonics
1485 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1487 (define_insn "k<code><mode>"
1488 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1489 (any_lshift:SWI1248_AVX512BWDQ
1490 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1491 (match_operand:QI 2 "immediate_operand" "n")))
1492 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1494 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1495 [(set_attr "type" "msklog")
1496 (set_attr "prefix" "vex")
1497 (set_attr "mode" "<MODE>")])
1499 (define_insn "ktest<mode>"
1500 [(set (reg:CC FLAGS_REG)
1502 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1503 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1506 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1507 [(set_attr "mode" "<MODE>")
1508 (set_attr "type" "msklog")
1509 (set_attr "prefix" "vex")])
1511 (define_insn "kortest<mode>"
1512 [(set (reg:CC FLAGS_REG)
1514 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1515 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1518 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1519 [(set_attr "mode" "<MODE>")
1520 (set_attr "type" "msklog")
1521 (set_attr "prefix" "vex")])
1523 (define_insn "kunpckhi"
1524 [(set (match_operand:HI 0 "register_operand" "=k")
1527 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1529 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1531 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1532 [(set_attr "mode" "HI")
1533 (set_attr "type" "msklog")
1534 (set_attr "prefix" "vex")])
1536 (define_insn "kunpcksi"
1537 [(set (match_operand:SI 0 "register_operand" "=k")
1540 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1542 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1544 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1545 [(set_attr "mode" "SI")])
1547 (define_insn "kunpckdi"
1548 [(set (match_operand:DI 0 "register_operand" "=k")
1551 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1553 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1555 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1556 [(set_attr "mode" "DI")])
1559 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1561 ;; Parallel floating point arithmetic
1563 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1565 (define_expand "<code><mode>2"
1566 [(set (match_operand:VF 0 "register_operand")
1568 (match_operand:VF 1 "register_operand")))]
1570 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1572 (define_insn_and_split "*absneg<mode>2"
1573 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1574 (match_operator:VF 3 "absneg_operator"
1575 [(match_operand:VF 1 "vector_operand" "0, xBm,v, m")]))
1576 (use (match_operand:VF 2 "vector_operand" "xBm,0, vm,v"))]
1579 "&& reload_completed"
1582 enum rtx_code absneg_op;
1588 if (MEM_P (operands[1]))
1589 op1 = operands[2], op2 = operands[1];
1591 op1 = operands[1], op2 = operands[2];
1596 if (rtx_equal_p (operands[0], operands[1]))
1602 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1603 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1604 t = gen_rtx_SET (operands[0], t);
1608 [(set_attr "isa" "noavx,noavx,avx,avx")])
1610 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1611 [(set (match_operand:VF 0 "register_operand")
1613 (match_operand:VF 1 "<round_nimm_predicate>")
1614 (match_operand:VF 2 "<round_nimm_predicate>")))]
1615 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1616 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1618 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1619 [(set (match_operand:VF 0 "register_operand" "=x,v")
1621 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1622 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1623 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1624 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1626 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1627 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1628 [(set_attr "isa" "noavx,avx")
1629 (set_attr "type" "sseadd")
1630 (set_attr "prefix" "<mask_prefix3>")
1631 (set_attr "mode" "<MODE>")])
1633 (define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1634 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1637 (match_operand:VF_128 1 "register_operand" "0,v")
1638 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1643 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1644 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1645 [(set_attr "isa" "noavx,avx")
1646 (set_attr "type" "sseadd")
1647 (set_attr "prefix" "<round_scalar_prefix>")
1648 (set_attr "mode" "<ssescalarmode>")])
1650 (define_expand "mul<mode>3<mask_name><round_name>"
1651 [(set (match_operand:VF 0 "register_operand")
1653 (match_operand:VF 1 "<round_nimm_predicate>")
1654 (match_operand:VF 2 "<round_nimm_predicate>")))]
1655 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1656 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1658 (define_insn "*mul<mode>3<mask_name><round_name>"
1659 [(set (match_operand:VF 0 "register_operand" "=x,v")
1661 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1662 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1664 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1665 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1667 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1668 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1669 [(set_attr "isa" "noavx,avx")
1670 (set_attr "type" "ssemul")
1671 (set_attr "prefix" "<mask_prefix3>")
1672 (set_attr "btver2_decode" "direct,double")
1673 (set_attr "mode" "<MODE>")])
1675 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1676 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1679 (match_operand:VF_128 1 "register_operand" "0,v")
1680 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1685 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1686 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1687 [(set_attr "isa" "noavx,avx")
1688 (set_attr "type" "sse<multdiv_mnemonic>")
1689 (set_attr "prefix" "<round_scalar_prefix>")
1690 (set_attr "btver2_decode" "direct,double")
1691 (set_attr "mode" "<ssescalarmode>")])
1693 (define_expand "div<mode>3"
1694 [(set (match_operand:VF2 0 "register_operand")
1695 (div:VF2 (match_operand:VF2 1 "register_operand")
1696 (match_operand:VF2 2 "vector_operand")))]
1698 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1700 (define_expand "div<mode>3"
1701 [(set (match_operand:VF1 0 "register_operand")
1702 (div:VF1 (match_operand:VF1 1 "register_operand")
1703 (match_operand:VF1 2 "vector_operand")))]
1706 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1709 && TARGET_RECIP_VEC_DIV
1710 && !optimize_insn_for_size_p ()
1711 && flag_finite_math_only && !flag_trapping_math
1712 && flag_unsafe_math_optimizations)
1714 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1719 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1720 [(set (match_operand:VF 0 "register_operand" "=x,v")
1722 (match_operand:VF 1 "register_operand" "0,v")
1723 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1724 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1726 div<ssemodesuffix>\t{%2, %0|%0, %2}
1727 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1728 [(set_attr "isa" "noavx,avx")
1729 (set_attr "type" "ssediv")
1730 (set_attr "prefix" "<mask_prefix3>")
1731 (set_attr "mode" "<MODE>")])
1733 (define_insn "<sse>_rcp<mode>2"
1734 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1736 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1738 "%vrcpps\t{%1, %0|%0, %1}"
1739 [(set_attr "type" "sse")
1740 (set_attr "atom_sse_attr" "rcp")
1741 (set_attr "btver2_sse_attr" "rcp")
1742 (set_attr "prefix" "maybe_vex")
1743 (set_attr "mode" "<MODE>")])
1745 (define_insn "sse_vmrcpv4sf2"
1746 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1748 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1750 (match_operand:V4SF 2 "register_operand" "0,x")
1754 rcpss\t{%1, %0|%0, %k1}
1755 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1756 [(set_attr "isa" "noavx,avx")
1757 (set_attr "type" "sse")
1758 (set_attr "atom_sse_attr" "rcp")
1759 (set_attr "btver2_sse_attr" "rcp")
1760 (set_attr "prefix" "orig,vex")
1761 (set_attr "mode" "SF")])
1763 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1764 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1766 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1769 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1770 [(set_attr "type" "sse")
1771 (set_attr "prefix" "evex")
1772 (set_attr "mode" "<MODE>")])
1774 (define_insn "srcp14<mode>"
1775 [(set (match_operand:VF_128 0 "register_operand" "=v")
1778 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1780 (match_operand:VF_128 2 "register_operand" "v")
1783 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1784 [(set_attr "type" "sse")
1785 (set_attr "prefix" "evex")
1786 (set_attr "mode" "<MODE>")])
1788 (define_insn "srcp14<mode>_mask"
1789 [(set (match_operand:VF_128 0 "register_operand" "=v")
1793 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1795 (match_operand:VF_128 3 "vector_move_operand" "0C")
1796 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1797 (match_operand:VF_128 2 "register_operand" "v")
1800 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1801 [(set_attr "type" "sse")
1802 (set_attr "prefix" "evex")
1803 (set_attr "mode" "<MODE>")])
1805 (define_expand "sqrt<mode>2"
1806 [(set (match_operand:VF2 0 "register_operand")
1807 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
1810 (define_expand "sqrt<mode>2"
1811 [(set (match_operand:VF1 0 "register_operand")
1812 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
1816 && TARGET_RECIP_VEC_SQRT
1817 && !optimize_insn_for_size_p ()
1818 && flag_finite_math_only && !flag_trapping_math
1819 && flag_unsafe_math_optimizations)
1821 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1826 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1827 [(set (match_operand:VF 0 "register_operand" "=x,v")
1828 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1829 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1831 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
1832 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1833 [(set_attr "isa" "noavx,avx")
1834 (set_attr "type" "sse")
1835 (set_attr "atom_sse_attr" "sqrt")
1836 (set_attr "btver2_sse_attr" "sqrt")
1837 (set_attr "prefix" "maybe_vex")
1838 (set_attr "mode" "<MODE>")])
1840 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1841 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1844 (match_operand:VF_128 1 "vector_operand" "xBm,<round_constraint>"))
1845 (match_operand:VF_128 2 "register_operand" "0,v")
1849 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1850 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1851 [(set_attr "isa" "noavx,avx")
1852 (set_attr "type" "sse")
1853 (set_attr "atom_sse_attr" "sqrt")
1854 (set_attr "prefix" "<round_prefix>")
1855 (set_attr "btver2_sse_attr" "sqrt")
1856 (set_attr "mode" "<ssescalarmode>")])
1858 (define_expand "rsqrt<mode>2"
1859 [(set (match_operand:VF1_128_256 0 "register_operand")
1861 [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
1864 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1868 (define_expand "rsqrtv16sf2"
1869 [(set (match_operand:V16SF 0 "register_operand")
1871 [(match_operand:V16SF 1 "vector_operand")]
1873 "TARGET_SSE_MATH && TARGET_AVX512ER"
1875 ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
1879 (define_insn "<sse>_rsqrt<mode>2"
1880 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1882 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
1884 "%vrsqrtps\t{%1, %0|%0, %1}"
1885 [(set_attr "type" "sse")
1886 (set_attr "prefix" "maybe_vex")
1887 (set_attr "mode" "<MODE>")])
1889 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1890 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1892 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1895 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1896 [(set_attr "type" "sse")
1897 (set_attr "prefix" "evex")
1898 (set_attr "mode" "<MODE>")])
1900 (define_insn "rsqrt14<mode>"
1901 [(set (match_operand:VF_128 0 "register_operand" "=v")
1904 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1906 (match_operand:VF_128 2 "register_operand" "v")
1909 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1910 [(set_attr "type" "sse")
1911 (set_attr "prefix" "evex")
1912 (set_attr "mode" "<MODE>")])
1914 (define_insn "rsqrt14_<mode>_mask"
1915 [(set (match_operand:VF_128 0 "register_operand" "=v")
1919 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1921 (match_operand:VF_128 3 "vector_move_operand" "0C")
1922 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1923 (match_operand:VF_128 2 "register_operand" "v")
1926 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1927 [(set_attr "type" "sse")
1928 (set_attr "prefix" "evex")
1929 (set_attr "mode" "<MODE>")])
1931 (define_insn "sse_vmrsqrtv4sf2"
1932 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1934 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1936 (match_operand:V4SF 2 "register_operand" "0,x")
1940 rsqrtss\t{%1, %0|%0, %k1}
1941 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1942 [(set_attr "isa" "noavx,avx")
1943 (set_attr "type" "sse")
1944 (set_attr "prefix" "orig,vex")
1945 (set_attr "mode" "SF")])
1947 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1948 [(set (match_operand:VF 0 "register_operand")
1950 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1951 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1952 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1954 if (!flag_finite_math_only || flag_signed_zeros)
1956 operands[1] = force_reg (<MODE>mode, operands[1]);
1957 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
1958 (operands[0], operands[1], operands[2]
1959 <mask_operand_arg34>
1960 <round_saeonly_mask_arg3>));
1964 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1967 ;; These versions of the min/max patterns are intentionally ignorant of
1968 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
1969 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
1970 ;; are undefined in this condition, we're certain this is correct.
1972 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1973 [(set (match_operand:VF 0 "register_operand" "=x,v")
1975 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1976 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
1978 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1979 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1981 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1982 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1983 [(set_attr "isa" "noavx,avx")
1984 (set_attr "type" "sseadd")
1985 (set_attr "btver2_sse_attr" "maxmin")
1986 (set_attr "prefix" "<mask_prefix3>")
1987 (set_attr "mode" "<MODE>")])
1989 ;; These versions of the min/max patterns implement exactly the operations
1990 ;; min = (op1 < op2 ? op1 : op2)
1991 ;; max = (!(op1 < op2) ? op1 : op2)
1992 ;; Their operands are not commutative, and thus they may be used in the
1993 ;; presence of -0.0 and NaN.
1995 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
1996 [(set (match_operand:VF 0 "register_operand" "=x,v")
1998 [(match_operand:VF 1 "register_operand" "0,v")
1999 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2002 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2004 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2005 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2006 [(set_attr "isa" "noavx,avx")
2007 (set_attr "type" "sseadd")
2008 (set_attr "btver2_sse_attr" "maxmin")
2009 (set_attr "prefix" "<mask_prefix3>")
2010 (set_attr "mode" "<MODE>")])
2012 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2013 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2016 (match_operand:VF_128 1 "register_operand" "0,v")
2017 (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
2022 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2023 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2024 [(set_attr "isa" "noavx,avx")
2025 (set_attr "type" "sse")
2026 (set_attr "btver2_sse_attr" "maxmin")
2027 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2028 (set_attr "mode" "<ssescalarmode>")])
2030 (define_insn "avx_addsubv4df3"
2031 [(set (match_operand:V4DF 0 "register_operand" "=x")
2034 (match_operand:V4DF 1 "register_operand" "x")
2035 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2036 (plus:V4DF (match_dup 1) (match_dup 2))
2039 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2040 [(set_attr "type" "sseadd")
2041 (set_attr "prefix" "vex")
2042 (set_attr "mode" "V4DF")])
2044 (define_insn "sse3_addsubv2df3"
2045 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2048 (match_operand:V2DF 1 "register_operand" "0,x")
2049 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2050 (plus:V2DF (match_dup 1) (match_dup 2))
2054 addsubpd\t{%2, %0|%0, %2}
2055 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2056 [(set_attr "isa" "noavx,avx")
2057 (set_attr "type" "sseadd")
2058 (set_attr "atom_unit" "complex")
2059 (set_attr "prefix" "orig,vex")
2060 (set_attr "mode" "V2DF")])
2062 (define_insn "avx_addsubv8sf3"
2063 [(set (match_operand:V8SF 0 "register_operand" "=x")
2066 (match_operand:V8SF 1 "register_operand" "x")
2067 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2068 (plus:V8SF (match_dup 1) (match_dup 2))
2071 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2072 [(set_attr "type" "sseadd")
2073 (set_attr "prefix" "vex")
2074 (set_attr "mode" "V8SF")])
2076 (define_insn "sse3_addsubv4sf3"
2077 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2080 (match_operand:V4SF 1 "register_operand" "0,x")
2081 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2082 (plus:V4SF (match_dup 1) (match_dup 2))
2086 addsubps\t{%2, %0|%0, %2}
2087 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2088 [(set_attr "isa" "noavx,avx")
2089 (set_attr "type" "sseadd")
2090 (set_attr "prefix" "orig,vex")
2091 (set_attr "prefix_rep" "1,*")
2092 (set_attr "mode" "V4SF")])
2095 [(set (match_operand:VF_128_256 0 "register_operand")
2096 (match_operator:VF_128_256 6 "addsub_vm_operator"
2098 (match_operand:VF_128_256 1 "register_operand")
2099 (match_operand:VF_128_256 2 "vector_operand"))
2101 (match_operand:VF_128_256 3 "vector_operand")
2102 (match_operand:VF_128_256 4 "vector_operand"))
2103 (match_operand 5 "const_int_operand")]))]
2105 && can_create_pseudo_p ()
2106 && ((rtx_equal_p (operands[1], operands[3])
2107 && rtx_equal_p (operands[2], operands[4]))
2108 || (rtx_equal_p (operands[1], operands[4])
2109 && rtx_equal_p (operands[2], operands[3])))"
2111 (vec_merge:VF_128_256
2112 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2113 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2117 [(set (match_operand:VF_128_256 0 "register_operand")
2118 (match_operator:VF_128_256 6 "addsub_vm_operator"
2120 (match_operand:VF_128_256 1 "vector_operand")
2121 (match_operand:VF_128_256 2 "vector_operand"))
2123 (match_operand:VF_128_256 3 "register_operand")
2124 (match_operand:VF_128_256 4 "vector_operand"))
2125 (match_operand 5 "const_int_operand")]))]
2127 && can_create_pseudo_p ()
2128 && ((rtx_equal_p (operands[1], operands[3])
2129 && rtx_equal_p (operands[2], operands[4]))
2130 || (rtx_equal_p (operands[1], operands[4])
2131 && rtx_equal_p (operands[2], operands[3])))"
2133 (vec_merge:VF_128_256
2134 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2135 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2138 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2140 = GEN_INT (~INTVAL (operands[5])
2141 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2145 [(set (match_operand:VF_128_256 0 "register_operand")
2146 (match_operator:VF_128_256 7 "addsub_vs_operator"
2147 [(vec_concat:<ssedoublemode>
2149 (match_operand:VF_128_256 1 "register_operand")
2150 (match_operand:VF_128_256 2 "vector_operand"))
2152 (match_operand:VF_128_256 3 "vector_operand")
2153 (match_operand:VF_128_256 4 "vector_operand")))
2154 (match_parallel 5 "addsub_vs_parallel"
2155 [(match_operand 6 "const_int_operand")])]))]
2157 && can_create_pseudo_p ()
2158 && ((rtx_equal_p (operands[1], operands[3])
2159 && rtx_equal_p (operands[2], operands[4]))
2160 || (rtx_equal_p (operands[1], operands[4])
2161 && rtx_equal_p (operands[2], operands[3])))"
2163 (vec_merge:VF_128_256
2164 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2165 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2168 int i, nelt = XVECLEN (operands[5], 0);
2169 HOST_WIDE_INT ival = 0;
2171 for (i = 0; i < nelt; i++)
2172 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2173 ival |= HOST_WIDE_INT_1 << i;
2175 operands[5] = GEN_INT (ival);
2179 [(set (match_operand:VF_128_256 0 "register_operand")
2180 (match_operator:VF_128_256 7 "addsub_vs_operator"
2181 [(vec_concat:<ssedoublemode>
2183 (match_operand:VF_128_256 1 "vector_operand")
2184 (match_operand:VF_128_256 2 "vector_operand"))
2186 (match_operand:VF_128_256 3 "register_operand")
2187 (match_operand:VF_128_256 4 "vector_operand")))
2188 (match_parallel 5 "addsub_vs_parallel"
2189 [(match_operand 6 "const_int_operand")])]))]
2191 && can_create_pseudo_p ()
2192 && ((rtx_equal_p (operands[1], operands[3])
2193 && rtx_equal_p (operands[2], operands[4]))
2194 || (rtx_equal_p (operands[1], operands[4])
2195 && rtx_equal_p (operands[2], operands[3])))"
2197 (vec_merge:VF_128_256
2198 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2199 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2202 int i, nelt = XVECLEN (operands[5], 0);
2203 HOST_WIDE_INT ival = 0;
2205 for (i = 0; i < nelt; i++)
2206 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2207 ival |= HOST_WIDE_INT_1 << i;
2209 operands[5] = GEN_INT (ival);
2212 (define_insn "avx_h<plusminus_insn>v4df3"
2213 [(set (match_operand:V4DF 0 "register_operand" "=x")
2218 (match_operand:V4DF 1 "register_operand" "x")
2219 (parallel [(const_int 0)]))
2220 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2223 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2224 (parallel [(const_int 0)]))
2225 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2228 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2229 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2231 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2232 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2234 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2235 [(set_attr "type" "sseadd")
2236 (set_attr "prefix" "vex")
2237 (set_attr "mode" "V4DF")])
2239 (define_expand "sse3_haddv2df3"
2240 [(set (match_operand:V2DF 0 "register_operand")
2244 (match_operand:V2DF 1 "register_operand")
2245 (parallel [(const_int 0)]))
2246 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2249 (match_operand:V2DF 2 "vector_operand")
2250 (parallel [(const_int 0)]))
2251 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2254 (define_insn "*sse3_haddv2df3"
2255 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2259 (match_operand:V2DF 1 "register_operand" "0,x")
2260 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2263 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2266 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2267 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2270 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2272 && INTVAL (operands[3]) != INTVAL (operands[4])
2273 && INTVAL (operands[5]) != INTVAL (operands[6])"
2275 haddpd\t{%2, %0|%0, %2}
2276 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2277 [(set_attr "isa" "noavx,avx")
2278 (set_attr "type" "sseadd")
2279 (set_attr "prefix" "orig,vex")
2280 (set_attr "mode" "V2DF")])
2282 (define_insn "sse3_hsubv2df3"
2283 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2287 (match_operand:V2DF 1 "register_operand" "0,x")
2288 (parallel [(const_int 0)]))
2289 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2292 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2293 (parallel [(const_int 0)]))
2294 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2297 hsubpd\t{%2, %0|%0, %2}
2298 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2299 [(set_attr "isa" "noavx,avx")
2300 (set_attr "type" "sseadd")
2301 (set_attr "prefix" "orig,vex")
2302 (set_attr "mode" "V2DF")])
2304 (define_insn "*sse3_haddv2df3_low"
2305 [(set (match_operand:DF 0 "register_operand" "=x,x")
2308 (match_operand:V2DF 1 "register_operand" "0,x")
2309 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2312 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2314 && INTVAL (operands[2]) != INTVAL (operands[3])"
2316 haddpd\t{%0, %0|%0, %0}
2317 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2318 [(set_attr "isa" "noavx,avx")
2319 (set_attr "type" "sseadd1")
2320 (set_attr "prefix" "orig,vex")
2321 (set_attr "mode" "V2DF")])
2323 (define_insn "*sse3_hsubv2df3_low"
2324 [(set (match_operand:DF 0 "register_operand" "=x,x")
2327 (match_operand:V2DF 1 "register_operand" "0,x")
2328 (parallel [(const_int 0)]))
2331 (parallel [(const_int 1)]))))]
2334 hsubpd\t{%0, %0|%0, %0}
2335 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2336 [(set_attr "isa" "noavx,avx")
2337 (set_attr "type" "sseadd1")
2338 (set_attr "prefix" "orig,vex")
2339 (set_attr "mode" "V2DF")])
2341 (define_insn "avx_h<plusminus_insn>v8sf3"
2342 [(set (match_operand:V8SF 0 "register_operand" "=x")
2348 (match_operand:V8SF 1 "register_operand" "x")
2349 (parallel [(const_int 0)]))
2350 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2352 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2353 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2357 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2358 (parallel [(const_int 0)]))
2359 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2361 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2362 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2366 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2367 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2369 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2370 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2373 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2374 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2376 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2377 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2379 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2380 [(set_attr "type" "sseadd")
2381 (set_attr "prefix" "vex")
2382 (set_attr "mode" "V8SF")])
2384 (define_insn "sse3_h<plusminus_insn>v4sf3"
2385 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2390 (match_operand:V4SF 1 "register_operand" "0,x")
2391 (parallel [(const_int 0)]))
2392 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2394 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2395 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2399 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2400 (parallel [(const_int 0)]))
2401 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2403 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2404 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2407 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2408 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2409 [(set_attr "isa" "noavx,avx")
2410 (set_attr "type" "sseadd")
2411 (set_attr "atom_unit" "complex")
2412 (set_attr "prefix" "orig,vex")
2413 (set_attr "prefix_rep" "1,*")
2414 (set_attr "mode" "V4SF")])
2416 (define_expand "reduc_plus_scal_v8df"
2417 [(match_operand:DF 0 "register_operand")
2418 (match_operand:V8DF 1 "register_operand")]
2421 rtx tmp = gen_reg_rtx (V8DFmode);
2422 ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
2423 emit_insn (gen_vec_extractv8dfdf (operands[0], tmp, const0_rtx));
2427 (define_expand "reduc_plus_scal_v4df"
2428 [(match_operand:DF 0 "register_operand")
2429 (match_operand:V4DF 1 "register_operand")]
2432 rtx tmp = gen_reg_rtx (V4DFmode);
2433 rtx tmp2 = gen_reg_rtx (V4DFmode);
2434 rtx vec_res = gen_reg_rtx (V4DFmode);
2435 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2436 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2437 emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
2438 emit_insn (gen_vec_extractv4dfdf (operands[0], vec_res, const0_rtx));
2442 (define_expand "reduc_plus_scal_v2df"
2443 [(match_operand:DF 0 "register_operand")
2444 (match_operand:V2DF 1 "register_operand")]
2447 rtx tmp = gen_reg_rtx (V2DFmode);
2448 emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
2449 emit_insn (gen_vec_extractv2dfdf (operands[0], tmp, const0_rtx));
2453 (define_expand "reduc_plus_scal_v16sf"
2454 [(match_operand:SF 0 "register_operand")
2455 (match_operand:V16SF 1 "register_operand")]
2458 rtx tmp = gen_reg_rtx (V16SFmode);
2459 ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
2460 emit_insn (gen_vec_extractv16sfsf (operands[0], tmp, const0_rtx));
2464 (define_expand "reduc_plus_scal_v8sf"
2465 [(match_operand:SF 0 "register_operand")
2466 (match_operand:V8SF 1 "register_operand")]
2469 rtx tmp = gen_reg_rtx (V8SFmode);
2470 rtx tmp2 = gen_reg_rtx (V8SFmode);
2471 rtx vec_res = gen_reg_rtx (V8SFmode);
2472 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2473 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2474 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2475 emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
2476 emit_insn (gen_vec_extractv8sfsf (operands[0], vec_res, const0_rtx));
2480 (define_expand "reduc_plus_scal_v4sf"
2481 [(match_operand:SF 0 "register_operand")
2482 (match_operand:V4SF 1 "register_operand")]
2485 rtx vec_res = gen_reg_rtx (V4SFmode);
2488 rtx tmp = gen_reg_rtx (V4SFmode);
2489 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2490 emit_insn (gen_sse3_haddv4sf3 (vec_res, tmp, tmp));
2493 ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
2494 emit_insn (gen_vec_extractv4sfsf (operands[0], vec_res, const0_rtx));
2498 ;; Modes handled by reduc_sm{in,ax}* patterns.
2499 (define_mode_iterator REDUC_SMINMAX_MODE
2500 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2501 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2502 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2503 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2504 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2505 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2506 (V8DF "TARGET_AVX512F")])
2508 (define_expand "reduc_<code>_scal_<mode>"
2509 [(smaxmin:REDUC_SMINMAX_MODE
2510 (match_operand:<ssescalarmode> 0 "register_operand")
2511 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2514 rtx tmp = gen_reg_rtx (<MODE>mode);
2515 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2516 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2521 (define_expand "reduc_<code>_scal_<mode>"
2522 [(umaxmin:VI_AVX512BW
2523 (match_operand:<ssescalarmode> 0 "register_operand")
2524 (match_operand:VI_AVX512BW 1 "register_operand"))]
2527 rtx tmp = gen_reg_rtx (<MODE>mode);
2528 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2529 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2534 (define_expand "reduc_<code>_scal_<mode>"
2536 (match_operand:<ssescalarmode> 0 "register_operand")
2537 (match_operand:VI_256 1 "register_operand"))]
2540 rtx tmp = gen_reg_rtx (<MODE>mode);
2541 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2542 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2547 (define_expand "reduc_umin_scal_v8hi"
2549 (match_operand:HI 0 "register_operand")
2550 (match_operand:V8HI 1 "register_operand"))]
2553 rtx tmp = gen_reg_rtx (V8HImode);
2554 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2555 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2559 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2560 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2562 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2563 (match_operand:SI 2 "const_0_to_255_operand")]
2566 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2567 [(set_attr "type" "sse")
2568 (set_attr "prefix" "evex")
2569 (set_attr "mode" "<MODE>")])
2571 (define_insn "reduces<mode><mask_scalar_name>"
2572 [(set (match_operand:VF_128 0 "register_operand" "=v")
2575 [(match_operand:VF_128 1 "register_operand" "v")
2576 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2577 (match_operand:SI 3 "const_0_to_255_operand")]
2582 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2, %3}"
2583 [(set_attr "type" "sse")
2584 (set_attr "prefix" "evex")
2585 (set_attr "mode" "<MODE>")])
2587 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2589 ;; Parallel floating point comparisons
2591 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2593 (define_insn "avx_cmp<mode>3"
2594 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2596 [(match_operand:VF_128_256 1 "register_operand" "x")
2597 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2598 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2601 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2602 [(set_attr "type" "ssecmp")
2603 (set_attr "length_immediate" "1")
2604 (set_attr "prefix" "vex")
2605 (set_attr "mode" "<MODE>")])
2607 (define_insn "avx_vmcmp<mode>3"
2608 [(set (match_operand:VF_128 0 "register_operand" "=x")
2611 [(match_operand:VF_128 1 "register_operand" "x")
2612 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2613 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2618 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2619 [(set_attr "type" "ssecmp")
2620 (set_attr "length_immediate" "1")
2621 (set_attr "prefix" "vex")
2622 (set_attr "mode" "<ssescalarmode>")])
2624 (define_insn "*<sse>_maskcmp<mode>3_comm"
2625 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2626 (match_operator:VF_128_256 3 "sse_comparison_operator"
2627 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2628 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2630 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2632 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2633 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2634 [(set_attr "isa" "noavx,avx")
2635 (set_attr "type" "ssecmp")
2636 (set_attr "length_immediate" "1")
2637 (set_attr "prefix" "orig,vex")
2638 (set_attr "mode" "<MODE>")])
2640 (define_insn "<sse>_maskcmp<mode>3"
2641 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2642 (match_operator:VF_128_256 3 "sse_comparison_operator"
2643 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2644 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2647 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2648 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2649 [(set_attr "isa" "noavx,avx")
2650 (set_attr "type" "ssecmp")
2651 (set_attr "length_immediate" "1")
2652 (set_attr "prefix" "orig,vex")
2653 (set_attr "mode" "<MODE>")])
2655 (define_insn "<sse>_vmmaskcmp<mode>3"
2656 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2658 (match_operator:VF_128 3 "sse_comparison_operator"
2659 [(match_operand:VF_128 1 "register_operand" "0,x")
2660 (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2665 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2666 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2667 [(set_attr "isa" "noavx,avx")
2668 (set_attr "type" "ssecmp")
2669 (set_attr "length_immediate" "1,*")
2670 (set_attr "prefix" "orig,vex")
2671 (set_attr "mode" "<ssescalarmode>")])
2673 (define_mode_attr cmp_imm_predicate
2674 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2675 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2676 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2677 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2678 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2679 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2680 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2681 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2682 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2684 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2685 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2686 (unspec:<avx512fmaskmode>
2687 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2688 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2689 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2691 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2692 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2693 [(set_attr "type" "ssecmp")
2694 (set_attr "length_immediate" "1")
2695 (set_attr "prefix" "evex")
2696 (set_attr "mode" "<sseinsnmode>")])
2698 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2699 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2700 (unspec:<avx512fmaskmode>
2701 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2702 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2703 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2706 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2707 [(set_attr "type" "ssecmp")
2708 (set_attr "length_immediate" "1")
2709 (set_attr "prefix" "evex")
2710 (set_attr "mode" "<sseinsnmode>")])
2712 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2713 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2714 (unspec:<avx512fmaskmode>
2715 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2716 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2717 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2718 UNSPEC_UNSIGNED_PCMP))]
2720 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2721 [(set_attr "type" "ssecmp")
2722 (set_attr "length_immediate" "1")
2723 (set_attr "prefix" "evex")
2724 (set_attr "mode" "<sseinsnmode>")])
2726 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2727 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2728 (unspec:<avx512fmaskmode>
2729 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2730 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2731 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2732 UNSPEC_UNSIGNED_PCMP))]
2734 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2735 [(set_attr "type" "ssecmp")
2736 (set_attr "length_immediate" "1")
2737 (set_attr "prefix" "evex")
2738 (set_attr "mode" "<sseinsnmode>")])
2740 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2741 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2742 (and:<avx512fmaskmode>
2743 (unspec:<avx512fmaskmode>
2744 [(match_operand:VF_128 1 "register_operand" "v")
2745 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2746 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2750 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2751 [(set_attr "type" "ssecmp")
2752 (set_attr "length_immediate" "1")
2753 (set_attr "prefix" "evex")
2754 (set_attr "mode" "<ssescalarmode>")])
2756 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2757 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2758 (and:<avx512fmaskmode>
2759 (unspec:<avx512fmaskmode>
2760 [(match_operand:VF_128 1 "register_operand" "v")
2761 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2762 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2764 (and:<avx512fmaskmode>
2765 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2768 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2769 [(set_attr "type" "ssecmp")
2770 (set_attr "length_immediate" "1")
2771 (set_attr "prefix" "evex")
2772 (set_attr "mode" "<ssescalarmode>")])
2774 (define_insn "avx512f_maskcmp<mode>3"
2775 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2776 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2777 [(match_operand:VF 1 "register_operand" "v")
2778 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2780 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2781 [(set_attr "type" "ssecmp")
2782 (set_attr "length_immediate" "1")
2783 (set_attr "prefix" "evex")
2784 (set_attr "mode" "<sseinsnmode>")])
2786 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
2787 [(set (reg:CCFP FLAGS_REG)
2790 (match_operand:<ssevecmode> 0 "register_operand" "v")
2791 (parallel [(const_int 0)]))
2793 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2794 (parallel [(const_int 0)]))))]
2795 "SSE_FLOAT_MODE_P (<MODE>mode)"
2796 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2797 [(set_attr "type" "ssecomi")
2798 (set_attr "prefix" "maybe_vex")
2799 (set_attr "prefix_rep" "0")
2800 (set (attr "prefix_data16")
2801 (if_then_else (eq_attr "mode" "DF")
2803 (const_string "0")))
2804 (set_attr "mode" "<MODE>")])
2806 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2807 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2808 (match_operator:<avx512fmaskmode> 1 ""
2809 [(match_operand:V48_AVX512VL 2 "register_operand")
2810 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2813 bool ok = ix86_expand_mask_vec_cmp (operands);
2818 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2819 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2820 (match_operator:<avx512fmaskmode> 1 ""
2821 [(match_operand:VI12_AVX512VL 2 "register_operand")
2822 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2825 bool ok = ix86_expand_mask_vec_cmp (operands);
2830 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2831 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2832 (match_operator:<sseintvecmode> 1 ""
2833 [(match_operand:VI_256 2 "register_operand")
2834 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2837 bool ok = ix86_expand_int_vec_cmp (operands);
2842 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2843 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2844 (match_operator:<sseintvecmode> 1 ""
2845 [(match_operand:VI124_128 2 "register_operand")
2846 (match_operand:VI124_128 3 "vector_operand")]))]
2849 bool ok = ix86_expand_int_vec_cmp (operands);
2854 (define_expand "vec_cmpv2div2di"
2855 [(set (match_operand:V2DI 0 "register_operand")
2856 (match_operator:V2DI 1 ""
2857 [(match_operand:V2DI 2 "register_operand")
2858 (match_operand:V2DI 3 "vector_operand")]))]
2861 bool ok = ix86_expand_int_vec_cmp (operands);
2866 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2867 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2868 (match_operator:<sseintvecmode> 1 ""
2869 [(match_operand:VF_256 2 "register_operand")
2870 (match_operand:VF_256 3 "nonimmediate_operand")]))]
2873 bool ok = ix86_expand_fp_vec_cmp (operands);
2878 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2879 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2880 (match_operator:<sseintvecmode> 1 ""
2881 [(match_operand:VF_128 2 "register_operand")
2882 (match_operand:VF_128 3 "vector_operand")]))]
2885 bool ok = ix86_expand_fp_vec_cmp (operands);
2890 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2891 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2892 (match_operator:<avx512fmaskmode> 1 ""
2893 [(match_operand:VI48_AVX512VL 2 "register_operand")
2894 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
2897 bool ok = ix86_expand_mask_vec_cmp (operands);
2902 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2903 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2904 (match_operator:<avx512fmaskmode> 1 ""
2905 [(match_operand:VI12_AVX512VL 2 "register_operand")
2906 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2909 bool ok = ix86_expand_mask_vec_cmp (operands);
2914 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2915 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2916 (match_operator:<sseintvecmode> 1 ""
2917 [(match_operand:VI_256 2 "register_operand")
2918 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2921 bool ok = ix86_expand_int_vec_cmp (operands);
2926 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2927 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2928 (match_operator:<sseintvecmode> 1 ""
2929 [(match_operand:VI124_128 2 "register_operand")
2930 (match_operand:VI124_128 3 "vector_operand")]))]
2933 bool ok = ix86_expand_int_vec_cmp (operands);
2938 (define_expand "vec_cmpuv2div2di"
2939 [(set (match_operand:V2DI 0 "register_operand")
2940 (match_operator:V2DI 1 ""
2941 [(match_operand:V2DI 2 "register_operand")
2942 (match_operand:V2DI 3 "vector_operand")]))]
2945 bool ok = ix86_expand_int_vec_cmp (operands);
2950 (define_expand "vec_cmpeqv2div2di"
2951 [(set (match_operand:V2DI 0 "register_operand")
2952 (match_operator:V2DI 1 ""
2953 [(match_operand:V2DI 2 "register_operand")
2954 (match_operand:V2DI 3 "vector_operand")]))]
2957 bool ok = ix86_expand_int_vec_cmp (operands);
2962 (define_expand "vcond<V_512:mode><VF_512:mode>"
2963 [(set (match_operand:V_512 0 "register_operand")
2965 (match_operator 3 ""
2966 [(match_operand:VF_512 4 "nonimmediate_operand")
2967 (match_operand:VF_512 5 "nonimmediate_operand")])
2968 (match_operand:V_512 1 "general_operand")
2969 (match_operand:V_512 2 "general_operand")))]
2971 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2972 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2974 bool ok = ix86_expand_fp_vcond (operands);
2979 (define_expand "vcond<V_256:mode><VF_256:mode>"
2980 [(set (match_operand:V_256 0 "register_operand")
2982 (match_operator 3 ""
2983 [(match_operand:VF_256 4 "nonimmediate_operand")
2984 (match_operand:VF_256 5 "nonimmediate_operand")])
2985 (match_operand:V_256 1 "general_operand")
2986 (match_operand:V_256 2 "general_operand")))]
2988 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2989 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2991 bool ok = ix86_expand_fp_vcond (operands);
2996 (define_expand "vcond<V_128:mode><VF_128:mode>"
2997 [(set (match_operand:V_128 0 "register_operand")
2999 (match_operator 3 ""
3000 [(match_operand:VF_128 4 "vector_operand")
3001 (match_operand:VF_128 5 "vector_operand")])
3002 (match_operand:V_128 1 "general_operand")
3003 (match_operand:V_128 2 "general_operand")))]
3005 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3006 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3008 bool ok = ix86_expand_fp_vcond (operands);
3013 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3014 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3015 (vec_merge:V48_AVX512VL
3016 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3017 (match_operand:V48_AVX512VL 2 "vector_move_operand")
3018 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3021 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3022 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3023 (vec_merge:VI12_AVX512VL
3024 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3025 (match_operand:VI12_AVX512VL 2 "vector_move_operand")
3026 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3029 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3030 [(set (match_operand:VI_256 0 "register_operand")
3032 (match_operand:VI_256 1 "nonimmediate_operand")
3033 (match_operand:VI_256 2 "vector_move_operand")
3034 (match_operand:<sseintvecmode> 3 "register_operand")))]
3037 ix86_expand_sse_movcc (operands[0], operands[3],
3038 operands[1], operands[2]);
3042 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3043 [(set (match_operand:VI124_128 0 "register_operand")
3044 (vec_merge:VI124_128
3045 (match_operand:VI124_128 1 "vector_operand")
3046 (match_operand:VI124_128 2 "vector_move_operand")
3047 (match_operand:<sseintvecmode> 3 "register_operand")))]
3050 ix86_expand_sse_movcc (operands[0], operands[3],
3051 operands[1], operands[2]);
3055 (define_expand "vcond_mask_v2div2di"
3056 [(set (match_operand:V2DI 0 "register_operand")
3058 (match_operand:V2DI 1 "vector_operand")
3059 (match_operand:V2DI 2 "vector_move_operand")
3060 (match_operand:V2DI 3 "register_operand")))]
3063 ix86_expand_sse_movcc (operands[0], operands[3],
3064 operands[1], operands[2]);
3068 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3069 [(set (match_operand:VF_256 0 "register_operand")
3071 (match_operand:VF_256 1 "nonimmediate_operand")
3072 (match_operand:VF_256 2 "vector_move_operand")
3073 (match_operand:<sseintvecmode> 3 "register_operand")))]
3076 ix86_expand_sse_movcc (operands[0], operands[3],
3077 operands[1], operands[2]);
3081 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3082 [(set (match_operand:VF_128 0 "register_operand")
3084 (match_operand:VF_128 1 "vector_operand")
3085 (match_operand:VF_128 2 "vector_move_operand")
3086 (match_operand:<sseintvecmode> 3 "register_operand")))]
3089 ix86_expand_sse_movcc (operands[0], operands[3],
3090 operands[1], operands[2]);
3094 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3096 ;; Parallel floating point logical operations
3098 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3100 (define_insn "<sse>_andnot<mode>3<mask_name>"
3101 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3104 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3105 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3106 "TARGET_SSE && <mask_avx512vl_condition>"
3108 static char buf[128];
3112 switch (which_alternative)
3115 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3120 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3126 switch (get_attr_mode (insn))
3134 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3135 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3136 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3139 suffix = "<ssemodesuffix>";
3142 snprintf (buf, sizeof (buf), ops, suffix);
3145 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3146 (set_attr "type" "sselog")
3147 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3149 (cond [(and (match_test "<mask_applied>")
3150 (and (eq_attr "alternative" "1")
3151 (match_test "!TARGET_AVX512DQ")))
3152 (const_string "<sseintvecmode2>")
3153 (eq_attr "alternative" "3")
3154 (const_string "<sseintvecmode2>")
3155 (and (match_test "<MODE_SIZE> == 16")
3156 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3157 (const_string "<ssePSmode>")
3158 (match_test "TARGET_AVX")
3159 (const_string "<MODE>")
3160 (match_test "optimize_function_for_size_p (cfun)")
3161 (const_string "V4SF")
3163 (const_string "<MODE>")))])
3166 (define_insn "<sse>_andnot<mode>3<mask_name>"
3167 [(set (match_operand:VF_512 0 "register_operand" "=v")
3170 (match_operand:VF_512 1 "register_operand" "v"))
3171 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3174 static char buf[128];
3178 suffix = "<ssemodesuffix>";
3181 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3182 if (!TARGET_AVX512DQ)
3184 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3188 snprintf (buf, sizeof (buf),
3189 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3193 [(set_attr "type" "sselog")
3194 (set_attr "prefix" "evex")
3196 (if_then_else (match_test "TARGET_AVX512DQ")
3197 (const_string "<sseinsnmode>")
3198 (const_string "XI")))])
3200 (define_expand "<code><mode>3<mask_name>"
3201 [(set (match_operand:VF_128_256 0 "register_operand")
3202 (any_logic:VF_128_256
3203 (match_operand:VF_128_256 1 "vector_operand")
3204 (match_operand:VF_128_256 2 "vector_operand")))]
3205 "TARGET_SSE && <mask_avx512vl_condition>"
3206 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3208 (define_expand "<code><mode>3<mask_name>"
3209 [(set (match_operand:VF_512 0 "register_operand")
3211 (match_operand:VF_512 1 "nonimmediate_operand")
3212 (match_operand:VF_512 2 "nonimmediate_operand")))]
3214 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3216 (define_insn "*<code><mode>3<mask_name>"
3217 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3218 (any_logic:VF_128_256
3219 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3220 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3221 "TARGET_SSE && <mask_avx512vl_condition>
3222 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3224 static char buf[128];
3228 switch (which_alternative)
3231 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3236 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3242 switch (get_attr_mode (insn))
3250 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3251 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3252 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3255 suffix = "<ssemodesuffix>";
3258 snprintf (buf, sizeof (buf), ops, suffix);
3261 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3262 (set_attr "type" "sselog")
3263 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3265 (cond [(and (match_test "<mask_applied>")
3266 (and (eq_attr "alternative" "1")
3267 (match_test "!TARGET_AVX512DQ")))
3268 (const_string "<sseintvecmode2>")
3269 (eq_attr "alternative" "3")
3270 (const_string "<sseintvecmode2>")
3271 (and (match_test "<MODE_SIZE> == 16")
3272 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3273 (const_string "<ssePSmode>")
3274 (match_test "TARGET_AVX")
3275 (const_string "<MODE>")
3276 (match_test "optimize_function_for_size_p (cfun)")
3277 (const_string "V4SF")
3279 (const_string "<MODE>")))])
3281 (define_insn "*<code><mode>3<mask_name>"
3282 [(set (match_operand:VF_512 0 "register_operand" "=v")
3284 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3285 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3286 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3288 static char buf[128];
3292 suffix = "<ssemodesuffix>";
3295 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3296 if (!TARGET_AVX512DQ)
3298 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3302 snprintf (buf, sizeof (buf),
3303 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3307 [(set_attr "type" "sselog")
3308 (set_attr "prefix" "evex")
3310 (if_then_else (match_test "TARGET_AVX512DQ")
3311 (const_string "<sseinsnmode>")
3312 (const_string "XI")))])
3314 (define_expand "copysign<mode>3"
3317 (not:VF (match_dup 3))
3318 (match_operand:VF 1 "vector_operand")))
3320 (and:VF (match_dup 3)
3321 (match_operand:VF 2 "vector_operand")))
3322 (set (match_operand:VF 0 "register_operand")
3323 (ior:VF (match_dup 4) (match_dup 5)))]
3326 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3328 operands[4] = gen_reg_rtx (<MODE>mode);
3329 operands[5] = gen_reg_rtx (<MODE>mode);
3332 ;; Also define scalar versions. These are used for abs, neg, and
3333 ;; conditional move. Using subregs into vector modes causes register
3334 ;; allocation lossage. These patterns do not allow memory operands
3335 ;; because the native instructions read the full 128-bits.
3337 (define_insn "*andnot<mode>3"
3338 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3341 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3342 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3343 "SSE_FLOAT_MODE_P (<MODE>mode)"
3345 static char buf[128];
3348 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3350 switch (which_alternative)
3353 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3356 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3359 if (TARGET_AVX512DQ)
3360 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3363 suffix = <MODE>mode == DFmode ? "q" : "d";
3364 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3368 if (TARGET_AVX512DQ)
3369 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3372 suffix = <MODE>mode == DFmode ? "q" : "d";
3373 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3380 snprintf (buf, sizeof (buf), ops, suffix);
3383 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3384 (set_attr "type" "sselog")
3385 (set_attr "prefix" "orig,vex,evex,evex")
3387 (cond [(eq_attr "alternative" "2")
3388 (if_then_else (match_test "TARGET_AVX512DQ")
3389 (const_string "<ssevecmode>")
3390 (const_string "TI"))
3391 (eq_attr "alternative" "3")
3392 (if_then_else (match_test "TARGET_AVX512DQ")
3393 (const_string "<avx512fvecmode>")
3394 (const_string "XI"))
3395 (and (match_test "<MODE_SIZE> == 16")
3396 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3397 (const_string "V4SF")
3398 (match_test "TARGET_AVX")
3399 (const_string "<ssevecmode>")
3400 (match_test "optimize_function_for_size_p (cfun)")
3401 (const_string "V4SF")
3403 (const_string "<ssevecmode>")))])
3405 (define_insn "*andnottf3"
3406 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3408 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3409 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3412 static char buf[128];
3415 = (which_alternative >= 2 ? "pandnq"
3416 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3418 switch (which_alternative)
3421 ops = "%s\t{%%2, %%0|%%0, %%2}";
3425 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3428 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3434 snprintf (buf, sizeof (buf), ops, tmp);
3437 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3438 (set_attr "type" "sselog")
3439 (set (attr "prefix_data16")
3441 (and (eq_attr "alternative" "0")
3442 (eq_attr "mode" "TI"))
3444 (const_string "*")))
3445 (set_attr "prefix" "orig,vex,evex,evex")
3447 (cond [(eq_attr "alternative" "2")
3449 (eq_attr "alternative" "3")
3451 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3452 (const_string "V4SF")
3453 (match_test "TARGET_AVX")
3455 (ior (not (match_test "TARGET_SSE2"))
3456 (match_test "optimize_function_for_size_p (cfun)"))
3457 (const_string "V4SF")
3459 (const_string "TI")))])
3461 (define_insn "*<code><mode>3"
3462 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3464 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3465 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3466 "SSE_FLOAT_MODE_P (<MODE>mode)"
3468 static char buf[128];
3471 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3473 switch (which_alternative)
3476 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3479 if (!TARGET_AVX512DQ)
3481 suffix = <MODE>mode == DFmode ? "q" : "d";
3482 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3487 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3490 if (TARGET_AVX512DQ)
3491 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3494 suffix = <MODE>mode == DFmode ? "q" : "d";
3495 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3502 snprintf (buf, sizeof (buf), ops, suffix);
3505 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3506 (set_attr "type" "sselog")
3507 (set_attr "prefix" "orig,vex,evex,evex")
3509 (cond [(eq_attr "alternative" "2")
3510 (if_then_else (match_test "TARGET_AVX512DQ")
3511 (const_string "<ssevecmode>")
3512 (const_string "TI"))
3513 (eq_attr "alternative" "3")
3514 (if_then_else (match_test "TARGET_AVX512DQ")
3515 (const_string "<avx512fvecmode>")
3516 (const_string "XI"))
3517 (and (match_test "<MODE_SIZE> == 16")
3518 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3519 (const_string "V4SF")
3520 (match_test "TARGET_AVX")
3521 (const_string "<ssevecmode>")
3522 (match_test "optimize_function_for_size_p (cfun)")
3523 (const_string "V4SF")
3525 (const_string "<ssevecmode>")))])
3527 (define_expand "<code>tf3"
3528 [(set (match_operand:TF 0 "register_operand")
3530 (match_operand:TF 1 "vector_operand")
3531 (match_operand:TF 2 "vector_operand")))]
3533 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3535 (define_insn "*<code>tf3"
3536 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3538 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3539 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3540 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3542 static char buf[128];
3545 = (which_alternative >= 2 ? "p<logic>q"
3546 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3548 switch (which_alternative)
3551 ops = "%s\t{%%2, %%0|%%0, %%2}";
3555 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3558 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3564 snprintf (buf, sizeof (buf), ops, tmp);
3567 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3568 (set_attr "type" "sselog")
3569 (set (attr "prefix_data16")
3571 (and (eq_attr "alternative" "0")
3572 (eq_attr "mode" "TI"))
3574 (const_string "*")))
3575 (set_attr "prefix" "orig,vex,evex,evex")
3577 (cond [(eq_attr "alternative" "2")
3579 (eq_attr "alternative" "3")
3581 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3582 (const_string "V4SF")
3583 (match_test "TARGET_AVX")
3585 (ior (not (match_test "TARGET_SSE2"))
3586 (match_test "optimize_function_for_size_p (cfun)"))
3587 (const_string "V4SF")
3589 (const_string "TI")))])
3591 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3593 ;; FMA floating point multiply/accumulate instructions. These include
3594 ;; scalar versions of the instructions as well as vector versions.
3596 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3598 ;; The standard names for scalar FMA are only available with SSE math enabled.
3599 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3600 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3601 ;; and TARGET_FMA4 are both false.
3602 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3603 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3604 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3605 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3606 (define_mode_iterator FMAMODEM
3607 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3608 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3609 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3610 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3611 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3612 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3613 (V16SF "TARGET_AVX512F")
3614 (V8DF "TARGET_AVX512F")])
3616 (define_expand "fma<mode>4"
3617 [(set (match_operand:FMAMODEM 0 "register_operand")
3619 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3620 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3621 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3623 (define_expand "fms<mode>4"
3624 [(set (match_operand:FMAMODEM 0 "register_operand")
3626 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3627 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3628 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3630 (define_expand "fnma<mode>4"
3631 [(set (match_operand:FMAMODEM 0 "register_operand")
3633 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3634 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3635 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3637 (define_expand "fnms<mode>4"
3638 [(set (match_operand:FMAMODEM 0 "register_operand")
3640 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3641 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3642 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3644 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3645 (define_mode_iterator FMAMODE_AVX512
3646 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3647 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3648 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3649 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3650 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3651 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3652 (V16SF "TARGET_AVX512F")
3653 (V8DF "TARGET_AVX512F")])
3655 (define_mode_iterator FMAMODE
3656 [SF DF V4SF V2DF V8SF V4DF])
3658 (define_expand "fma4i_fmadd_<mode>"
3659 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3661 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3662 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3663 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3665 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3666 [(match_operand:VF_AVX512VL 0 "register_operand")
3667 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3668 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3669 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3670 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3671 "TARGET_AVX512F && <round_mode512bit_condition>"
3673 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3674 operands[0], operands[1], operands[2], operands[3],
3675 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3679 (define_insn "*fma_fmadd_<mode>"
3680 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3682 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3683 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3684 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3685 "TARGET_FMA || TARGET_FMA4"
3687 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3688 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3689 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3690 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3691 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3692 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3693 (set_attr "type" "ssemuladd")
3694 (set_attr "mode" "<MODE>")])
3696 ;; Suppose AVX-512F as baseline
3697 (define_mode_iterator VF_SF_AVX512VL
3698 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3699 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3701 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3702 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3704 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3705 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3706 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3707 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3709 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3710 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3711 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3712 [(set_attr "type" "ssemuladd")
3713 (set_attr "mode" "<MODE>")])
3715 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3716 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3717 (vec_merge:VF_AVX512VL
3719 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3720 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3721 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3723 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3724 "TARGET_AVX512F && <round_mode512bit_condition>"
3726 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3727 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3728 [(set_attr "type" "ssemuladd")
3729 (set_attr "mode" "<MODE>")])
3731 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3732 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3733 (vec_merge:VF_AVX512VL
3735 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3736 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3737 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3739 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3741 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3742 [(set_attr "type" "ssemuladd")
3743 (set_attr "mode" "<MODE>")])
3745 (define_insn "*fma_fmsub_<mode>"
3746 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3748 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3749 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3751 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3752 "TARGET_FMA || TARGET_FMA4"
3754 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3755 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3756 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3757 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3758 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3759 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3760 (set_attr "type" "ssemuladd")
3761 (set_attr "mode" "<MODE>")])
3763 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3764 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3766 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3767 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3769 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3770 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3772 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3773 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3774 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3775 [(set_attr "type" "ssemuladd")
3776 (set_attr "mode" "<MODE>")])
3778 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3779 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3780 (vec_merge:VF_AVX512VL
3782 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3783 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3785 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3787 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3790 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3791 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3792 [(set_attr "type" "ssemuladd")
3793 (set_attr "mode" "<MODE>")])
3795 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3796 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3797 (vec_merge:VF_AVX512VL
3799 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3800 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3802 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3804 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3805 "TARGET_AVX512F && <round_mode512bit_condition>"
3806 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3807 [(set_attr "type" "ssemuladd")
3808 (set_attr "mode" "<MODE>")])
3810 (define_insn "*fma_fnmadd_<mode>"
3811 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3814 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3815 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3816 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3817 "TARGET_FMA || TARGET_FMA4"
3819 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3820 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3821 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3822 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3823 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3824 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3825 (set_attr "type" "ssemuladd")
3826 (set_attr "mode" "<MODE>")])
3828 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3829 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3832 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3833 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3834 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3835 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3837 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3838 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3839 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3840 [(set_attr "type" "ssemuladd")
3841 (set_attr "mode" "<MODE>")])
3843 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3844 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3845 (vec_merge:VF_AVX512VL
3848 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3849 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3850 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3852 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3853 "TARGET_AVX512F && <round_mode512bit_condition>"
3855 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3856 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3857 [(set_attr "type" "ssemuladd")
3858 (set_attr "mode" "<MODE>")])
3860 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3861 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3862 (vec_merge:VF_AVX512VL
3865 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3866 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3867 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3869 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3870 "TARGET_AVX512F && <round_mode512bit_condition>"
3871 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3872 [(set_attr "type" "ssemuladd")
3873 (set_attr "mode" "<MODE>")])
3875 (define_insn "*fma_fnmsub_<mode>"
3876 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3879 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3880 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3882 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3883 "TARGET_FMA || TARGET_FMA4"
3885 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3886 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3887 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3888 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3889 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3890 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3891 (set_attr "type" "ssemuladd")
3892 (set_attr "mode" "<MODE>")])
3894 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3895 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3898 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3899 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3901 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3902 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3904 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3905 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3906 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3907 [(set_attr "type" "ssemuladd")
3908 (set_attr "mode" "<MODE>")])
3910 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3911 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3912 (vec_merge:VF_AVX512VL
3915 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3916 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3918 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3920 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3921 "TARGET_AVX512F && <round_mode512bit_condition>"
3923 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3924 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3925 [(set_attr "type" "ssemuladd")
3926 (set_attr "mode" "<MODE>")])
3928 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3929 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3930 (vec_merge:VF_AVX512VL
3933 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3934 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3936 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3938 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3940 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3941 [(set_attr "type" "ssemuladd")
3942 (set_attr "mode" "<MODE>")])
3944 ;; FMA parallel floating point multiply addsub and subadd operations.
3946 ;; It would be possible to represent these without the UNSPEC as
3949 ;; (fma op1 op2 op3)
3950 ;; (fma op1 op2 (neg op3))
3953 ;; But this doesn't seem useful in practice.
3955 (define_expand "fmaddsub_<mode>"
3956 [(set (match_operand:VF 0 "register_operand")
3958 [(match_operand:VF 1 "nonimmediate_operand")
3959 (match_operand:VF 2 "nonimmediate_operand")
3960 (match_operand:VF 3 "nonimmediate_operand")]
3962 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3964 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3965 [(match_operand:VF_AVX512VL 0 "register_operand")
3966 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3967 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3968 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3969 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3972 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3973 operands[0], operands[1], operands[2], operands[3],
3974 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3978 (define_insn "*fma_fmaddsub_<mode>"
3979 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3981 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3982 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3983 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3985 "TARGET_FMA || TARGET_FMA4"
3987 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3988 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3989 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3990 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3991 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3992 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3993 (set_attr "type" "ssemuladd")
3994 (set_attr "mode" "<MODE>")])
3996 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3997 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3998 (unspec:VF_SF_AVX512VL
3999 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4000 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4001 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4003 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4005 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4006 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4007 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4008 [(set_attr "type" "ssemuladd")
4009 (set_attr "mode" "<MODE>")])
4011 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4012 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4013 (vec_merge:VF_AVX512VL
4015 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4016 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4017 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
4020 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4023 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4024 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4025 [(set_attr "type" "ssemuladd")
4026 (set_attr "mode" "<MODE>")])
4028 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4029 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4030 (vec_merge:VF_AVX512VL
4032 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4033 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4034 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4037 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4039 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4040 [(set_attr "type" "ssemuladd")
4041 (set_attr "mode" "<MODE>")])
4043 (define_insn "*fma_fmsubadd_<mode>"
4044 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4046 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4047 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4049 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4051 "TARGET_FMA || TARGET_FMA4"
4053 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4054 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4055 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4056 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4057 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4058 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4059 (set_attr "type" "ssemuladd")
4060 (set_attr "mode" "<MODE>")])
4062 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4063 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4064 (unspec:VF_SF_AVX512VL
4065 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4066 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4068 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4070 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4072 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4073 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4074 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4075 [(set_attr "type" "ssemuladd")
4076 (set_attr "mode" "<MODE>")])
4078 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4079 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4080 (vec_merge:VF_AVX512VL
4082 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4083 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4085 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
4088 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4091 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4092 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4093 [(set_attr "type" "ssemuladd")
4094 (set_attr "mode" "<MODE>")])
4096 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4097 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4098 (vec_merge:VF_AVX512VL
4100 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4101 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4103 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4106 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4108 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4109 [(set_attr "type" "ssemuladd")
4110 (set_attr "mode" "<MODE>")])
4112 ;; FMA3 floating point scalar intrinsics. These merge result with
4113 ;; high-order elements from the destination register.
4115 (define_expand "fmai_vmfmadd_<mode><round_name>"
4116 [(set (match_operand:VF_128 0 "register_operand")
4119 (match_operand:VF_128 1 "<round_nimm_predicate>")
4120 (match_operand:VF_128 2 "<round_nimm_predicate>")
4121 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4126 (define_insn "*fmai_fmadd_<mode>"
4127 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4130 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4131 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4132 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
4135 "TARGET_FMA || TARGET_AVX512F"
4137 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4138 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4139 [(set_attr "type" "ssemuladd")
4140 (set_attr "mode" "<MODE>")])
4142 (define_insn "*fmai_fmsub_<mode>"
4143 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4146 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4147 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4149 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4152 "TARGET_FMA || TARGET_AVX512F"
4154 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4155 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4156 [(set_attr "type" "ssemuladd")
4157 (set_attr "mode" "<MODE>")])
4159 (define_insn "*fmai_fnmadd_<mode><round_name>"
4160 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4164 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4165 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4166 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4169 "TARGET_FMA || TARGET_AVX512F"
4171 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4172 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4173 [(set_attr "type" "ssemuladd")
4174 (set_attr "mode" "<MODE>")])
4176 (define_insn "*fmai_fnmsub_<mode><round_name>"
4177 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4181 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
4182 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4184 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4187 "TARGET_FMA || TARGET_AVX512F"
4189 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4190 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4191 [(set_attr "type" "ssemuladd")
4192 (set_attr "mode" "<MODE>")])
4194 ;; FMA4 floating point scalar intrinsics. These write the
4195 ;; entire destination register, with the high-order elements zeroed.
4197 (define_expand "fma4i_vmfmadd_<mode>"
4198 [(set (match_operand:VF_128 0 "register_operand")
4201 (match_operand:VF_128 1 "nonimmediate_operand")
4202 (match_operand:VF_128 2 "nonimmediate_operand")
4203 (match_operand:VF_128 3 "nonimmediate_operand"))
4207 "operands[4] = CONST0_RTX (<MODE>mode);")
4209 (define_insn "*fma4i_vmfmadd_<mode>"
4210 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4213 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4214 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4215 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4216 (match_operand:VF_128 4 "const0_operand")
4219 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4220 [(set_attr "type" "ssemuladd")
4221 (set_attr "mode" "<MODE>")])
4223 (define_insn "*fma4i_vmfmsub_<mode>"
4224 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4227 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4228 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4230 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4231 (match_operand:VF_128 4 "const0_operand")
4234 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4235 [(set_attr "type" "ssemuladd")
4236 (set_attr "mode" "<MODE>")])
4238 (define_insn "*fma4i_vmfnmadd_<mode>"
4239 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4243 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4244 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4245 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4246 (match_operand:VF_128 4 "const0_operand")
4249 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4250 [(set_attr "type" "ssemuladd")
4251 (set_attr "mode" "<MODE>")])
4253 (define_insn "*fma4i_vmfnmsub_<mode>"
4254 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4258 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4259 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4261 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4262 (match_operand:VF_128 4 "const0_operand")
4265 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4266 [(set_attr "type" "ssemuladd")
4267 (set_attr "mode" "<MODE>")])
4269 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4271 ;; Parallel single-precision floating point conversion operations
4273 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4275 (define_insn "sse_cvtpi2ps"
4276 [(set (match_operand:V4SF 0 "register_operand" "=x")
4279 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
4280 (match_operand:V4SF 1 "register_operand" "0")
4283 "cvtpi2ps\t{%2, %0|%0, %2}"
4284 [(set_attr "type" "ssecvt")
4285 (set_attr "mode" "V4SF")])
4287 (define_insn "sse_cvtps2pi"
4288 [(set (match_operand:V2SI 0 "register_operand" "=y")
4290 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
4292 (parallel [(const_int 0) (const_int 1)])))]
4294 "cvtps2pi\t{%1, %0|%0, %q1}"
4295 [(set_attr "type" "ssecvt")
4296 (set_attr "unit" "mmx")
4297 (set_attr "mode" "DI")])
4299 (define_insn "sse_cvttps2pi"
4300 [(set (match_operand:V2SI 0 "register_operand" "=y")
4302 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
4303 (parallel [(const_int 0) (const_int 1)])))]
4305 "cvttps2pi\t{%1, %0|%0, %q1}"
4306 [(set_attr "type" "ssecvt")
4307 (set_attr "unit" "mmx")
4308 (set_attr "prefix_rep" "0")
4309 (set_attr "mode" "SF")])
4311 (define_insn "sse_cvtsi2ss<round_name>"
4312 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4315 (float:SF (match_operand:SI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4316 (match_operand:V4SF 1 "register_operand" "0,0,v")
4320 cvtsi2ss\t{%2, %0|%0, %2}
4321 cvtsi2ss\t{%2, %0|%0, %2}
4322 vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4323 [(set_attr "isa" "noavx,noavx,avx")
4324 (set_attr "type" "sseicvt")
4325 (set_attr "athlon_decode" "vector,double,*")
4326 (set_attr "amdfam10_decode" "vector,double,*")
4327 (set_attr "bdver1_decode" "double,direct,*")
4328 (set_attr "btver2_decode" "double,double,double")
4329 (set_attr "znver1_decode" "double,double,double")
4330 (set_attr "prefix" "orig,orig,maybe_evex")
4331 (set_attr "mode" "SF")])
4333 (define_insn "sse_cvtsi2ssq<round_name>"
4334 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4337 (float:SF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4338 (match_operand:V4SF 1 "register_operand" "0,0,v")
4340 "TARGET_SSE && TARGET_64BIT"
4342 cvtsi2ssq\t{%2, %0|%0, %2}
4343 cvtsi2ssq\t{%2, %0|%0, %2}
4344 vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4345 [(set_attr "isa" "noavx,noavx,avx")
4346 (set_attr "type" "sseicvt")
4347 (set_attr "athlon_decode" "vector,double,*")
4348 (set_attr "amdfam10_decode" "vector,double,*")
4349 (set_attr "bdver1_decode" "double,direct,*")
4350 (set_attr "btver2_decode" "double,double,double")
4351 (set_attr "length_vex" "*,*,4")
4352 (set_attr "prefix_rex" "1,1,*")
4353 (set_attr "prefix" "orig,orig,maybe_evex")
4354 (set_attr "mode" "SF")])
4356 (define_insn "sse_cvtss2si<round_name>"
4357 [(set (match_operand:SI 0 "register_operand" "=r,r")
4360 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4361 (parallel [(const_int 0)]))]
4362 UNSPEC_FIX_NOTRUNC))]
4364 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4365 [(set_attr "type" "sseicvt")
4366 (set_attr "athlon_decode" "double,vector")
4367 (set_attr "bdver1_decode" "double,double")
4368 (set_attr "prefix_rep" "1")
4369 (set_attr "prefix" "maybe_vex")
4370 (set_attr "mode" "SI")])
4372 (define_insn "sse_cvtss2si_2"
4373 [(set (match_operand:SI 0 "register_operand" "=r,r")
4374 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4375 UNSPEC_FIX_NOTRUNC))]
4377 "%vcvtss2si\t{%1, %0|%0, %k1}"
4378 [(set_attr "type" "sseicvt")
4379 (set_attr "athlon_decode" "double,vector")
4380 (set_attr "amdfam10_decode" "double,double")
4381 (set_attr "bdver1_decode" "double,double")
4382 (set_attr "prefix_rep" "1")
4383 (set_attr "prefix" "maybe_vex")
4384 (set_attr "mode" "SI")])
4386 (define_insn "sse_cvtss2siq<round_name>"
4387 [(set (match_operand:DI 0 "register_operand" "=r,r")
4390 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4391 (parallel [(const_int 0)]))]
4392 UNSPEC_FIX_NOTRUNC))]
4393 "TARGET_SSE && TARGET_64BIT"
4394 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4395 [(set_attr "type" "sseicvt")
4396 (set_attr "athlon_decode" "double,vector")
4397 (set_attr "bdver1_decode" "double,double")
4398 (set_attr "prefix_rep" "1")
4399 (set_attr "prefix" "maybe_vex")
4400 (set_attr "mode" "DI")])
4402 (define_insn "sse_cvtss2siq_2"
4403 [(set (match_operand:DI 0 "register_operand" "=r,r")
4404 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4405 UNSPEC_FIX_NOTRUNC))]
4406 "TARGET_SSE && TARGET_64BIT"
4407 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
4408 [(set_attr "type" "sseicvt")
4409 (set_attr "athlon_decode" "double,vector")
4410 (set_attr "amdfam10_decode" "double,double")
4411 (set_attr "bdver1_decode" "double,double")
4412 (set_attr "prefix_rep" "1")
4413 (set_attr "prefix" "maybe_vex")
4414 (set_attr "mode" "DI")])
4416 (define_insn "sse_cvttss2si<round_saeonly_name>"
4417 [(set (match_operand:SI 0 "register_operand" "=r,r")
4420 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4421 (parallel [(const_int 0)]))))]
4423 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4424 [(set_attr "type" "sseicvt")
4425 (set_attr "athlon_decode" "double,vector")
4426 (set_attr "amdfam10_decode" "double,double")
4427 (set_attr "bdver1_decode" "double,double")
4428 (set_attr "prefix_rep" "1")
4429 (set_attr "prefix" "maybe_vex")
4430 (set_attr "mode" "SI")])
4432 (define_insn "sse_cvttss2siq<round_saeonly_name>"
4433 [(set (match_operand:DI 0 "register_operand" "=r,r")
4436 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
4437 (parallel [(const_int 0)]))))]
4438 "TARGET_SSE && TARGET_64BIT"
4439 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4440 [(set_attr "type" "sseicvt")
4441 (set_attr "athlon_decode" "double,vector")
4442 (set_attr "amdfam10_decode" "double,double")
4443 (set_attr "bdver1_decode" "double,double")
4444 (set_attr "prefix_rep" "1")
4445 (set_attr "prefix" "maybe_vex")
4446 (set_attr "mode" "DI")])
4448 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
4449 [(set (match_operand:VF_128 0 "register_operand" "=v")
4451 (vec_duplicate:VF_128
4452 (unsigned_float:<ssescalarmode>
4453 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4454 (match_operand:VF_128 1 "register_operand" "v")
4456 "TARGET_AVX512F && <round_modev4sf_condition>"
4457 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4458 [(set_attr "type" "sseicvt")
4459 (set_attr "prefix" "evex")
4460 (set_attr "mode" "<ssescalarmode>")])
4462 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4463 [(set (match_operand:VF_128 0 "register_operand" "=v")
4465 (vec_duplicate:VF_128
4466 (unsigned_float:<ssescalarmode>
4467 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4468 (match_operand:VF_128 1 "register_operand" "v")
4470 "TARGET_AVX512F && TARGET_64BIT"
4471 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4472 [(set_attr "type" "sseicvt")
4473 (set_attr "prefix" "evex")
4474 (set_attr "mode" "<ssescalarmode>")])
4476 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4477 [(set (match_operand:VF1 0 "register_operand" "=x,v")
4479 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
4480 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4482 cvtdq2ps\t{%1, %0|%0, %1}
4483 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4484 [(set_attr "isa" "noavx,avx")
4485 (set_attr "type" "ssecvt")
4486 (set_attr "prefix" "maybe_vex")
4487 (set_attr "mode" "<sseinsnmode>")])
4489 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4490 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4491 (unsigned_float:VF1_AVX512VL
4492 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4494 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4495 [(set_attr "type" "ssecvt")
4496 (set_attr "prefix" "evex")
4497 (set_attr "mode" "<MODE>")])
4499 (define_expand "floatuns<sseintvecmodelower><mode>2"
4500 [(match_operand:VF1 0 "register_operand")
4501 (match_operand:<sseintvecmode> 1 "register_operand")]
4502 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4504 if (<MODE>mode == V16SFmode)
4505 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4507 if (TARGET_AVX512VL)
4509 if (<MODE>mode == V4SFmode)
4510 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4512 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4515 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4521 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4522 (define_mode_attr sf2simodelower
4523 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4525 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4526 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4528 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
4529 UNSPEC_FIX_NOTRUNC))]
4530 "TARGET_SSE2 && <mask_mode512bit_condition>"
4531 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4532 [(set_attr "type" "ssecvt")
4533 (set (attr "prefix_data16")
4535 (match_test "TARGET_AVX")
4537 (const_string "1")))
4538 (set_attr "prefix" "maybe_vex")
4539 (set_attr "mode" "<sseinsnmode>")])
4541 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4542 [(set (match_operand:V16SI 0 "register_operand" "=v")
4544 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4545 UNSPEC_FIX_NOTRUNC))]
4547 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4548 [(set_attr "type" "ssecvt")
4549 (set_attr "prefix" "evex")
4550 (set_attr "mode" "XI")])
4552 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4553 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4554 (unspec:VI4_AVX512VL
4555 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4556 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4558 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4559 [(set_attr "type" "ssecvt")
4560 (set_attr "prefix" "evex")
4561 (set_attr "mode" "<sseinsnmode>")])
4563 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4564 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4565 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4566 UNSPEC_FIX_NOTRUNC))]
4567 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4568 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4569 [(set_attr "type" "ssecvt")
4570 (set_attr "prefix" "evex")
4571 (set_attr "mode" "<sseinsnmode>")])
4573 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4574 [(set (match_operand:V2DI 0 "register_operand" "=v")
4577 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4578 (parallel [(const_int 0) (const_int 1)]))]
4579 UNSPEC_FIX_NOTRUNC))]
4580 "TARGET_AVX512DQ && TARGET_AVX512VL"
4581 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4582 [(set_attr "type" "ssecvt")
4583 (set_attr "prefix" "evex")
4584 (set_attr "mode" "TI")])
4586 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4587 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4588 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4589 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4590 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4591 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4592 [(set_attr "type" "ssecvt")
4593 (set_attr "prefix" "evex")
4594 (set_attr "mode" "<sseinsnmode>")])
4596 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4597 [(set (match_operand:V2DI 0 "register_operand" "=v")
4600 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4601 (parallel [(const_int 0) (const_int 1)]))]
4602 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4603 "TARGET_AVX512DQ && TARGET_AVX512VL"
4604 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4605 [(set_attr "type" "ssecvt")
4606 (set_attr "prefix" "evex")
4607 (set_attr "mode" "TI")])
4609 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4610 [(set (match_operand:V16SI 0 "register_operand" "=v")
4612 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4614 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4615 [(set_attr "type" "ssecvt")
4616 (set_attr "prefix" "evex")
4617 (set_attr "mode" "XI")])
4619 (define_insn "fix_truncv8sfv8si2<mask_name>"
4620 [(set (match_operand:V8SI 0 "register_operand" "=v")
4621 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4622 "TARGET_AVX && <mask_avx512vl_condition>"
4623 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4624 [(set_attr "type" "ssecvt")
4625 (set_attr "prefix" "<mask_prefix>")
4626 (set_attr "mode" "OI")])
4628 (define_insn "fix_truncv4sfv4si2<mask_name>"
4629 [(set (match_operand:V4SI 0 "register_operand" "=v")
4630 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
4631 "TARGET_SSE2 && <mask_avx512vl_condition>"
4632 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4633 [(set_attr "type" "ssecvt")
4634 (set (attr "prefix_rep")
4636 (match_test "TARGET_AVX")
4638 (const_string "1")))
4639 (set (attr "prefix_data16")
4641 (match_test "TARGET_AVX")
4643 (const_string "0")))
4644 (set_attr "prefix_data16" "0")
4645 (set_attr "prefix" "<mask_prefix2>")
4646 (set_attr "mode" "TI")])
4648 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4649 [(match_operand:<sseintvecmode> 0 "register_operand")
4650 (match_operand:VF1 1 "register_operand")]
4653 if (<MODE>mode == V16SFmode)
4654 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4659 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4660 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4661 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4662 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4667 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4669 ;; Parallel double-precision floating point conversion operations
4671 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4673 (define_insn "sse2_cvtpi2pd"
4674 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4675 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4677 "cvtpi2pd\t{%1, %0|%0, %1}"
4678 [(set_attr "type" "ssecvt")
4679 (set_attr "unit" "mmx,*")
4680 (set_attr "prefix_data16" "1,*")
4681 (set_attr "mode" "V2DF")])
4683 (define_insn "sse2_cvtpd2pi"
4684 [(set (match_operand:V2SI 0 "register_operand" "=y")
4685 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4686 UNSPEC_FIX_NOTRUNC))]
4688 "cvtpd2pi\t{%1, %0|%0, %1}"
4689 [(set_attr "type" "ssecvt")
4690 (set_attr "unit" "mmx")
4691 (set_attr "bdver1_decode" "double")
4692 (set_attr "btver2_decode" "direct")
4693 (set_attr "prefix_data16" "1")
4694 (set_attr "mode" "DI")])
4696 (define_insn "sse2_cvttpd2pi"
4697 [(set (match_operand:V2SI 0 "register_operand" "=y")
4698 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4700 "cvttpd2pi\t{%1, %0|%0, %1}"
4701 [(set_attr "type" "ssecvt")
4702 (set_attr "unit" "mmx")
4703 (set_attr "bdver1_decode" "double")
4704 (set_attr "prefix_data16" "1")
4705 (set_attr "mode" "TI")])
4707 (define_insn "sse2_cvtsi2sd"
4708 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4711 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4712 (match_operand:V2DF 1 "register_operand" "0,0,v")
4716 cvtsi2sd\t{%2, %0|%0, %2}
4717 cvtsi2sd\t{%2, %0|%0, %2}
4718 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4719 [(set_attr "isa" "noavx,noavx,avx")
4720 (set_attr "type" "sseicvt")
4721 (set_attr "athlon_decode" "double,direct,*")
4722 (set_attr "amdfam10_decode" "vector,double,*")
4723 (set_attr "bdver1_decode" "double,direct,*")
4724 (set_attr "btver2_decode" "double,double,double")
4725 (set_attr "znver1_decode" "double,double,double")
4726 (set_attr "prefix" "orig,orig,maybe_evex")
4727 (set_attr "mode" "DF")])
4729 (define_insn "sse2_cvtsi2sdq<round_name>"
4730 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4733 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4734 (match_operand:V2DF 1 "register_operand" "0,0,v")
4736 "TARGET_SSE2 && TARGET_64BIT"
4738 cvtsi2sdq\t{%2, %0|%0, %2}
4739 cvtsi2sdq\t{%2, %0|%0, %2}
4740 vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4741 [(set_attr "isa" "noavx,noavx,avx")
4742 (set_attr "type" "sseicvt")
4743 (set_attr "athlon_decode" "double,direct,*")
4744 (set_attr "amdfam10_decode" "vector,double,*")
4745 (set_attr "bdver1_decode" "double,direct,*")
4746 (set_attr "length_vex" "*,*,4")
4747 (set_attr "prefix_rex" "1,1,*")
4748 (set_attr "prefix" "orig,orig,maybe_evex")
4749 (set_attr "mode" "DF")])
4751 (define_insn "avx512f_vcvtss2usi<round_name>"
4752 [(set (match_operand:SI 0 "register_operand" "=r")
4755 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4756 (parallel [(const_int 0)]))]
4757 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4759 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4760 [(set_attr "type" "sseicvt")
4761 (set_attr "prefix" "evex")
4762 (set_attr "mode" "SI")])
4764 (define_insn "avx512f_vcvtss2usiq<round_name>"
4765 [(set (match_operand:DI 0 "register_operand" "=r")
4768 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4769 (parallel [(const_int 0)]))]
4770 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4771 "TARGET_AVX512F && TARGET_64BIT"
4772 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4773 [(set_attr "type" "sseicvt")
4774 (set_attr "prefix" "evex")
4775 (set_attr "mode" "DI")])
4777 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4778 [(set (match_operand:SI 0 "register_operand" "=r")
4781 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4782 (parallel [(const_int 0)]))))]
4784 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4785 [(set_attr "type" "sseicvt")
4786 (set_attr "prefix" "evex")
4787 (set_attr "mode" "SI")])
4789 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4790 [(set (match_operand:DI 0 "register_operand" "=r")
4793 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4794 (parallel [(const_int 0)]))))]
4795 "TARGET_AVX512F && TARGET_64BIT"
4796 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4797 [(set_attr "type" "sseicvt")
4798 (set_attr "prefix" "evex")
4799 (set_attr "mode" "DI")])
4801 (define_insn "avx512f_vcvtsd2usi<round_name>"
4802 [(set (match_operand:SI 0 "register_operand" "=r")
4805 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4806 (parallel [(const_int 0)]))]
4807 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4809 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4810 [(set_attr "type" "sseicvt")
4811 (set_attr "prefix" "evex")
4812 (set_attr "mode" "SI")])
4814 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4815 [(set (match_operand:DI 0 "register_operand" "=r")
4818 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4819 (parallel [(const_int 0)]))]
4820 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4821 "TARGET_AVX512F && TARGET_64BIT"
4822 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4823 [(set_attr "type" "sseicvt")
4824 (set_attr "prefix" "evex")
4825 (set_attr "mode" "DI")])
4827 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4828 [(set (match_operand:SI 0 "register_operand" "=r")
4831 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4832 (parallel [(const_int 0)]))))]
4834 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4835 [(set_attr "type" "sseicvt")
4836 (set_attr "prefix" "evex")
4837 (set_attr "mode" "SI")])
4839 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4840 [(set (match_operand:DI 0 "register_operand" "=r")
4843 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4844 (parallel [(const_int 0)]))))]
4845 "TARGET_AVX512F && TARGET_64BIT"
4846 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4847 [(set_attr "type" "sseicvt")
4848 (set_attr "prefix" "evex")
4849 (set_attr "mode" "DI")])
4851 (define_insn "sse2_cvtsd2si<round_name>"
4852 [(set (match_operand:SI 0 "register_operand" "=r,r")
4855 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4856 (parallel [(const_int 0)]))]
4857 UNSPEC_FIX_NOTRUNC))]
4859 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4860 [(set_attr "type" "sseicvt")
4861 (set_attr "athlon_decode" "double,vector")
4862 (set_attr "bdver1_decode" "double,double")
4863 (set_attr "btver2_decode" "double,double")
4864 (set_attr "prefix_rep" "1")
4865 (set_attr "prefix" "maybe_vex")
4866 (set_attr "mode" "SI")])
4868 (define_insn "sse2_cvtsd2si_2"
4869 [(set (match_operand:SI 0 "register_operand" "=r,r")
4870 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4871 UNSPEC_FIX_NOTRUNC))]
4873 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4874 [(set_attr "type" "sseicvt")
4875 (set_attr "athlon_decode" "double,vector")
4876 (set_attr "amdfam10_decode" "double,double")
4877 (set_attr "bdver1_decode" "double,double")
4878 (set_attr "prefix_rep" "1")
4879 (set_attr "prefix" "maybe_vex")
4880 (set_attr "mode" "SI")])
4882 (define_insn "sse2_cvtsd2siq<round_name>"
4883 [(set (match_operand:DI 0 "register_operand" "=r,r")
4886 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4887 (parallel [(const_int 0)]))]
4888 UNSPEC_FIX_NOTRUNC))]
4889 "TARGET_SSE2 && TARGET_64BIT"
4890 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4891 [(set_attr "type" "sseicvt")
4892 (set_attr "athlon_decode" "double,vector")
4893 (set_attr "bdver1_decode" "double,double")
4894 (set_attr "prefix_rep" "1")
4895 (set_attr "prefix" "maybe_vex")
4896 (set_attr "mode" "DI")])
4898 (define_insn "sse2_cvtsd2siq_2"
4899 [(set (match_operand:DI 0 "register_operand" "=r,r")
4900 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4901 UNSPEC_FIX_NOTRUNC))]
4902 "TARGET_SSE2 && TARGET_64BIT"
4903 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4904 [(set_attr "type" "sseicvt")
4905 (set_attr "athlon_decode" "double,vector")
4906 (set_attr "amdfam10_decode" "double,double")
4907 (set_attr "bdver1_decode" "double,double")
4908 (set_attr "prefix_rep" "1")
4909 (set_attr "prefix" "maybe_vex")
4910 (set_attr "mode" "DI")])
4912 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4913 [(set (match_operand:SI 0 "register_operand" "=r,r")
4916 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4917 (parallel [(const_int 0)]))))]
4919 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4920 [(set_attr "type" "sseicvt")
4921 (set_attr "athlon_decode" "double,vector")
4922 (set_attr "amdfam10_decode" "double,double")
4923 (set_attr "bdver1_decode" "double,double")
4924 (set_attr "btver2_decode" "double,double")
4925 (set_attr "prefix_rep" "1")
4926 (set_attr "prefix" "maybe_vex")
4927 (set_attr "mode" "SI")])
4929 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4930 [(set (match_operand:DI 0 "register_operand" "=r,r")
4933 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4934 (parallel [(const_int 0)]))))]
4935 "TARGET_SSE2 && TARGET_64BIT"
4936 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4937 [(set_attr "type" "sseicvt")
4938 (set_attr "athlon_decode" "double,vector")
4939 (set_attr "amdfam10_decode" "double,double")
4940 (set_attr "bdver1_decode" "double,double")
4941 (set_attr "prefix_rep" "1")
4942 (set_attr "prefix" "maybe_vex")
4943 (set_attr "mode" "DI")])
4945 ;; For float<si2dfmode><mode>2 insn pattern
4946 (define_mode_attr si2dfmode
4947 [(V8DF "V8SI") (V4DF "V4SI")])
4948 (define_mode_attr si2dfmodelower
4949 [(V8DF "v8si") (V4DF "v4si")])
4951 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4952 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4953 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4954 "TARGET_AVX && <mask_mode512bit_condition>"
4955 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4956 [(set_attr "type" "ssecvt")
4957 (set_attr "prefix" "maybe_vex")
4958 (set_attr "mode" "<MODE>")])
4960 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4961 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4962 (any_float:VF2_AVX512VL
4963 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4965 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4966 [(set_attr "type" "ssecvt")
4967 (set_attr "prefix" "evex")
4968 (set_attr "mode" "<MODE>")])
4970 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4971 (define_mode_attr qq2pssuff
4972 [(V8SF "") (V4SF "{y}")])
4974 (define_mode_attr sselongvecmode
4975 [(V8SF "V8DI") (V4SF "V4DI")])
4977 (define_mode_attr sselongvecmodelower
4978 [(V8SF "v8di") (V4SF "v4di")])
4980 (define_mode_attr sseintvecmode3
4981 [(V8SF "XI") (V4SF "OI")
4982 (V8DF "OI") (V4DF "TI")])
4984 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4985 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4986 (any_float:VF1_128_256VL
4987 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4988 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4989 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4990 [(set_attr "type" "ssecvt")
4991 (set_attr "prefix" "evex")
4992 (set_attr "mode" "<MODE>")])
4994 (define_insn "*<floatsuffix>floatv2div2sf2"
4995 [(set (match_operand:V4SF 0 "register_operand" "=v")
4997 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4998 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4999 "TARGET_AVX512DQ && TARGET_AVX512VL"
5000 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5001 [(set_attr "type" "ssecvt")
5002 (set_attr "prefix" "evex")
5003 (set_attr "mode" "V4SF")])
5005 (define_insn "<floatsuffix>floatv2div2sf2_mask"
5006 [(set (match_operand:V4SF 0 "register_operand" "=v")
5009 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5011 (match_operand:V4SF 2 "vector_move_operand" "0C")
5012 (parallel [(const_int 0) (const_int 1)]))
5013 (match_operand:QI 3 "register_operand" "Yk"))
5014 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5015 "TARGET_AVX512DQ && TARGET_AVX512VL"
5016 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5017 [(set_attr "type" "ssecvt")
5018 (set_attr "prefix" "evex")
5019 (set_attr "mode" "V4SF")])
5021 (define_insn "*<floatsuffix>floatv2div2sf2_mask_1"
5022 [(set (match_operand:V4SF 0 "register_operand" "=v")
5025 (any_float:V2SF (match_operand:V2DI 1
5026 "nonimmediate_operand" "vm"))
5027 (const_vector:V2SF [(const_int 0) (const_int 0)])
5028 (match_operand:QI 2 "register_operand" "Yk"))
5029 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5030 "TARGET_AVX512DQ && TARGET_AVX512VL"
5031 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5032 [(set_attr "type" "ssecvt")
5033 (set_attr "prefix" "evex")
5034 (set_attr "mode" "V4SF")])
5036 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5037 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5038 (unsigned_float:VF2_512_256VL
5039 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5041 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5042 [(set_attr "type" "ssecvt")
5043 (set_attr "prefix" "evex")
5044 (set_attr "mode" "<MODE>")])
5046 (define_insn "ufloatv2siv2df2<mask_name>"
5047 [(set (match_operand:V2DF 0 "register_operand" "=v")
5048 (unsigned_float:V2DF
5050 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5051 (parallel [(const_int 0) (const_int 1)]))))]
5053 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5054 [(set_attr "type" "ssecvt")
5055 (set_attr "prefix" "evex")
5056 (set_attr "mode" "V2DF")])
5058 (define_insn "avx512f_cvtdq2pd512_2"
5059 [(set (match_operand:V8DF 0 "register_operand" "=v")
5062 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5063 (parallel [(const_int 0) (const_int 1)
5064 (const_int 2) (const_int 3)
5065 (const_int 4) (const_int 5)
5066 (const_int 6) (const_int 7)]))))]
5068 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5069 [(set_attr "type" "ssecvt")
5070 (set_attr "prefix" "evex")
5071 (set_attr "mode" "V8DF")])
5073 (define_insn "avx_cvtdq2pd256_2"
5074 [(set (match_operand:V4DF 0 "register_operand" "=v")
5077 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5078 (parallel [(const_int 0) (const_int 1)
5079 (const_int 2) (const_int 3)]))))]
5081 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5082 [(set_attr "type" "ssecvt")
5083 (set_attr "prefix" "maybe_evex")
5084 (set_attr "mode" "V4DF")])
5086 (define_insn "sse2_cvtdq2pd<mask_name>"
5087 [(set (match_operand:V2DF 0 "register_operand" "=v")
5090 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5091 (parallel [(const_int 0) (const_int 1)]))))]
5092 "TARGET_SSE2 && <mask_avx512vl_condition>"
5093 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5094 [(set_attr "type" "ssecvt")
5095 (set_attr "prefix" "maybe_vex")
5096 (set_attr "mode" "V2DF")])
5098 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5099 [(set (match_operand:V8SI 0 "register_operand" "=v")
5101 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5102 UNSPEC_FIX_NOTRUNC))]
5104 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5105 [(set_attr "type" "ssecvt")
5106 (set_attr "prefix" "evex")
5107 (set_attr "mode" "OI")])
5109 (define_insn "avx_cvtpd2dq256<mask_name>"
5110 [(set (match_operand:V4SI 0 "register_operand" "=v")
5111 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5112 UNSPEC_FIX_NOTRUNC))]
5113 "TARGET_AVX && <mask_avx512vl_condition>"
5114 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5115 [(set_attr "type" "ssecvt")
5116 (set_attr "prefix" "<mask_prefix>")
5117 (set_attr "mode" "OI")])
5119 (define_expand "avx_cvtpd2dq256_2"
5120 [(set (match_operand:V8SI 0 "register_operand")
5122 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5126 "operands[2] = CONST0_RTX (V4SImode);")
5128 (define_insn "*avx_cvtpd2dq256_2"
5129 [(set (match_operand:V8SI 0 "register_operand" "=v")
5131 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5133 (match_operand:V4SI 2 "const0_operand")))]
5135 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5136 [(set_attr "type" "ssecvt")
5137 (set_attr "prefix" "vex")
5138 (set_attr "btver2_decode" "vector")
5139 (set_attr "mode" "OI")])
5141 (define_insn "sse2_cvtpd2dq<mask_name>"
5142 [(set (match_operand:V4SI 0 "register_operand" "=v")
5144 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5146 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5147 "TARGET_SSE2 && <mask_avx512vl_condition>"
5150 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5152 return "cvtpd2dq\t{%1, %0|%0, %1}";
5154 [(set_attr "type" "ssecvt")
5155 (set_attr "prefix_rep" "1")
5156 (set_attr "prefix_data16" "0")
5157 (set_attr "prefix" "maybe_vex")
5158 (set_attr "mode" "TI")
5159 (set_attr "amdfam10_decode" "double")
5160 (set_attr "athlon_decode" "vector")
5161 (set_attr "bdver1_decode" "double")])
5163 ;; For ufix_notrunc* insn patterns
5164 (define_mode_attr pd2udqsuff
5165 [(V8DF "") (V4DF "{y}")])
5167 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5168 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5170 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
5171 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5173 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5174 [(set_attr "type" "ssecvt")
5175 (set_attr "prefix" "evex")
5176 (set_attr "mode" "<sseinsnmode>")])
5178 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
5179 [(set (match_operand:V4SI 0 "register_operand" "=v")
5182 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5183 UNSPEC_UNSIGNED_FIX_NOTRUNC)
5184 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5186 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5187 [(set_attr "type" "ssecvt")
5188 (set_attr "prefix" "evex")
5189 (set_attr "mode" "TI")])
5191 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
5192 [(set (match_operand:V8SI 0 "register_operand" "=v")
5194 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5196 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5197 [(set_attr "type" "ssecvt")
5198 (set_attr "prefix" "evex")
5199 (set_attr "mode" "OI")])
5201 (define_insn "ufix_truncv2dfv2si2<mask_name>"
5202 [(set (match_operand:V4SI 0 "register_operand" "=v")
5204 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5205 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5207 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5208 [(set_attr "type" "ssecvt")
5209 (set_attr "prefix" "evex")
5210 (set_attr "mode" "TI")])
5212 (define_insn "fix_truncv4dfv4si2<mask_name>"
5213 [(set (match_operand:V4SI 0 "register_operand" "=v")
5214 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5215 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5216 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5217 [(set_attr "type" "ssecvt")
5218 (set_attr "prefix" "maybe_evex")
5219 (set_attr "mode" "OI")])
5221 (define_insn "ufix_truncv4dfv4si2<mask_name>"
5222 [(set (match_operand:V4SI 0 "register_operand" "=v")
5223 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5224 "TARGET_AVX512VL && TARGET_AVX512F"
5225 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5226 [(set_attr "type" "ssecvt")
5227 (set_attr "prefix" "maybe_evex")
5228 (set_attr "mode" "OI")])
5230 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5231 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5232 (any_fix:<sseintvecmode>
5233 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5234 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5235 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5236 [(set_attr "type" "ssecvt")
5237 (set_attr "prefix" "evex")
5238 (set_attr "mode" "<sseintvecmode2>")])
5240 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5241 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5242 (unspec:<sseintvecmode>
5243 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5244 UNSPEC_FIX_NOTRUNC))]
5245 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5246 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5247 [(set_attr "type" "ssecvt")
5248 (set_attr "prefix" "evex")
5249 (set_attr "mode" "<sseintvecmode2>")])
5251 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5252 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5253 (unspec:<sseintvecmode>
5254 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5255 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5256 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5257 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5258 [(set_attr "type" "ssecvt")
5259 (set_attr "prefix" "evex")
5260 (set_attr "mode" "<sseintvecmode2>")])
5262 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5263 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5264 (any_fix:<sselongvecmode>
5265 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5266 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5267 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5268 [(set_attr "type" "ssecvt")
5269 (set_attr "prefix" "evex")
5270 (set_attr "mode" "<sseintvecmode3>")])
5272 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
5273 [(set (match_operand:V2DI 0 "register_operand" "=v")
5276 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5277 (parallel [(const_int 0) (const_int 1)]))))]
5278 "TARGET_AVX512DQ && TARGET_AVX512VL"
5279 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5280 [(set_attr "type" "ssecvt")
5281 (set_attr "prefix" "evex")
5282 (set_attr "mode" "TI")])
5284 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
5285 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5286 (unsigned_fix:<sseintvecmode>
5287 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
5289 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5290 [(set_attr "type" "ssecvt")
5291 (set_attr "prefix" "evex")
5292 (set_attr "mode" "<sseintvecmode2>")])
5294 (define_expand "avx_cvttpd2dq256_2"
5295 [(set (match_operand:V8SI 0 "register_operand")
5297 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
5300 "operands[2] = CONST0_RTX (V4SImode);")
5302 (define_insn "sse2_cvttpd2dq<mask_name>"
5303 [(set (match_operand:V4SI 0 "register_operand" "=v")
5305 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
5306 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5307 "TARGET_SSE2 && <mask_avx512vl_condition>"
5310 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5312 return "cvttpd2dq\t{%1, %0|%0, %1}";
5314 [(set_attr "type" "ssecvt")
5315 (set_attr "amdfam10_decode" "double")
5316 (set_attr "athlon_decode" "vector")
5317 (set_attr "bdver1_decode" "double")
5318 (set_attr "prefix" "maybe_vex")
5319 (set_attr "mode" "TI")])
5321 (define_insn "sse2_cvtsd2ss<round_name>"
5322 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5325 (float_truncate:V2SF
5326 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
5327 (match_operand:V4SF 1 "register_operand" "0,0,v")
5331 cvtsd2ss\t{%2, %0|%0, %2}
5332 cvtsd2ss\t{%2, %0|%0, %q2}
5333 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
5334 [(set_attr "isa" "noavx,noavx,avx")
5335 (set_attr "type" "ssecvt")
5336 (set_attr "athlon_decode" "vector,double,*")
5337 (set_attr "amdfam10_decode" "vector,double,*")
5338 (set_attr "bdver1_decode" "direct,direct,*")
5339 (set_attr "btver2_decode" "double,double,double")
5340 (set_attr "prefix" "orig,orig,<round_prefix>")
5341 (set_attr "mode" "SF")])
5343 (define_insn "*sse2_vd_cvtsd2ss"
5344 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5347 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
5348 (match_operand:V4SF 1 "register_operand" "0,0,v")
5352 cvtsd2ss\t{%2, %0|%0, %2}
5353 cvtsd2ss\t{%2, %0|%0, %2}
5354 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
5355 [(set_attr "isa" "noavx,noavx,avx")
5356 (set_attr "type" "ssecvt")
5357 (set_attr "athlon_decode" "vector,double,*")
5358 (set_attr "amdfam10_decode" "vector,double,*")
5359 (set_attr "bdver1_decode" "direct,direct,*")
5360 (set_attr "btver2_decode" "double,double,double")
5361 (set_attr "prefix" "orig,orig,vex")
5362 (set_attr "mode" "SF")])
5364 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
5365 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5369 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
5370 (parallel [(const_int 0) (const_int 1)])))
5371 (match_operand:V2DF 1 "register_operand" "0,0,v")
5375 cvtss2sd\t{%2, %0|%0, %2}
5376 cvtss2sd\t{%2, %0|%0, %k2}
5377 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
5378 [(set_attr "isa" "noavx,noavx,avx")
5379 (set_attr "type" "ssecvt")
5380 (set_attr "amdfam10_decode" "vector,double,*")
5381 (set_attr "athlon_decode" "direct,direct,*")
5382 (set_attr "bdver1_decode" "direct,direct,*")
5383 (set_attr "btver2_decode" "double,double,double")
5384 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
5385 (set_attr "mode" "DF")])
5387 (define_insn "*sse2_vd_cvtss2sd"
5388 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5391 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
5392 (match_operand:V2DF 1 "register_operand" "0,0,v")
5396 cvtss2sd\t{%2, %0|%0, %2}
5397 cvtss2sd\t{%2, %0|%0, %2}
5398 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
5399 [(set_attr "isa" "noavx,noavx,avx")
5400 (set_attr "type" "ssecvt")
5401 (set_attr "amdfam10_decode" "vector,double,*")
5402 (set_attr "athlon_decode" "direct,direct,*")
5403 (set_attr "bdver1_decode" "direct,direct,*")
5404 (set_attr "btver2_decode" "double,double,double")
5405 (set_attr "prefix" "orig,orig,vex")
5406 (set_attr "mode" "DF")])
5408 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
5409 [(set (match_operand:V8SF 0 "register_operand" "=v")
5410 (float_truncate:V8SF
5411 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
5413 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5414 [(set_attr "type" "ssecvt")
5415 (set_attr "prefix" "evex")
5416 (set_attr "mode" "V8SF")])
5418 (define_insn "avx_cvtpd2ps256<mask_name>"
5419 [(set (match_operand:V4SF 0 "register_operand" "=v")
5420 (float_truncate:V4SF
5421 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5422 "TARGET_AVX && <mask_avx512vl_condition>"
5423 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5424 [(set_attr "type" "ssecvt")
5425 (set_attr "prefix" "maybe_evex")
5426 (set_attr "btver2_decode" "vector")
5427 (set_attr "mode" "V4SF")])
5429 (define_expand "sse2_cvtpd2ps"
5430 [(set (match_operand:V4SF 0 "register_operand")
5432 (float_truncate:V2SF
5433 (match_operand:V2DF 1 "vector_operand"))
5436 "operands[2] = CONST0_RTX (V2SFmode);")
5438 (define_expand "sse2_cvtpd2ps_mask"
5439 [(set (match_operand:V4SF 0 "register_operand")
5442 (float_truncate:V2SF
5443 (match_operand:V2DF 1 "vector_operand"))
5445 (match_operand:V4SF 2 "register_operand")
5446 (match_operand:QI 3 "register_operand")))]
5448 "operands[4] = CONST0_RTX (V2SFmode);")
5450 (define_insn "*sse2_cvtpd2ps<mask_name>"
5451 [(set (match_operand:V4SF 0 "register_operand" "=v")
5453 (float_truncate:V2SF
5454 (match_operand:V2DF 1 "vector_operand" "vBm"))
5455 (match_operand:V2SF 2 "const0_operand")))]
5456 "TARGET_SSE2 && <mask_avx512vl_condition>"
5459 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
5461 return "cvtpd2ps\t{%1, %0|%0, %1}";
5463 [(set_attr "type" "ssecvt")
5464 (set_attr "amdfam10_decode" "double")
5465 (set_attr "athlon_decode" "vector")
5466 (set_attr "bdver1_decode" "double")
5467 (set_attr "prefix_data16" "1")
5468 (set_attr "prefix" "maybe_vex")
5469 (set_attr "mode" "V4SF")])
5471 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
5472 (define_mode_attr sf2dfmode
5473 [(V8DF "V8SF") (V4DF "V4SF")])
5475 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
5476 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5477 (float_extend:VF2_512_256
5478 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5479 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
5480 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5481 [(set_attr "type" "ssecvt")
5482 (set_attr "prefix" "maybe_vex")
5483 (set_attr "mode" "<MODE>")])
5485 (define_insn "*avx_cvtps2pd256_2"
5486 [(set (match_operand:V4DF 0 "register_operand" "=v")
5489 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5490 (parallel [(const_int 0) (const_int 1)
5491 (const_int 2) (const_int 3)]))))]
5493 "vcvtps2pd\t{%x1, %0|%0, %x1}"
5494 [(set_attr "type" "ssecvt")
5495 (set_attr "prefix" "vex")
5496 (set_attr "mode" "V4DF")])
5498 (define_insn "vec_unpacks_lo_v16sf"
5499 [(set (match_operand:V8DF 0 "register_operand" "=v")
5502 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5503 (parallel [(const_int 0) (const_int 1)
5504 (const_int 2) (const_int 3)
5505 (const_int 4) (const_int 5)
5506 (const_int 6) (const_int 7)]))))]
5508 "vcvtps2pd\t{%t1, %0|%0, %t1}"
5509 [(set_attr "type" "ssecvt")
5510 (set_attr "prefix" "evex")
5511 (set_attr "mode" "V8DF")])
5513 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5514 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5515 (unspec:<avx512fmaskmode>
5516 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5517 UNSPEC_CVTINT2MASK))]
5519 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5520 [(set_attr "prefix" "evex")
5521 (set_attr "mode" "<sseinsnmode>")])
5523 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5524 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5525 (unspec:<avx512fmaskmode>
5526 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5527 UNSPEC_CVTINT2MASK))]
5529 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5530 [(set_attr "prefix" "evex")
5531 (set_attr "mode" "<sseinsnmode>")])
5533 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5534 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5535 (vec_merge:VI12_AVX512VL
5538 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5541 operands[2] = CONSTM1_RTX (<MODE>mode);
5542 operands[3] = CONST0_RTX (<MODE>mode);
5545 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5546 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5547 (vec_merge:VI12_AVX512VL
5548 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
5549 (match_operand:VI12_AVX512VL 3 "const0_operand")
5550 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5552 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5553 [(set_attr "prefix" "evex")
5554 (set_attr "mode" "<sseinsnmode>")])
5556 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5557 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5558 (vec_merge:VI48_AVX512VL
5561 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5564 operands[2] = CONSTM1_RTX (<MODE>mode);
5565 operands[3] = CONST0_RTX (<MODE>mode);
5568 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5569 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5570 (vec_merge:VI48_AVX512VL
5571 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
5572 (match_operand:VI48_AVX512VL 3 "const0_operand")
5573 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5575 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5576 [(set_attr "prefix" "evex")
5577 (set_attr "mode" "<sseinsnmode>")])
5579 (define_insn "sse2_cvtps2pd<mask_name>"
5580 [(set (match_operand:V2DF 0 "register_operand" "=v")
5583 (match_operand:V4SF 1 "vector_operand" "vm")
5584 (parallel [(const_int 0) (const_int 1)]))))]
5585 "TARGET_SSE2 && <mask_avx512vl_condition>"
5586 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5587 [(set_attr "type" "ssecvt")
5588 (set_attr "amdfam10_decode" "direct")
5589 (set_attr "athlon_decode" "double")
5590 (set_attr "bdver1_decode" "double")
5591 (set_attr "prefix_data16" "0")
5592 (set_attr "prefix" "maybe_vex")
5593 (set_attr "mode" "V2DF")])
5595 (define_expand "vec_unpacks_hi_v4sf"
5600 (match_operand:V4SF 1 "vector_operand"))
5601 (parallel [(const_int 6) (const_int 7)
5602 (const_int 2) (const_int 3)])))
5603 (set (match_operand:V2DF 0 "register_operand")
5607 (parallel [(const_int 0) (const_int 1)]))))]
5609 "operands[2] = gen_reg_rtx (V4SFmode);")
5611 (define_expand "vec_unpacks_hi_v8sf"
5614 (match_operand:V8SF 1 "register_operand")
5615 (parallel [(const_int 4) (const_int 5)
5616 (const_int 6) (const_int 7)])))
5617 (set (match_operand:V4DF 0 "register_operand")
5621 "operands[2] = gen_reg_rtx (V4SFmode);")
5623 (define_expand "vec_unpacks_hi_v16sf"
5626 (match_operand:V16SF 1 "register_operand")
5627 (parallel [(const_int 8) (const_int 9)
5628 (const_int 10) (const_int 11)
5629 (const_int 12) (const_int 13)
5630 (const_int 14) (const_int 15)])))
5631 (set (match_operand:V8DF 0 "register_operand")
5635 "operands[2] = gen_reg_rtx (V8SFmode);")
5637 (define_expand "vec_unpacks_lo_v4sf"
5638 [(set (match_operand:V2DF 0 "register_operand")
5641 (match_operand:V4SF 1 "vector_operand")
5642 (parallel [(const_int 0) (const_int 1)]))))]
5645 (define_expand "vec_unpacks_lo_v8sf"
5646 [(set (match_operand:V4DF 0 "register_operand")
5649 (match_operand:V8SF 1 "nonimmediate_operand")
5650 (parallel [(const_int 0) (const_int 1)
5651 (const_int 2) (const_int 3)]))))]
5654 (define_mode_attr sseunpackfltmode
5655 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5656 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5658 (define_expand "vec_unpacks_float_hi_<mode>"
5659 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5660 (match_operand:VI2_AVX512F 1 "register_operand")]
5663 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5665 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5666 emit_insn (gen_rtx_SET (operands[0],
5667 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5671 (define_expand "vec_unpacks_float_lo_<mode>"
5672 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5673 (match_operand:VI2_AVX512F 1 "register_operand")]
5676 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5678 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5679 emit_insn (gen_rtx_SET (operands[0],
5680 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5684 (define_expand "vec_unpacku_float_hi_<mode>"
5685 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5686 (match_operand:VI2_AVX512F 1 "register_operand")]
5689 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5691 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5692 emit_insn (gen_rtx_SET (operands[0],
5693 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5697 (define_expand "vec_unpacku_float_lo_<mode>"
5698 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5699 (match_operand:VI2_AVX512F 1 "register_operand")]
5702 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5704 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5705 emit_insn (gen_rtx_SET (operands[0],
5706 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5710 (define_expand "vec_unpacks_float_hi_v4si"
5713 (match_operand:V4SI 1 "vector_operand")
5714 (parallel [(const_int 2) (const_int 3)
5715 (const_int 2) (const_int 3)])))
5716 (set (match_operand:V2DF 0 "register_operand")
5720 (parallel [(const_int 0) (const_int 1)]))))]
5722 "operands[2] = gen_reg_rtx (V4SImode);")
5724 (define_expand "vec_unpacks_float_lo_v4si"
5725 [(set (match_operand:V2DF 0 "register_operand")
5728 (match_operand:V4SI 1 "vector_operand")
5729 (parallel [(const_int 0) (const_int 1)]))))]
5732 (define_expand "vec_unpacks_float_hi_v8si"
5735 (match_operand:V8SI 1 "vector_operand")
5736 (parallel [(const_int 4) (const_int 5)
5737 (const_int 6) (const_int 7)])))
5738 (set (match_operand:V4DF 0 "register_operand")
5742 "operands[2] = gen_reg_rtx (V4SImode);")
5744 (define_expand "vec_unpacks_float_lo_v8si"
5745 [(set (match_operand:V4DF 0 "register_operand")
5748 (match_operand:V8SI 1 "nonimmediate_operand")
5749 (parallel [(const_int 0) (const_int 1)
5750 (const_int 2) (const_int 3)]))))]
5753 (define_expand "vec_unpacks_float_hi_v16si"
5756 (match_operand:V16SI 1 "nonimmediate_operand")
5757 (parallel [(const_int 8) (const_int 9)
5758 (const_int 10) (const_int 11)
5759 (const_int 12) (const_int 13)
5760 (const_int 14) (const_int 15)])))
5761 (set (match_operand:V8DF 0 "register_operand")
5765 "operands[2] = gen_reg_rtx (V8SImode);")
5767 (define_expand "vec_unpacks_float_lo_v16si"
5768 [(set (match_operand:V8DF 0 "register_operand")
5771 (match_operand:V16SI 1 "nonimmediate_operand")
5772 (parallel [(const_int 0) (const_int 1)
5773 (const_int 2) (const_int 3)
5774 (const_int 4) (const_int 5)
5775 (const_int 6) (const_int 7)]))))]
5778 (define_expand "vec_unpacku_float_hi_v4si"
5781 (match_operand:V4SI 1 "vector_operand")
5782 (parallel [(const_int 2) (const_int 3)
5783 (const_int 2) (const_int 3)])))
5788 (parallel [(const_int 0) (const_int 1)]))))
5790 (lt:V2DF (match_dup 6) (match_dup 3)))
5792 (and:V2DF (match_dup 7) (match_dup 4)))
5793 (set (match_operand:V2DF 0 "register_operand")
5794 (plus:V2DF (match_dup 6) (match_dup 8)))]
5797 REAL_VALUE_TYPE TWO32r;
5801 real_ldexp (&TWO32r, &dconst1, 32);
5802 x = const_double_from_real_value (TWO32r, DFmode);
5804 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5805 operands[4] = force_reg (V2DFmode,
5806 ix86_build_const_vector (V2DFmode, 1, x));
5808 operands[5] = gen_reg_rtx (V4SImode);
5810 for (i = 6; i < 9; i++)
5811 operands[i] = gen_reg_rtx (V2DFmode);
5814 (define_expand "vec_unpacku_float_lo_v4si"
5818 (match_operand:V4SI 1 "vector_operand")
5819 (parallel [(const_int 0) (const_int 1)]))))
5821 (lt:V2DF (match_dup 5) (match_dup 3)))
5823 (and:V2DF (match_dup 6) (match_dup 4)))
5824 (set (match_operand:V2DF 0 "register_operand")
5825 (plus:V2DF (match_dup 5) (match_dup 7)))]
5828 REAL_VALUE_TYPE TWO32r;
5832 real_ldexp (&TWO32r, &dconst1, 32);
5833 x = const_double_from_real_value (TWO32r, DFmode);
5835 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5836 operands[4] = force_reg (V2DFmode,
5837 ix86_build_const_vector (V2DFmode, 1, x));
5839 for (i = 5; i < 8; i++)
5840 operands[i] = gen_reg_rtx (V2DFmode);
5843 (define_expand "vec_unpacku_float_hi_v8si"
5844 [(match_operand:V4DF 0 "register_operand")
5845 (match_operand:V8SI 1 "register_operand")]
5848 REAL_VALUE_TYPE TWO32r;
5852 real_ldexp (&TWO32r, &dconst1, 32);
5853 x = const_double_from_real_value (TWO32r, DFmode);
5855 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5856 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5857 tmp[5] = gen_reg_rtx (V4SImode);
5859 for (i = 2; i < 5; i++)
5860 tmp[i] = gen_reg_rtx (V4DFmode);
5861 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5862 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5863 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5864 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5865 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5869 (define_expand "vec_unpacku_float_hi_v16si"
5870 [(match_operand:V8DF 0 "register_operand")
5871 (match_operand:V16SI 1 "register_operand")]
5874 REAL_VALUE_TYPE TWO32r;
5877 real_ldexp (&TWO32r, &dconst1, 32);
5878 x = const_double_from_real_value (TWO32r, DFmode);
5880 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5881 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5882 tmp[2] = gen_reg_rtx (V8DFmode);
5883 tmp[3] = gen_reg_rtx (V8SImode);
5884 k = gen_reg_rtx (QImode);
5886 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5887 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5888 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5889 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5890 emit_move_insn (operands[0], tmp[2]);
5894 (define_expand "vec_unpacku_float_lo_v8si"
5895 [(match_operand:V4DF 0 "register_operand")
5896 (match_operand:V8SI 1 "nonimmediate_operand")]
5899 REAL_VALUE_TYPE TWO32r;
5903 real_ldexp (&TWO32r, &dconst1, 32);
5904 x = const_double_from_real_value (TWO32r, DFmode);
5906 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5907 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5909 for (i = 2; i < 5; i++)
5910 tmp[i] = gen_reg_rtx (V4DFmode);
5911 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5912 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5913 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5914 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5918 (define_expand "vec_unpacku_float_lo_v16si"
5919 [(match_operand:V8DF 0 "register_operand")
5920 (match_operand:V16SI 1 "nonimmediate_operand")]
5923 REAL_VALUE_TYPE TWO32r;
5926 real_ldexp (&TWO32r, &dconst1, 32);
5927 x = const_double_from_real_value (TWO32r, DFmode);
5929 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5930 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5931 tmp[2] = gen_reg_rtx (V8DFmode);
5932 k = gen_reg_rtx (QImode);
5934 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5935 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5936 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5937 emit_move_insn (operands[0], tmp[2]);
5941 (define_expand "vec_pack_trunc_<mode>"
5943 (float_truncate:<sf2dfmode>
5944 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5946 (float_truncate:<sf2dfmode>
5947 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5948 (set (match_operand:<ssePSmode> 0 "register_operand")
5949 (vec_concat:<ssePSmode>
5954 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5955 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5958 (define_expand "vec_pack_trunc_v2df"
5959 [(match_operand:V4SF 0 "register_operand")
5960 (match_operand:V2DF 1 "vector_operand")
5961 (match_operand:V2DF 2 "vector_operand")]
5966 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5968 tmp0 = gen_reg_rtx (V4DFmode);
5969 tmp1 = force_reg (V2DFmode, operands[1]);
5971 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5972 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5976 tmp0 = gen_reg_rtx (V4SFmode);
5977 tmp1 = gen_reg_rtx (V4SFmode);
5979 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5980 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5981 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5986 (define_expand "vec_pack_sfix_trunc_v8df"
5987 [(match_operand:V16SI 0 "register_operand")
5988 (match_operand:V8DF 1 "nonimmediate_operand")
5989 (match_operand:V8DF 2 "nonimmediate_operand")]
5994 r1 = gen_reg_rtx (V8SImode);
5995 r2 = gen_reg_rtx (V8SImode);
5997 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5998 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5999 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6003 (define_expand "vec_pack_sfix_trunc_v4df"
6004 [(match_operand:V8SI 0 "register_operand")
6005 (match_operand:V4DF 1 "nonimmediate_operand")
6006 (match_operand:V4DF 2 "nonimmediate_operand")]
6011 r1 = gen_reg_rtx (V4SImode);
6012 r2 = gen_reg_rtx (V4SImode);
6014 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
6015 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
6016 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6020 (define_expand "vec_pack_sfix_trunc_v2df"
6021 [(match_operand:V4SI 0 "register_operand")
6022 (match_operand:V2DF 1 "vector_operand")
6023 (match_operand:V2DF 2 "vector_operand")]
6026 rtx tmp0, tmp1, tmp2;
6028 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6030 tmp0 = gen_reg_rtx (V4DFmode);
6031 tmp1 = force_reg (V2DFmode, operands[1]);
6033 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6034 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
6038 tmp0 = gen_reg_rtx (V4SImode);
6039 tmp1 = gen_reg_rtx (V4SImode);
6040 tmp2 = gen_reg_rtx (V2DImode);
6042 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
6043 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
6044 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6045 gen_lowpart (V2DImode, tmp0),
6046 gen_lowpart (V2DImode, tmp1)));
6047 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6052 (define_mode_attr ssepackfltmode
6053 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
6055 (define_expand "vec_pack_ufix_trunc_<mode>"
6056 [(match_operand:<ssepackfltmode> 0 "register_operand")
6057 (match_operand:VF2 1 "register_operand")
6058 (match_operand:VF2 2 "register_operand")]
6061 if (<MODE>mode == V8DFmode)
6065 r1 = gen_reg_rtx (V8SImode);
6066 r2 = gen_reg_rtx (V8SImode);
6068 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
6069 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
6070 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6075 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
6076 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
6077 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
6078 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
6079 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
6081 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
6082 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
6086 tmp[5] = gen_reg_rtx (V8SFmode);
6087 ix86_expand_vec_extract_even_odd (tmp[5],
6088 gen_lowpart (V8SFmode, tmp[2]),
6089 gen_lowpart (V8SFmode, tmp[3]), 0);
6090 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
6092 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
6093 operands[0], 0, OPTAB_DIRECT);
6094 if (tmp[6] != operands[0])
6095 emit_move_insn (operands[0], tmp[6]);
6101 (define_expand "avx512f_vec_pack_sfix_v8df"
6102 [(match_operand:V16SI 0 "register_operand")
6103 (match_operand:V8DF 1 "nonimmediate_operand")
6104 (match_operand:V8DF 2 "nonimmediate_operand")]
6109 r1 = gen_reg_rtx (V8SImode);
6110 r2 = gen_reg_rtx (V8SImode);
6112 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
6113 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
6114 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6118 (define_expand "vec_pack_sfix_v4df"
6119 [(match_operand:V8SI 0 "register_operand")
6120 (match_operand:V4DF 1 "nonimmediate_operand")
6121 (match_operand:V4DF 2 "nonimmediate_operand")]
6126 r1 = gen_reg_rtx (V4SImode);
6127 r2 = gen_reg_rtx (V4SImode);
6129 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6130 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6131 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6135 (define_expand "vec_pack_sfix_v2df"
6136 [(match_operand:V4SI 0 "register_operand")
6137 (match_operand:V2DF 1 "vector_operand")
6138 (match_operand:V2DF 2 "vector_operand")]
6141 rtx tmp0, tmp1, tmp2;
6143 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6145 tmp0 = gen_reg_rtx (V4DFmode);
6146 tmp1 = force_reg (V2DFmode, operands[1]);
6148 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6149 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6153 tmp0 = gen_reg_rtx (V4SImode);
6154 tmp1 = gen_reg_rtx (V4SImode);
6155 tmp2 = gen_reg_rtx (V2DImode);
6157 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6158 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
6159 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6160 gen_lowpart (V2DImode, tmp0),
6161 gen_lowpart (V2DImode, tmp1)));
6162 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6167 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6169 ;; Parallel single-precision floating point element swizzling
6171 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6173 (define_expand "sse_movhlps_exp"
6174 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6177 (match_operand:V4SF 1 "nonimmediate_operand")
6178 (match_operand:V4SF 2 "nonimmediate_operand"))
6179 (parallel [(const_int 6)
6185 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6187 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6189 /* Fix up the destination if needed. */
6190 if (dst != operands[0])
6191 emit_move_insn (operands[0], dst);
6196 (define_insn "sse_movhlps"
6197 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6200 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6201 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
6202 (parallel [(const_int 6)
6206 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6208 movhlps\t{%2, %0|%0, %2}
6209 vmovhlps\t{%2, %1, %0|%0, %1, %2}
6210 movlps\t{%H2, %0|%0, %H2}
6211 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
6212 %vmovhps\t{%2, %0|%q0, %2}"
6213 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6214 (set_attr "type" "ssemov")
6215 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6216 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6218 (define_expand "sse_movlhps_exp"
6219 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6222 (match_operand:V4SF 1 "nonimmediate_operand")
6223 (match_operand:V4SF 2 "nonimmediate_operand"))
6224 (parallel [(const_int 0)
6230 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6232 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6234 /* Fix up the destination if needed. */
6235 if (dst != operands[0])
6236 emit_move_insn (operands[0], dst);
6241 (define_insn "sse_movlhps"
6242 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6245 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6246 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
6247 (parallel [(const_int 0)
6251 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6253 movlhps\t{%2, %0|%0, %2}
6254 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6255 movhps\t{%2, %0|%0, %q2}
6256 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6257 %vmovlps\t{%2, %H0|%H0, %2}"
6258 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6259 (set_attr "type" "ssemov")
6260 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6261 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6263 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
6264 [(set (match_operand:V16SF 0 "register_operand" "=v")
6267 (match_operand:V16SF 1 "register_operand" "v")
6268 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6269 (parallel [(const_int 2) (const_int 18)
6270 (const_int 3) (const_int 19)
6271 (const_int 6) (const_int 22)
6272 (const_int 7) (const_int 23)
6273 (const_int 10) (const_int 26)
6274 (const_int 11) (const_int 27)
6275 (const_int 14) (const_int 30)
6276 (const_int 15) (const_int 31)])))]
6278 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6279 [(set_attr "type" "sselog")
6280 (set_attr "prefix" "evex")
6281 (set_attr "mode" "V16SF")])
6283 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6284 (define_insn "avx_unpckhps256<mask_name>"
6285 [(set (match_operand:V8SF 0 "register_operand" "=v")
6288 (match_operand:V8SF 1 "register_operand" "v")
6289 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6290 (parallel [(const_int 2) (const_int 10)
6291 (const_int 3) (const_int 11)
6292 (const_int 6) (const_int 14)
6293 (const_int 7) (const_int 15)])))]
6294 "TARGET_AVX && <mask_avx512vl_condition>"
6295 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6296 [(set_attr "type" "sselog")
6297 (set_attr "prefix" "vex")
6298 (set_attr "mode" "V8SF")])
6300 (define_expand "vec_interleave_highv8sf"
6304 (match_operand:V8SF 1 "register_operand")
6305 (match_operand:V8SF 2 "nonimmediate_operand"))
6306 (parallel [(const_int 0) (const_int 8)
6307 (const_int 1) (const_int 9)
6308 (const_int 4) (const_int 12)
6309 (const_int 5) (const_int 13)])))
6315 (parallel [(const_int 2) (const_int 10)
6316 (const_int 3) (const_int 11)
6317 (const_int 6) (const_int 14)
6318 (const_int 7) (const_int 15)])))
6319 (set (match_operand:V8SF 0 "register_operand")
6324 (parallel [(const_int 4) (const_int 5)
6325 (const_int 6) (const_int 7)
6326 (const_int 12) (const_int 13)
6327 (const_int 14) (const_int 15)])))]
6330 operands[3] = gen_reg_rtx (V8SFmode);
6331 operands[4] = gen_reg_rtx (V8SFmode);
6334 (define_insn "vec_interleave_highv4sf<mask_name>"
6335 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6338 (match_operand:V4SF 1 "register_operand" "0,v")
6339 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6340 (parallel [(const_int 2) (const_int 6)
6341 (const_int 3) (const_int 7)])))]
6342 "TARGET_SSE && <mask_avx512vl_condition>"
6344 unpckhps\t{%2, %0|%0, %2}
6345 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6346 [(set_attr "isa" "noavx,avx")
6347 (set_attr "type" "sselog")
6348 (set_attr "prefix" "orig,vex")
6349 (set_attr "mode" "V4SF")])
6351 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
6352 [(set (match_operand:V16SF 0 "register_operand" "=v")
6355 (match_operand:V16SF 1 "register_operand" "v")
6356 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6357 (parallel [(const_int 0) (const_int 16)
6358 (const_int 1) (const_int 17)
6359 (const_int 4) (const_int 20)
6360 (const_int 5) (const_int 21)
6361 (const_int 8) (const_int 24)
6362 (const_int 9) (const_int 25)
6363 (const_int 12) (const_int 28)
6364 (const_int 13) (const_int 29)])))]
6366 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6367 [(set_attr "type" "sselog")
6368 (set_attr "prefix" "evex")
6369 (set_attr "mode" "V16SF")])
6371 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6372 (define_insn "avx_unpcklps256<mask_name>"
6373 [(set (match_operand:V8SF 0 "register_operand" "=v")
6376 (match_operand:V8SF 1 "register_operand" "v")
6377 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6378 (parallel [(const_int 0) (const_int 8)
6379 (const_int 1) (const_int 9)
6380 (const_int 4) (const_int 12)
6381 (const_int 5) (const_int 13)])))]
6382 "TARGET_AVX && <mask_avx512vl_condition>"
6383 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6384 [(set_attr "type" "sselog")
6385 (set_attr "prefix" "vex")
6386 (set_attr "mode" "V8SF")])
6388 (define_insn "unpcklps128_mask"
6389 [(set (match_operand:V4SF 0 "register_operand" "=v")
6393 (match_operand:V4SF 1 "register_operand" "v")
6394 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6395 (parallel [(const_int 0) (const_int 4)
6396 (const_int 1) (const_int 5)]))
6397 (match_operand:V4SF 3 "vector_move_operand" "0C")
6398 (match_operand:QI 4 "register_operand" "Yk")))]
6400 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
6401 [(set_attr "type" "sselog")
6402 (set_attr "prefix" "evex")
6403 (set_attr "mode" "V4SF")])
6405 (define_expand "vec_interleave_lowv8sf"
6409 (match_operand:V8SF 1 "register_operand")
6410 (match_operand:V8SF 2 "nonimmediate_operand"))
6411 (parallel [(const_int 0) (const_int 8)
6412 (const_int 1) (const_int 9)
6413 (const_int 4) (const_int 12)
6414 (const_int 5) (const_int 13)])))
6420 (parallel [(const_int 2) (const_int 10)
6421 (const_int 3) (const_int 11)
6422 (const_int 6) (const_int 14)
6423 (const_int 7) (const_int 15)])))
6424 (set (match_operand:V8SF 0 "register_operand")
6429 (parallel [(const_int 0) (const_int 1)
6430 (const_int 2) (const_int 3)
6431 (const_int 8) (const_int 9)
6432 (const_int 10) (const_int 11)])))]
6435 operands[3] = gen_reg_rtx (V8SFmode);
6436 operands[4] = gen_reg_rtx (V8SFmode);
6439 (define_insn "vec_interleave_lowv4sf"
6440 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6443 (match_operand:V4SF 1 "register_operand" "0,v")
6444 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6445 (parallel [(const_int 0) (const_int 4)
6446 (const_int 1) (const_int 5)])))]
6449 unpcklps\t{%2, %0|%0, %2}
6450 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
6451 [(set_attr "isa" "noavx,avx")
6452 (set_attr "type" "sselog")
6453 (set_attr "prefix" "orig,maybe_evex")
6454 (set_attr "mode" "V4SF")])
6456 ;; These are modeled with the same vec_concat as the others so that we
6457 ;; capture users of shufps that can use the new instructions
6458 (define_insn "avx_movshdup256<mask_name>"
6459 [(set (match_operand:V8SF 0 "register_operand" "=v")
6462 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6464 (parallel [(const_int 1) (const_int 1)
6465 (const_int 3) (const_int 3)
6466 (const_int 5) (const_int 5)
6467 (const_int 7) (const_int 7)])))]
6468 "TARGET_AVX && <mask_avx512vl_condition>"
6469 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6470 [(set_attr "type" "sse")
6471 (set_attr "prefix" "vex")
6472 (set_attr "mode" "V8SF")])
6474 (define_insn "sse3_movshdup<mask_name>"
6475 [(set (match_operand:V4SF 0 "register_operand" "=v")
6478 (match_operand:V4SF 1 "vector_operand" "vBm")
6480 (parallel [(const_int 1)
6484 "TARGET_SSE3 && <mask_avx512vl_condition>"
6485 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6486 [(set_attr "type" "sse")
6487 (set_attr "prefix_rep" "1")
6488 (set_attr "prefix" "maybe_vex")
6489 (set_attr "mode" "V4SF")])
6491 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
6492 [(set (match_operand:V16SF 0 "register_operand" "=v")
6495 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6497 (parallel [(const_int 1) (const_int 1)
6498 (const_int 3) (const_int 3)
6499 (const_int 5) (const_int 5)
6500 (const_int 7) (const_int 7)
6501 (const_int 9) (const_int 9)
6502 (const_int 11) (const_int 11)
6503 (const_int 13) (const_int 13)
6504 (const_int 15) (const_int 15)])))]
6506 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6507 [(set_attr "type" "sse")
6508 (set_attr "prefix" "evex")
6509 (set_attr "mode" "V16SF")])
6511 (define_insn "avx_movsldup256<mask_name>"
6512 [(set (match_operand:V8SF 0 "register_operand" "=v")
6515 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6517 (parallel [(const_int 0) (const_int 0)
6518 (const_int 2) (const_int 2)
6519 (const_int 4) (const_int 4)
6520 (const_int 6) (const_int 6)])))]
6521 "TARGET_AVX && <mask_avx512vl_condition>"
6522 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6523 [(set_attr "type" "sse")
6524 (set_attr "prefix" "vex")
6525 (set_attr "mode" "V8SF")])
6527 (define_insn "sse3_movsldup<mask_name>"
6528 [(set (match_operand:V4SF 0 "register_operand" "=v")
6531 (match_operand:V4SF 1 "vector_operand" "vBm")
6533 (parallel [(const_int 0)
6537 "TARGET_SSE3 && <mask_avx512vl_condition>"
6538 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6539 [(set_attr "type" "sse")
6540 (set_attr "prefix_rep" "1")
6541 (set_attr "prefix" "maybe_vex")
6542 (set_attr "mode" "V4SF")])
6544 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6545 [(set (match_operand:V16SF 0 "register_operand" "=v")
6548 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6550 (parallel [(const_int 0) (const_int 0)
6551 (const_int 2) (const_int 2)
6552 (const_int 4) (const_int 4)
6553 (const_int 6) (const_int 6)
6554 (const_int 8) (const_int 8)
6555 (const_int 10) (const_int 10)
6556 (const_int 12) (const_int 12)
6557 (const_int 14) (const_int 14)])))]
6559 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6560 [(set_attr "type" "sse")
6561 (set_attr "prefix" "evex")
6562 (set_attr "mode" "V16SF")])
6564 (define_expand "avx_shufps256<mask_expand4_name>"
6565 [(match_operand:V8SF 0 "register_operand")
6566 (match_operand:V8SF 1 "register_operand")
6567 (match_operand:V8SF 2 "nonimmediate_operand")
6568 (match_operand:SI 3 "const_int_operand")]
6571 int mask = INTVAL (operands[3]);
6572 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6575 GEN_INT ((mask >> 0) & 3),
6576 GEN_INT ((mask >> 2) & 3),
6577 GEN_INT (((mask >> 4) & 3) + 8),
6578 GEN_INT (((mask >> 6) & 3) + 8),
6579 GEN_INT (((mask >> 0) & 3) + 4),
6580 GEN_INT (((mask >> 2) & 3) + 4),
6581 GEN_INT (((mask >> 4) & 3) + 12),
6582 GEN_INT (((mask >> 6) & 3) + 12)
6583 <mask_expand4_args>));
6587 ;; One bit in mask selects 2 elements.
6588 (define_insn "avx_shufps256_1<mask_name>"
6589 [(set (match_operand:V8SF 0 "register_operand" "=v")
6592 (match_operand:V8SF 1 "register_operand" "v")
6593 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6594 (parallel [(match_operand 3 "const_0_to_3_operand" )
6595 (match_operand 4 "const_0_to_3_operand" )
6596 (match_operand 5 "const_8_to_11_operand" )
6597 (match_operand 6 "const_8_to_11_operand" )
6598 (match_operand 7 "const_4_to_7_operand" )
6599 (match_operand 8 "const_4_to_7_operand" )
6600 (match_operand 9 "const_12_to_15_operand")
6601 (match_operand 10 "const_12_to_15_operand")])))]
6603 && <mask_avx512vl_condition>
6604 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6605 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6606 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6607 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6610 mask = INTVAL (operands[3]);
6611 mask |= INTVAL (operands[4]) << 2;
6612 mask |= (INTVAL (operands[5]) - 8) << 4;
6613 mask |= (INTVAL (operands[6]) - 8) << 6;
6614 operands[3] = GEN_INT (mask);
6616 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6618 [(set_attr "type" "sseshuf")
6619 (set_attr "length_immediate" "1")
6620 (set_attr "prefix" "<mask_prefix>")
6621 (set_attr "mode" "V8SF")])
6623 (define_expand "sse_shufps<mask_expand4_name>"
6624 [(match_operand:V4SF 0 "register_operand")
6625 (match_operand:V4SF 1 "register_operand")
6626 (match_operand:V4SF 2 "vector_operand")
6627 (match_operand:SI 3 "const_int_operand")]
6630 int mask = INTVAL (operands[3]);
6631 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6634 GEN_INT ((mask >> 0) & 3),
6635 GEN_INT ((mask >> 2) & 3),
6636 GEN_INT (((mask >> 4) & 3) + 4),
6637 GEN_INT (((mask >> 6) & 3) + 4)
6638 <mask_expand4_args>));
6642 (define_insn "sse_shufps_v4sf_mask"
6643 [(set (match_operand:V4SF 0 "register_operand" "=v")
6647 (match_operand:V4SF 1 "register_operand" "v")
6648 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6649 (parallel [(match_operand 3 "const_0_to_3_operand")
6650 (match_operand 4 "const_0_to_3_operand")
6651 (match_operand 5 "const_4_to_7_operand")
6652 (match_operand 6 "const_4_to_7_operand")]))
6653 (match_operand:V4SF 7 "vector_move_operand" "0C")
6654 (match_operand:QI 8 "register_operand" "Yk")))]
6658 mask |= INTVAL (operands[3]) << 0;
6659 mask |= INTVAL (operands[4]) << 2;
6660 mask |= (INTVAL (operands[5]) - 4) << 4;
6661 mask |= (INTVAL (operands[6]) - 4) << 6;
6662 operands[3] = GEN_INT (mask);
6664 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6666 [(set_attr "type" "sseshuf")
6667 (set_attr "length_immediate" "1")
6668 (set_attr "prefix" "evex")
6669 (set_attr "mode" "V4SF")])
6671 (define_insn "sse_shufps_<mode>"
6672 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
6673 (vec_select:VI4F_128
6674 (vec_concat:<ssedoublevecmode>
6675 (match_operand:VI4F_128 1 "register_operand" "0,v")
6676 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
6677 (parallel [(match_operand 3 "const_0_to_3_operand")
6678 (match_operand 4 "const_0_to_3_operand")
6679 (match_operand 5 "const_4_to_7_operand")
6680 (match_operand 6 "const_4_to_7_operand")])))]
6684 mask |= INTVAL (operands[3]) << 0;
6685 mask |= INTVAL (operands[4]) << 2;
6686 mask |= (INTVAL (operands[5]) - 4) << 4;
6687 mask |= (INTVAL (operands[6]) - 4) << 6;
6688 operands[3] = GEN_INT (mask);
6690 switch (which_alternative)
6693 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6695 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6700 [(set_attr "isa" "noavx,avx")
6701 (set_attr "type" "sseshuf")
6702 (set_attr "length_immediate" "1")
6703 (set_attr "prefix" "orig,maybe_evex")
6704 (set_attr "mode" "V4SF")])
6706 (define_insn "sse_storehps"
6707 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6709 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
6710 (parallel [(const_int 2) (const_int 3)])))]
6711 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6713 %vmovhps\t{%1, %0|%q0, %1}
6714 %vmovhlps\t{%1, %d0|%d0, %1}
6715 %vmovlps\t{%H1, %d0|%d0, %H1}"
6716 [(set_attr "type" "ssemov")
6717 (set_attr "prefix" "maybe_vex")
6718 (set_attr "mode" "V2SF,V4SF,V2SF")])
6720 (define_expand "sse_loadhps_exp"
6721 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6724 (match_operand:V4SF 1 "nonimmediate_operand")
6725 (parallel [(const_int 0) (const_int 1)]))
6726 (match_operand:V2SF 2 "nonimmediate_operand")))]
6729 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6731 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6733 /* Fix up the destination if needed. */
6734 if (dst != operands[0])
6735 emit_move_insn (operands[0], dst);
6740 (define_insn "sse_loadhps"
6741 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6744 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6745 (parallel [(const_int 0) (const_int 1)]))
6746 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
6749 movhps\t{%2, %0|%0, %q2}
6750 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6751 movlhps\t{%2, %0|%0, %2}
6752 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6753 %vmovlps\t{%2, %H0|%H0, %2}"
6754 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6755 (set_attr "type" "ssemov")
6756 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6757 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6759 (define_insn "sse_storelps"
6760 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6762 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
6763 (parallel [(const_int 0) (const_int 1)])))]
6764 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6766 %vmovlps\t{%1, %0|%q0, %1}
6767 %vmovaps\t{%1, %0|%0, %1}
6768 %vmovlps\t{%1, %d0|%d0, %q1}"
6769 [(set_attr "type" "ssemov")
6770 (set_attr "prefix" "maybe_vex")
6771 (set_attr "mode" "V2SF,V4SF,V2SF")])
6773 (define_expand "sse_loadlps_exp"
6774 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6776 (match_operand:V2SF 2 "nonimmediate_operand")
6778 (match_operand:V4SF 1 "nonimmediate_operand")
6779 (parallel [(const_int 2) (const_int 3)]))))]
6782 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6784 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6786 /* Fix up the destination if needed. */
6787 if (dst != operands[0])
6788 emit_move_insn (operands[0], dst);
6793 (define_insn "sse_loadlps"
6794 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6796 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
6798 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
6799 (parallel [(const_int 2) (const_int 3)]))))]
6802 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6803 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6804 movlps\t{%2, %0|%0, %q2}
6805 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6806 %vmovlps\t{%2, %0|%q0, %2}"
6807 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6808 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6809 (set (attr "length_immediate")
6810 (if_then_else (eq_attr "alternative" "0,1")
6812 (const_string "*")))
6813 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6814 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6816 (define_insn "sse_movss"
6817 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6819 (match_operand:V4SF 2 "register_operand" " x,v")
6820 (match_operand:V4SF 1 "register_operand" " 0,v")
6824 movss\t{%2, %0|%0, %2}
6825 vmovss\t{%2, %1, %0|%0, %1, %2}"
6826 [(set_attr "isa" "noavx,avx")
6827 (set_attr "type" "ssemov")
6828 (set_attr "prefix" "orig,maybe_evex")
6829 (set_attr "mode" "SF")])
6831 (define_insn "avx2_vec_dup<mode>"
6832 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
6833 (vec_duplicate:VF1_128_256
6835 (match_operand:V4SF 1 "register_operand" "v")
6836 (parallel [(const_int 0)]))))]
6838 "vbroadcastss\t{%1, %0|%0, %1}"
6839 [(set_attr "type" "sselog1")
6840 (set_attr "prefix" "maybe_evex")
6841 (set_attr "mode" "<MODE>")])
6843 (define_insn "avx2_vec_dupv8sf_1"
6844 [(set (match_operand:V8SF 0 "register_operand" "=v")
6847 (match_operand:V8SF 1 "register_operand" "v")
6848 (parallel [(const_int 0)]))))]
6850 "vbroadcastss\t{%x1, %0|%0, %x1}"
6851 [(set_attr "type" "sselog1")
6852 (set_attr "prefix" "maybe_evex")
6853 (set_attr "mode" "V8SF")])
6855 (define_insn "avx512f_vec_dup<mode>_1"
6856 [(set (match_operand:VF_512 0 "register_operand" "=v")
6857 (vec_duplicate:VF_512
6858 (vec_select:<ssescalarmode>
6859 (match_operand:VF_512 1 "register_operand" "v")
6860 (parallel [(const_int 0)]))))]
6862 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6863 [(set_attr "type" "sselog1")
6864 (set_attr "prefix" "evex")
6865 (set_attr "mode" "<MODE>")])
6867 ;; Although insertps takes register source, we prefer
6868 ;; unpcklps with register source since it is shorter.
6869 (define_insn "*vec_concatv2sf_sse4_1"
6870 [(set (match_operand:V2SF 0 "register_operand"
6871 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
6873 (match_operand:SF 1 "nonimmediate_operand"
6874 " 0, 0,Yv, 0,0, v,m, 0 , m")
6875 (match_operand:SF 2 "vector_move_operand"
6876 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
6877 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6879 unpcklps\t{%2, %0|%0, %2}
6880 unpcklps\t{%2, %0|%0, %2}
6881 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6882 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6883 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6884 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6885 %vmovss\t{%1, %0|%0, %1}
6886 punpckldq\t{%2, %0|%0, %2}
6887 movd\t{%1, %0|%0, %1}"
6889 (cond [(eq_attr "alternative" "0,1,3,4")
6890 (const_string "noavx")
6891 (eq_attr "alternative" "2,5")
6892 (const_string "avx")
6894 (const_string "*")))
6896 (cond [(eq_attr "alternative" "6")
6897 (const_string "ssemov")
6898 (eq_attr "alternative" "7")
6899 (const_string "mmxcvt")
6900 (eq_attr "alternative" "8")
6901 (const_string "mmxmov")
6903 (const_string "sselog")))
6904 (set (attr "prefix_data16")
6905 (if_then_else (eq_attr "alternative" "3,4")
6907 (const_string "*")))
6908 (set (attr "prefix_extra")
6909 (if_then_else (eq_attr "alternative" "3,4,5")
6911 (const_string "*")))
6912 (set (attr "length_immediate")
6913 (if_then_else (eq_attr "alternative" "3,4,5")
6915 (const_string "*")))
6916 (set (attr "prefix")
6917 (cond [(eq_attr "alternative" "2,5")
6918 (const_string "maybe_evex")
6919 (eq_attr "alternative" "6")
6920 (const_string "maybe_vex")
6922 (const_string "orig")))
6923 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6925 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6926 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
6927 ;; alternatives pretty much forces the MMX alternative to be chosen.
6928 (define_insn "*vec_concatv2sf_sse"
6929 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6931 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6932 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6935 unpcklps\t{%2, %0|%0, %2}
6936 movss\t{%1, %0|%0, %1}
6937 punpckldq\t{%2, %0|%0, %2}
6938 movd\t{%1, %0|%0, %1}"
6939 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6940 (set_attr "mode" "V4SF,SF,DI,DI")])
6942 (define_insn "*vec_concatv4sf"
6943 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
6945 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
6946 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
6949 movlhps\t{%2, %0|%0, %2}
6950 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6951 movhps\t{%2, %0|%0, %q2}
6952 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6953 [(set_attr "isa" "noavx,avx,noavx,avx")
6954 (set_attr "type" "ssemov")
6955 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
6956 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6958 ;; Avoid combining registers from different units in a single alternative,
6959 ;; see comment above inline_secondary_memory_needed function in i386.c
6960 (define_insn "vec_set<mode>_0"
6961 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6962 "=Yr,*x,v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6964 (vec_duplicate:VI4F_128
6965 (match_operand:<ssescalarmode> 2 "general_operand"
6966 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6967 (match_operand:VI4F_128 1 "vector_move_operand"
6968 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
6972 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
6973 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
6974 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
6975 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6976 %vmovd\t{%2, %0|%0, %2}
6977 movss\t{%2, %0|%0, %2}
6978 movss\t{%2, %0|%0, %2}
6979 vmovss\t{%2, %1, %0|%0, %1, %2}
6980 pinsrd\t{$0, %2, %0|%0, %2, 0}
6981 pinsrd\t{$0, %2, %0|%0, %2, 0}
6982 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6987 (cond [(eq_attr "alternative" "0,1,8,9")
6988 (const_string "sse4_noavx")
6989 (eq_attr "alternative" "2,7,10")
6990 (const_string "avx")
6991 (eq_attr "alternative" "3,4")
6992 (const_string "sse2")
6993 (eq_attr "alternative" "5,6")
6994 (const_string "noavx")
6996 (const_string "*")))
6998 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
6999 (const_string "sselog")
7000 (eq_attr "alternative" "12")
7001 (const_string "imov")
7002 (eq_attr "alternative" "13")
7003 (const_string "fmov")
7005 (const_string "ssemov")))
7006 (set (attr "prefix_extra")
7007 (if_then_else (eq_attr "alternative" "8,9,10")
7009 (const_string "*")))
7010 (set (attr "length_immediate")
7011 (if_then_else (eq_attr "alternative" "8,9,10")
7013 (const_string "*")))
7014 (set (attr "prefix")
7015 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
7016 (const_string "orig")
7017 (eq_attr "alternative" "2")
7018 (const_string "maybe_evex")
7019 (eq_attr "alternative" "3,4")
7020 (const_string "maybe_vex")
7021 (eq_attr "alternative" "7,10")
7022 (const_string "vex")
7024 (const_string "*")))
7025 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
7027 ;; A subset is vec_setv4sf.
7028 (define_insn "*vec_setv4sf_sse4_1"
7029 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7032 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
7033 (match_operand:V4SF 1 "register_operand" "0,0,v")
7034 (match_operand:SI 3 "const_int_operand")))]
7036 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7037 < GET_MODE_NUNITS (V4SFmode))"
7039 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
7040 switch (which_alternative)
7044 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7046 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7051 [(set_attr "isa" "noavx,noavx,avx")
7052 (set_attr "type" "sselog")
7053 (set_attr "prefix_data16" "1,1,*")
7054 (set_attr "prefix_extra" "1")
7055 (set_attr "length_immediate" "1")
7056 (set_attr "prefix" "orig,orig,maybe_evex")
7057 (set_attr "mode" "V4SF")])
7059 (define_insn "sse4_1_insertps"
7060 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7061 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
7062 (match_operand:V4SF 1 "register_operand" "0,0,v")
7063 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
7067 if (MEM_P (operands[2]))
7069 unsigned count_s = INTVAL (operands[3]) >> 6;
7071 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
7072 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
7074 switch (which_alternative)
7078 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7080 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7085 [(set_attr "isa" "noavx,noavx,avx")
7086 (set_attr "type" "sselog")
7087 (set_attr "prefix_data16" "1,1,*")
7088 (set_attr "prefix_extra" "1")
7089 (set_attr "length_immediate" "1")
7090 (set_attr "prefix" "orig,orig,maybe_evex")
7091 (set_attr "mode" "V4SF")])
7094 [(set (match_operand:VI4F_128 0 "memory_operand")
7096 (vec_duplicate:VI4F_128
7097 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
7100 "TARGET_SSE && reload_completed"
7101 [(set (match_dup 0) (match_dup 1))]
7102 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
7104 (define_expand "vec_set<mode>"
7105 [(match_operand:V 0 "register_operand")
7106 (match_operand:<ssescalarmode> 1 "register_operand")
7107 (match_operand 2 "const_int_operand")]
7110 ix86_expand_vector_set (false, operands[0], operands[1],
7111 INTVAL (operands[2]));
7115 (define_insn_and_split "*vec_extractv4sf_0"
7116 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
7118 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
7119 (parallel [(const_int 0)])))]
7120 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7122 "&& reload_completed"
7123 [(set (match_dup 0) (match_dup 1))]
7124 "operands[1] = gen_lowpart (SFmode, operands[1]);")
7126 (define_insn_and_split "*sse4_1_extractps"
7127 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
7129 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
7130 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
7133 extractps\t{%2, %1, %0|%0, %1, %2}
7134 extractps\t{%2, %1, %0|%0, %1, %2}
7135 vextractps\t{%2, %1, %0|%0, %1, %2}
7138 "&& reload_completed && SSE_REG_P (operands[0])"
7141 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
7142 switch (INTVAL (operands[2]))
7146 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
7147 operands[2], operands[2],
7148 GEN_INT (INTVAL (operands[2]) + 4),
7149 GEN_INT (INTVAL (operands[2]) + 4)));
7152 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
7155 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
7160 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
7161 (set_attr "type" "sselog,sselog,sselog,*,*")
7162 (set_attr "prefix_data16" "1,1,1,*,*")
7163 (set_attr "prefix_extra" "1,1,1,*,*")
7164 (set_attr "length_immediate" "1,1,1,*,*")
7165 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
7166 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
7168 (define_insn_and_split "*vec_extractv4sf_mem"
7169 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
7171 (match_operand:V4SF 1 "memory_operand" "o,o,o")
7172 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
7175 "&& reload_completed"
7176 [(set (match_dup 0) (match_dup 1))]
7178 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
7181 (define_mode_attr extract_type
7182 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7184 (define_mode_attr extract_suf
7185 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7187 (define_mode_iterator AVX512_VEC
7188 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7190 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
7191 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
7192 (match_operand:AVX512_VEC 1 "register_operand")
7193 (match_operand:SI 2 "const_0_to_3_operand")
7194 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7195 (match_operand:QI 4 "register_operand")]
7199 mask = INTVAL (operands[2]);
7200 rtx dest = operands[0];
7202 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
7203 dest = gen_reg_rtx (<ssequartermode>mode);
7205 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7206 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
7207 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7208 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
7211 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
7212 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
7214 if (dest != operands[0])
7215 emit_move_insn (operands[0], dest);
7219 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
7220 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7221 (vec_merge:<ssequartermode>
7222 (vec_select:<ssequartermode>
7223 (match_operand:V8FI 1 "register_operand" "v")
7224 (parallel [(match_operand 2 "const_0_to_7_operand")
7225 (match_operand 3 "const_0_to_7_operand")]))
7226 (match_operand:<ssequartermode> 4 "memory_operand" "0")
7227 (match_operand:QI 5 "register_operand" "Yk")))]
7229 && INTVAL (operands[2]) % 2 == 0
7230 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7231 && rtx_equal_p (operands[4], operands[0])"
7233 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
7234 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
7236 [(set_attr "type" "sselog")
7237 (set_attr "prefix_extra" "1")
7238 (set_attr "length_immediate" "1")
7239 (set_attr "memory" "store")
7240 (set_attr "prefix" "evex")
7241 (set_attr "mode" "<sseinsnmode>")])
7243 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
7244 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7245 (vec_merge:<ssequartermode>
7246 (vec_select:<ssequartermode>
7247 (match_operand:V16FI 1 "register_operand" "v")
7248 (parallel [(match_operand 2 "const_0_to_15_operand")
7249 (match_operand 3 "const_0_to_15_operand")
7250 (match_operand 4 "const_0_to_15_operand")
7251 (match_operand 5 "const_0_to_15_operand")]))
7252 (match_operand:<ssequartermode> 6 "memory_operand" "0")
7253 (match_operand:QI 7 "register_operand" "Yk")))]
7255 && INTVAL (operands[2]) % 4 == 0
7256 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7257 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7258 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
7259 && rtx_equal_p (operands[6], operands[0])"
7261 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7262 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
7264 [(set_attr "type" "sselog")
7265 (set_attr "prefix_extra" "1")
7266 (set_attr "length_immediate" "1")
7267 (set_attr "memory" "store")
7268 (set_attr "prefix" "evex")
7269 (set_attr "mode" "<sseinsnmode>")])
7271 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
7272 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7273 (vec_select:<ssequartermode>
7274 (match_operand:V8FI 1 "register_operand" "v")
7275 (parallel [(match_operand 2 "const_0_to_7_operand")
7276 (match_operand 3 "const_0_to_7_operand")])))]
7278 && INTVAL (operands[2]) % 2 == 0
7279 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
7281 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
7282 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
7284 [(set_attr "type" "sselog1")
7285 (set_attr "prefix_extra" "1")
7286 (set_attr "length_immediate" "1")
7287 (set_attr "prefix" "evex")
7288 (set_attr "mode" "<sseinsnmode>")])
7290 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
7291 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7292 (vec_select:<ssequartermode>
7293 (match_operand:V16FI 1 "register_operand" "v")
7294 (parallel [(match_operand 2 "const_0_to_15_operand")
7295 (match_operand 3 "const_0_to_15_operand")
7296 (match_operand 4 "const_0_to_15_operand")
7297 (match_operand 5 "const_0_to_15_operand")])))]
7299 && INTVAL (operands[2]) % 4 == 0
7300 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7301 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7302 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
7304 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7305 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
7307 [(set_attr "type" "sselog1")
7308 (set_attr "prefix_extra" "1")
7309 (set_attr "length_immediate" "1")
7310 (set_attr "prefix" "evex")
7311 (set_attr "mode" "<sseinsnmode>")])
7313 (define_mode_attr extract_type_2
7314 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
7316 (define_mode_attr extract_suf_2
7317 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
7319 (define_mode_iterator AVX512_VEC_2
7320 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
7322 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
7323 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7324 (match_operand:AVX512_VEC_2 1 "register_operand")
7325 (match_operand:SI 2 "const_0_to_1_operand")
7326 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
7327 (match_operand:QI 4 "register_operand")]
7330 rtx (*insn)(rtx, rtx, rtx, rtx);
7331 rtx dest = operands[0];
7333 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
7334 dest = gen_reg_rtx (<ssehalfvecmode>mode);
7336 switch (INTVAL (operands[2]))
7339 insn = gen_vec_extract_lo_<mode>_mask;
7342 insn = gen_vec_extract_hi_<mode>_mask;
7348 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7349 if (dest != operands[0])
7350 emit_move_insn (operands[0], dest);
7355 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7356 (vec_select:<ssehalfvecmode>
7357 (match_operand:V8FI 1 "nonimmediate_operand")
7358 (parallel [(const_int 0) (const_int 1)
7359 (const_int 2) (const_int 3)])))]
7360 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7363 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
7364 [(set (match_dup 0) (match_dup 1))]
7365 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7367 (define_insn "vec_extract_lo_<mode>_maskm"
7368 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7369 (vec_merge:<ssehalfvecmode>
7370 (vec_select:<ssehalfvecmode>
7371 (match_operand:V8FI 1 "register_operand" "v")
7372 (parallel [(const_int 0) (const_int 1)
7373 (const_int 2) (const_int 3)]))
7374 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7375 (match_operand:QI 3 "register_operand" "Yk")))]
7377 && rtx_equal_p (operands[2], operands[0])"
7378 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7379 [(set_attr "type" "sselog1")
7380 (set_attr "prefix_extra" "1")
7381 (set_attr "length_immediate" "1")
7382 (set_attr "prefix" "evex")
7383 (set_attr "mode" "<sseinsnmode>")])
7385 (define_insn "vec_extract_lo_<mode><mask_name>"
7386 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
7387 (vec_select:<ssehalfvecmode>
7388 (match_operand:V8FI 1 "<store_mask_predicate>" "v,<store_mask_constraint>")
7389 (parallel [(const_int 0) (const_int 1)
7390 (const_int 2) (const_int 3)])))]
7392 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7394 if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
7395 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7399 [(set_attr "type" "sselog1")
7400 (set_attr "prefix_extra" "1")
7401 (set_attr "length_immediate" "1")
7402 (set_attr "prefix" "evex")
7403 (set_attr "mode" "<sseinsnmode>")])
7405 (define_insn "vec_extract_hi_<mode>_maskm"
7406 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7407 (vec_merge:<ssehalfvecmode>
7408 (vec_select:<ssehalfvecmode>
7409 (match_operand:V8FI 1 "register_operand" "v")
7410 (parallel [(const_int 4) (const_int 5)
7411 (const_int 6) (const_int 7)]))
7412 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7413 (match_operand:QI 3 "register_operand" "Yk")))]
7415 && rtx_equal_p (operands[2], operands[0])"
7416 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7417 [(set_attr "type" "sselog")
7418 (set_attr "prefix_extra" "1")
7419 (set_attr "length_immediate" "1")
7420 (set_attr "memory" "store")
7421 (set_attr "prefix" "evex")
7422 (set_attr "mode" "<sseinsnmode>")])
7424 (define_insn "vec_extract_hi_<mode><mask_name>"
7425 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7426 (vec_select:<ssehalfvecmode>
7427 (match_operand:V8FI 1 "register_operand" "v")
7428 (parallel [(const_int 4) (const_int 5)
7429 (const_int 6) (const_int 7)])))]
7431 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
7432 [(set_attr "type" "sselog1")
7433 (set_attr "prefix_extra" "1")
7434 (set_attr "length_immediate" "1")
7435 (set_attr "prefix" "evex")
7436 (set_attr "mode" "<sseinsnmode>")])
7438 (define_insn "vec_extract_hi_<mode>_maskm"
7439 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7440 (vec_merge:<ssehalfvecmode>
7441 (vec_select:<ssehalfvecmode>
7442 (match_operand:V16FI 1 "register_operand" "v")
7443 (parallel [(const_int 8) (const_int 9)
7444 (const_int 10) (const_int 11)
7445 (const_int 12) (const_int 13)
7446 (const_int 14) (const_int 15)]))
7447 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7448 (match_operand:QI 3 "register_operand" "Yk")))]
7450 && rtx_equal_p (operands[2], operands[0])"
7451 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7452 [(set_attr "type" "sselog1")
7453 (set_attr "prefix_extra" "1")
7454 (set_attr "length_immediate" "1")
7455 (set_attr "prefix" "evex")
7456 (set_attr "mode" "<sseinsnmode>")])
7458 (define_insn "vec_extract_hi_<mode><mask_name>"
7459 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
7460 (vec_select:<ssehalfvecmode>
7461 (match_operand:V16FI 1 "register_operand" "v,v")
7462 (parallel [(const_int 8) (const_int 9)
7463 (const_int 10) (const_int 11)
7464 (const_int 12) (const_int 13)
7465 (const_int 14) (const_int 15)])))]
7466 "TARGET_AVX512F && <mask_avx512dq_condition>"
7468 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
7469 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7470 [(set_attr "type" "sselog1")
7471 (set_attr "prefix_extra" "1")
7472 (set_attr "isa" "avx512dq,noavx512dq")
7473 (set_attr "length_immediate" "1")
7474 (set_attr "prefix" "evex")
7475 (set_attr "mode" "<sseinsnmode>")])
7477 (define_expand "avx512vl_vextractf128<mode>"
7478 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7479 (match_operand:VI48F_256 1 "register_operand")
7480 (match_operand:SI 2 "const_0_to_1_operand")
7481 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
7482 (match_operand:QI 4 "register_operand")]
7483 "TARGET_AVX512DQ && TARGET_AVX512VL"
7485 rtx (*insn)(rtx, rtx, rtx, rtx);
7486 rtx dest = operands[0];
7489 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
7490 /* For V8S[IF]mode there are maskm insns with =m and 0
7492 ? !rtx_equal_p (dest, operands[3])
7493 /* For V4D[IF]mode, hi insns don't allow memory, and
7494 lo insns have =m and 0C constraints. */
7495 : (operands[2] != const0_rtx
7496 || (!rtx_equal_p (dest, operands[3])
7497 && GET_CODE (operands[3]) != CONST_VECTOR))))
7498 dest = gen_reg_rtx (<ssehalfvecmode>mode);
7499 switch (INTVAL (operands[2]))
7502 insn = gen_vec_extract_lo_<mode>_mask;
7505 insn = gen_vec_extract_hi_<mode>_mask;
7511 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7512 if (dest != operands[0])
7513 emit_move_insn (operands[0], dest);
7517 (define_expand "avx_vextractf128<mode>"
7518 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7519 (match_operand:V_256 1 "register_operand")
7520 (match_operand:SI 2 "const_0_to_1_operand")]
7523 rtx (*insn)(rtx, rtx);
7525 switch (INTVAL (operands[2]))
7528 insn = gen_vec_extract_lo_<mode>;
7531 insn = gen_vec_extract_hi_<mode>;
7537 emit_insn (insn (operands[0], operands[1]));
7541 (define_insn "vec_extract_lo_<mode><mask_name>"
7542 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
7543 (vec_select:<ssehalfvecmode>
7544 (match_operand:V16FI 1 "<store_mask_predicate>"
7545 "<store_mask_constraint>,v")
7546 (parallel [(const_int 0) (const_int 1)
7547 (const_int 2) (const_int 3)
7548 (const_int 4) (const_int 5)
7549 (const_int 6) (const_int 7)])))]
7551 && <mask_mode512bit_condition>
7552 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7555 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7561 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7562 (vec_select:<ssehalfvecmode>
7563 (match_operand:V16FI 1 "nonimmediate_operand")
7564 (parallel [(const_int 0) (const_int 1)
7565 (const_int 2) (const_int 3)
7566 (const_int 4) (const_int 5)
7567 (const_int 6) (const_int 7)])))]
7568 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7569 && reload_completed"
7570 [(set (match_dup 0) (match_dup 1))]
7571 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7573 (define_insn "vec_extract_lo_<mode><mask_name>"
7574 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
7575 (vec_select:<ssehalfvecmode>
7576 (match_operand:VI8F_256 1 "<store_mask_predicate>"
7577 "<store_mask_constraint>,v")
7578 (parallel [(const_int 0) (const_int 1)])))]
7580 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7581 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7584 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
7588 [(set_attr "type" "sselog")
7589 (set_attr "prefix_extra" "1")
7590 (set_attr "length_immediate" "1")
7591 (set_attr "memory" "none,store")
7592 (set_attr "prefix" "evex")
7593 (set_attr "mode" "XI")])
7596 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7597 (vec_select:<ssehalfvecmode>
7598 (match_operand:VI8F_256 1 "nonimmediate_operand")
7599 (parallel [(const_int 0) (const_int 1)])))]
7600 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7601 && reload_completed"
7602 [(set (match_dup 0) (match_dup 1))]
7603 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7605 (define_insn "vec_extract_hi_<mode><mask_name>"
7606 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7607 (vec_select:<ssehalfvecmode>
7608 (match_operand:VI8F_256 1 "register_operand" "v,v")
7609 (parallel [(const_int 2) (const_int 3)])))]
7610 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7612 if (TARGET_AVX512VL)
7614 if (TARGET_AVX512DQ)
7615 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7617 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7620 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7622 [(set_attr "type" "sselog")
7623 (set_attr "prefix_extra" "1")
7624 (set_attr "length_immediate" "1")
7625 (set_attr "memory" "none,store")
7626 (set_attr "prefix" "vex")
7627 (set_attr "mode" "<sseinsnmode>")])
7630 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7631 (vec_select:<ssehalfvecmode>
7632 (match_operand:VI4F_256 1 "nonimmediate_operand")
7633 (parallel [(const_int 0) (const_int 1)
7634 (const_int 2) (const_int 3)])))]
7635 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7636 && reload_completed"
7637 [(set (match_dup 0) (match_dup 1))]
7638 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7640 (define_insn "vec_extract_lo_<mode><mask_name>"
7641 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
7642 "=<store_mask_constraint>,v")
7643 (vec_select:<ssehalfvecmode>
7644 (match_operand:VI4F_256 1 "<store_mask_predicate>"
7645 "v,<store_mask_constraint>")
7646 (parallel [(const_int 0) (const_int 1)
7647 (const_int 2) (const_int 3)])))]
7649 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7650 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7653 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7657 [(set_attr "type" "sselog1")
7658 (set_attr "prefix_extra" "1")
7659 (set_attr "length_immediate" "1")
7660 (set_attr "prefix" "evex")
7661 (set_attr "mode" "<sseinsnmode>")])
7663 (define_insn "vec_extract_lo_<mode>_maskm"
7664 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7665 (vec_merge:<ssehalfvecmode>
7666 (vec_select:<ssehalfvecmode>
7667 (match_operand:VI4F_256 1 "register_operand" "v")
7668 (parallel [(const_int 0) (const_int 1)
7669 (const_int 2) (const_int 3)]))
7670 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7671 (match_operand:QI 3 "register_operand" "Yk")))]
7672 "TARGET_AVX512VL && TARGET_AVX512F
7673 && rtx_equal_p (operands[2], operands[0])"
7674 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7675 [(set_attr "type" "sselog1")
7676 (set_attr "prefix_extra" "1")
7677 (set_attr "length_immediate" "1")
7678 (set_attr "prefix" "evex")
7679 (set_attr "mode" "<sseinsnmode>")])
7681 (define_insn "vec_extract_hi_<mode>_maskm"
7682 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7683 (vec_merge:<ssehalfvecmode>
7684 (vec_select:<ssehalfvecmode>
7685 (match_operand:VI4F_256 1 "register_operand" "v")
7686 (parallel [(const_int 4) (const_int 5)
7687 (const_int 6) (const_int 7)]))
7688 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7689 (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
7690 "TARGET_AVX512F && TARGET_AVX512VL
7691 && rtx_equal_p (operands[2], operands[0])"
7692 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7693 [(set_attr "type" "sselog1")
7694 (set_attr "length_immediate" "1")
7695 (set_attr "prefix" "evex")
7696 (set_attr "mode" "<sseinsnmode>")])
7698 (define_insn "vec_extract_hi_<mode>_mask"
7699 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
7700 (vec_merge:<ssehalfvecmode>
7701 (vec_select:<ssehalfvecmode>
7702 (match_operand:VI4F_256 1 "register_operand" "v")
7703 (parallel [(const_int 4) (const_int 5)
7704 (const_int 6) (const_int 7)]))
7705 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C")
7706 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
7708 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
7709 [(set_attr "type" "sselog1")
7710 (set_attr "length_immediate" "1")
7711 (set_attr "prefix" "evex")
7712 (set_attr "mode" "<sseinsnmode>")])
7714 (define_insn "vec_extract_hi_<mode>"
7715 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
7716 (vec_select:<ssehalfvecmode>
7717 (match_operand:VI4F_256 1 "register_operand" "x, v")
7718 (parallel [(const_int 4) (const_int 5)
7719 (const_int 6) (const_int 7)])))]
7722 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
7723 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7724 [(set_attr "isa" "*, avx512vl")
7725 (set_attr "prefix" "vex, evex")
7726 (set_attr "type" "sselog1")
7727 (set_attr "length_immediate" "1")
7728 (set_attr "mode" "<sseinsnmode>")])
7730 (define_insn_and_split "vec_extract_lo_v32hi"
7731 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7733 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7734 (parallel [(const_int 0) (const_int 1)
7735 (const_int 2) (const_int 3)
7736 (const_int 4) (const_int 5)
7737 (const_int 6) (const_int 7)
7738 (const_int 8) (const_int 9)
7739 (const_int 10) (const_int 11)
7740 (const_int 12) (const_int 13)
7741 (const_int 14) (const_int 15)])))]
7742 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7744 "&& reload_completed"
7745 [(set (match_dup 0) (match_dup 1))]
7746 "operands[1] = gen_lowpart (V16HImode, operands[1]);")
7748 (define_insn "vec_extract_hi_v32hi"
7749 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7751 (match_operand:V32HI 1 "register_operand" "v,v")
7752 (parallel [(const_int 16) (const_int 17)
7753 (const_int 18) (const_int 19)
7754 (const_int 20) (const_int 21)
7755 (const_int 22) (const_int 23)
7756 (const_int 24) (const_int 25)
7757 (const_int 26) (const_int 27)
7758 (const_int 28) (const_int 29)
7759 (const_int 30) (const_int 31)])))]
7761 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7762 [(set_attr "type" "sselog")
7763 (set_attr "prefix_extra" "1")
7764 (set_attr "length_immediate" "1")
7765 (set_attr "memory" "none,store")
7766 (set_attr "prefix" "evex")
7767 (set_attr "mode" "XI")])
7769 (define_insn_and_split "vec_extract_lo_v16hi"
7770 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
7772 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
7773 (parallel [(const_int 0) (const_int 1)
7774 (const_int 2) (const_int 3)
7775 (const_int 4) (const_int 5)
7776 (const_int 6) (const_int 7)])))]
7777 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7779 "&& reload_completed"
7780 [(set (match_dup 0) (match_dup 1))]
7781 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
7783 (define_insn "vec_extract_hi_v16hi"
7784 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
7786 (match_operand:V16HI 1 "register_operand" "x,x,v,v,v,v")
7787 (parallel [(const_int 8) (const_int 9)
7788 (const_int 10) (const_int 11)
7789 (const_int 12) (const_int 13)
7790 (const_int 14) (const_int 15)])))]
7793 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7794 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7795 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7796 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7797 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
7798 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7799 [(set_attr "type" "sselog")
7800 (set_attr "prefix_extra" "1")
7801 (set_attr "length_immediate" "1")
7802 (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
7803 (set_attr "memory" "none,store,none,store,none,store")
7804 (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
7805 (set_attr "mode" "OI")])
7807 (define_insn_and_split "vec_extract_lo_v64qi"
7808 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7810 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7811 (parallel [(const_int 0) (const_int 1)
7812 (const_int 2) (const_int 3)
7813 (const_int 4) (const_int 5)
7814 (const_int 6) (const_int 7)
7815 (const_int 8) (const_int 9)
7816 (const_int 10) (const_int 11)
7817 (const_int 12) (const_int 13)
7818 (const_int 14) (const_int 15)
7819 (const_int 16) (const_int 17)
7820 (const_int 18) (const_int 19)
7821 (const_int 20) (const_int 21)
7822 (const_int 22) (const_int 23)
7823 (const_int 24) (const_int 25)
7824 (const_int 26) (const_int 27)
7825 (const_int 28) (const_int 29)
7826 (const_int 30) (const_int 31)])))]
7827 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7829 "&& reload_completed"
7830 [(set (match_dup 0) (match_dup 1))]
7831 "operands[1] = gen_lowpart (V32QImode, operands[1]);")
7833 (define_insn "vec_extract_hi_v64qi"
7834 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7836 (match_operand:V64QI 1 "register_operand" "v,v")
7837 (parallel [(const_int 32) (const_int 33)
7838 (const_int 34) (const_int 35)
7839 (const_int 36) (const_int 37)
7840 (const_int 38) (const_int 39)
7841 (const_int 40) (const_int 41)
7842 (const_int 42) (const_int 43)
7843 (const_int 44) (const_int 45)
7844 (const_int 46) (const_int 47)
7845 (const_int 48) (const_int 49)
7846 (const_int 50) (const_int 51)
7847 (const_int 52) (const_int 53)
7848 (const_int 54) (const_int 55)
7849 (const_int 56) (const_int 57)
7850 (const_int 58) (const_int 59)
7851 (const_int 60) (const_int 61)
7852 (const_int 62) (const_int 63)])))]
7854 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7855 [(set_attr "type" "sselog")
7856 (set_attr "prefix_extra" "1")
7857 (set_attr "length_immediate" "1")
7858 (set_attr "memory" "none,store")
7859 (set_attr "prefix" "evex")
7860 (set_attr "mode" "XI")])
7862 (define_insn_and_split "vec_extract_lo_v32qi"
7863 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
7865 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
7866 (parallel [(const_int 0) (const_int 1)
7867 (const_int 2) (const_int 3)
7868 (const_int 4) (const_int 5)
7869 (const_int 6) (const_int 7)
7870 (const_int 8) (const_int 9)
7871 (const_int 10) (const_int 11)
7872 (const_int 12) (const_int 13)
7873 (const_int 14) (const_int 15)])))]
7874 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7876 "&& reload_completed"
7877 [(set (match_dup 0) (match_dup 1))]
7878 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
7880 (define_insn "vec_extract_hi_v32qi"
7881 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
7883 (match_operand:V32QI 1 "register_operand" "x,x,v,v,v,v")
7884 (parallel [(const_int 16) (const_int 17)
7885 (const_int 18) (const_int 19)
7886 (const_int 20) (const_int 21)
7887 (const_int 22) (const_int 23)
7888 (const_int 24) (const_int 25)
7889 (const_int 26) (const_int 27)
7890 (const_int 28) (const_int 29)
7891 (const_int 30) (const_int 31)])))]
7894 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7895 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7896 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7897 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7898 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
7899 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7900 [(set_attr "type" "sselog")
7901 (set_attr "prefix_extra" "1")
7902 (set_attr "length_immediate" "1")
7903 (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
7904 (set_attr "memory" "none,store,none,store,none,store")
7905 (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
7906 (set_attr "mode" "OI")])
7908 ;; Modes handled by vec_extract patterns.
7909 (define_mode_iterator VEC_EXTRACT_MODE
7910 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7911 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7912 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7913 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7914 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7915 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
7916 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
7918 (define_expand "vec_extract<mode><ssescalarmodelower>"
7919 [(match_operand:<ssescalarmode> 0 "register_operand")
7920 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7921 (match_operand 2 "const_int_operand")]
7924 ix86_expand_vector_extract (false, operands[0], operands[1],
7925 INTVAL (operands[2]));
7929 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
7930 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7931 (match_operand:V_512 1 "register_operand")
7932 (match_operand 2 "const_0_to_1_operand")]
7935 if (INTVAL (operands[2]))
7936 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
7938 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
7942 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7944 ;; Parallel double-precision floating point element swizzling
7946 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7948 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7949 [(set (match_operand:V8DF 0 "register_operand" "=v")
7952 (match_operand:V8DF 1 "register_operand" "v")
7953 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7954 (parallel [(const_int 1) (const_int 9)
7955 (const_int 3) (const_int 11)
7956 (const_int 5) (const_int 13)
7957 (const_int 7) (const_int 15)])))]
7959 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7960 [(set_attr "type" "sselog")
7961 (set_attr "prefix" "evex")
7962 (set_attr "mode" "V8DF")])
7964 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7965 (define_insn "avx_unpckhpd256<mask_name>"
7966 [(set (match_operand:V4DF 0 "register_operand" "=v")
7969 (match_operand:V4DF 1 "register_operand" "v")
7970 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7971 (parallel [(const_int 1) (const_int 5)
7972 (const_int 3) (const_int 7)])))]
7973 "TARGET_AVX && <mask_avx512vl_condition>"
7974 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7975 [(set_attr "type" "sselog")
7976 (set_attr "prefix" "vex")
7977 (set_attr "mode" "V4DF")])
7979 (define_expand "vec_interleave_highv4df"
7983 (match_operand:V4DF 1 "register_operand")
7984 (match_operand:V4DF 2 "nonimmediate_operand"))
7985 (parallel [(const_int 0) (const_int 4)
7986 (const_int 2) (const_int 6)])))
7992 (parallel [(const_int 1) (const_int 5)
7993 (const_int 3) (const_int 7)])))
7994 (set (match_operand:V4DF 0 "register_operand")
7999 (parallel [(const_int 2) (const_int 3)
8000 (const_int 6) (const_int 7)])))]
8003 operands[3] = gen_reg_rtx (V4DFmode);
8004 operands[4] = gen_reg_rtx (V4DFmode);
8008 (define_insn "avx512vl_unpckhpd128_mask"
8009 [(set (match_operand:V2DF 0 "register_operand" "=v")
8013 (match_operand:V2DF 1 "register_operand" "v")
8014 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8015 (parallel [(const_int 1) (const_int 3)]))
8016 (match_operand:V2DF 3 "vector_move_operand" "0C")
8017 (match_operand:QI 4 "register_operand" "Yk")))]
8019 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8020 [(set_attr "type" "sselog")
8021 (set_attr "prefix" "evex")
8022 (set_attr "mode" "V2DF")])
8024 (define_expand "vec_interleave_highv2df"
8025 [(set (match_operand:V2DF 0 "register_operand")
8028 (match_operand:V2DF 1 "nonimmediate_operand")
8029 (match_operand:V2DF 2 "nonimmediate_operand"))
8030 (parallel [(const_int 1)
8034 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
8035 operands[2] = force_reg (V2DFmode, operands[2]);
8038 (define_insn "*vec_interleave_highv2df"
8039 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
8042 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
8043 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
8044 (parallel [(const_int 1)
8046 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
8048 unpckhpd\t{%2, %0|%0, %2}
8049 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
8050 %vmovddup\t{%H1, %0|%0, %H1}
8051 movlpd\t{%H1, %0|%0, %H1}
8052 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
8053 %vmovhpd\t{%1, %0|%q0, %1}"
8054 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8055 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8056 (set (attr "prefix_data16")
8057 (if_then_else (eq_attr "alternative" "3,5")
8059 (const_string "*")))
8060 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8061 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8063 (define_expand "avx512f_movddup512<mask_name>"
8064 [(set (match_operand:V8DF 0 "register_operand")
8067 (match_operand:V8DF 1 "nonimmediate_operand")
8069 (parallel [(const_int 0) (const_int 8)
8070 (const_int 2) (const_int 10)
8071 (const_int 4) (const_int 12)
8072 (const_int 6) (const_int 14)])))]
8075 (define_expand "avx512f_unpcklpd512<mask_name>"
8076 [(set (match_operand:V8DF 0 "register_operand")
8079 (match_operand:V8DF 1 "register_operand")
8080 (match_operand:V8DF 2 "nonimmediate_operand"))
8081 (parallel [(const_int 0) (const_int 8)
8082 (const_int 2) (const_int 10)
8083 (const_int 4) (const_int 12)
8084 (const_int 6) (const_int 14)])))]
8087 (define_insn "*avx512f_unpcklpd512<mask_name>"
8088 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
8091 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
8092 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
8093 (parallel [(const_int 0) (const_int 8)
8094 (const_int 2) (const_int 10)
8095 (const_int 4) (const_int 12)
8096 (const_int 6) (const_int 14)])))]
8099 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
8100 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8101 [(set_attr "type" "sselog")
8102 (set_attr "prefix" "evex")
8103 (set_attr "mode" "V8DF")])
8105 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8106 (define_expand "avx_movddup256<mask_name>"
8107 [(set (match_operand:V4DF 0 "register_operand")
8110 (match_operand:V4DF 1 "nonimmediate_operand")
8112 (parallel [(const_int 0) (const_int 4)
8113 (const_int 2) (const_int 6)])))]
8114 "TARGET_AVX && <mask_avx512vl_condition>")
8116 (define_expand "avx_unpcklpd256<mask_name>"
8117 [(set (match_operand:V4DF 0 "register_operand")
8120 (match_operand:V4DF 1 "register_operand")
8121 (match_operand:V4DF 2 "nonimmediate_operand"))
8122 (parallel [(const_int 0) (const_int 4)
8123 (const_int 2) (const_int 6)])))]
8124 "TARGET_AVX && <mask_avx512vl_condition>")
8126 (define_insn "*avx_unpcklpd256<mask_name>"
8127 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
8130 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
8131 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
8132 (parallel [(const_int 0) (const_int 4)
8133 (const_int 2) (const_int 6)])))]
8134 "TARGET_AVX && <mask_avx512vl_condition>"
8136 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
8137 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
8138 [(set_attr "type" "sselog")
8139 (set_attr "prefix" "vex")
8140 (set_attr "mode" "V4DF")])
8142 (define_expand "vec_interleave_lowv4df"
8146 (match_operand:V4DF 1 "register_operand")
8147 (match_operand:V4DF 2 "nonimmediate_operand"))
8148 (parallel [(const_int 0) (const_int 4)
8149 (const_int 2) (const_int 6)])))
8155 (parallel [(const_int 1) (const_int 5)
8156 (const_int 3) (const_int 7)])))
8157 (set (match_operand:V4DF 0 "register_operand")
8162 (parallel [(const_int 0) (const_int 1)
8163 (const_int 4) (const_int 5)])))]
8166 operands[3] = gen_reg_rtx (V4DFmode);
8167 operands[4] = gen_reg_rtx (V4DFmode);
8170 (define_insn "avx512vl_unpcklpd128_mask"
8171 [(set (match_operand:V2DF 0 "register_operand" "=v")
8175 (match_operand:V2DF 1 "register_operand" "v")
8176 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8177 (parallel [(const_int 0) (const_int 2)]))
8178 (match_operand:V2DF 3 "vector_move_operand" "0C")
8179 (match_operand:QI 4 "register_operand" "Yk")))]
8181 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8182 [(set_attr "type" "sselog")
8183 (set_attr "prefix" "evex")
8184 (set_attr "mode" "V2DF")])
8186 (define_expand "vec_interleave_lowv2df"
8187 [(set (match_operand:V2DF 0 "register_operand")
8190 (match_operand:V2DF 1 "nonimmediate_operand")
8191 (match_operand:V2DF 2 "nonimmediate_operand"))
8192 (parallel [(const_int 0)
8196 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
8197 operands[1] = force_reg (V2DFmode, operands[1]);
8200 (define_insn "*vec_interleave_lowv2df"
8201 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
8204 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
8205 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
8206 (parallel [(const_int 0)
8208 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
8210 unpcklpd\t{%2, %0|%0, %2}
8211 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8212 %vmovddup\t{%1, %0|%0, %q1}
8213 movhpd\t{%2, %0|%0, %q2}
8214 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
8215 %vmovlpd\t{%2, %H0|%H0, %2}"
8216 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8217 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8218 (set (attr "prefix_data16")
8219 (if_then_else (eq_attr "alternative" "3,5")
8221 (const_string "*")))
8222 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8223 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8226 [(set (match_operand:V2DF 0 "memory_operand")
8229 (match_operand:V2DF 1 "register_operand")
8231 (parallel [(const_int 0)
8233 "TARGET_SSE3 && reload_completed"
8236 rtx low = gen_lowpart (DFmode, operands[1]);
8238 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
8239 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
8244 [(set (match_operand:V2DF 0 "register_operand")
8247 (match_operand:V2DF 1 "memory_operand")
8249 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
8250 (match_operand:SI 3 "const_int_operand")])))]
8251 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
8252 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
8254 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
8257 (define_insn "avx512f_vmscalef<mode><round_name>"
8258 [(set (match_operand:VF_128 0 "register_operand" "=v")
8261 [(match_operand:VF_128 1 "register_operand" "v")
8262 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
8267 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
8268 [(set_attr "prefix" "evex")
8269 (set_attr "mode" "<ssescalarmode>")])
8271 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
8272 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8274 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
8275 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
8278 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8279 [(set_attr "prefix" "evex")
8280 (set_attr "mode" "<MODE>")])
8282 (define_expand "<avx512>_vternlog<mode>_maskz"
8283 [(match_operand:VI48_AVX512VL 0 "register_operand")
8284 (match_operand:VI48_AVX512VL 1 "register_operand")
8285 (match_operand:VI48_AVX512VL 2 "register_operand")
8286 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
8287 (match_operand:SI 4 "const_0_to_255_operand")
8288 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8291 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
8292 operands[0], operands[1], operands[2], operands[3],
8293 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
8297 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
8298 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8299 (unspec:VI48_AVX512VL
8300 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8301 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8302 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8303 (match_operand:SI 4 "const_0_to_255_operand")]
8306 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
8307 [(set_attr "type" "sselog")
8308 (set_attr "prefix" "evex")
8309 (set_attr "mode" "<sseinsnmode>")])
8311 (define_insn "<avx512>_vternlog<mode>_mask"
8312 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8313 (vec_merge:VI48_AVX512VL
8314 (unspec:VI48_AVX512VL
8315 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8316 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8317 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8318 (match_operand:SI 4 "const_0_to_255_operand")]
8321 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8323 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
8324 [(set_attr "type" "sselog")
8325 (set_attr "prefix" "evex")
8326 (set_attr "mode" "<sseinsnmode>")])
8328 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
8329 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8330 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
8333 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
8334 [(set_attr "prefix" "evex")
8335 (set_attr "mode" "<MODE>")])
8337 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
8338 [(set (match_operand:VF_128 0 "register_operand" "=v")
8341 [(match_operand:VF_128 1 "register_operand" "v")
8342 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
8347 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_saeonly_scalar_mask_op3>}";
8348 [(set_attr "prefix" "evex")
8349 (set_attr "mode" "<ssescalarmode>")])
8351 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
8352 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8353 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
8354 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
8355 (match_operand:SI 3 "const_0_to_255_operand")]
8358 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
8359 [(set_attr "prefix" "evex")
8360 (set_attr "mode" "<sseinsnmode>")])
8362 (define_expand "avx512f_shufps512_mask"
8363 [(match_operand:V16SF 0 "register_operand")
8364 (match_operand:V16SF 1 "register_operand")
8365 (match_operand:V16SF 2 "nonimmediate_operand")
8366 (match_operand:SI 3 "const_0_to_255_operand")
8367 (match_operand:V16SF 4 "register_operand")
8368 (match_operand:HI 5 "register_operand")]
8371 int mask = INTVAL (operands[3]);
8372 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
8373 GEN_INT ((mask >> 0) & 3),
8374 GEN_INT ((mask >> 2) & 3),
8375 GEN_INT (((mask >> 4) & 3) + 16),
8376 GEN_INT (((mask >> 6) & 3) + 16),
8377 GEN_INT (((mask >> 0) & 3) + 4),
8378 GEN_INT (((mask >> 2) & 3) + 4),
8379 GEN_INT (((mask >> 4) & 3) + 20),
8380 GEN_INT (((mask >> 6) & 3) + 20),
8381 GEN_INT (((mask >> 0) & 3) + 8),
8382 GEN_INT (((mask >> 2) & 3) + 8),
8383 GEN_INT (((mask >> 4) & 3) + 24),
8384 GEN_INT (((mask >> 6) & 3) + 24),
8385 GEN_INT (((mask >> 0) & 3) + 12),
8386 GEN_INT (((mask >> 2) & 3) + 12),
8387 GEN_INT (((mask >> 4) & 3) + 28),
8388 GEN_INT (((mask >> 6) & 3) + 28),
8389 operands[4], operands[5]));
8394 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
8395 [(match_operand:VF_AVX512VL 0 "register_operand")
8396 (match_operand:VF_AVX512VL 1 "register_operand")
8397 (match_operand:VF_AVX512VL 2 "register_operand")
8398 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8399 (match_operand:SI 4 "const_0_to_255_operand")
8400 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8403 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8404 operands[0], operands[1], operands[2], operands[3],
8405 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8406 <round_saeonly_expand_operand6>));
8410 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
8411 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8413 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8414 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8415 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8416 (match_operand:SI 4 "const_0_to_255_operand")]
8419 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8420 [(set_attr "prefix" "evex")
8421 (set_attr "mode" "<MODE>")])
8423 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
8424 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8425 (vec_merge:VF_AVX512VL
8427 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8428 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8429 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8430 (match_operand:SI 4 "const_0_to_255_operand")]
8433 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8435 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8436 [(set_attr "prefix" "evex")
8437 (set_attr "mode" "<MODE>")])
8439 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
8440 [(match_operand:VF_128 0 "register_operand")
8441 (match_operand:VF_128 1 "register_operand")
8442 (match_operand:VF_128 2 "register_operand")
8443 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8444 (match_operand:SI 4 "const_0_to_255_operand")
8445 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8448 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8449 operands[0], operands[1], operands[2], operands[3],
8450 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8451 <round_saeonly_expand_operand6>));
8455 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
8456 [(set (match_operand:VF_128 0 "register_operand" "=v")
8459 [(match_operand:VF_128 1 "register_operand" "0")
8460 (match_operand:VF_128 2 "register_operand" "v")
8461 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8462 (match_operand:SI 4 "const_0_to_255_operand")]
8467 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8468 [(set_attr "prefix" "evex")
8469 (set_attr "mode" "<ssescalarmode>")])
8471 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
8472 [(set (match_operand:VF_128 0 "register_operand" "=v")
8476 [(match_operand:VF_128 1 "register_operand" "0")
8477 (match_operand:VF_128 2 "register_operand" "v")
8478 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8479 (match_operand:SI 4 "const_0_to_255_operand")]
8484 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8486 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8487 [(set_attr "prefix" "evex")
8488 (set_attr "mode" "<ssescalarmode>")])
8490 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
8491 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8493 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
8494 (match_operand:SI 2 "const_0_to_255_operand")]
8497 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
8498 [(set_attr "length_immediate" "1")
8499 (set_attr "prefix" "evex")
8500 (set_attr "mode" "<MODE>")])
8502 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
8503 [(set (match_operand:VF_128 0 "register_operand" "=v")
8506 [(match_operand:VF_128 1 "register_operand" "v")
8507 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8508 (match_operand:SI 3 "const_0_to_255_operand")]
8513 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
8514 [(set_attr "length_immediate" "1")
8515 (set_attr "prefix" "evex")
8516 (set_attr "mode" "<MODE>")])
8518 ;; One bit in mask selects 2 elements.
8519 (define_insn "avx512f_shufps512_1<mask_name>"
8520 [(set (match_operand:V16SF 0 "register_operand" "=v")
8523 (match_operand:V16SF 1 "register_operand" "v")
8524 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
8525 (parallel [(match_operand 3 "const_0_to_3_operand")
8526 (match_operand 4 "const_0_to_3_operand")
8527 (match_operand 5 "const_16_to_19_operand")
8528 (match_operand 6 "const_16_to_19_operand")
8529 (match_operand 7 "const_4_to_7_operand")
8530 (match_operand 8 "const_4_to_7_operand")
8531 (match_operand 9 "const_20_to_23_operand")
8532 (match_operand 10 "const_20_to_23_operand")
8533 (match_operand 11 "const_8_to_11_operand")
8534 (match_operand 12 "const_8_to_11_operand")
8535 (match_operand 13 "const_24_to_27_operand")
8536 (match_operand 14 "const_24_to_27_operand")
8537 (match_operand 15 "const_12_to_15_operand")
8538 (match_operand 16 "const_12_to_15_operand")
8539 (match_operand 17 "const_28_to_31_operand")
8540 (match_operand 18 "const_28_to_31_operand")])))]
8542 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
8543 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
8544 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
8545 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
8546 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
8547 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
8548 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
8549 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
8550 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
8551 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
8552 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
8553 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
8556 mask = INTVAL (operands[3]);
8557 mask |= INTVAL (operands[4]) << 2;
8558 mask |= (INTVAL (operands[5]) - 16) << 4;
8559 mask |= (INTVAL (operands[6]) - 16) << 6;
8560 operands[3] = GEN_INT (mask);
8562 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
8564 [(set_attr "type" "sselog")
8565 (set_attr "length_immediate" "1")
8566 (set_attr "prefix" "evex")
8567 (set_attr "mode" "V16SF")])
8569 (define_expand "avx512f_shufpd512_mask"
8570 [(match_operand:V8DF 0 "register_operand")
8571 (match_operand:V8DF 1 "register_operand")
8572 (match_operand:V8DF 2 "nonimmediate_operand")
8573 (match_operand:SI 3 "const_0_to_255_operand")
8574 (match_operand:V8DF 4 "register_operand")
8575 (match_operand:QI 5 "register_operand")]
8578 int mask = INTVAL (operands[3]);
8579 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8581 GEN_INT (mask & 2 ? 9 : 8),
8582 GEN_INT (mask & 4 ? 3 : 2),
8583 GEN_INT (mask & 8 ? 11 : 10),
8584 GEN_INT (mask & 16 ? 5 : 4),
8585 GEN_INT (mask & 32 ? 13 : 12),
8586 GEN_INT (mask & 64 ? 7 : 6),
8587 GEN_INT (mask & 128 ? 15 : 14),
8588 operands[4], operands[5]));
8592 (define_insn "avx512f_shufpd512_1<mask_name>"
8593 [(set (match_operand:V8DF 0 "register_operand" "=v")
8596 (match_operand:V8DF 1 "register_operand" "v")
8597 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8598 (parallel [(match_operand 3 "const_0_to_1_operand")
8599 (match_operand 4 "const_8_to_9_operand")
8600 (match_operand 5 "const_2_to_3_operand")
8601 (match_operand 6 "const_10_to_11_operand")
8602 (match_operand 7 "const_4_to_5_operand")
8603 (match_operand 8 "const_12_to_13_operand")
8604 (match_operand 9 "const_6_to_7_operand")
8605 (match_operand 10 "const_14_to_15_operand")])))]
8609 mask = INTVAL (operands[3]);
8610 mask |= (INTVAL (operands[4]) - 8) << 1;
8611 mask |= (INTVAL (operands[5]) - 2) << 2;
8612 mask |= (INTVAL (operands[6]) - 10) << 3;
8613 mask |= (INTVAL (operands[7]) - 4) << 4;
8614 mask |= (INTVAL (operands[8]) - 12) << 5;
8615 mask |= (INTVAL (operands[9]) - 6) << 6;
8616 mask |= (INTVAL (operands[10]) - 14) << 7;
8617 operands[3] = GEN_INT (mask);
8619 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8621 [(set_attr "type" "sselog")
8622 (set_attr "length_immediate" "1")
8623 (set_attr "prefix" "evex")
8624 (set_attr "mode" "V8DF")])
8626 (define_expand "avx_shufpd256<mask_expand4_name>"
8627 [(match_operand:V4DF 0 "register_operand")
8628 (match_operand:V4DF 1 "register_operand")
8629 (match_operand:V4DF 2 "nonimmediate_operand")
8630 (match_operand:SI 3 "const_int_operand")]
8633 int mask = INTVAL (operands[3]);
8634 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8638 GEN_INT (mask & 2 ? 5 : 4),
8639 GEN_INT (mask & 4 ? 3 : 2),
8640 GEN_INT (mask & 8 ? 7 : 6)
8641 <mask_expand4_args>));
8645 (define_insn "avx_shufpd256_1<mask_name>"
8646 [(set (match_operand:V4DF 0 "register_operand" "=v")
8649 (match_operand:V4DF 1 "register_operand" "v")
8650 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8651 (parallel [(match_operand 3 "const_0_to_1_operand")
8652 (match_operand 4 "const_4_to_5_operand")
8653 (match_operand 5 "const_2_to_3_operand")
8654 (match_operand 6 "const_6_to_7_operand")])))]
8655 "TARGET_AVX && <mask_avx512vl_condition>"
8658 mask = INTVAL (operands[3]);
8659 mask |= (INTVAL (operands[4]) - 4) << 1;
8660 mask |= (INTVAL (operands[5]) - 2) << 2;
8661 mask |= (INTVAL (operands[6]) - 6) << 3;
8662 operands[3] = GEN_INT (mask);
8664 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8666 [(set_attr "type" "sseshuf")
8667 (set_attr "length_immediate" "1")
8668 (set_attr "prefix" "vex")
8669 (set_attr "mode" "V4DF")])
8671 (define_expand "sse2_shufpd<mask_expand4_name>"
8672 [(match_operand:V2DF 0 "register_operand")
8673 (match_operand:V2DF 1 "register_operand")
8674 (match_operand:V2DF 2 "vector_operand")
8675 (match_operand:SI 3 "const_int_operand")]
8678 int mask = INTVAL (operands[3]);
8679 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8680 operands[2], GEN_INT (mask & 1),
8681 GEN_INT (mask & 2 ? 3 : 2)
8682 <mask_expand4_args>));
8686 (define_insn "sse2_shufpd_v2df_mask"
8687 [(set (match_operand:V2DF 0 "register_operand" "=v")
8691 (match_operand:V2DF 1 "register_operand" "v")
8692 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8693 (parallel [(match_operand 3 "const_0_to_1_operand")
8694 (match_operand 4 "const_2_to_3_operand")]))
8695 (match_operand:V2DF 5 "vector_move_operand" "0C")
8696 (match_operand:QI 6 "register_operand" "Yk")))]
8700 mask = INTVAL (operands[3]);
8701 mask |= (INTVAL (operands[4]) - 2) << 1;
8702 operands[3] = GEN_INT (mask);
8704 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8706 [(set_attr "type" "sseshuf")
8707 (set_attr "length_immediate" "1")
8708 (set_attr "prefix" "evex")
8709 (set_attr "mode" "V2DF")])
8711 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8712 (define_insn "avx2_interleave_highv4di<mask_name>"
8713 [(set (match_operand:V4DI 0 "register_operand" "=v")
8716 (match_operand:V4DI 1 "register_operand" "v")
8717 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8718 (parallel [(const_int 1)
8722 "TARGET_AVX2 && <mask_avx512vl_condition>"
8723 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8724 [(set_attr "type" "sselog")
8725 (set_attr "prefix" "vex")
8726 (set_attr "mode" "OI")])
8728 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8729 [(set (match_operand:V8DI 0 "register_operand" "=v")
8732 (match_operand:V8DI 1 "register_operand" "v")
8733 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8734 (parallel [(const_int 1) (const_int 9)
8735 (const_int 3) (const_int 11)
8736 (const_int 5) (const_int 13)
8737 (const_int 7) (const_int 15)])))]
8739 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8740 [(set_attr "type" "sselog")
8741 (set_attr "prefix" "evex")
8742 (set_attr "mode" "XI")])
8744 (define_insn "vec_interleave_highv2di<mask_name>"
8745 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8748 (match_operand:V2DI 1 "register_operand" "0,v")
8749 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8750 (parallel [(const_int 1)
8752 "TARGET_SSE2 && <mask_avx512vl_condition>"
8754 punpckhqdq\t{%2, %0|%0, %2}
8755 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8756 [(set_attr "isa" "noavx,avx")
8757 (set_attr "type" "sselog")
8758 (set_attr "prefix_data16" "1,*")
8759 (set_attr "prefix" "orig,<mask_prefix>")
8760 (set_attr "mode" "TI")])
8762 (define_insn "avx2_interleave_lowv4di<mask_name>"
8763 [(set (match_operand:V4DI 0 "register_operand" "=v")
8766 (match_operand:V4DI 1 "register_operand" "v")
8767 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8768 (parallel [(const_int 0)
8772 "TARGET_AVX2 && <mask_avx512vl_condition>"
8773 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8774 [(set_attr "type" "sselog")
8775 (set_attr "prefix" "vex")
8776 (set_attr "mode" "OI")])
8778 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8779 [(set (match_operand:V8DI 0 "register_operand" "=v")
8782 (match_operand:V8DI 1 "register_operand" "v")
8783 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8784 (parallel [(const_int 0) (const_int 8)
8785 (const_int 2) (const_int 10)
8786 (const_int 4) (const_int 12)
8787 (const_int 6) (const_int 14)])))]
8789 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8790 [(set_attr "type" "sselog")
8791 (set_attr "prefix" "evex")
8792 (set_attr "mode" "XI")])
8794 (define_insn "vec_interleave_lowv2di<mask_name>"
8795 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8798 (match_operand:V2DI 1 "register_operand" "0,v")
8799 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8800 (parallel [(const_int 0)
8802 "TARGET_SSE2 && <mask_avx512vl_condition>"
8804 punpcklqdq\t{%2, %0|%0, %2}
8805 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8806 [(set_attr "isa" "noavx,avx")
8807 (set_attr "type" "sselog")
8808 (set_attr "prefix_data16" "1,*")
8809 (set_attr "prefix" "orig,vex")
8810 (set_attr "mode" "TI")])
8812 (define_insn "sse2_shufpd_<mode>"
8813 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
8814 (vec_select:VI8F_128
8815 (vec_concat:<ssedoublevecmode>
8816 (match_operand:VI8F_128 1 "register_operand" "0,v")
8817 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
8818 (parallel [(match_operand 3 "const_0_to_1_operand")
8819 (match_operand 4 "const_2_to_3_operand")])))]
8823 mask = INTVAL (operands[3]);
8824 mask |= (INTVAL (operands[4]) - 2) << 1;
8825 operands[3] = GEN_INT (mask);
8827 switch (which_alternative)
8830 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8832 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8837 [(set_attr "isa" "noavx,avx")
8838 (set_attr "type" "sseshuf")
8839 (set_attr "length_immediate" "1")
8840 (set_attr "prefix" "orig,maybe_evex")
8841 (set_attr "mode" "V2DF")])
8843 ;; Avoid combining registers from different units in a single alternative,
8844 ;; see comment above inline_secondary_memory_needed function in i386.c
8845 (define_insn "sse2_storehpd"
8846 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
8848 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
8849 (parallel [(const_int 1)])))]
8850 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8852 %vmovhpd\t{%1, %0|%0, %1}
8854 vunpckhpd\t{%d1, %0|%0, %d1}
8858 [(set_attr "isa" "*,noavx,avx,*,*,*")
8859 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8860 (set (attr "prefix_data16")
8862 (and (eq_attr "alternative" "0")
8863 (not (match_test "TARGET_AVX")))
8865 (const_string "*")))
8866 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
8867 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8870 [(set (match_operand:DF 0 "register_operand")
8872 (match_operand:V2DF 1 "memory_operand")
8873 (parallel [(const_int 1)])))]
8874 "TARGET_SSE2 && reload_completed"
8875 [(set (match_dup 0) (match_dup 1))]
8876 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8878 (define_insn "*vec_extractv2df_1_sse"
8879 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8881 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8882 (parallel [(const_int 1)])))]
8883 "!TARGET_SSE2 && TARGET_SSE
8884 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8886 movhps\t{%1, %0|%q0, %1}
8887 movhlps\t{%1, %0|%0, %1}
8888 movlps\t{%H1, %0|%0, %H1}"
8889 [(set_attr "type" "ssemov")
8890 (set_attr "mode" "V2SF,V4SF,V2SF")])
8892 ;; Avoid combining registers from different units in a single alternative,
8893 ;; see comment above inline_secondary_memory_needed function in i386.c
8894 (define_insn "sse2_storelpd"
8895 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8897 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
8898 (parallel [(const_int 0)])))]
8899 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8901 %vmovlpd\t{%1, %0|%0, %1}
8906 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8907 (set (attr "prefix_data16")
8908 (if_then_else (eq_attr "alternative" "0")
8910 (const_string "*")))
8911 (set_attr "prefix" "maybe_vex")
8912 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8915 [(set (match_operand:DF 0 "register_operand")
8917 (match_operand:V2DF 1 "nonimmediate_operand")
8918 (parallel [(const_int 0)])))]
8919 "TARGET_SSE2 && reload_completed"
8920 [(set (match_dup 0) (match_dup 1))]
8921 "operands[1] = gen_lowpart (DFmode, operands[1]);")
8923 (define_insn "*vec_extractv2df_0_sse"
8924 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8926 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8927 (parallel [(const_int 0)])))]
8928 "!TARGET_SSE2 && TARGET_SSE
8929 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8931 movlps\t{%1, %0|%0, %1}
8932 movaps\t{%1, %0|%0, %1}
8933 movlps\t{%1, %0|%0, %q1}"
8934 [(set_attr "type" "ssemov")
8935 (set_attr "mode" "V2SF,V4SF,V2SF")])
8937 (define_expand "sse2_loadhpd_exp"
8938 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8941 (match_operand:V2DF 1 "nonimmediate_operand")
8942 (parallel [(const_int 0)]))
8943 (match_operand:DF 2 "nonimmediate_operand")))]
8946 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8948 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8950 /* Fix up the destination if needed. */
8951 if (dst != operands[0])
8952 emit_move_insn (operands[0], dst);
8957 ;; Avoid combining registers from different units in a single alternative,
8958 ;; see comment above inline_secondary_memory_needed function in i386.c
8959 (define_insn "sse2_loadhpd"
8960 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8964 (match_operand:V2DF 1 "nonimmediate_operand"
8966 (parallel [(const_int 0)]))
8967 (match_operand:DF 2 "nonimmediate_operand"
8968 " m,m,x,v,x,*f,r")))]
8969 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8971 movhpd\t{%2, %0|%0, %2}
8972 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8973 unpcklpd\t{%2, %0|%0, %2}
8974 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8978 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8979 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8980 (set (attr "prefix_data16")
8981 (if_then_else (eq_attr "alternative" "0")
8983 (const_string "*")))
8984 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
8985 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8988 [(set (match_operand:V2DF 0 "memory_operand")
8990 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8991 (match_operand:DF 1 "register_operand")))]
8992 "TARGET_SSE2 && reload_completed"
8993 [(set (match_dup 0) (match_dup 1))]
8994 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8996 (define_expand "sse2_loadlpd_exp"
8997 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8999 (match_operand:DF 2 "nonimmediate_operand")
9001 (match_operand:V2DF 1 "nonimmediate_operand")
9002 (parallel [(const_int 1)]))))]
9005 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9007 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
9009 /* Fix up the destination if needed. */
9010 if (dst != operands[0])
9011 emit_move_insn (operands[0], dst);
9016 ;; Avoid combining registers from different units in a single alternative,
9017 ;; see comment above inline_secondary_memory_needed function in i386.c
9018 (define_insn "sse2_loadlpd"
9019 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9020 "=v,x,v,x,v,x,x,v,m,m ,m")
9022 (match_operand:DF 2 "nonimmediate_operand"
9023 "vm,m,m,x,v,0,0,v,x,*f,r")
9025 (match_operand:V2DF 1 "vector_move_operand"
9026 " C,0,v,0,v,x,o,o,0,0 ,0")
9027 (parallel [(const_int 1)]))))]
9028 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9030 %vmovq\t{%2, %0|%0, %2}
9031 movlpd\t{%2, %0|%0, %2}
9032 vmovlpd\t{%2, %1, %0|%0, %1, %2}
9033 movsd\t{%2, %0|%0, %2}
9034 vmovsd\t{%2, %1, %0|%0, %1, %2}
9035 shufpd\t{$2, %1, %0|%0, %1, 2}
9036 movhpd\t{%H1, %0|%0, %H1}
9037 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
9041 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
9043 (cond [(eq_attr "alternative" "5")
9044 (const_string "sselog")
9045 (eq_attr "alternative" "9")
9046 (const_string "fmov")
9047 (eq_attr "alternative" "10")
9048 (const_string "imov")
9050 (const_string "ssemov")))
9051 (set (attr "prefix_data16")
9052 (if_then_else (eq_attr "alternative" "1,6")
9054 (const_string "*")))
9055 (set (attr "length_immediate")
9056 (if_then_else (eq_attr "alternative" "5")
9058 (const_string "*")))
9059 (set (attr "prefix")
9060 (cond [(eq_attr "alternative" "0")
9061 (const_string "maybe_vex")
9062 (eq_attr "alternative" "1,3,5,6")
9063 (const_string "orig")
9064 (eq_attr "alternative" "2,4,7")
9065 (const_string "maybe_evex")
9067 (const_string "*")))
9068 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
9071 [(set (match_operand:V2DF 0 "memory_operand")
9073 (match_operand:DF 1 "register_operand")
9074 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
9075 "TARGET_SSE2 && reload_completed"
9076 [(set (match_dup 0) (match_dup 1))]
9077 "operands[0] = adjust_address (operands[0], DFmode, 0);")
9079 (define_insn "sse2_movsd"
9080 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
9082 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
9083 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
9087 movsd\t{%2, %0|%0, %2}
9088 vmovsd\t{%2, %1, %0|%0, %1, %2}
9089 movlpd\t{%2, %0|%0, %q2}
9090 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
9091 %vmovlpd\t{%2, %0|%q0, %2}
9092 shufpd\t{$2, %1, %0|%0, %1, 2}
9093 movhps\t{%H1, %0|%0, %H1}
9094 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
9095 %vmovhps\t{%1, %H0|%H0, %1}"
9096 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
9099 (eq_attr "alternative" "5")
9100 (const_string "sselog")
9101 (const_string "ssemov")))
9102 (set (attr "prefix_data16")
9104 (and (eq_attr "alternative" "2,4")
9105 (not (match_test "TARGET_AVX")))
9107 (const_string "*")))
9108 (set (attr "length_immediate")
9109 (if_then_else (eq_attr "alternative" "5")
9111 (const_string "*")))
9112 (set (attr "prefix")
9113 (cond [(eq_attr "alternative" "1,3,7")
9114 (const_string "maybe_evex")
9115 (eq_attr "alternative" "4,8")
9116 (const_string "maybe_vex")
9118 (const_string "orig")))
9119 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
9121 (define_insn "vec_dupv2df<mask_name>"
9122 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
9124 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
9125 "TARGET_SSE2 && <mask_avx512vl_condition>"
9128 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
9129 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
9130 [(set_attr "isa" "noavx,sse3,avx512vl")
9131 (set_attr "type" "sselog1")
9132 (set_attr "prefix" "orig,maybe_vex,evex")
9133 (set_attr "mode" "V2DF,DF,DF")])
9135 (define_insn "vec_concatv2df"
9136 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
9138 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,xm,0,0")
9139 (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m, C,x,m")))]
9141 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
9142 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
9144 unpcklpd\t{%2, %0|%0, %2}
9145 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9146 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9147 %vmovddup\t{%1, %0|%0, %1}
9148 vmovddup\t{%1, %0|%0, %1}
9149 movhpd\t{%2, %0|%0, %2}
9150 vmovhpd\t{%2, %1, %0|%0, %1, %2}
9151 %vmovq\t{%1, %0|%0, %1}
9152 movlhps\t{%2, %0|%0, %2}
9153 movhps\t{%2, %0|%0, %2}"
9155 (cond [(eq_attr "alternative" "0,5")
9156 (const_string "sse2_noavx")
9157 (eq_attr "alternative" "1,6")
9158 (const_string "avx")
9159 (eq_attr "alternative" "2,4")
9160 (const_string "avx512vl")
9161 (eq_attr "alternative" "3")
9162 (const_string "sse3")
9163 (eq_attr "alternative" "7")
9164 (const_string "sse2")
9166 (const_string "noavx")))
9169 (eq_attr "alternative" "0,1,2,3,4")
9170 (const_string "sselog")
9171 (const_string "ssemov")))
9172 (set (attr "prefix_data16")
9173 (if_then_else (eq_attr "alternative" "5")
9175 (const_string "*")))
9176 (set (attr "prefix")
9177 (cond [(eq_attr "alternative" "1,6")
9178 (const_string "vex")
9179 (eq_attr "alternative" "2,4")
9180 (const_string "evex")
9181 (eq_attr "alternative" "3,7")
9182 (const_string "maybe_vex")
9184 (const_string "orig")))
9185 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
9187 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9189 ;; Parallel integer down-conversion operations
9191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9193 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
9194 (define_mode_attr pmov_src_mode
9195 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
9196 (define_mode_attr pmov_src_lower
9197 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
9198 (define_mode_attr pmov_suff_1
9199 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
9201 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
9202 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9203 (any_truncate:PMOV_DST_MODE_1
9204 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
9206 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
9207 [(set_attr "type" "ssemov")
9208 (set_attr "memory" "none,store")
9209 (set_attr "prefix" "evex")
9210 (set_attr "mode" "<sseinsnmode>")])
9212 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
9213 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9214 (vec_merge:PMOV_DST_MODE_1
9215 (any_truncate:PMOV_DST_MODE_1
9216 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
9217 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
9218 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9220 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9221 [(set_attr "type" "ssemov")
9222 (set_attr "memory" "none,store")
9223 (set_attr "prefix" "evex")
9224 (set_attr "mode" "<sseinsnmode>")])
9226 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
9227 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
9228 (vec_merge:PMOV_DST_MODE_1
9229 (any_truncate:PMOV_DST_MODE_1
9230 (match_operand:<pmov_src_mode> 1 "register_operand"))
9232 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9235 (define_insn "avx512bw_<code>v32hiv32qi2"
9236 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9238 (match_operand:V32HI 1 "register_operand" "v,v")))]
9240 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
9241 [(set_attr "type" "ssemov")
9242 (set_attr "memory" "none,store")
9243 (set_attr "prefix" "evex")
9244 (set_attr "mode" "XI")])
9246 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
9247 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9250 (match_operand:V32HI 1 "register_operand" "v,v"))
9251 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
9252 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
9254 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9255 [(set_attr "type" "ssemov")
9256 (set_attr "memory" "none,store")
9257 (set_attr "prefix" "evex")
9258 (set_attr "mode" "XI")])
9260 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
9261 [(set (match_operand:V32QI 0 "nonimmediate_operand")
9264 (match_operand:V32HI 1 "register_operand"))
9266 (match_operand:SI 2 "register_operand")))]
9269 (define_mode_iterator PMOV_DST_MODE_2
9270 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
9271 (define_mode_attr pmov_suff_2
9272 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
9274 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
9275 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9276 (any_truncate:PMOV_DST_MODE_2
9277 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
9279 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
9280 [(set_attr "type" "ssemov")
9281 (set_attr "memory" "none,store")
9282 (set_attr "prefix" "evex")
9283 (set_attr "mode" "<sseinsnmode>")])
9285 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
9286 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9287 (vec_merge:PMOV_DST_MODE_2
9288 (any_truncate:PMOV_DST_MODE_2
9289 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
9290 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
9291 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9293 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9294 [(set_attr "type" "ssemov")
9295 (set_attr "memory" "none,store")
9296 (set_attr "prefix" "evex")
9297 (set_attr "mode" "<sseinsnmode>")])
9299 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
9300 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
9301 (vec_merge:PMOV_DST_MODE_2
9302 (any_truncate:PMOV_DST_MODE_2
9303 (match_operand:<ssedoublemode> 1 "register_operand"))
9305 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9308 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
9309 (define_mode_attr pmov_dst_3
9310 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
9311 (define_mode_attr pmov_dst_zeroed_3
9312 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
9313 (define_mode_attr pmov_suff_3
9314 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
9316 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
9317 [(set (match_operand:V16QI 0 "register_operand" "=v")
9319 (any_truncate:<pmov_dst_3>
9320 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
9321 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
9323 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9324 [(set_attr "type" "ssemov")
9325 (set_attr "prefix" "evex")
9326 (set_attr "mode" "TI")])
9328 (define_insn "*avx512vl_<code>v2div2qi2_store"
9329 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9332 (match_operand:V2DI 1 "register_operand" "v"))
9335 (parallel [(const_int 2) (const_int 3)
9336 (const_int 4) (const_int 5)
9337 (const_int 6) (const_int 7)
9338 (const_int 8) (const_int 9)
9339 (const_int 10) (const_int 11)
9340 (const_int 12) (const_int 13)
9341 (const_int 14) (const_int 15)]))))]
9343 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9344 [(set_attr "type" "ssemov")
9345 (set_attr "memory" "store")
9346 (set_attr "prefix" "evex")
9347 (set_attr "mode" "TI")])
9349 (define_insn "avx512vl_<code>v2div2qi2_mask"
9350 [(set (match_operand:V16QI 0 "register_operand" "=v")
9354 (match_operand:V2DI 1 "register_operand" "v"))
9356 (match_operand:V16QI 2 "vector_move_operand" "0C")
9357 (parallel [(const_int 0) (const_int 1)]))
9358 (match_operand:QI 3 "register_operand" "Yk"))
9359 (const_vector:V14QI [(const_int 0) (const_int 0)
9360 (const_int 0) (const_int 0)
9361 (const_int 0) (const_int 0)
9362 (const_int 0) (const_int 0)
9363 (const_int 0) (const_int 0)
9364 (const_int 0) (const_int 0)
9365 (const_int 0) (const_int 0)])))]
9367 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9368 [(set_attr "type" "ssemov")
9369 (set_attr "prefix" "evex")
9370 (set_attr "mode" "TI")])
9372 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
9373 [(set (match_operand:V16QI 0 "register_operand" "=v")
9377 (match_operand:V2DI 1 "register_operand" "v"))
9378 (const_vector:V2QI [(const_int 0) (const_int 0)])
9379 (match_operand:QI 2 "register_operand" "Yk"))
9380 (const_vector:V14QI [(const_int 0) (const_int 0)
9381 (const_int 0) (const_int 0)
9382 (const_int 0) (const_int 0)
9383 (const_int 0) (const_int 0)
9384 (const_int 0) (const_int 0)
9385 (const_int 0) (const_int 0)
9386 (const_int 0) (const_int 0)])))]
9388 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9389 [(set_attr "type" "ssemov")
9390 (set_attr "prefix" "evex")
9391 (set_attr "mode" "TI")])
9393 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
9394 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9398 (match_operand:V2DI 1 "register_operand" "v"))
9401 (parallel [(const_int 0) (const_int 1)]))
9402 (match_operand:QI 2 "register_operand" "Yk"))
9405 (parallel [(const_int 2) (const_int 3)
9406 (const_int 4) (const_int 5)
9407 (const_int 6) (const_int 7)
9408 (const_int 8) (const_int 9)
9409 (const_int 10) (const_int 11)
9410 (const_int 12) (const_int 13)
9411 (const_int 14) (const_int 15)]))))]
9413 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
9414 [(set_attr "type" "ssemov")
9415 (set_attr "memory" "store")
9416 (set_attr "prefix" "evex")
9417 (set_attr "mode" "TI")])
9419 (define_insn "*avx512vl_<code><mode>v4qi2_store"
9420 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9423 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9426 (parallel [(const_int 4) (const_int 5)
9427 (const_int 6) (const_int 7)
9428 (const_int 8) (const_int 9)
9429 (const_int 10) (const_int 11)
9430 (const_int 12) (const_int 13)
9431 (const_int 14) (const_int 15)]))))]
9433 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9434 [(set_attr "type" "ssemov")
9435 (set_attr "memory" "store")
9436 (set_attr "prefix" "evex")
9437 (set_attr "mode" "TI")])
9439 (define_insn "avx512vl_<code><mode>v4qi2_mask"
9440 [(set (match_operand:V16QI 0 "register_operand" "=v")
9444 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9446 (match_operand:V16QI 2 "vector_move_operand" "0C")
9447 (parallel [(const_int 0) (const_int 1)
9448 (const_int 2) (const_int 3)]))
9449 (match_operand:QI 3 "register_operand" "Yk"))
9450 (const_vector:V12QI [(const_int 0) (const_int 0)
9451 (const_int 0) (const_int 0)
9452 (const_int 0) (const_int 0)
9453 (const_int 0) (const_int 0)
9454 (const_int 0) (const_int 0)
9455 (const_int 0) (const_int 0)])))]
9457 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9458 [(set_attr "type" "ssemov")
9459 (set_attr "prefix" "evex")
9460 (set_attr "mode" "TI")])
9462 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
9463 [(set (match_operand:V16QI 0 "register_operand" "=v")
9467 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9468 (const_vector:V4QI [(const_int 0) (const_int 0)
9469 (const_int 0) (const_int 0)])
9470 (match_operand:QI 2 "register_operand" "Yk"))
9471 (const_vector:V12QI [(const_int 0) (const_int 0)
9472 (const_int 0) (const_int 0)
9473 (const_int 0) (const_int 0)
9474 (const_int 0) (const_int 0)
9475 (const_int 0) (const_int 0)
9476 (const_int 0) (const_int 0)])))]
9478 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9479 [(set_attr "type" "ssemov")
9480 (set_attr "prefix" "evex")
9481 (set_attr "mode" "TI")])
9483 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
9484 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9488 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9491 (parallel [(const_int 0) (const_int 1)
9492 (const_int 2) (const_int 3)]))
9493 (match_operand:QI 2 "register_operand" "Yk"))
9496 (parallel [(const_int 4) (const_int 5)
9497 (const_int 6) (const_int 7)
9498 (const_int 8) (const_int 9)
9499 (const_int 10) (const_int 11)
9500 (const_int 12) (const_int 13)
9501 (const_int 14) (const_int 15)]))))]
9504 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
9505 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}";
9506 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9508 [(set_attr "type" "ssemov")
9509 (set_attr "memory" "store")
9510 (set_attr "prefix" "evex")
9511 (set_attr "mode" "TI")])
9513 (define_mode_iterator VI2_128_BW_4_256
9514 [(V8HI "TARGET_AVX512BW") V8SI])
9516 (define_insn "*avx512vl_<code><mode>v8qi2_store"
9517 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9520 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9523 (parallel [(const_int 8) (const_int 9)
9524 (const_int 10) (const_int 11)
9525 (const_int 12) (const_int 13)
9526 (const_int 14) (const_int 15)]))))]
9528 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9529 [(set_attr "type" "ssemov")
9530 (set_attr "memory" "store")
9531 (set_attr "prefix" "evex")
9532 (set_attr "mode" "TI")])
9534 (define_insn "avx512vl_<code><mode>v8qi2_mask"
9535 [(set (match_operand:V16QI 0 "register_operand" "=v")
9539 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9541 (match_operand:V16QI 2 "vector_move_operand" "0C")
9542 (parallel [(const_int 0) (const_int 1)
9543 (const_int 2) (const_int 3)
9544 (const_int 4) (const_int 5)
9545 (const_int 6) (const_int 7)]))
9546 (match_operand:QI 3 "register_operand" "Yk"))
9547 (const_vector:V8QI [(const_int 0) (const_int 0)
9548 (const_int 0) (const_int 0)
9549 (const_int 0) (const_int 0)
9550 (const_int 0) (const_int 0)])))]
9552 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9553 [(set_attr "type" "ssemov")
9554 (set_attr "prefix" "evex")
9555 (set_attr "mode" "TI")])
9557 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
9558 [(set (match_operand:V16QI 0 "register_operand" "=v")
9562 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9563 (const_vector:V8QI [(const_int 0) (const_int 0)
9564 (const_int 0) (const_int 0)
9565 (const_int 0) (const_int 0)
9566 (const_int 0) (const_int 0)])
9567 (match_operand:QI 2 "register_operand" "Yk"))
9568 (const_vector:V8QI [(const_int 0) (const_int 0)
9569 (const_int 0) (const_int 0)
9570 (const_int 0) (const_int 0)
9571 (const_int 0) (const_int 0)])))]
9573 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9574 [(set_attr "type" "ssemov")
9575 (set_attr "prefix" "evex")
9576 (set_attr "mode" "TI")])
9578 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
9579 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9583 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9586 (parallel [(const_int 0) (const_int 1)
9587 (const_int 2) (const_int 3)
9588 (const_int 4) (const_int 5)
9589 (const_int 6) (const_int 7)]))
9590 (match_operand:QI 2 "register_operand" "Yk"))
9593 (parallel [(const_int 8) (const_int 9)
9594 (const_int 10) (const_int 11)
9595 (const_int 12) (const_int 13)
9596 (const_int 14) (const_int 15)]))))]
9599 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
9600 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9601 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
9603 [(set_attr "type" "ssemov")
9604 (set_attr "memory" "store")
9605 (set_attr "prefix" "evex")
9606 (set_attr "mode" "TI")])
9608 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
9609 (define_mode_attr pmov_dst_4
9610 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
9611 (define_mode_attr pmov_dst_zeroed_4
9612 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
9613 (define_mode_attr pmov_suff_4
9614 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
9616 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
9617 [(set (match_operand:V8HI 0 "register_operand" "=v")
9619 (any_truncate:<pmov_dst_4>
9620 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
9621 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
9623 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9624 [(set_attr "type" "ssemov")
9625 (set_attr "prefix" "evex")
9626 (set_attr "mode" "TI")])
9628 (define_insn "*avx512vl_<code><mode>v4hi2_store"
9629 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9632 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9635 (parallel [(const_int 4) (const_int 5)
9636 (const_int 6) (const_int 7)]))))]
9638 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9639 [(set_attr "type" "ssemov")
9640 (set_attr "memory" "store")
9641 (set_attr "prefix" "evex")
9642 (set_attr "mode" "TI")])
9644 (define_insn "avx512vl_<code><mode>v4hi2_mask"
9645 [(set (match_operand:V8HI 0 "register_operand" "=v")
9649 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9651 (match_operand:V8HI 2 "vector_move_operand" "0C")
9652 (parallel [(const_int 0) (const_int 1)
9653 (const_int 2) (const_int 3)]))
9654 (match_operand:QI 3 "register_operand" "Yk"))
9655 (const_vector:V4HI [(const_int 0) (const_int 0)
9656 (const_int 0) (const_int 0)])))]
9658 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9659 [(set_attr "type" "ssemov")
9660 (set_attr "prefix" "evex")
9661 (set_attr "mode" "TI")])
9663 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
9664 [(set (match_operand:V8HI 0 "register_operand" "=v")
9668 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9669 (const_vector:V4HI [(const_int 0) (const_int 0)
9670 (const_int 0) (const_int 0)])
9671 (match_operand:QI 2 "register_operand" "Yk"))
9672 (const_vector:V4HI [(const_int 0) (const_int 0)
9673 (const_int 0) (const_int 0)])))]
9675 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9676 [(set_attr "type" "ssemov")
9677 (set_attr "prefix" "evex")
9678 (set_attr "mode" "TI")])
9680 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
9681 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9685 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9688 (parallel [(const_int 0) (const_int 1)
9689 (const_int 2) (const_int 3)]))
9690 (match_operand:QI 2 "register_operand" "Yk"))
9693 (parallel [(const_int 4) (const_int 5)
9694 (const_int 6) (const_int 7)]))))]
9697 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
9698 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
9699 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9701 [(set_attr "type" "ssemov")
9702 (set_attr "memory" "store")
9703 (set_attr "prefix" "evex")
9704 (set_attr "mode" "TI")])
9706 (define_insn "*avx512vl_<code>v2div2hi2_store"
9707 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9710 (match_operand:V2DI 1 "register_operand" "v"))
9713 (parallel [(const_int 2) (const_int 3)
9714 (const_int 4) (const_int 5)
9715 (const_int 6) (const_int 7)]))))]
9717 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
9718 [(set_attr "type" "ssemov")
9719 (set_attr "memory" "store")
9720 (set_attr "prefix" "evex")
9721 (set_attr "mode" "TI")])
9723 (define_insn "avx512vl_<code>v2div2hi2_mask"
9724 [(set (match_operand:V8HI 0 "register_operand" "=v")
9728 (match_operand:V2DI 1 "register_operand" "v"))
9730 (match_operand:V8HI 2 "vector_move_operand" "0C")
9731 (parallel [(const_int 0) (const_int 1)]))
9732 (match_operand:QI 3 "register_operand" "Yk"))
9733 (const_vector:V6HI [(const_int 0) (const_int 0)
9734 (const_int 0) (const_int 0)
9735 (const_int 0) (const_int 0)])))]
9737 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9738 [(set_attr "type" "ssemov")
9739 (set_attr "prefix" "evex")
9740 (set_attr "mode" "TI")])
9742 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
9743 [(set (match_operand:V8HI 0 "register_operand" "=v")
9747 (match_operand:V2DI 1 "register_operand" "v"))
9748 (const_vector:V2HI [(const_int 0) (const_int 0)])
9749 (match_operand:QI 2 "register_operand" "Yk"))
9750 (const_vector:V6HI [(const_int 0) (const_int 0)
9751 (const_int 0) (const_int 0)
9752 (const_int 0) (const_int 0)])))]
9754 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9755 [(set_attr "type" "ssemov")
9756 (set_attr "prefix" "evex")
9757 (set_attr "mode" "TI")])
9759 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
9760 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9764 (match_operand:V2DI 1 "register_operand" "v"))
9767 (parallel [(const_int 0) (const_int 1)]))
9768 (match_operand:QI 2 "register_operand" "Yk"))
9771 (parallel [(const_int 2) (const_int 3)
9772 (const_int 4) (const_int 5)
9773 (const_int 6) (const_int 7)]))))]
9775 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
9776 [(set_attr "type" "ssemov")
9777 (set_attr "memory" "store")
9778 (set_attr "prefix" "evex")
9779 (set_attr "mode" "TI")])
9781 (define_insn "*avx512vl_<code>v2div2si2"
9782 [(set (match_operand:V4SI 0 "register_operand" "=v")
9785 (match_operand:V2DI 1 "register_operand" "v"))
9786 (match_operand:V2SI 2 "const0_operand")))]
9788 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9789 [(set_attr "type" "ssemov")
9790 (set_attr "prefix" "evex")
9791 (set_attr "mode" "TI")])
9793 (define_insn "*avx512vl_<code>v2div2si2_store"
9794 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9797 (match_operand:V2DI 1 "register_operand" "v"))
9800 (parallel [(const_int 2) (const_int 3)]))))]
9802 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9803 [(set_attr "type" "ssemov")
9804 (set_attr "memory" "store")
9805 (set_attr "prefix" "evex")
9806 (set_attr "mode" "TI")])
9808 (define_insn "avx512vl_<code>v2div2si2_mask"
9809 [(set (match_operand:V4SI 0 "register_operand" "=v")
9813 (match_operand:V2DI 1 "register_operand" "v"))
9815 (match_operand:V4SI 2 "vector_move_operand" "0C")
9816 (parallel [(const_int 0) (const_int 1)]))
9817 (match_operand:QI 3 "register_operand" "Yk"))
9818 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9820 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9821 [(set_attr "type" "ssemov")
9822 (set_attr "prefix" "evex")
9823 (set_attr "mode" "TI")])
9825 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
9826 [(set (match_operand:V4SI 0 "register_operand" "=v")
9830 (match_operand:V2DI 1 "register_operand" "v"))
9831 (const_vector:V2SI [(const_int 0) (const_int 0)])
9832 (match_operand:QI 2 "register_operand" "Yk"))
9833 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9835 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9836 [(set_attr "type" "ssemov")
9837 (set_attr "prefix" "evex")
9838 (set_attr "mode" "TI")])
9840 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9841 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9845 (match_operand:V2DI 1 "register_operand" "v"))
9848 (parallel [(const_int 0) (const_int 1)]))
9849 (match_operand:QI 2 "register_operand" "Yk"))
9852 (parallel [(const_int 2) (const_int 3)]))))]
9854 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
9855 [(set_attr "type" "ssemov")
9856 (set_attr "memory" "store")
9857 (set_attr "prefix" "evex")
9858 (set_attr "mode" "TI")])
9860 (define_insn "*avx512f_<code>v8div16qi2"
9861 [(set (match_operand:V16QI 0 "register_operand" "=v")
9864 (match_operand:V8DI 1 "register_operand" "v"))
9865 (const_vector:V8QI [(const_int 0) (const_int 0)
9866 (const_int 0) (const_int 0)
9867 (const_int 0) (const_int 0)
9868 (const_int 0) (const_int 0)])))]
9870 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9871 [(set_attr "type" "ssemov")
9872 (set_attr "prefix" "evex")
9873 (set_attr "mode" "TI")])
9875 (define_insn "*avx512f_<code>v8div16qi2_store"
9876 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9879 (match_operand:V8DI 1 "register_operand" "v"))
9882 (parallel [(const_int 8) (const_int 9)
9883 (const_int 10) (const_int 11)
9884 (const_int 12) (const_int 13)
9885 (const_int 14) (const_int 15)]))))]
9887 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9888 [(set_attr "type" "ssemov")
9889 (set_attr "memory" "store")
9890 (set_attr "prefix" "evex")
9891 (set_attr "mode" "TI")])
9893 (define_insn "avx512f_<code>v8div16qi2_mask"
9894 [(set (match_operand:V16QI 0 "register_operand" "=v")
9898 (match_operand:V8DI 1 "register_operand" "v"))
9900 (match_operand:V16QI 2 "vector_move_operand" "0C")
9901 (parallel [(const_int 0) (const_int 1)
9902 (const_int 2) (const_int 3)
9903 (const_int 4) (const_int 5)
9904 (const_int 6) (const_int 7)]))
9905 (match_operand:QI 3 "register_operand" "Yk"))
9906 (const_vector:V8QI [(const_int 0) (const_int 0)
9907 (const_int 0) (const_int 0)
9908 (const_int 0) (const_int 0)
9909 (const_int 0) (const_int 0)])))]
9911 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9912 [(set_attr "type" "ssemov")
9913 (set_attr "prefix" "evex")
9914 (set_attr "mode" "TI")])
9916 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
9917 [(set (match_operand:V16QI 0 "register_operand" "=v")
9921 (match_operand:V8DI 1 "register_operand" "v"))
9922 (const_vector:V8QI [(const_int 0) (const_int 0)
9923 (const_int 0) (const_int 0)
9924 (const_int 0) (const_int 0)
9925 (const_int 0) (const_int 0)])
9926 (match_operand:QI 2 "register_operand" "Yk"))
9927 (const_vector:V8QI [(const_int 0) (const_int 0)
9928 (const_int 0) (const_int 0)
9929 (const_int 0) (const_int 0)
9930 (const_int 0) (const_int 0)])))]
9932 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9933 [(set_attr "type" "ssemov")
9934 (set_attr "prefix" "evex")
9935 (set_attr "mode" "TI")])
9937 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9938 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9942 (match_operand:V8DI 1 "register_operand" "v"))
9945 (parallel [(const_int 0) (const_int 1)
9946 (const_int 2) (const_int 3)
9947 (const_int 4) (const_int 5)
9948 (const_int 6) (const_int 7)]))
9949 (match_operand:QI 2 "register_operand" "Yk"))
9952 (parallel [(const_int 8) (const_int 9)
9953 (const_int 10) (const_int 11)
9954 (const_int 12) (const_int 13)
9955 (const_int 14) (const_int 15)]))))]
9957 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
9958 [(set_attr "type" "ssemov")
9959 (set_attr "memory" "store")
9960 (set_attr "prefix" "evex")
9961 (set_attr "mode" "TI")])
9963 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9965 ;; Parallel integral arithmetic
9967 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9969 (define_expand "neg<mode>2"
9970 [(set (match_operand:VI_AVX2 0 "register_operand")
9973 (match_operand:VI_AVX2 1 "vector_operand")))]
9975 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9977 (define_expand "<plusminus_insn><mode>3"
9978 [(set (match_operand:VI_AVX2 0 "register_operand")
9980 (match_operand:VI_AVX2 1 "vector_operand")
9981 (match_operand:VI_AVX2 2 "vector_operand")))]
9983 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9985 (define_expand "<plusminus_insn><mode>3_mask"
9986 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9987 (vec_merge:VI48_AVX512VL
9988 (plusminus:VI48_AVX512VL
9989 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9990 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9991 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9992 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9994 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9996 (define_expand "<plusminus_insn><mode>3_mask"
9997 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9998 (vec_merge:VI12_AVX512VL
9999 (plusminus:VI12_AVX512VL
10000 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
10001 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10002 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10003 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10005 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10007 (define_insn "*<plusminus_insn><mode>3"
10008 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
10010 (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
10011 (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
10012 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10014 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10015 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10016 [(set_attr "isa" "noavx,avx")
10017 (set_attr "type" "sseiadd")
10018 (set_attr "prefix_data16" "1,*")
10019 (set_attr "prefix" "<mask_prefix3>")
10020 (set_attr "mode" "<sseinsnmode>")])
10022 (define_insn "*<plusminus_insn><mode>3_mask"
10023 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10024 (vec_merge:VI48_AVX512VL
10025 (plusminus:VI48_AVX512VL
10026 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10027 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10028 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10029 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10030 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10031 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10032 [(set_attr "type" "sseiadd")
10033 (set_attr "prefix" "evex")
10034 (set_attr "mode" "<sseinsnmode>")])
10036 (define_insn "*<plusminus_insn><mode>3_mask"
10037 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10038 (vec_merge:VI12_AVX512VL
10039 (plusminus:VI12_AVX512VL
10040 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10041 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10042 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10043 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10044 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10045 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10046 [(set_attr "type" "sseiadd")
10047 (set_attr "prefix" "evex")
10048 (set_attr "mode" "<sseinsnmode>")])
10050 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10051 [(set (match_operand:VI12_AVX2 0 "register_operand")
10052 (sat_plusminus:VI12_AVX2
10053 (match_operand:VI12_AVX2 1 "vector_operand")
10054 (match_operand:VI12_AVX2 2 "vector_operand")))]
10055 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10056 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10058 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10059 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
10060 (sat_plusminus:VI12_AVX2
10061 (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
10062 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
10063 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
10064 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10066 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10067 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10068 [(set_attr "isa" "noavx,avx")
10069 (set_attr "type" "sseiadd")
10070 (set_attr "prefix_data16" "1,*")
10071 (set_attr "prefix" "orig,maybe_evex")
10072 (set_attr "mode" "TI")])
10074 (define_expand "mul<mode>3<mask_name>"
10075 [(set (match_operand:VI1_AVX512 0 "register_operand")
10076 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
10077 (match_operand:VI1_AVX512 2 "register_operand")))]
10078 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10080 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
10084 (define_expand "mul<mode>3<mask_name>"
10085 [(set (match_operand:VI2_AVX2 0 "register_operand")
10086 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
10087 (match_operand:VI2_AVX2 2 "vector_operand")))]
10088 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10089 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10091 (define_insn "*mul<mode>3<mask_name>"
10092 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10093 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
10094 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
10095 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10096 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10098 pmullw\t{%2, %0|%0, %2}
10099 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10100 [(set_attr "isa" "noavx,avx")
10101 (set_attr "type" "sseimul")
10102 (set_attr "prefix_data16" "1,*")
10103 (set_attr "prefix" "orig,vex")
10104 (set_attr "mode" "<sseinsnmode>")])
10106 (define_expand "<s>mul<mode>3_highpart<mask_name>"
10107 [(set (match_operand:VI2_AVX2 0 "register_operand")
10109 (lshiftrt:<ssedoublemode>
10110 (mult:<ssedoublemode>
10111 (any_extend:<ssedoublemode>
10112 (match_operand:VI2_AVX2 1 "vector_operand"))
10113 (any_extend:<ssedoublemode>
10114 (match_operand:VI2_AVX2 2 "vector_operand")))
10117 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10118 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10120 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
10121 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10123 (lshiftrt:<ssedoublemode>
10124 (mult:<ssedoublemode>
10125 (any_extend:<ssedoublemode>
10126 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
10127 (any_extend:<ssedoublemode>
10128 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
10130 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10131 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10133 pmulh<u>w\t{%2, %0|%0, %2}
10134 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10135 [(set_attr "isa" "noavx,avx")
10136 (set_attr "type" "sseimul")
10137 (set_attr "prefix_data16" "1,*")
10138 (set_attr "prefix" "orig,vex")
10139 (set_attr "mode" "<sseinsnmode>")])
10141 (define_expand "vec_widen_umult_even_v16si<mask_name>"
10142 [(set (match_operand:V8DI 0 "register_operand")
10146 (match_operand:V16SI 1 "nonimmediate_operand")
10147 (parallel [(const_int 0) (const_int 2)
10148 (const_int 4) (const_int 6)
10149 (const_int 8) (const_int 10)
10150 (const_int 12) (const_int 14)])))
10153 (match_operand:V16SI 2 "nonimmediate_operand")
10154 (parallel [(const_int 0) (const_int 2)
10155 (const_int 4) (const_int 6)
10156 (const_int 8) (const_int 10)
10157 (const_int 12) (const_int 14)])))))]
10159 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10161 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
10162 [(set (match_operand:V8DI 0 "register_operand" "=v")
10166 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10167 (parallel [(const_int 0) (const_int 2)
10168 (const_int 4) (const_int 6)
10169 (const_int 8) (const_int 10)
10170 (const_int 12) (const_int 14)])))
10173 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10174 (parallel [(const_int 0) (const_int 2)
10175 (const_int 4) (const_int 6)
10176 (const_int 8) (const_int 10)
10177 (const_int 12) (const_int 14)])))))]
10178 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10179 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10180 [(set_attr "type" "sseimul")
10181 (set_attr "prefix_extra" "1")
10182 (set_attr "prefix" "evex")
10183 (set_attr "mode" "XI")])
10185 (define_expand "vec_widen_umult_even_v8si<mask_name>"
10186 [(set (match_operand:V4DI 0 "register_operand")
10190 (match_operand:V8SI 1 "nonimmediate_operand")
10191 (parallel [(const_int 0) (const_int 2)
10192 (const_int 4) (const_int 6)])))
10195 (match_operand:V8SI 2 "nonimmediate_operand")
10196 (parallel [(const_int 0) (const_int 2)
10197 (const_int 4) (const_int 6)])))))]
10198 "TARGET_AVX2 && <mask_avx512vl_condition>"
10199 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10201 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
10202 [(set (match_operand:V4DI 0 "register_operand" "=v")
10206 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10207 (parallel [(const_int 0) (const_int 2)
10208 (const_int 4) (const_int 6)])))
10211 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10212 (parallel [(const_int 0) (const_int 2)
10213 (const_int 4) (const_int 6)])))))]
10214 "TARGET_AVX2 && <mask_avx512vl_condition>
10215 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10216 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10217 [(set_attr "type" "sseimul")
10218 (set_attr "prefix" "maybe_evex")
10219 (set_attr "mode" "OI")])
10221 (define_expand "vec_widen_umult_even_v4si<mask_name>"
10222 [(set (match_operand:V2DI 0 "register_operand")
10226 (match_operand:V4SI 1 "vector_operand")
10227 (parallel [(const_int 0) (const_int 2)])))
10230 (match_operand:V4SI 2 "vector_operand")
10231 (parallel [(const_int 0) (const_int 2)])))))]
10232 "TARGET_SSE2 && <mask_avx512vl_condition>"
10233 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10235 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
10236 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10240 (match_operand:V4SI 1 "vector_operand" "%0,v")
10241 (parallel [(const_int 0) (const_int 2)])))
10244 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
10245 (parallel [(const_int 0) (const_int 2)])))))]
10246 "TARGET_SSE2 && <mask_avx512vl_condition>
10247 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10249 pmuludq\t{%2, %0|%0, %2}
10250 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10251 [(set_attr "isa" "noavx,avx")
10252 (set_attr "type" "sseimul")
10253 (set_attr "prefix_data16" "1,*")
10254 (set_attr "prefix" "orig,maybe_evex")
10255 (set_attr "mode" "TI")])
10257 (define_expand "vec_widen_smult_even_v16si<mask_name>"
10258 [(set (match_operand:V8DI 0 "register_operand")
10262 (match_operand:V16SI 1 "nonimmediate_operand")
10263 (parallel [(const_int 0) (const_int 2)
10264 (const_int 4) (const_int 6)
10265 (const_int 8) (const_int 10)
10266 (const_int 12) (const_int 14)])))
10269 (match_operand:V16SI 2 "nonimmediate_operand")
10270 (parallel [(const_int 0) (const_int 2)
10271 (const_int 4) (const_int 6)
10272 (const_int 8) (const_int 10)
10273 (const_int 12) (const_int 14)])))))]
10275 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10277 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
10278 [(set (match_operand:V8DI 0 "register_operand" "=v")
10282 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10283 (parallel [(const_int 0) (const_int 2)
10284 (const_int 4) (const_int 6)
10285 (const_int 8) (const_int 10)
10286 (const_int 12) (const_int 14)])))
10289 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10290 (parallel [(const_int 0) (const_int 2)
10291 (const_int 4) (const_int 6)
10292 (const_int 8) (const_int 10)
10293 (const_int 12) (const_int 14)])))))]
10294 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10295 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10296 [(set_attr "type" "sseimul")
10297 (set_attr "prefix_extra" "1")
10298 (set_attr "prefix" "evex")
10299 (set_attr "mode" "XI")])
10301 (define_expand "vec_widen_smult_even_v8si<mask_name>"
10302 [(set (match_operand:V4DI 0 "register_operand")
10306 (match_operand:V8SI 1 "nonimmediate_operand")
10307 (parallel [(const_int 0) (const_int 2)
10308 (const_int 4) (const_int 6)])))
10311 (match_operand:V8SI 2 "nonimmediate_operand")
10312 (parallel [(const_int 0) (const_int 2)
10313 (const_int 4) (const_int 6)])))))]
10314 "TARGET_AVX2 && <mask_avx512vl_condition>"
10315 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10317 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
10318 [(set (match_operand:V4DI 0 "register_operand" "=v")
10322 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10323 (parallel [(const_int 0) (const_int 2)
10324 (const_int 4) (const_int 6)])))
10327 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10328 (parallel [(const_int 0) (const_int 2)
10329 (const_int 4) (const_int 6)])))))]
10330 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10331 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10332 [(set_attr "type" "sseimul")
10333 (set_attr "prefix_extra" "1")
10334 (set_attr "prefix" "vex")
10335 (set_attr "mode" "OI")])
10337 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
10338 [(set (match_operand:V2DI 0 "register_operand")
10342 (match_operand:V4SI 1 "vector_operand")
10343 (parallel [(const_int 0) (const_int 2)])))
10346 (match_operand:V4SI 2 "vector_operand")
10347 (parallel [(const_int 0) (const_int 2)])))))]
10348 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
10349 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10351 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
10352 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
10356 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
10357 (parallel [(const_int 0) (const_int 2)])))
10360 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
10361 (parallel [(const_int 0) (const_int 2)])))))]
10362 "TARGET_SSE4_1 && <mask_avx512vl_condition>
10363 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10365 pmuldq\t{%2, %0|%0, %2}
10366 pmuldq\t{%2, %0|%0, %2}
10367 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10368 [(set_attr "isa" "noavx,noavx,avx")
10369 (set_attr "type" "sseimul")
10370 (set_attr "prefix_data16" "1,1,*")
10371 (set_attr "prefix_extra" "1")
10372 (set_attr "prefix" "orig,orig,vex")
10373 (set_attr "mode" "TI")])
10375 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
10376 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
10377 (unspec:<sseunpackmode>
10378 [(match_operand:VI2_AVX2 1 "register_operand" "v")
10379 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
10380 UNSPEC_PMADDWD512))]
10381 "TARGET_AVX512BW && <mask_mode512bit_condition>"
10382 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
10383 [(set_attr "type" "sseiadd")
10384 (set_attr "prefix" "evex")
10385 (set_attr "mode" "XI")])
10387 (define_expand "avx2_pmaddwd"
10388 [(set (match_operand:V8SI 0 "register_operand")
10393 (match_operand:V16HI 1 "nonimmediate_operand")
10394 (parallel [(const_int 0) (const_int 2)
10395 (const_int 4) (const_int 6)
10396 (const_int 8) (const_int 10)
10397 (const_int 12) (const_int 14)])))
10400 (match_operand:V16HI 2 "nonimmediate_operand")
10401 (parallel [(const_int 0) (const_int 2)
10402 (const_int 4) (const_int 6)
10403 (const_int 8) (const_int 10)
10404 (const_int 12) (const_int 14)]))))
10407 (vec_select:V8HI (match_dup 1)
10408 (parallel [(const_int 1) (const_int 3)
10409 (const_int 5) (const_int 7)
10410 (const_int 9) (const_int 11)
10411 (const_int 13) (const_int 15)])))
10413 (vec_select:V8HI (match_dup 2)
10414 (parallel [(const_int 1) (const_int 3)
10415 (const_int 5) (const_int 7)
10416 (const_int 9) (const_int 11)
10417 (const_int 13) (const_int 15)]))))))]
10419 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
10421 (define_insn "*avx2_pmaddwd"
10422 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
10427 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
10428 (parallel [(const_int 0) (const_int 2)
10429 (const_int 4) (const_int 6)
10430 (const_int 8) (const_int 10)
10431 (const_int 12) (const_int 14)])))
10434 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
10435 (parallel [(const_int 0) (const_int 2)
10436 (const_int 4) (const_int 6)
10437 (const_int 8) (const_int 10)
10438 (const_int 12) (const_int 14)]))))
10441 (vec_select:V8HI (match_dup 1)
10442 (parallel [(const_int 1) (const_int 3)
10443 (const_int 5) (const_int 7)
10444 (const_int 9) (const_int 11)
10445 (const_int 13) (const_int 15)])))
10447 (vec_select:V8HI (match_dup 2)
10448 (parallel [(const_int 1) (const_int 3)
10449 (const_int 5) (const_int 7)
10450 (const_int 9) (const_int 11)
10451 (const_int 13) (const_int 15)]))))))]
10452 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10453 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10454 [(set_attr "type" "sseiadd")
10455 (set_attr "isa" "*,avx512bw")
10456 (set_attr "prefix" "vex,evex")
10457 (set_attr "mode" "OI")])
10459 (define_expand "sse2_pmaddwd"
10460 [(set (match_operand:V4SI 0 "register_operand")
10465 (match_operand:V8HI 1 "vector_operand")
10466 (parallel [(const_int 0) (const_int 2)
10467 (const_int 4) (const_int 6)])))
10470 (match_operand:V8HI 2 "vector_operand")
10471 (parallel [(const_int 0) (const_int 2)
10472 (const_int 4) (const_int 6)]))))
10475 (vec_select:V4HI (match_dup 1)
10476 (parallel [(const_int 1) (const_int 3)
10477 (const_int 5) (const_int 7)])))
10479 (vec_select:V4HI (match_dup 2)
10480 (parallel [(const_int 1) (const_int 3)
10481 (const_int 5) (const_int 7)]))))))]
10483 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
10485 (define_insn "*sse2_pmaddwd"
10486 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
10491 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
10492 (parallel [(const_int 0) (const_int 2)
10493 (const_int 4) (const_int 6)])))
10496 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
10497 (parallel [(const_int 0) (const_int 2)
10498 (const_int 4) (const_int 6)]))))
10501 (vec_select:V4HI (match_dup 1)
10502 (parallel [(const_int 1) (const_int 3)
10503 (const_int 5) (const_int 7)])))
10505 (vec_select:V4HI (match_dup 2)
10506 (parallel [(const_int 1) (const_int 3)
10507 (const_int 5) (const_int 7)]))))))]
10508 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10510 pmaddwd\t{%2, %0|%0, %2}
10511 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
10512 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10513 [(set_attr "isa" "noavx,avx,avx512bw")
10514 (set_attr "type" "sseiadd")
10515 (set_attr "atom_unit" "simul")
10516 (set_attr "prefix_data16" "1,*,*")
10517 (set_attr "prefix" "orig,vex,evex")
10518 (set_attr "mode" "TI")])
10520 (define_insn "avx512dq_mul<mode>3<mask_name>"
10521 [(set (match_operand:VI8 0 "register_operand" "=v")
10523 (match_operand:VI8 1 "register_operand" "v")
10524 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
10525 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
10526 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10527 [(set_attr "type" "sseimul")
10528 (set_attr "prefix" "evex")
10529 (set_attr "mode" "<sseinsnmode>")])
10531 (define_expand "mul<mode>3<mask_name>"
10532 [(set (match_operand:VI4_AVX512F 0 "register_operand")
10534 (match_operand:VI4_AVX512F 1 "general_vector_operand")
10535 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
10536 "TARGET_SSE2 && <mask_mode512bit_condition>"
10540 if (!vector_operand (operands[1], <MODE>mode))
10541 operands[1] = force_reg (<MODE>mode, operands[1]);
10542 if (!vector_operand (operands[2], <MODE>mode))
10543 operands[2] = force_reg (<MODE>mode, operands[2]);
10544 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10548 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
10553 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
10554 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
10556 (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
10557 (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
10558 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10559 && <mask_mode512bit_condition>"
10561 pmulld\t{%2, %0|%0, %2}
10562 pmulld\t{%2, %0|%0, %2}
10563 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10564 [(set_attr "isa" "noavx,noavx,avx")
10565 (set_attr "type" "sseimul")
10566 (set_attr "prefix_extra" "1")
10567 (set_attr "prefix" "<mask_prefix4>")
10568 (set_attr "btver2_decode" "vector,vector,vector")
10569 (set_attr "mode" "<sseinsnmode>")])
10571 (define_expand "mul<mode>3"
10572 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10573 (mult:VI8_AVX2_AVX512F
10574 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10575 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10578 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
10582 (define_expand "vec_widen_<s>mult_hi_<mode>"
10583 [(match_operand:<sseunpackmode> 0 "register_operand")
10584 (any_extend:<sseunpackmode>
10585 (match_operand:VI124_AVX2 1 "register_operand"))
10586 (match_operand:VI124_AVX2 2 "register_operand")]
10589 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10594 (define_expand "vec_widen_<s>mult_lo_<mode>"
10595 [(match_operand:<sseunpackmode> 0 "register_operand")
10596 (any_extend:<sseunpackmode>
10597 (match_operand:VI124_AVX2 1 "register_operand"))
10598 (match_operand:VI124_AVX2 2 "register_operand")]
10601 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10606 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
10607 ;; named patterns, but signed V4SI needs special help for plain SSE2.
10608 (define_expand "vec_widen_smult_even_v4si"
10609 [(match_operand:V2DI 0 "register_operand")
10610 (match_operand:V4SI 1 "vector_operand")
10611 (match_operand:V4SI 2 "vector_operand")]
10614 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10619 (define_expand "vec_widen_<s>mult_odd_<mode>"
10620 [(match_operand:<sseunpackmode> 0 "register_operand")
10621 (any_extend:<sseunpackmode>
10622 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
10623 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
10626 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10631 (define_mode_attr SDOT_PMADD_SUF
10632 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
10634 (define_expand "sdot_prod<mode>"
10635 [(match_operand:<sseunpackmode> 0 "register_operand")
10636 (match_operand:VI2_AVX2 1 "register_operand")
10637 (match_operand:VI2_AVX2 2 "register_operand")
10638 (match_operand:<sseunpackmode> 3 "register_operand")]
10641 rtx t = gen_reg_rtx (<sseunpackmode>mode);
10642 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
10643 emit_insn (gen_rtx_SET (operands[0],
10644 gen_rtx_PLUS (<sseunpackmode>mode,
10649 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
10650 ;; back together when madd is available.
10651 (define_expand "sdot_prodv4si"
10652 [(match_operand:V2DI 0 "register_operand")
10653 (match_operand:V4SI 1 "register_operand")
10654 (match_operand:V4SI 2 "register_operand")
10655 (match_operand:V2DI 3 "register_operand")]
10658 rtx t = gen_reg_rtx (V2DImode);
10659 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
10660 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
10664 (define_expand "usadv16qi"
10665 [(match_operand:V4SI 0 "register_operand")
10666 (match_operand:V16QI 1 "register_operand")
10667 (match_operand:V16QI 2 "vector_operand")
10668 (match_operand:V4SI 3 "vector_operand")]
10671 rtx t1 = gen_reg_rtx (V2DImode);
10672 rtx t2 = gen_reg_rtx (V4SImode);
10673 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
10674 convert_move (t2, t1, 0);
10675 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
10679 (define_expand "usadv32qi"
10680 [(match_operand:V8SI 0 "register_operand")
10681 (match_operand:V32QI 1 "register_operand")
10682 (match_operand:V32QI 2 "nonimmediate_operand")
10683 (match_operand:V8SI 3 "nonimmediate_operand")]
10686 rtx t1 = gen_reg_rtx (V4DImode);
10687 rtx t2 = gen_reg_rtx (V8SImode);
10688 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
10689 convert_move (t2, t1, 0);
10690 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
10694 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
10695 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
10696 (ashiftrt:VI248_AVX512BW_1
10697 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
10698 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10700 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10701 [(set_attr "type" "sseishft")
10702 (set (attr "length_immediate")
10703 (if_then_else (match_operand 2 "const_int_operand")
10705 (const_string "0")))
10706 (set_attr "mode" "<sseinsnmode>")])
10708 (define_insn "ashr<mode>3"
10709 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
10710 (ashiftrt:VI24_AVX2
10711 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
10712 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
10715 psra<ssemodesuffix>\t{%2, %0|%0, %2}
10716 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10717 [(set_attr "isa" "noavx,avx")
10718 (set_attr "type" "sseishft")
10719 (set (attr "length_immediate")
10720 (if_then_else (match_operand 2 "const_int_operand")
10722 (const_string "0")))
10723 (set_attr "prefix_data16" "1,*")
10724 (set_attr "prefix" "orig,vex")
10725 (set_attr "mode" "<sseinsnmode>")])
10727 (define_insn "ashr<mode>3<mask_name>"
10728 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
10729 (ashiftrt:VI248_AVX512BW_AVX512VL
10730 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
10731 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10733 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10734 [(set_attr "type" "sseishft")
10735 (set (attr "length_immediate")
10736 (if_then_else (match_operand 2 "const_int_operand")
10738 (const_string "0")))
10739 (set_attr "mode" "<sseinsnmode>")])
10741 (define_insn "<mask_codefor><shift_insn><mode>3<mask_name>"
10742 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
10743 (any_lshift:VI248_AVX512BW_2
10744 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
10745 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10747 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10748 [(set_attr "type" "sseishft")
10749 (set (attr "length_immediate")
10750 (if_then_else (match_operand 2 "const_int_operand")
10752 (const_string "0")))
10753 (set_attr "mode" "<sseinsnmode>")])
10755 (define_insn "<shift_insn><mode>3"
10756 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
10757 (any_lshift:VI248_AVX2
10758 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
10759 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
10762 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10763 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10764 [(set_attr "isa" "noavx,avx")
10765 (set_attr "type" "sseishft")
10766 (set (attr "length_immediate")
10767 (if_then_else (match_operand 2 "const_int_operand")
10769 (const_string "0")))
10770 (set_attr "prefix_data16" "1,*")
10771 (set_attr "prefix" "orig,vex")
10772 (set_attr "mode" "<sseinsnmode>")])
10774 (define_insn "<shift_insn><mode>3<mask_name>"
10775 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
10776 (any_lshift:VI248_AVX512BW
10777 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
10778 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
10780 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10781 [(set_attr "type" "sseishft")
10782 (set (attr "length_immediate")
10783 (if_then_else (match_operand 2 "const_int_operand")
10785 (const_string "0")))
10786 (set_attr "mode" "<sseinsnmode>")])
10789 (define_expand "vec_shr_<mode>"
10790 [(set (match_dup 3)
10792 (match_operand:VI_128 1 "register_operand")
10793 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10794 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10797 operands[1] = gen_lowpart (V1TImode, operands[1]);
10798 operands[3] = gen_reg_rtx (V1TImode);
10799 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10802 (define_insn "avx512bw_<shift_insn><mode>3"
10803 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
10804 (any_lshift:VIMAX_AVX512VL
10805 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
10806 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
10809 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10810 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
10812 [(set_attr "type" "sseishft")
10813 (set_attr "length_immediate" "1")
10814 (set_attr "prefix" "maybe_evex")
10815 (set_attr "mode" "<sseinsnmode>")])
10817 (define_insn "<sse2_avx2>_<shift_insn><mode>3"
10818 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10819 (any_lshift:VIMAX_AVX2
10820 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10821 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10824 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10826 switch (which_alternative)
10829 return "p<vshift>dq\t{%2, %0|%0, %2}";
10831 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
10833 gcc_unreachable ();
10836 [(set_attr "isa" "noavx,avx")
10837 (set_attr "type" "sseishft")
10838 (set_attr "length_immediate" "1")
10839 (set_attr "atom_unit" "sishuf")
10840 (set_attr "prefix_data16" "1,*")
10841 (set_attr "prefix" "orig,vex")
10842 (set_attr "mode" "<sseinsnmode>")])
10844 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10845 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10846 (any_rotate:VI48_AVX512VL
10847 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10848 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10850 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10851 [(set_attr "prefix" "evex")
10852 (set_attr "mode" "<sseinsnmode>")])
10854 (define_insn "<avx512>_<rotate><mode><mask_name>"
10855 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10856 (any_rotate:VI48_AVX512VL
10857 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10858 (match_operand:SI 2 "const_0_to_255_operand")))]
10860 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10861 [(set_attr "prefix" "evex")
10862 (set_attr "mode" "<sseinsnmode>")])
10864 (define_expand "<code><mode>3"
10865 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10866 (maxmin:VI124_256_AVX512F_AVX512BW
10867 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10868 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10870 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10872 (define_insn "*avx2_<code><mode>3"
10873 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10875 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10876 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10877 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10878 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10879 [(set_attr "type" "sseiadd")
10880 (set_attr "prefix_extra" "1")
10881 (set_attr "prefix" "vex")
10882 (set_attr "mode" "OI")])
10884 (define_expand "<code><mode>3_mask"
10885 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10886 (vec_merge:VI48_AVX512VL
10887 (maxmin:VI48_AVX512VL
10888 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10889 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10890 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10891 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10893 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10895 (define_insn "*avx512f_<code><mode>3<mask_name>"
10896 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10897 (maxmin:VI48_AVX512VL
10898 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10899 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10900 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10901 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10902 [(set_attr "type" "sseiadd")
10903 (set_attr "prefix_extra" "1")
10904 (set_attr "prefix" "maybe_evex")
10905 (set_attr "mode" "<sseinsnmode>")])
10907 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10908 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10909 (maxmin:VI12_AVX512VL
10910 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10911 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10913 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10914 [(set_attr "type" "sseiadd")
10915 (set_attr "prefix" "evex")
10916 (set_attr "mode" "<sseinsnmode>")])
10918 (define_expand "<code><mode>3"
10919 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10920 (maxmin:VI8_AVX2_AVX512F
10921 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10922 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10926 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10927 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10930 enum rtx_code code;
10935 xops[0] = operands[0];
10937 if (<CODE> == SMAX || <CODE> == UMAX)
10939 xops[1] = operands[1];
10940 xops[2] = operands[2];
10944 xops[1] = operands[2];
10945 xops[2] = operands[1];
10948 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10950 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10951 xops[4] = operands[1];
10952 xops[5] = operands[2];
10954 ok = ix86_expand_int_vcond (xops);
10960 (define_expand "<code><mode>3"
10961 [(set (match_operand:VI124_128 0 "register_operand")
10963 (match_operand:VI124_128 1 "vector_operand")
10964 (match_operand:VI124_128 2 "vector_operand")))]
10967 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10968 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10974 xops[0] = operands[0];
10975 operands[1] = force_reg (<MODE>mode, operands[1]);
10976 operands[2] = force_reg (<MODE>mode, operands[2]);
10978 if (<CODE> == SMAX)
10980 xops[1] = operands[1];
10981 xops[2] = operands[2];
10985 xops[1] = operands[2];
10986 xops[2] = operands[1];
10989 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10990 xops[4] = operands[1];
10991 xops[5] = operands[2];
10993 ok = ix86_expand_int_vcond (xops);
10999 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11000 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
11002 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
11003 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11005 && <mask_mode512bit_condition>
11006 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11008 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11009 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11010 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11011 [(set_attr "isa" "noavx,noavx,avx")
11012 (set_attr "type" "sseiadd")
11013 (set_attr "prefix_extra" "1,1,*")
11014 (set_attr "prefix" "orig,orig,vex")
11015 (set_attr "mode" "TI")])
11017 (define_insn "*<code>v8hi3"
11018 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
11020 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11021 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
11022 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11024 p<maxmin_int>w\t{%2, %0|%0, %2}
11025 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
11026 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
11027 [(set_attr "isa" "noavx,avx,avx512bw")
11028 (set_attr "type" "sseiadd")
11029 (set_attr "prefix_data16" "1,*,*")
11030 (set_attr "prefix_extra" "*,1,1")
11031 (set_attr "prefix" "orig,vex,evex")
11032 (set_attr "mode" "TI")])
11034 (define_expand "<code><mode>3"
11035 [(set (match_operand:VI124_128 0 "register_operand")
11037 (match_operand:VI124_128 1 "vector_operand")
11038 (match_operand:VI124_128 2 "vector_operand")))]
11041 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
11042 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11043 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
11045 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
11046 operands[1] = force_reg (<MODE>mode, operands[1]);
11047 if (rtx_equal_p (op3, op2))
11048 op3 = gen_reg_rtx (V8HImode);
11049 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
11050 emit_insn (gen_addv8hi3 (op0, op3, op2));
11058 operands[1] = force_reg (<MODE>mode, operands[1]);
11059 operands[2] = force_reg (<MODE>mode, operands[2]);
11061 xops[0] = operands[0];
11063 if (<CODE> == UMAX)
11065 xops[1] = operands[1];
11066 xops[2] = operands[2];
11070 xops[1] = operands[2];
11071 xops[2] = operands[1];
11074 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
11075 xops[4] = operands[1];
11076 xops[5] = operands[2];
11078 ok = ix86_expand_int_vcond (xops);
11084 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11085 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
11087 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
11088 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11090 && <mask_mode512bit_condition>
11091 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11093 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11094 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11095 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11096 [(set_attr "isa" "noavx,noavx,avx")
11097 (set_attr "type" "sseiadd")
11098 (set_attr "prefix_extra" "1,1,*")
11099 (set_attr "prefix" "orig,orig,vex")
11100 (set_attr "mode" "TI")])
11102 (define_insn "*<code>v16qi3"
11103 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
11105 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
11106 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
11107 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11109 p<maxmin_int>b\t{%2, %0|%0, %2}
11110 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
11111 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
11112 [(set_attr "isa" "noavx,avx,avx512bw")
11113 (set_attr "type" "sseiadd")
11114 (set_attr "prefix_data16" "1,*,*")
11115 (set_attr "prefix_extra" "*,1,1")
11116 (set_attr "prefix" "orig,vex,evex")
11117 (set_attr "mode" "TI")])
11119 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11121 ;; Parallel integral comparisons
11123 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11125 (define_expand "avx2_eq<mode>3"
11126 [(set (match_operand:VI_256 0 "register_operand")
11128 (match_operand:VI_256 1 "nonimmediate_operand")
11129 (match_operand:VI_256 2 "nonimmediate_operand")))]
11131 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11133 (define_insn "*avx2_eq<mode>3"
11134 [(set (match_operand:VI_256 0 "register_operand" "=x")
11136 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
11137 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11138 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11139 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11140 [(set_attr "type" "ssecmp")
11141 (set_attr "prefix_extra" "1")
11142 (set_attr "prefix" "vex")
11143 (set_attr "mode" "OI")])
11145 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11146 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11147 (unspec:<avx512fmaskmode>
11148 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11149 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
11150 UNSPEC_MASKED_EQ))]
11152 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11154 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11155 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11156 (unspec:<avx512fmaskmode>
11157 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11158 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
11159 UNSPEC_MASKED_EQ))]
11161 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11163 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11164 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11165 (unspec:<avx512fmaskmode>
11166 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "%v")
11167 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11168 UNSPEC_MASKED_EQ))]
11169 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11170 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11171 [(set_attr "type" "ssecmp")
11172 (set_attr "prefix_extra" "1")
11173 (set_attr "prefix" "evex")
11174 (set_attr "mode" "<sseinsnmode>")])
11176 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11177 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11178 (unspec:<avx512fmaskmode>
11179 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
11180 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11181 UNSPEC_MASKED_EQ))]
11182 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11183 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11184 [(set_attr "type" "ssecmp")
11185 (set_attr "prefix_extra" "1")
11186 (set_attr "prefix" "evex")
11187 (set_attr "mode" "<sseinsnmode>")])
11189 (define_insn "*sse4_1_eqv2di3"
11190 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11192 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
11193 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11194 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11196 pcmpeqq\t{%2, %0|%0, %2}
11197 pcmpeqq\t{%2, %0|%0, %2}
11198 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
11199 [(set_attr "isa" "noavx,noavx,avx")
11200 (set_attr "type" "ssecmp")
11201 (set_attr "prefix_extra" "1")
11202 (set_attr "prefix" "orig,orig,vex")
11203 (set_attr "mode" "TI")])
11205 (define_insn "*sse2_eq<mode>3"
11206 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11208 (match_operand:VI124_128 1 "vector_operand" "%0,x")
11209 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11210 "TARGET_SSE2 && !TARGET_XOP
11211 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11213 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
11214 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11215 [(set_attr "isa" "noavx,avx")
11216 (set_attr "type" "ssecmp")
11217 (set_attr "prefix_data16" "1,*")
11218 (set_attr "prefix" "orig,vex")
11219 (set_attr "mode" "TI")])
11221 (define_expand "sse2_eq<mode>3"
11222 [(set (match_operand:VI124_128 0 "register_operand")
11224 (match_operand:VI124_128 1 "vector_operand")
11225 (match_operand:VI124_128 2 "vector_operand")))]
11226 "TARGET_SSE2 && !TARGET_XOP "
11227 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11229 (define_expand "sse4_1_eqv2di3"
11230 [(set (match_operand:V2DI 0 "register_operand")
11232 (match_operand:V2DI 1 "vector_operand")
11233 (match_operand:V2DI 2 "vector_operand")))]
11235 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
11237 (define_insn "sse4_2_gtv2di3"
11238 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11240 (match_operand:V2DI 1 "register_operand" "0,0,x")
11241 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11244 pcmpgtq\t{%2, %0|%0, %2}
11245 pcmpgtq\t{%2, %0|%0, %2}
11246 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
11247 [(set_attr "isa" "noavx,noavx,avx")
11248 (set_attr "type" "ssecmp")
11249 (set_attr "prefix_extra" "1")
11250 (set_attr "prefix" "orig,orig,vex")
11251 (set_attr "mode" "TI")])
11253 (define_insn "avx2_gt<mode>3"
11254 [(set (match_operand:VI_256 0 "register_operand" "=x")
11256 (match_operand:VI_256 1 "register_operand" "x")
11257 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11259 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11260 [(set_attr "type" "ssecmp")
11261 (set_attr "prefix_extra" "1")
11262 (set_attr "prefix" "vex")
11263 (set_attr "mode" "OI")])
11265 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11266 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11267 (unspec:<avx512fmaskmode>
11268 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11269 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11271 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11272 [(set_attr "type" "ssecmp")
11273 (set_attr "prefix_extra" "1")
11274 (set_attr "prefix" "evex")
11275 (set_attr "mode" "<sseinsnmode>")])
11277 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11278 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11279 (unspec:<avx512fmaskmode>
11280 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11281 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11283 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11284 [(set_attr "type" "ssecmp")
11285 (set_attr "prefix_extra" "1")
11286 (set_attr "prefix" "evex")
11287 (set_attr "mode" "<sseinsnmode>")])
11289 (define_insn "sse2_gt<mode>3"
11290 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11292 (match_operand:VI124_128 1 "register_operand" "0,x")
11293 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11294 "TARGET_SSE2 && !TARGET_XOP"
11296 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
11297 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11298 [(set_attr "isa" "noavx,avx")
11299 (set_attr "type" "ssecmp")
11300 (set_attr "prefix_data16" "1,*")
11301 (set_attr "prefix" "orig,vex")
11302 (set_attr "mode" "TI")])
11304 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
11305 [(set (match_operand:V_512 0 "register_operand")
11306 (if_then_else:V_512
11307 (match_operator 3 ""
11308 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11309 (match_operand:VI_AVX512BW 5 "general_operand")])
11310 (match_operand:V_512 1)
11311 (match_operand:V_512 2)))]
11313 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11314 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11316 bool ok = ix86_expand_int_vcond (operands);
11321 (define_expand "vcond<V_256:mode><VI_256:mode>"
11322 [(set (match_operand:V_256 0 "register_operand")
11323 (if_then_else:V_256
11324 (match_operator 3 ""
11325 [(match_operand:VI_256 4 "nonimmediate_operand")
11326 (match_operand:VI_256 5 "general_operand")])
11327 (match_operand:V_256 1)
11328 (match_operand:V_256 2)))]
11330 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11331 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11333 bool ok = ix86_expand_int_vcond (operands);
11338 (define_expand "vcond<V_128:mode><VI124_128:mode>"
11339 [(set (match_operand:V_128 0 "register_operand")
11340 (if_then_else:V_128
11341 (match_operator 3 ""
11342 [(match_operand:VI124_128 4 "vector_operand")
11343 (match_operand:VI124_128 5 "general_operand")])
11344 (match_operand:V_128 1)
11345 (match_operand:V_128 2)))]
11347 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11348 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11350 bool ok = ix86_expand_int_vcond (operands);
11355 (define_expand "vcond<VI8F_128:mode>v2di"
11356 [(set (match_operand:VI8F_128 0 "register_operand")
11357 (if_then_else:VI8F_128
11358 (match_operator 3 ""
11359 [(match_operand:V2DI 4 "vector_operand")
11360 (match_operand:V2DI 5 "general_operand")])
11361 (match_operand:VI8F_128 1)
11362 (match_operand:VI8F_128 2)))]
11365 bool ok = ix86_expand_int_vcond (operands);
11370 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
11371 [(set (match_operand:V_512 0 "register_operand")
11372 (if_then_else:V_512
11373 (match_operator 3 ""
11374 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11375 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
11376 (match_operand:V_512 1 "general_operand")
11377 (match_operand:V_512 2 "general_operand")))]
11379 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11380 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11382 bool ok = ix86_expand_int_vcond (operands);
11387 (define_expand "vcondu<V_256:mode><VI_256:mode>"
11388 [(set (match_operand:V_256 0 "register_operand")
11389 (if_then_else:V_256
11390 (match_operator 3 ""
11391 [(match_operand:VI_256 4 "nonimmediate_operand")
11392 (match_operand:VI_256 5 "nonimmediate_operand")])
11393 (match_operand:V_256 1 "general_operand")
11394 (match_operand:V_256 2 "general_operand")))]
11396 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11397 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11399 bool ok = ix86_expand_int_vcond (operands);
11404 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
11405 [(set (match_operand:V_128 0 "register_operand")
11406 (if_then_else:V_128
11407 (match_operator 3 ""
11408 [(match_operand:VI124_128 4 "vector_operand")
11409 (match_operand:VI124_128 5 "vector_operand")])
11410 (match_operand:V_128 1 "general_operand")
11411 (match_operand:V_128 2 "general_operand")))]
11413 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11414 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11416 bool ok = ix86_expand_int_vcond (operands);
11421 (define_expand "vcondu<VI8F_128:mode>v2di"
11422 [(set (match_operand:VI8F_128 0 "register_operand")
11423 (if_then_else:VI8F_128
11424 (match_operator 3 ""
11425 [(match_operand:V2DI 4 "vector_operand")
11426 (match_operand:V2DI 5 "vector_operand")])
11427 (match_operand:VI8F_128 1 "general_operand")
11428 (match_operand:VI8F_128 2 "general_operand")))]
11431 bool ok = ix86_expand_int_vcond (operands);
11436 (define_expand "vcondeq<VI8F_128:mode>v2di"
11437 [(set (match_operand:VI8F_128 0 "register_operand")
11438 (if_then_else:VI8F_128
11439 (match_operator 3 ""
11440 [(match_operand:V2DI 4 "vector_operand")
11441 (match_operand:V2DI 5 "general_operand")])
11442 (match_operand:VI8F_128 1)
11443 (match_operand:VI8F_128 2)))]
11446 bool ok = ix86_expand_int_vcond (operands);
11451 (define_mode_iterator VEC_PERM_AVX2
11452 [V16QI V8HI V4SI V2DI V4SF V2DF
11453 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11454 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
11455 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
11456 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11457 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11458 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
11460 (define_expand "vec_perm<mode>"
11461 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
11462 (match_operand:VEC_PERM_AVX2 1 "register_operand")
11463 (match_operand:VEC_PERM_AVX2 2 "register_operand")
11464 (match_operand:<sseintvecmode> 3 "register_operand")]
11465 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
11467 ix86_expand_vec_perm (operands);
11471 (define_mode_iterator VEC_PERM_CONST
11472 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
11473 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
11474 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
11475 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
11476 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
11477 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11478 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11479 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11480 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
11482 (define_expand "vec_perm_const<mode>"
11483 [(match_operand:VEC_PERM_CONST 0 "register_operand")
11484 (match_operand:VEC_PERM_CONST 1 "register_operand")
11485 (match_operand:VEC_PERM_CONST 2 "register_operand")
11486 (match_operand:<sseintvecmode> 3)]
11489 if (ix86_expand_vec_perm_const (operands))
11495 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11497 ;; Parallel bitwise logical operations
11499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11501 (define_expand "one_cmpl<mode>2"
11502 [(set (match_operand:VI 0 "register_operand")
11503 (xor:VI (match_operand:VI 1 "vector_operand")
11507 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
11510 (define_expand "<sse2_avx2>_andnot<mode>3"
11511 [(set (match_operand:VI_AVX2 0 "register_operand")
11513 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
11514 (match_operand:VI_AVX2 2 "vector_operand")))]
11517 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11518 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11519 (vec_merge:VI48_AVX512VL
11522 (match_operand:VI48_AVX512VL 1 "register_operand"))
11523 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11524 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
11525 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11528 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11529 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11530 (vec_merge:VI12_AVX512VL
11533 (match_operand:VI12_AVX512VL 1 "register_operand"))
11534 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11535 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
11536 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11539 (define_insn "*andnot<mode>3"
11540 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
11542 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
11543 (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
11546 static char buf[64];
11549 const char *ssesuffix;
11551 switch (get_attr_mode (insn))
11554 gcc_assert (TARGET_AVX512F);
11557 gcc_assert (TARGET_AVX2);
11560 gcc_assert (TARGET_SSE2);
11562 switch (<MODE>mode)
11566 /* There is no vpandnb or vpandnw instruction, nor vpandn for
11567 512-bit vectors. Use vpandnq instead. */
11572 ssesuffix = "<ssemodesuffix>";
11578 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
11579 ? "<ssemodesuffix>" : "");
11582 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
11587 gcc_assert (TARGET_AVX512F);
11590 gcc_assert (TARGET_AVX);
11593 gcc_assert (TARGET_SSE);
11599 gcc_unreachable ();
11602 switch (which_alternative)
11605 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11609 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11612 gcc_unreachable ();
11615 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11618 [(set_attr "isa" "noavx,avx,avx")
11619 (set_attr "type" "sselog")
11620 (set (attr "prefix_data16")
11622 (and (eq_attr "alternative" "0")
11623 (eq_attr "mode" "TI"))
11625 (const_string "*")))
11626 (set_attr "prefix" "orig,vex,evex")
11628 (cond [(and (match_test "<MODE_SIZE> == 16")
11629 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11630 (const_string "<ssePSmode>")
11631 (match_test "TARGET_AVX2")
11632 (const_string "<sseinsnmode>")
11633 (match_test "TARGET_AVX")
11635 (match_test "<MODE_SIZE> > 16")
11636 (const_string "V8SF")
11637 (const_string "<sseinsnmode>"))
11638 (ior (not (match_test "TARGET_SSE2"))
11639 (match_test "optimize_function_for_size_p (cfun)"))
11640 (const_string "V4SF")
11642 (const_string "<sseinsnmode>")))])
11644 (define_insn "*andnot<mode>3_mask"
11645 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11646 (vec_merge:VI48_AVX512VL
11649 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
11650 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11651 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
11652 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11654 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11655 [(set_attr "type" "sselog")
11656 (set_attr "prefix" "evex")
11657 (set_attr "mode" "<sseinsnmode>")])
11659 (define_expand "<code><mode>3"
11660 [(set (match_operand:VI 0 "register_operand")
11662 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
11663 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
11666 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
11670 (define_insn "<mask_codefor><code><mode>3<mask_name>"
11671 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
11672 (any_logic:VI48_AVX_AVX512F
11673 (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
11674 (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
11675 "TARGET_SSE && <mask_mode512bit_condition>
11676 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11678 static char buf[64];
11681 const char *ssesuffix;
11683 switch (get_attr_mode (insn))
11686 gcc_assert (TARGET_AVX512F);
11689 gcc_assert (TARGET_AVX2);
11692 gcc_assert (TARGET_SSE2);
11694 switch (<MODE>mode)
11698 ssesuffix = "<ssemodesuffix>";
11704 ssesuffix = (TARGET_AVX512VL
11705 && (<mask_applied> || which_alternative == 2)
11706 ? "<ssemodesuffix>" : "");
11709 gcc_unreachable ();
11714 gcc_assert (TARGET_AVX);
11717 gcc_assert (TARGET_SSE);
11723 gcc_unreachable ();
11726 switch (which_alternative)
11729 if (<mask_applied>)
11730 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
11732 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11736 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11739 gcc_unreachable ();
11742 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11745 [(set_attr "isa" "noavx,avx,avx")
11746 (set_attr "type" "sselog")
11747 (set (attr "prefix_data16")
11749 (and (eq_attr "alternative" "0")
11750 (eq_attr "mode" "TI"))
11752 (const_string "*")))
11753 (set_attr "prefix" "<mask_prefix3>,evex")
11755 (cond [(and (match_test "<MODE_SIZE> == 16")
11756 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11757 (const_string "<ssePSmode>")
11758 (match_test "TARGET_AVX2")
11759 (const_string "<sseinsnmode>")
11760 (match_test "TARGET_AVX")
11762 (match_test "<MODE_SIZE> > 16")
11763 (const_string "V8SF")
11764 (const_string "<sseinsnmode>"))
11765 (ior (not (match_test "TARGET_SSE2"))
11766 (match_test "optimize_function_for_size_p (cfun)"))
11767 (const_string "V4SF")
11769 (const_string "<sseinsnmode>")))])
11771 (define_insn "*<code><mode>3"
11772 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
11773 (any_logic:VI12_AVX_AVX512F
11774 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
11775 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
11776 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11778 static char buf[64];
11781 const char *ssesuffix;
11783 switch (get_attr_mode (insn))
11786 gcc_assert (TARGET_AVX512F);
11789 gcc_assert (TARGET_AVX2);
11792 gcc_assert (TARGET_SSE2);
11794 switch (<MODE>mode)
11804 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
11807 gcc_unreachable ();
11812 gcc_assert (TARGET_AVX);
11815 gcc_assert (TARGET_SSE);
11821 gcc_unreachable ();
11824 switch (which_alternative)
11827 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11831 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11834 gcc_unreachable ();
11837 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11840 [(set_attr "isa" "noavx,avx,avx")
11841 (set_attr "type" "sselog")
11842 (set (attr "prefix_data16")
11844 (and (eq_attr "alternative" "0")
11845 (eq_attr "mode" "TI"))
11847 (const_string "*")))
11848 (set_attr "prefix" "<mask_prefix3>,evex")
11850 (cond [(and (match_test "<MODE_SIZE> == 16")
11851 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11852 (const_string "<ssePSmode>")
11853 (match_test "TARGET_AVX2")
11854 (const_string "<sseinsnmode>")
11855 (match_test "TARGET_AVX")
11857 (match_test "<MODE_SIZE> > 16")
11858 (const_string "V8SF")
11859 (const_string "<sseinsnmode>"))
11860 (ior (not (match_test "TARGET_SSE2"))
11861 (match_test "optimize_function_for_size_p (cfun)"))
11862 (const_string "V4SF")
11864 (const_string "<sseinsnmode>")))])
11866 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11867 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11868 (unspec:<avx512fmaskmode>
11869 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11870 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11873 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11874 [(set_attr "prefix" "evex")
11875 (set_attr "mode" "<sseinsnmode>")])
11877 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11878 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11879 (unspec:<avx512fmaskmode>
11880 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11881 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11884 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11885 [(set_attr "prefix" "evex")
11886 (set_attr "mode" "<sseinsnmode>")])
11888 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11889 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11890 (unspec:<avx512fmaskmode>
11891 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11892 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11895 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11896 [(set_attr "prefix" "evex")
11897 (set_attr "mode" "<sseinsnmode>")])
11899 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11900 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11901 (unspec:<avx512fmaskmode>
11902 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11903 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11906 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11907 [(set_attr "prefix" "evex")
11908 (set_attr "mode" "<sseinsnmode>")])
11910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11912 ;; Parallel integral element swizzling
11914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11916 (define_expand "vec_pack_trunc_<mode>"
11917 [(match_operand:<ssepackmode> 0 "register_operand")
11918 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
11919 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
11922 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11923 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11924 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11928 (define_expand "vec_pack_trunc_qi"
11929 [(set (match_operand:HI 0 ("register_operand"))
11930 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand")))
11932 (zero_extend:HI (match_operand:QI 1 ("register_operand")))))]
11935 (define_expand "vec_pack_trunc_<mode>"
11936 [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
11937 (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))
11939 (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))))]
11942 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
11945 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11946 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
11947 (vec_concat:VI1_AVX512
11948 (ss_truncate:<ssehalfvecmode>
11949 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
11950 (ss_truncate:<ssehalfvecmode>
11951 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
11952 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11954 packsswb\t{%2, %0|%0, %2}
11955 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
11956 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11957 [(set_attr "isa" "noavx,avx,avx512bw")
11958 (set_attr "type" "sselog")
11959 (set_attr "prefix_data16" "1,*,*")
11960 (set_attr "prefix" "orig,<mask_prefix>,evex")
11961 (set_attr "mode" "<sseinsnmode>")])
11963 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11964 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
11965 (vec_concat:VI2_AVX2
11966 (ss_truncate:<ssehalfvecmode>
11967 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
11968 (ss_truncate:<ssehalfvecmode>
11969 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
11970 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11972 packssdw\t{%2, %0|%0, %2}
11973 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
11974 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11975 [(set_attr "isa" "noavx,avx,avx512bw")
11976 (set_attr "type" "sselog")
11977 (set_attr "prefix_data16" "1,*,*")
11978 (set_attr "prefix" "orig,<mask_prefix>,evex")
11979 (set_attr "mode" "<sseinsnmode>")])
11981 (define_insn "<sse2_avx2>_packuswb<mask_name>"
11982 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
11983 (vec_concat:VI1_AVX512
11984 (us_truncate:<ssehalfvecmode>
11985 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
11986 (us_truncate:<ssehalfvecmode>
11987 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
11988 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11990 packuswb\t{%2, %0|%0, %2}
11991 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
11992 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11993 [(set_attr "isa" "noavx,avx,avx512bw")
11994 (set_attr "type" "sselog")
11995 (set_attr "prefix_data16" "1,*,*")
11996 (set_attr "prefix" "orig,<mask_prefix>,evex")
11997 (set_attr "mode" "<sseinsnmode>")])
11999 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
12000 [(set (match_operand:V64QI 0 "register_operand" "=v")
12003 (match_operand:V64QI 1 "register_operand" "v")
12004 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12005 (parallel [(const_int 8) (const_int 72)
12006 (const_int 9) (const_int 73)
12007 (const_int 10) (const_int 74)
12008 (const_int 11) (const_int 75)
12009 (const_int 12) (const_int 76)
12010 (const_int 13) (const_int 77)
12011 (const_int 14) (const_int 78)
12012 (const_int 15) (const_int 79)
12013 (const_int 24) (const_int 88)
12014 (const_int 25) (const_int 89)
12015 (const_int 26) (const_int 90)
12016 (const_int 27) (const_int 91)
12017 (const_int 28) (const_int 92)
12018 (const_int 29) (const_int 93)
12019 (const_int 30) (const_int 94)
12020 (const_int 31) (const_int 95)
12021 (const_int 40) (const_int 104)
12022 (const_int 41) (const_int 105)
12023 (const_int 42) (const_int 106)
12024 (const_int 43) (const_int 107)
12025 (const_int 44) (const_int 108)
12026 (const_int 45) (const_int 109)
12027 (const_int 46) (const_int 110)
12028 (const_int 47) (const_int 111)
12029 (const_int 56) (const_int 120)
12030 (const_int 57) (const_int 121)
12031 (const_int 58) (const_int 122)
12032 (const_int 59) (const_int 123)
12033 (const_int 60) (const_int 124)
12034 (const_int 61) (const_int 125)
12035 (const_int 62) (const_int 126)
12036 (const_int 63) (const_int 127)])))]
12038 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12039 [(set_attr "type" "sselog")
12040 (set_attr "prefix" "evex")
12041 (set_attr "mode" "XI")])
12043 (define_insn "avx2_interleave_highv32qi<mask_name>"
12044 [(set (match_operand:V32QI 0 "register_operand" "=v")
12047 (match_operand:V32QI 1 "register_operand" "v")
12048 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12049 (parallel [(const_int 8) (const_int 40)
12050 (const_int 9) (const_int 41)
12051 (const_int 10) (const_int 42)
12052 (const_int 11) (const_int 43)
12053 (const_int 12) (const_int 44)
12054 (const_int 13) (const_int 45)
12055 (const_int 14) (const_int 46)
12056 (const_int 15) (const_int 47)
12057 (const_int 24) (const_int 56)
12058 (const_int 25) (const_int 57)
12059 (const_int 26) (const_int 58)
12060 (const_int 27) (const_int 59)
12061 (const_int 28) (const_int 60)
12062 (const_int 29) (const_int 61)
12063 (const_int 30) (const_int 62)
12064 (const_int 31) (const_int 63)])))]
12065 "TARGET_AVX2 && <mask_avx512vl_condition>"
12066 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12067 [(set_attr "type" "sselog")
12068 (set_attr "prefix" "<mask_prefix>")
12069 (set_attr "mode" "OI")])
12071 (define_insn "vec_interleave_highv16qi<mask_name>"
12072 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12075 (match_operand:V16QI 1 "register_operand" "0,v")
12076 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12077 (parallel [(const_int 8) (const_int 24)
12078 (const_int 9) (const_int 25)
12079 (const_int 10) (const_int 26)
12080 (const_int 11) (const_int 27)
12081 (const_int 12) (const_int 28)
12082 (const_int 13) (const_int 29)
12083 (const_int 14) (const_int 30)
12084 (const_int 15) (const_int 31)])))]
12085 "TARGET_SSE2 && <mask_avx512vl_condition>"
12087 punpckhbw\t{%2, %0|%0, %2}
12088 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12089 [(set_attr "isa" "noavx,avx")
12090 (set_attr "type" "sselog")
12091 (set_attr "prefix_data16" "1,*")
12092 (set_attr "prefix" "orig,<mask_prefix>")
12093 (set_attr "mode" "TI")])
12095 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
12096 [(set (match_operand:V64QI 0 "register_operand" "=v")
12099 (match_operand:V64QI 1 "register_operand" "v")
12100 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12101 (parallel [(const_int 0) (const_int 64)
12102 (const_int 1) (const_int 65)
12103 (const_int 2) (const_int 66)
12104 (const_int 3) (const_int 67)
12105 (const_int 4) (const_int 68)
12106 (const_int 5) (const_int 69)
12107 (const_int 6) (const_int 70)
12108 (const_int 7) (const_int 71)
12109 (const_int 16) (const_int 80)
12110 (const_int 17) (const_int 81)
12111 (const_int 18) (const_int 82)
12112 (const_int 19) (const_int 83)
12113 (const_int 20) (const_int 84)
12114 (const_int 21) (const_int 85)
12115 (const_int 22) (const_int 86)
12116 (const_int 23) (const_int 87)
12117 (const_int 32) (const_int 96)
12118 (const_int 33) (const_int 97)
12119 (const_int 34) (const_int 98)
12120 (const_int 35) (const_int 99)
12121 (const_int 36) (const_int 100)
12122 (const_int 37) (const_int 101)
12123 (const_int 38) (const_int 102)
12124 (const_int 39) (const_int 103)
12125 (const_int 48) (const_int 112)
12126 (const_int 49) (const_int 113)
12127 (const_int 50) (const_int 114)
12128 (const_int 51) (const_int 115)
12129 (const_int 52) (const_int 116)
12130 (const_int 53) (const_int 117)
12131 (const_int 54) (const_int 118)
12132 (const_int 55) (const_int 119)])))]
12134 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12135 [(set_attr "type" "sselog")
12136 (set_attr "prefix" "evex")
12137 (set_attr "mode" "XI")])
12139 (define_insn "avx2_interleave_lowv32qi<mask_name>"
12140 [(set (match_operand:V32QI 0 "register_operand" "=v")
12143 (match_operand:V32QI 1 "register_operand" "v")
12144 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12145 (parallel [(const_int 0) (const_int 32)
12146 (const_int 1) (const_int 33)
12147 (const_int 2) (const_int 34)
12148 (const_int 3) (const_int 35)
12149 (const_int 4) (const_int 36)
12150 (const_int 5) (const_int 37)
12151 (const_int 6) (const_int 38)
12152 (const_int 7) (const_int 39)
12153 (const_int 16) (const_int 48)
12154 (const_int 17) (const_int 49)
12155 (const_int 18) (const_int 50)
12156 (const_int 19) (const_int 51)
12157 (const_int 20) (const_int 52)
12158 (const_int 21) (const_int 53)
12159 (const_int 22) (const_int 54)
12160 (const_int 23) (const_int 55)])))]
12161 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12162 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12163 [(set_attr "type" "sselog")
12164 (set_attr "prefix" "maybe_vex")
12165 (set_attr "mode" "OI")])
12167 (define_insn "vec_interleave_lowv16qi<mask_name>"
12168 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12171 (match_operand:V16QI 1 "register_operand" "0,v")
12172 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12173 (parallel [(const_int 0) (const_int 16)
12174 (const_int 1) (const_int 17)
12175 (const_int 2) (const_int 18)
12176 (const_int 3) (const_int 19)
12177 (const_int 4) (const_int 20)
12178 (const_int 5) (const_int 21)
12179 (const_int 6) (const_int 22)
12180 (const_int 7) (const_int 23)])))]
12181 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12183 punpcklbw\t{%2, %0|%0, %2}
12184 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12185 [(set_attr "isa" "noavx,avx")
12186 (set_attr "type" "sselog")
12187 (set_attr "prefix_data16" "1,*")
12188 (set_attr "prefix" "orig,vex")
12189 (set_attr "mode" "TI")])
12191 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
12192 [(set (match_operand:V32HI 0 "register_operand" "=v")
12195 (match_operand:V32HI 1 "register_operand" "v")
12196 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12197 (parallel [(const_int 4) (const_int 36)
12198 (const_int 5) (const_int 37)
12199 (const_int 6) (const_int 38)
12200 (const_int 7) (const_int 39)
12201 (const_int 12) (const_int 44)
12202 (const_int 13) (const_int 45)
12203 (const_int 14) (const_int 46)
12204 (const_int 15) (const_int 47)
12205 (const_int 20) (const_int 52)
12206 (const_int 21) (const_int 53)
12207 (const_int 22) (const_int 54)
12208 (const_int 23) (const_int 55)
12209 (const_int 28) (const_int 60)
12210 (const_int 29) (const_int 61)
12211 (const_int 30) (const_int 62)
12212 (const_int 31) (const_int 63)])))]
12214 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12215 [(set_attr "type" "sselog")
12216 (set_attr "prefix" "evex")
12217 (set_attr "mode" "XI")])
12219 (define_insn "avx2_interleave_highv16hi<mask_name>"
12220 [(set (match_operand:V16HI 0 "register_operand" "=v")
12223 (match_operand:V16HI 1 "register_operand" "v")
12224 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12225 (parallel [(const_int 4) (const_int 20)
12226 (const_int 5) (const_int 21)
12227 (const_int 6) (const_int 22)
12228 (const_int 7) (const_int 23)
12229 (const_int 12) (const_int 28)
12230 (const_int 13) (const_int 29)
12231 (const_int 14) (const_int 30)
12232 (const_int 15) (const_int 31)])))]
12233 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12234 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12235 [(set_attr "type" "sselog")
12236 (set_attr "prefix" "maybe_evex")
12237 (set_attr "mode" "OI")])
12239 (define_insn "vec_interleave_highv8hi<mask_name>"
12240 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12243 (match_operand:V8HI 1 "register_operand" "0,v")
12244 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12245 (parallel [(const_int 4) (const_int 12)
12246 (const_int 5) (const_int 13)
12247 (const_int 6) (const_int 14)
12248 (const_int 7) (const_int 15)])))]
12249 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12251 punpckhwd\t{%2, %0|%0, %2}
12252 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12253 [(set_attr "isa" "noavx,avx")
12254 (set_attr "type" "sselog")
12255 (set_attr "prefix_data16" "1,*")
12256 (set_attr "prefix" "orig,maybe_vex")
12257 (set_attr "mode" "TI")])
12259 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
12260 [(set (match_operand:V32HI 0 "register_operand" "=v")
12263 (match_operand:V32HI 1 "register_operand" "v")
12264 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12265 (parallel [(const_int 0) (const_int 32)
12266 (const_int 1) (const_int 33)
12267 (const_int 2) (const_int 34)
12268 (const_int 3) (const_int 35)
12269 (const_int 8) (const_int 40)
12270 (const_int 9) (const_int 41)
12271 (const_int 10) (const_int 42)
12272 (const_int 11) (const_int 43)
12273 (const_int 16) (const_int 48)
12274 (const_int 17) (const_int 49)
12275 (const_int 18) (const_int 50)
12276 (const_int 19) (const_int 51)
12277 (const_int 24) (const_int 56)
12278 (const_int 25) (const_int 57)
12279 (const_int 26) (const_int 58)
12280 (const_int 27) (const_int 59)])))]
12282 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12283 [(set_attr "type" "sselog")
12284 (set_attr "prefix" "evex")
12285 (set_attr "mode" "XI")])
12287 (define_insn "avx2_interleave_lowv16hi<mask_name>"
12288 [(set (match_operand:V16HI 0 "register_operand" "=v")
12291 (match_operand:V16HI 1 "register_operand" "v")
12292 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12293 (parallel [(const_int 0) (const_int 16)
12294 (const_int 1) (const_int 17)
12295 (const_int 2) (const_int 18)
12296 (const_int 3) (const_int 19)
12297 (const_int 8) (const_int 24)
12298 (const_int 9) (const_int 25)
12299 (const_int 10) (const_int 26)
12300 (const_int 11) (const_int 27)])))]
12301 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12302 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12303 [(set_attr "type" "sselog")
12304 (set_attr "prefix" "maybe_evex")
12305 (set_attr "mode" "OI")])
12307 (define_insn "vec_interleave_lowv8hi<mask_name>"
12308 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12311 (match_operand:V8HI 1 "register_operand" "0,v")
12312 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12313 (parallel [(const_int 0) (const_int 8)
12314 (const_int 1) (const_int 9)
12315 (const_int 2) (const_int 10)
12316 (const_int 3) (const_int 11)])))]
12317 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12319 punpcklwd\t{%2, %0|%0, %2}
12320 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12321 [(set_attr "isa" "noavx,avx")
12322 (set_attr "type" "sselog")
12323 (set_attr "prefix_data16" "1,*")
12324 (set_attr "prefix" "orig,maybe_evex")
12325 (set_attr "mode" "TI")])
12327 (define_insn "avx2_interleave_highv8si<mask_name>"
12328 [(set (match_operand:V8SI 0 "register_operand" "=v")
12331 (match_operand:V8SI 1 "register_operand" "v")
12332 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12333 (parallel [(const_int 2) (const_int 10)
12334 (const_int 3) (const_int 11)
12335 (const_int 6) (const_int 14)
12336 (const_int 7) (const_int 15)])))]
12337 "TARGET_AVX2 && <mask_avx512vl_condition>"
12338 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12339 [(set_attr "type" "sselog")
12340 (set_attr "prefix" "maybe_evex")
12341 (set_attr "mode" "OI")])
12343 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
12344 [(set (match_operand:V16SI 0 "register_operand" "=v")
12347 (match_operand:V16SI 1 "register_operand" "v")
12348 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12349 (parallel [(const_int 2) (const_int 18)
12350 (const_int 3) (const_int 19)
12351 (const_int 6) (const_int 22)
12352 (const_int 7) (const_int 23)
12353 (const_int 10) (const_int 26)
12354 (const_int 11) (const_int 27)
12355 (const_int 14) (const_int 30)
12356 (const_int 15) (const_int 31)])))]
12358 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12359 [(set_attr "type" "sselog")
12360 (set_attr "prefix" "evex")
12361 (set_attr "mode" "XI")])
12364 (define_insn "vec_interleave_highv4si<mask_name>"
12365 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12368 (match_operand:V4SI 1 "register_operand" "0,v")
12369 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12370 (parallel [(const_int 2) (const_int 6)
12371 (const_int 3) (const_int 7)])))]
12372 "TARGET_SSE2 && <mask_avx512vl_condition>"
12374 punpckhdq\t{%2, %0|%0, %2}
12375 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12376 [(set_attr "isa" "noavx,avx")
12377 (set_attr "type" "sselog")
12378 (set_attr "prefix_data16" "1,*")
12379 (set_attr "prefix" "orig,maybe_vex")
12380 (set_attr "mode" "TI")])
12382 (define_insn "avx2_interleave_lowv8si<mask_name>"
12383 [(set (match_operand:V8SI 0 "register_operand" "=v")
12386 (match_operand:V8SI 1 "register_operand" "v")
12387 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12388 (parallel [(const_int 0) (const_int 8)
12389 (const_int 1) (const_int 9)
12390 (const_int 4) (const_int 12)
12391 (const_int 5) (const_int 13)])))]
12392 "TARGET_AVX2 && <mask_avx512vl_condition>"
12393 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12394 [(set_attr "type" "sselog")
12395 (set_attr "prefix" "maybe_evex")
12396 (set_attr "mode" "OI")])
12398 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
12399 [(set (match_operand:V16SI 0 "register_operand" "=v")
12402 (match_operand:V16SI 1 "register_operand" "v")
12403 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12404 (parallel [(const_int 0) (const_int 16)
12405 (const_int 1) (const_int 17)
12406 (const_int 4) (const_int 20)
12407 (const_int 5) (const_int 21)
12408 (const_int 8) (const_int 24)
12409 (const_int 9) (const_int 25)
12410 (const_int 12) (const_int 28)
12411 (const_int 13) (const_int 29)])))]
12413 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12414 [(set_attr "type" "sselog")
12415 (set_attr "prefix" "evex")
12416 (set_attr "mode" "XI")])
12418 (define_insn "vec_interleave_lowv4si<mask_name>"
12419 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12422 (match_operand:V4SI 1 "register_operand" "0,v")
12423 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12424 (parallel [(const_int 0) (const_int 4)
12425 (const_int 1) (const_int 5)])))]
12426 "TARGET_SSE2 && <mask_avx512vl_condition>"
12428 punpckldq\t{%2, %0|%0, %2}
12429 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12430 [(set_attr "isa" "noavx,avx")
12431 (set_attr "type" "sselog")
12432 (set_attr "prefix_data16" "1,*")
12433 (set_attr "prefix" "orig,vex")
12434 (set_attr "mode" "TI")])
12436 (define_expand "vec_interleave_high<mode>"
12437 [(match_operand:VI_256 0 "register_operand")
12438 (match_operand:VI_256 1 "register_operand")
12439 (match_operand:VI_256 2 "nonimmediate_operand")]
12442 rtx t1 = gen_reg_rtx (<MODE>mode);
12443 rtx t2 = gen_reg_rtx (<MODE>mode);
12444 rtx t3 = gen_reg_rtx (V4DImode);
12445 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12446 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12447 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12448 gen_lowpart (V4DImode, t2),
12449 GEN_INT (1 + (3 << 4))));
12450 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12454 (define_expand "vec_interleave_low<mode>"
12455 [(match_operand:VI_256 0 "register_operand")
12456 (match_operand:VI_256 1 "register_operand")
12457 (match_operand:VI_256 2 "nonimmediate_operand")]
12460 rtx t1 = gen_reg_rtx (<MODE>mode);
12461 rtx t2 = gen_reg_rtx (<MODE>mode);
12462 rtx t3 = gen_reg_rtx (V4DImode);
12463 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12464 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12465 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12466 gen_lowpart (V4DImode, t2),
12467 GEN_INT (0 + (2 << 4))));
12468 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12472 ;; Modes handled by pinsr patterns.
12473 (define_mode_iterator PINSR_MODE
12474 [(V16QI "TARGET_SSE4_1") V8HI
12475 (V4SI "TARGET_SSE4_1")
12476 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
12478 (define_mode_attr sse2p4_1
12479 [(V16QI "sse4_1") (V8HI "sse2")
12480 (V4SI "sse4_1") (V2DI "sse4_1")])
12482 (define_mode_attr pinsr_evex_isa
12483 [(V16QI "avx512bw") (V8HI "avx512bw")
12484 (V4SI "avx512dq") (V2DI "avx512dq")])
12486 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
12487 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
12488 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
12489 (vec_merge:PINSR_MODE
12490 (vec_duplicate:PINSR_MODE
12491 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
12492 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
12493 (match_operand:SI 3 "const_int_operand")))]
12495 && ((unsigned) exact_log2 (INTVAL (operands[3]))
12496 < GET_MODE_NUNITS (<MODE>mode))"
12498 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
12500 switch (which_alternative)
12503 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12504 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
12507 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
12510 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12511 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
12515 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12517 gcc_unreachable ();
12520 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
12521 (set_attr "type" "sselog")
12522 (set (attr "prefix_rex")
12524 (and (not (match_test "TARGET_AVX"))
12525 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
12527 (const_string "*")))
12528 (set (attr "prefix_data16")
12530 (and (not (match_test "TARGET_AVX"))
12531 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12533 (const_string "*")))
12534 (set (attr "prefix_extra")
12536 (and (not (match_test "TARGET_AVX"))
12537 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12539 (const_string "1")))
12540 (set_attr "length_immediate" "1")
12541 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
12542 (set_attr "mode" "TI")])
12544 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
12545 [(match_operand:AVX512_VEC 0 "register_operand")
12546 (match_operand:AVX512_VEC 1 "register_operand")
12547 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
12548 (match_operand:SI 3 "const_0_to_3_operand")
12549 (match_operand:AVX512_VEC 4 "register_operand")
12550 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12554 mask = INTVAL (operands[3]);
12555 selector = GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ?
12556 0xFFFF ^ (0xF000 >> mask * 4)
12557 : 0xFF ^ (0xC0 >> mask * 2);
12558 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
12559 (operands[0], operands[1], operands[2], GEN_INT (selector),
12560 operands[4], operands[5]));
12564 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
12565 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
12566 (vec_merge:AVX512_VEC
12567 (match_operand:AVX512_VEC 1 "register_operand" "v")
12568 (vec_duplicate:AVX512_VEC
12569 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
12570 (match_operand:SI 3 "const_int_operand" "n")))]
12574 int selector = INTVAL (operands[3]);
12576 if (selector == 0xFFF || selector == 0x3F)
12578 else if ( selector == 0xF0FF || selector == 0xCF)
12580 else if ( selector == 0xFF0F || selector == 0xF3)
12582 else if ( selector == 0xFFF0 || selector == 0xFC)
12585 gcc_unreachable ();
12587 operands[3] = GEN_INT (mask);
12589 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
12591 [(set_attr "type" "sselog")
12592 (set_attr "length_immediate" "1")
12593 (set_attr "prefix" "evex")
12594 (set_attr "mode" "<sseinsnmode>")])
12596 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
12597 [(match_operand:AVX512_VEC_2 0 "register_operand")
12598 (match_operand:AVX512_VEC_2 1 "register_operand")
12599 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
12600 (match_operand:SI 3 "const_0_to_1_operand")
12601 (match_operand:AVX512_VEC_2 4 "register_operand")
12602 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12605 int mask = INTVAL (operands[3]);
12607 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
12608 operands[2], operands[4],
12611 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
12612 operands[2], operands[4],
12617 (define_insn "vec_set_lo_<mode><mask_name>"
12618 [(set (match_operand:V16FI 0 "register_operand" "=v")
12620 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12621 (vec_select:<ssehalfvecmode>
12622 (match_operand:V16FI 1 "register_operand" "v")
12623 (parallel [(const_int 8) (const_int 9)
12624 (const_int 10) (const_int 11)
12625 (const_int 12) (const_int 13)
12626 (const_int 14) (const_int 15)]))))]
12628 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12629 [(set_attr "type" "sselog")
12630 (set_attr "length_immediate" "1")
12631 (set_attr "prefix" "evex")
12632 (set_attr "mode" "<sseinsnmode>")])
12634 (define_insn "vec_set_hi_<mode><mask_name>"
12635 [(set (match_operand:V16FI 0 "register_operand" "=v")
12637 (vec_select:<ssehalfvecmode>
12638 (match_operand:V16FI 1 "register_operand" "v")
12639 (parallel [(const_int 0) (const_int 1)
12640 (const_int 2) (const_int 3)
12641 (const_int 4) (const_int 5)
12642 (const_int 6) (const_int 7)]))
12643 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12645 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12646 [(set_attr "type" "sselog")
12647 (set_attr "length_immediate" "1")
12648 (set_attr "prefix" "evex")
12649 (set_attr "mode" "<sseinsnmode>")])
12651 (define_insn "vec_set_lo_<mode><mask_name>"
12652 [(set (match_operand:V8FI 0 "register_operand" "=v")
12654 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12655 (vec_select:<ssehalfvecmode>
12656 (match_operand:V8FI 1 "register_operand" "v")
12657 (parallel [(const_int 4) (const_int 5)
12658 (const_int 6) (const_int 7)]))))]
12660 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12661 [(set_attr "type" "sselog")
12662 (set_attr "length_immediate" "1")
12663 (set_attr "prefix" "evex")
12664 (set_attr "mode" "XI")])
12666 (define_insn "vec_set_hi_<mode><mask_name>"
12667 [(set (match_operand:V8FI 0 "register_operand" "=v")
12669 (vec_select:<ssehalfvecmode>
12670 (match_operand:V8FI 1 "register_operand" "v")
12671 (parallel [(const_int 0) (const_int 1)
12672 (const_int 2) (const_int 3)]))
12673 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12675 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12676 [(set_attr "type" "sselog")
12677 (set_attr "length_immediate" "1")
12678 (set_attr "prefix" "evex")
12679 (set_attr "mode" "XI")])
12681 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
12682 [(match_operand:VI8F_256 0 "register_operand")
12683 (match_operand:VI8F_256 1 "register_operand")
12684 (match_operand:VI8F_256 2 "nonimmediate_operand")
12685 (match_operand:SI 3 "const_0_to_3_operand")
12686 (match_operand:VI8F_256 4 "register_operand")
12687 (match_operand:QI 5 "register_operand")]
12690 int mask = INTVAL (operands[3]);
12691 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
12692 (operands[0], operands[1], operands[2],
12693 GEN_INT (((mask >> 0) & 1) * 2 + 0),
12694 GEN_INT (((mask >> 0) & 1) * 2 + 1),
12695 GEN_INT (((mask >> 1) & 1) * 2 + 4),
12696 GEN_INT (((mask >> 1) & 1) * 2 + 5),
12697 operands[4], operands[5]));
12701 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
12702 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
12703 (vec_select:VI8F_256
12704 (vec_concat:<ssedoublemode>
12705 (match_operand:VI8F_256 1 "register_operand" "v")
12706 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
12707 (parallel [(match_operand 3 "const_0_to_3_operand")
12708 (match_operand 4 "const_0_to_3_operand")
12709 (match_operand 5 "const_4_to_7_operand")
12710 (match_operand 6 "const_4_to_7_operand")])))]
12712 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12713 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
12716 mask = INTVAL (operands[3]) / 2;
12717 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
12718 operands[3] = GEN_INT (mask);
12719 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
12721 [(set_attr "type" "sselog")
12722 (set_attr "length_immediate" "1")
12723 (set_attr "prefix" "evex")
12724 (set_attr "mode" "XI")])
12726 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
12727 [(match_operand:V8FI 0 "register_operand")
12728 (match_operand:V8FI 1 "register_operand")
12729 (match_operand:V8FI 2 "nonimmediate_operand")
12730 (match_operand:SI 3 "const_0_to_255_operand")
12731 (match_operand:V8FI 4 "register_operand")
12732 (match_operand:QI 5 "register_operand")]
12735 int mask = INTVAL (operands[3]);
12736 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
12737 (operands[0], operands[1], operands[2],
12738 GEN_INT (((mask >> 0) & 3) * 2),
12739 GEN_INT (((mask >> 0) & 3) * 2 + 1),
12740 GEN_INT (((mask >> 2) & 3) * 2),
12741 GEN_INT (((mask >> 2) & 3) * 2 + 1),
12742 GEN_INT (((mask >> 4) & 3) * 2 + 8),
12743 GEN_INT (((mask >> 4) & 3) * 2 + 9),
12744 GEN_INT (((mask >> 6) & 3) * 2 + 8),
12745 GEN_INT (((mask >> 6) & 3) * 2 + 9),
12746 operands[4], operands[5]));
12750 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
12751 [(set (match_operand:V8FI 0 "register_operand" "=v")
12753 (vec_concat:<ssedoublemode>
12754 (match_operand:V8FI 1 "register_operand" "v")
12755 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
12756 (parallel [(match_operand 3 "const_0_to_7_operand")
12757 (match_operand 4 "const_0_to_7_operand")
12758 (match_operand 5 "const_0_to_7_operand")
12759 (match_operand 6 "const_0_to_7_operand")
12760 (match_operand 7 "const_8_to_15_operand")
12761 (match_operand 8 "const_8_to_15_operand")
12762 (match_operand 9 "const_8_to_15_operand")
12763 (match_operand 10 "const_8_to_15_operand")])))]
12765 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12766 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
12767 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12768 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
12771 mask = INTVAL (operands[3]) / 2;
12772 mask |= INTVAL (operands[5]) / 2 << 2;
12773 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
12774 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
12775 operands[3] = GEN_INT (mask);
12777 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12779 [(set_attr "type" "sselog")
12780 (set_attr "length_immediate" "1")
12781 (set_attr "prefix" "evex")
12782 (set_attr "mode" "<sseinsnmode>")])
12784 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
12785 [(match_operand:VI4F_256 0 "register_operand")
12786 (match_operand:VI4F_256 1 "register_operand")
12787 (match_operand:VI4F_256 2 "nonimmediate_operand")
12788 (match_operand:SI 3 "const_0_to_3_operand")
12789 (match_operand:VI4F_256 4 "register_operand")
12790 (match_operand:QI 5 "register_operand")]
12793 int mask = INTVAL (operands[3]);
12794 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
12795 (operands[0], operands[1], operands[2],
12796 GEN_INT (((mask >> 0) & 1) * 4 + 0),
12797 GEN_INT (((mask >> 0) & 1) * 4 + 1),
12798 GEN_INT (((mask >> 0) & 1) * 4 + 2),
12799 GEN_INT (((mask >> 0) & 1) * 4 + 3),
12800 GEN_INT (((mask >> 1) & 1) * 4 + 8),
12801 GEN_INT (((mask >> 1) & 1) * 4 + 9),
12802 GEN_INT (((mask >> 1) & 1) * 4 + 10),
12803 GEN_INT (((mask >> 1) & 1) * 4 + 11),
12804 operands[4], operands[5]));
12808 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
12809 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
12810 (vec_select:VI4F_256
12811 (vec_concat:<ssedoublemode>
12812 (match_operand:VI4F_256 1 "register_operand" "v")
12813 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
12814 (parallel [(match_operand 3 "const_0_to_7_operand")
12815 (match_operand 4 "const_0_to_7_operand")
12816 (match_operand 5 "const_0_to_7_operand")
12817 (match_operand 6 "const_0_to_7_operand")
12818 (match_operand 7 "const_8_to_15_operand")
12819 (match_operand 8 "const_8_to_15_operand")
12820 (match_operand 9 "const_8_to_15_operand")
12821 (match_operand 10 "const_8_to_15_operand")])))]
12823 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12824 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12825 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12826 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12827 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12828 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
12831 mask = INTVAL (operands[3]) / 4;
12832 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
12833 operands[3] = GEN_INT (mask);
12835 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12837 [(set_attr "type" "sselog")
12838 (set_attr "length_immediate" "1")
12839 (set_attr "prefix" "evex")
12840 (set_attr "mode" "<sseinsnmode>")])
12842 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
12843 [(match_operand:V16FI 0 "register_operand")
12844 (match_operand:V16FI 1 "register_operand")
12845 (match_operand:V16FI 2 "nonimmediate_operand")
12846 (match_operand:SI 3 "const_0_to_255_operand")
12847 (match_operand:V16FI 4 "register_operand")
12848 (match_operand:HI 5 "register_operand")]
12851 int mask = INTVAL (operands[3]);
12852 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
12853 (operands[0], operands[1], operands[2],
12854 GEN_INT (((mask >> 0) & 3) * 4),
12855 GEN_INT (((mask >> 0) & 3) * 4 + 1),
12856 GEN_INT (((mask >> 0) & 3) * 4 + 2),
12857 GEN_INT (((mask >> 0) & 3) * 4 + 3),
12858 GEN_INT (((mask >> 2) & 3) * 4),
12859 GEN_INT (((mask >> 2) & 3) * 4 + 1),
12860 GEN_INT (((mask >> 2) & 3) * 4 + 2),
12861 GEN_INT (((mask >> 2) & 3) * 4 + 3),
12862 GEN_INT (((mask >> 4) & 3) * 4 + 16),
12863 GEN_INT (((mask >> 4) & 3) * 4 + 17),
12864 GEN_INT (((mask >> 4) & 3) * 4 + 18),
12865 GEN_INT (((mask >> 4) & 3) * 4 + 19),
12866 GEN_INT (((mask >> 6) & 3) * 4 + 16),
12867 GEN_INT (((mask >> 6) & 3) * 4 + 17),
12868 GEN_INT (((mask >> 6) & 3) * 4 + 18),
12869 GEN_INT (((mask >> 6) & 3) * 4 + 19),
12870 operands[4], operands[5]));
12874 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
12875 [(set (match_operand:V16FI 0 "register_operand" "=v")
12877 (vec_concat:<ssedoublemode>
12878 (match_operand:V16FI 1 "register_operand" "v")
12879 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
12880 (parallel [(match_operand 3 "const_0_to_15_operand")
12881 (match_operand 4 "const_0_to_15_operand")
12882 (match_operand 5 "const_0_to_15_operand")
12883 (match_operand 6 "const_0_to_15_operand")
12884 (match_operand 7 "const_0_to_15_operand")
12885 (match_operand 8 "const_0_to_15_operand")
12886 (match_operand 9 "const_0_to_15_operand")
12887 (match_operand 10 "const_0_to_15_operand")
12888 (match_operand 11 "const_16_to_31_operand")
12889 (match_operand 12 "const_16_to_31_operand")
12890 (match_operand 13 "const_16_to_31_operand")
12891 (match_operand 14 "const_16_to_31_operand")
12892 (match_operand 15 "const_16_to_31_operand")
12893 (match_operand 16 "const_16_to_31_operand")
12894 (match_operand 17 "const_16_to_31_operand")
12895 (match_operand 18 "const_16_to_31_operand")])))]
12897 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12898 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12899 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12900 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12901 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12902 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12903 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12904 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12905 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12906 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12907 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12908 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12911 mask = INTVAL (operands[3]) / 4;
12912 mask |= INTVAL (operands[7]) / 4 << 2;
12913 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12914 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12915 operands[3] = GEN_INT (mask);
12917 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12919 [(set_attr "type" "sselog")
12920 (set_attr "length_immediate" "1")
12921 (set_attr "prefix" "evex")
12922 (set_attr "mode" "<sseinsnmode>")])
12924 (define_expand "avx512f_pshufdv3_mask"
12925 [(match_operand:V16SI 0 "register_operand")
12926 (match_operand:V16SI 1 "nonimmediate_operand")
12927 (match_operand:SI 2 "const_0_to_255_operand")
12928 (match_operand:V16SI 3 "register_operand")
12929 (match_operand:HI 4 "register_operand")]
12932 int mask = INTVAL (operands[2]);
12933 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12934 GEN_INT ((mask >> 0) & 3),
12935 GEN_INT ((mask >> 2) & 3),
12936 GEN_INT ((mask >> 4) & 3),
12937 GEN_INT ((mask >> 6) & 3),
12938 GEN_INT (((mask >> 0) & 3) + 4),
12939 GEN_INT (((mask >> 2) & 3) + 4),
12940 GEN_INT (((mask >> 4) & 3) + 4),
12941 GEN_INT (((mask >> 6) & 3) + 4),
12942 GEN_INT (((mask >> 0) & 3) + 8),
12943 GEN_INT (((mask >> 2) & 3) + 8),
12944 GEN_INT (((mask >> 4) & 3) + 8),
12945 GEN_INT (((mask >> 6) & 3) + 8),
12946 GEN_INT (((mask >> 0) & 3) + 12),
12947 GEN_INT (((mask >> 2) & 3) + 12),
12948 GEN_INT (((mask >> 4) & 3) + 12),
12949 GEN_INT (((mask >> 6) & 3) + 12),
12950 operands[3], operands[4]));
12954 (define_insn "avx512f_pshufd_1<mask_name>"
12955 [(set (match_operand:V16SI 0 "register_operand" "=v")
12957 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12958 (parallel [(match_operand 2 "const_0_to_3_operand")
12959 (match_operand 3 "const_0_to_3_operand")
12960 (match_operand 4 "const_0_to_3_operand")
12961 (match_operand 5 "const_0_to_3_operand")
12962 (match_operand 6 "const_4_to_7_operand")
12963 (match_operand 7 "const_4_to_7_operand")
12964 (match_operand 8 "const_4_to_7_operand")
12965 (match_operand 9 "const_4_to_7_operand")
12966 (match_operand 10 "const_8_to_11_operand")
12967 (match_operand 11 "const_8_to_11_operand")
12968 (match_operand 12 "const_8_to_11_operand")
12969 (match_operand 13 "const_8_to_11_operand")
12970 (match_operand 14 "const_12_to_15_operand")
12971 (match_operand 15 "const_12_to_15_operand")
12972 (match_operand 16 "const_12_to_15_operand")
12973 (match_operand 17 "const_12_to_15_operand")])))]
12975 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12976 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12977 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12978 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12979 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12980 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12981 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12982 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12983 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12984 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12985 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12986 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12989 mask |= INTVAL (operands[2]) << 0;
12990 mask |= INTVAL (operands[3]) << 2;
12991 mask |= INTVAL (operands[4]) << 4;
12992 mask |= INTVAL (operands[5]) << 6;
12993 operands[2] = GEN_INT (mask);
12995 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
12997 [(set_attr "type" "sselog1")
12998 (set_attr "prefix" "evex")
12999 (set_attr "length_immediate" "1")
13000 (set_attr "mode" "XI")])
13002 (define_expand "avx512vl_pshufdv3_mask"
13003 [(match_operand:V8SI 0 "register_operand")
13004 (match_operand:V8SI 1 "nonimmediate_operand")
13005 (match_operand:SI 2 "const_0_to_255_operand")
13006 (match_operand:V8SI 3 "register_operand")
13007 (match_operand:QI 4 "register_operand")]
13010 int mask = INTVAL (operands[2]);
13011 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
13012 GEN_INT ((mask >> 0) & 3),
13013 GEN_INT ((mask >> 2) & 3),
13014 GEN_INT ((mask >> 4) & 3),
13015 GEN_INT ((mask >> 6) & 3),
13016 GEN_INT (((mask >> 0) & 3) + 4),
13017 GEN_INT (((mask >> 2) & 3) + 4),
13018 GEN_INT (((mask >> 4) & 3) + 4),
13019 GEN_INT (((mask >> 6) & 3) + 4),
13020 operands[3], operands[4]));
13024 (define_expand "avx2_pshufdv3"
13025 [(match_operand:V8SI 0 "register_operand")
13026 (match_operand:V8SI 1 "nonimmediate_operand")
13027 (match_operand:SI 2 "const_0_to_255_operand")]
13030 int mask = INTVAL (operands[2]);
13031 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
13032 GEN_INT ((mask >> 0) & 3),
13033 GEN_INT ((mask >> 2) & 3),
13034 GEN_INT ((mask >> 4) & 3),
13035 GEN_INT ((mask >> 6) & 3),
13036 GEN_INT (((mask >> 0) & 3) + 4),
13037 GEN_INT (((mask >> 2) & 3) + 4),
13038 GEN_INT (((mask >> 4) & 3) + 4),
13039 GEN_INT (((mask >> 6) & 3) + 4)));
13043 (define_insn "avx2_pshufd_1<mask_name>"
13044 [(set (match_operand:V8SI 0 "register_operand" "=v")
13046 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
13047 (parallel [(match_operand 2 "const_0_to_3_operand")
13048 (match_operand 3 "const_0_to_3_operand")
13049 (match_operand 4 "const_0_to_3_operand")
13050 (match_operand 5 "const_0_to_3_operand")
13051 (match_operand 6 "const_4_to_7_operand")
13052 (match_operand 7 "const_4_to_7_operand")
13053 (match_operand 8 "const_4_to_7_operand")
13054 (match_operand 9 "const_4_to_7_operand")])))]
13056 && <mask_avx512vl_condition>
13057 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
13058 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
13059 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
13060 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
13063 mask |= INTVAL (operands[2]) << 0;
13064 mask |= INTVAL (operands[3]) << 2;
13065 mask |= INTVAL (operands[4]) << 4;
13066 mask |= INTVAL (operands[5]) << 6;
13067 operands[2] = GEN_INT (mask);
13069 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13071 [(set_attr "type" "sselog1")
13072 (set_attr "prefix" "maybe_evex")
13073 (set_attr "length_immediate" "1")
13074 (set_attr "mode" "OI")])
13076 (define_expand "avx512vl_pshufd_mask"
13077 [(match_operand:V4SI 0 "register_operand")
13078 (match_operand:V4SI 1 "nonimmediate_operand")
13079 (match_operand:SI 2 "const_0_to_255_operand")
13080 (match_operand:V4SI 3 "register_operand")
13081 (match_operand:QI 4 "register_operand")]
13084 int mask = INTVAL (operands[2]);
13085 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
13086 GEN_INT ((mask >> 0) & 3),
13087 GEN_INT ((mask >> 2) & 3),
13088 GEN_INT ((mask >> 4) & 3),
13089 GEN_INT ((mask >> 6) & 3),
13090 operands[3], operands[4]));
13094 (define_expand "sse2_pshufd"
13095 [(match_operand:V4SI 0 "register_operand")
13096 (match_operand:V4SI 1 "vector_operand")
13097 (match_operand:SI 2 "const_int_operand")]
13100 int mask = INTVAL (operands[2]);
13101 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
13102 GEN_INT ((mask >> 0) & 3),
13103 GEN_INT ((mask >> 2) & 3),
13104 GEN_INT ((mask >> 4) & 3),
13105 GEN_INT ((mask >> 6) & 3)));
13109 (define_insn "sse2_pshufd_1<mask_name>"
13110 [(set (match_operand:V4SI 0 "register_operand" "=v")
13112 (match_operand:V4SI 1 "vector_operand" "vBm")
13113 (parallel [(match_operand 2 "const_0_to_3_operand")
13114 (match_operand 3 "const_0_to_3_operand")
13115 (match_operand 4 "const_0_to_3_operand")
13116 (match_operand 5 "const_0_to_3_operand")])))]
13117 "TARGET_SSE2 && <mask_avx512vl_condition>"
13120 mask |= INTVAL (operands[2]) << 0;
13121 mask |= INTVAL (operands[3]) << 2;
13122 mask |= INTVAL (operands[4]) << 4;
13123 mask |= INTVAL (operands[5]) << 6;
13124 operands[2] = GEN_INT (mask);
13126 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13128 [(set_attr "type" "sselog1")
13129 (set_attr "prefix_data16" "1")
13130 (set_attr "prefix" "<mask_prefix2>")
13131 (set_attr "length_immediate" "1")
13132 (set_attr "mode" "TI")])
13134 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
13135 [(set (match_operand:V32HI 0 "register_operand" "=v")
13137 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13138 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13141 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13142 [(set_attr "type" "sselog")
13143 (set_attr "prefix" "evex")
13144 (set_attr "mode" "XI")])
13146 (define_expand "avx512vl_pshuflwv3_mask"
13147 [(match_operand:V16HI 0 "register_operand")
13148 (match_operand:V16HI 1 "nonimmediate_operand")
13149 (match_operand:SI 2 "const_0_to_255_operand")
13150 (match_operand:V16HI 3 "register_operand")
13151 (match_operand:HI 4 "register_operand")]
13152 "TARGET_AVX512VL && TARGET_AVX512BW"
13154 int mask = INTVAL (operands[2]);
13155 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
13156 GEN_INT ((mask >> 0) & 3),
13157 GEN_INT ((mask >> 2) & 3),
13158 GEN_INT ((mask >> 4) & 3),
13159 GEN_INT ((mask >> 6) & 3),
13160 GEN_INT (((mask >> 0) & 3) + 8),
13161 GEN_INT (((mask >> 2) & 3) + 8),
13162 GEN_INT (((mask >> 4) & 3) + 8),
13163 GEN_INT (((mask >> 6) & 3) + 8),
13164 operands[3], operands[4]));
13168 (define_expand "avx2_pshuflwv3"
13169 [(match_operand:V16HI 0 "register_operand")
13170 (match_operand:V16HI 1 "nonimmediate_operand")
13171 (match_operand:SI 2 "const_0_to_255_operand")]
13174 int mask = INTVAL (operands[2]);
13175 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
13176 GEN_INT ((mask >> 0) & 3),
13177 GEN_INT ((mask >> 2) & 3),
13178 GEN_INT ((mask >> 4) & 3),
13179 GEN_INT ((mask >> 6) & 3),
13180 GEN_INT (((mask >> 0) & 3) + 8),
13181 GEN_INT (((mask >> 2) & 3) + 8),
13182 GEN_INT (((mask >> 4) & 3) + 8),
13183 GEN_INT (((mask >> 6) & 3) + 8)));
13187 (define_insn "avx2_pshuflw_1<mask_name>"
13188 [(set (match_operand:V16HI 0 "register_operand" "=v")
13190 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13191 (parallel [(match_operand 2 "const_0_to_3_operand")
13192 (match_operand 3 "const_0_to_3_operand")
13193 (match_operand 4 "const_0_to_3_operand")
13194 (match_operand 5 "const_0_to_3_operand")
13199 (match_operand 6 "const_8_to_11_operand")
13200 (match_operand 7 "const_8_to_11_operand")
13201 (match_operand 8 "const_8_to_11_operand")
13202 (match_operand 9 "const_8_to_11_operand")
13206 (const_int 15)])))]
13208 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13209 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13210 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13211 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13212 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13215 mask |= INTVAL (operands[2]) << 0;
13216 mask |= INTVAL (operands[3]) << 2;
13217 mask |= INTVAL (operands[4]) << 4;
13218 mask |= INTVAL (operands[5]) << 6;
13219 operands[2] = GEN_INT (mask);
13221 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13223 [(set_attr "type" "sselog")
13224 (set_attr "prefix" "maybe_evex")
13225 (set_attr "length_immediate" "1")
13226 (set_attr "mode" "OI")])
13228 (define_expand "avx512vl_pshuflw_mask"
13229 [(match_operand:V8HI 0 "register_operand")
13230 (match_operand:V8HI 1 "nonimmediate_operand")
13231 (match_operand:SI 2 "const_0_to_255_operand")
13232 (match_operand:V8HI 3 "register_operand")
13233 (match_operand:QI 4 "register_operand")]
13234 "TARGET_AVX512VL && TARGET_AVX512BW"
13236 int mask = INTVAL (operands[2]);
13237 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
13238 GEN_INT ((mask >> 0) & 3),
13239 GEN_INT ((mask >> 2) & 3),
13240 GEN_INT ((mask >> 4) & 3),
13241 GEN_INT ((mask >> 6) & 3),
13242 operands[3], operands[4]));
13246 (define_expand "sse2_pshuflw"
13247 [(match_operand:V8HI 0 "register_operand")
13248 (match_operand:V8HI 1 "vector_operand")
13249 (match_operand:SI 2 "const_int_operand")]
13252 int mask = INTVAL (operands[2]);
13253 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
13254 GEN_INT ((mask >> 0) & 3),
13255 GEN_INT ((mask >> 2) & 3),
13256 GEN_INT ((mask >> 4) & 3),
13257 GEN_INT ((mask >> 6) & 3)));
13261 (define_insn "sse2_pshuflw_1<mask_name>"
13262 [(set (match_operand:V8HI 0 "register_operand" "=v")
13264 (match_operand:V8HI 1 "vector_operand" "vBm")
13265 (parallel [(match_operand 2 "const_0_to_3_operand")
13266 (match_operand 3 "const_0_to_3_operand")
13267 (match_operand 4 "const_0_to_3_operand")
13268 (match_operand 5 "const_0_to_3_operand")
13273 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13276 mask |= INTVAL (operands[2]) << 0;
13277 mask |= INTVAL (operands[3]) << 2;
13278 mask |= INTVAL (operands[4]) << 4;
13279 mask |= INTVAL (operands[5]) << 6;
13280 operands[2] = GEN_INT (mask);
13282 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13284 [(set_attr "type" "sselog")
13285 (set_attr "prefix_data16" "0")
13286 (set_attr "prefix_rep" "1")
13287 (set_attr "prefix" "maybe_vex")
13288 (set_attr "length_immediate" "1")
13289 (set_attr "mode" "TI")])
13291 (define_expand "avx2_pshufhwv3"
13292 [(match_operand:V16HI 0 "register_operand")
13293 (match_operand:V16HI 1 "nonimmediate_operand")
13294 (match_operand:SI 2 "const_0_to_255_operand")]
13297 int mask = INTVAL (operands[2]);
13298 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
13299 GEN_INT (((mask >> 0) & 3) + 4),
13300 GEN_INT (((mask >> 2) & 3) + 4),
13301 GEN_INT (((mask >> 4) & 3) + 4),
13302 GEN_INT (((mask >> 6) & 3) + 4),
13303 GEN_INT (((mask >> 0) & 3) + 12),
13304 GEN_INT (((mask >> 2) & 3) + 12),
13305 GEN_INT (((mask >> 4) & 3) + 12),
13306 GEN_INT (((mask >> 6) & 3) + 12)));
13310 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
13311 [(set (match_operand:V32HI 0 "register_operand" "=v")
13313 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13314 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13317 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13318 [(set_attr "type" "sselog")
13319 (set_attr "prefix" "evex")
13320 (set_attr "mode" "XI")])
13322 (define_expand "avx512vl_pshufhwv3_mask"
13323 [(match_operand:V16HI 0 "register_operand")
13324 (match_operand:V16HI 1 "nonimmediate_operand")
13325 (match_operand:SI 2 "const_0_to_255_operand")
13326 (match_operand:V16HI 3 "register_operand")
13327 (match_operand:HI 4 "register_operand")]
13328 "TARGET_AVX512VL && TARGET_AVX512BW"
13330 int mask = INTVAL (operands[2]);
13331 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
13332 GEN_INT (((mask >> 0) & 3) + 4),
13333 GEN_INT (((mask >> 2) & 3) + 4),
13334 GEN_INT (((mask >> 4) & 3) + 4),
13335 GEN_INT (((mask >> 6) & 3) + 4),
13336 GEN_INT (((mask >> 0) & 3) + 12),
13337 GEN_INT (((mask >> 2) & 3) + 12),
13338 GEN_INT (((mask >> 4) & 3) + 12),
13339 GEN_INT (((mask >> 6) & 3) + 12),
13340 operands[3], operands[4]));
13344 (define_insn "avx2_pshufhw_1<mask_name>"
13345 [(set (match_operand:V16HI 0 "register_operand" "=v")
13347 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13348 (parallel [(const_int 0)
13352 (match_operand 2 "const_4_to_7_operand")
13353 (match_operand 3 "const_4_to_7_operand")
13354 (match_operand 4 "const_4_to_7_operand")
13355 (match_operand 5 "const_4_to_7_operand")
13360 (match_operand 6 "const_12_to_15_operand")
13361 (match_operand 7 "const_12_to_15_operand")
13362 (match_operand 8 "const_12_to_15_operand")
13363 (match_operand 9 "const_12_to_15_operand")])))]
13365 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13366 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13367 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13368 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13369 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13372 mask |= (INTVAL (operands[2]) - 4) << 0;
13373 mask |= (INTVAL (operands[3]) - 4) << 2;
13374 mask |= (INTVAL (operands[4]) - 4) << 4;
13375 mask |= (INTVAL (operands[5]) - 4) << 6;
13376 operands[2] = GEN_INT (mask);
13378 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13380 [(set_attr "type" "sselog")
13381 (set_attr "prefix" "maybe_evex")
13382 (set_attr "length_immediate" "1")
13383 (set_attr "mode" "OI")])
13385 (define_expand "avx512vl_pshufhw_mask"
13386 [(match_operand:V8HI 0 "register_operand")
13387 (match_operand:V8HI 1 "nonimmediate_operand")
13388 (match_operand:SI 2 "const_0_to_255_operand")
13389 (match_operand:V8HI 3 "register_operand")
13390 (match_operand:QI 4 "register_operand")]
13391 "TARGET_AVX512VL && TARGET_AVX512BW"
13393 int mask = INTVAL (operands[2]);
13394 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
13395 GEN_INT (((mask >> 0) & 3) + 4),
13396 GEN_INT (((mask >> 2) & 3) + 4),
13397 GEN_INT (((mask >> 4) & 3) + 4),
13398 GEN_INT (((mask >> 6) & 3) + 4),
13399 operands[3], operands[4]));
13403 (define_expand "sse2_pshufhw"
13404 [(match_operand:V8HI 0 "register_operand")
13405 (match_operand:V8HI 1 "vector_operand")
13406 (match_operand:SI 2 "const_int_operand")]
13409 int mask = INTVAL (operands[2]);
13410 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
13411 GEN_INT (((mask >> 0) & 3) + 4),
13412 GEN_INT (((mask >> 2) & 3) + 4),
13413 GEN_INT (((mask >> 4) & 3) + 4),
13414 GEN_INT (((mask >> 6) & 3) + 4)));
13418 (define_insn "sse2_pshufhw_1<mask_name>"
13419 [(set (match_operand:V8HI 0 "register_operand" "=v")
13421 (match_operand:V8HI 1 "vector_operand" "vBm")
13422 (parallel [(const_int 0)
13426 (match_operand 2 "const_4_to_7_operand")
13427 (match_operand 3 "const_4_to_7_operand")
13428 (match_operand 4 "const_4_to_7_operand")
13429 (match_operand 5 "const_4_to_7_operand")])))]
13430 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13433 mask |= (INTVAL (operands[2]) - 4) << 0;
13434 mask |= (INTVAL (operands[3]) - 4) << 2;
13435 mask |= (INTVAL (operands[4]) - 4) << 4;
13436 mask |= (INTVAL (operands[5]) - 4) << 6;
13437 operands[2] = GEN_INT (mask);
13439 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13441 [(set_attr "type" "sselog")
13442 (set_attr "prefix_rep" "1")
13443 (set_attr "prefix_data16" "0")
13444 (set_attr "prefix" "maybe_vex")
13445 (set_attr "length_immediate" "1")
13446 (set_attr "mode" "TI")])
13448 (define_expand "sse2_loadd"
13449 [(set (match_operand:V4SI 0 "register_operand")
13451 (vec_duplicate:V4SI
13452 (match_operand:SI 1 "nonimmediate_operand"))
13456 "operands[2] = CONST0_RTX (V4SImode);")
13458 (define_insn "sse2_loadld"
13459 [(set (match_operand:V4SI 0 "register_operand" "=v,Yi,x,x,v")
13461 (vec_duplicate:V4SI
13462 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
13463 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
13467 %vmovd\t{%2, %0|%0, %2}
13468 %vmovd\t{%2, %0|%0, %2}
13469 movss\t{%2, %0|%0, %2}
13470 movss\t{%2, %0|%0, %2}
13471 vmovss\t{%2, %1, %0|%0, %1, %2}"
13472 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
13473 (set_attr "type" "ssemov")
13474 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
13475 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
13477 ;; QI and HI modes handled by pextr patterns.
13478 (define_mode_iterator PEXTR_MODE12
13479 [(V16QI "TARGET_SSE4_1") V8HI])
13481 (define_insn "*vec_extract<mode>"
13482 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
13483 (vec_select:<ssescalarmode>
13484 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
13486 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
13489 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13490 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
13491 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13492 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13493 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
13494 (set_attr "type" "sselog1")
13495 (set_attr "prefix_data16" "1")
13496 (set (attr "prefix_extra")
13498 (and (eq_attr "alternative" "0,2")
13499 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13501 (const_string "1")))
13502 (set_attr "length_immediate" "1")
13503 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
13504 (set_attr "mode" "TI")])
13506 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
13507 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
13509 (vec_select:<PEXTR_MODE12:ssescalarmode>
13510 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
13512 [(match_operand:SI 2
13513 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
13516 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13517 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
13518 [(set_attr "isa" "*,avx512bw")
13519 (set_attr "type" "sselog1")
13520 (set_attr "prefix_data16" "1")
13521 (set (attr "prefix_extra")
13523 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
13525 (const_string "1")))
13526 (set_attr "length_immediate" "1")
13527 (set_attr "prefix" "maybe_vex")
13528 (set_attr "mode" "TI")])
13530 (define_insn "*vec_extract<mode>_mem"
13531 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
13532 (vec_select:<ssescalarmode>
13533 (match_operand:VI12_128 1 "memory_operand" "o")
13535 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13539 (define_insn "*vec_extract<ssevecmodelower>_0"
13540 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,v ,m")
13542 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,vm,v")
13543 (parallel [(const_int 0)])))]
13544 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13547 (define_insn "*vec_extractv2di_0_sse"
13548 [(set (match_operand:DI 0 "nonimmediate_operand" "=v,m")
13550 (match_operand:V2DI 1 "nonimmediate_operand" "vm,v")
13551 (parallel [(const_int 0)])))]
13552 "TARGET_SSE && !TARGET_64BIT
13553 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13557 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13559 (match_operand:<ssevecmode> 1 "register_operand")
13560 (parallel [(const_int 0)])))]
13561 "TARGET_SSE && reload_completed"
13562 [(set (match_dup 0) (match_dup 1))]
13563 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
13565 (define_insn "*vec_extractv4si_0_zext_sse4"
13566 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
13569 (match_operand:V4SI 1 "register_operand" "Yj,x,v")
13570 (parallel [(const_int 0)]))))]
13573 [(set_attr "isa" "x64,*,avx512f")])
13575 (define_insn "*vec_extractv4si_0_zext"
13576 [(set (match_operand:DI 0 "register_operand" "=r")
13579 (match_operand:V4SI 1 "register_operand" "x")
13580 (parallel [(const_int 0)]))))]
13581 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
13585 [(set (match_operand:DI 0 "register_operand")
13588 (match_operand:V4SI 1 "register_operand")
13589 (parallel [(const_int 0)]))))]
13590 "TARGET_SSE2 && reload_completed"
13591 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13592 "operands[1] = gen_lowpart (SImode, operands[1]);")
13594 (define_insn "*vec_extractv4si"
13595 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
13597 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
13598 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
13601 switch (which_alternative)
13605 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
13609 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13610 return "psrldq\t{%2, %0|%0, %2}";
13614 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13615 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
13618 gcc_unreachable ();
13621 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
13622 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
13623 (set (attr "prefix_extra")
13624 (if_then_else (eq_attr "alternative" "0,1")
13626 (const_string "*")))
13627 (set_attr "length_immediate" "1")
13628 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
13629 (set_attr "mode" "TI")])
13631 (define_insn "*vec_extractv4si_zext"
13632 [(set (match_operand:DI 0 "register_operand" "=r,r")
13635 (match_operand:V4SI 1 "register_operand" "x,v")
13636 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13637 "TARGET_64BIT && TARGET_SSE4_1"
13638 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
13639 [(set_attr "isa" "*,avx512dq")
13640 (set_attr "type" "sselog1")
13641 (set_attr "prefix_extra" "1")
13642 (set_attr "length_immediate" "1")
13643 (set_attr "prefix" "maybe_vex")
13644 (set_attr "mode" "TI")])
13646 (define_insn "*vec_extractv4si_mem"
13647 [(set (match_operand:SI 0 "register_operand" "=x,r")
13649 (match_operand:V4SI 1 "memory_operand" "o,o")
13650 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
13654 (define_insn_and_split "*vec_extractv4si_zext_mem"
13655 [(set (match_operand:DI 0 "register_operand" "=x,r")
13658 (match_operand:V4SI 1 "memory_operand" "o,o")
13659 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13660 "TARGET_64BIT && TARGET_SSE"
13662 "&& reload_completed"
13663 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13665 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
13668 (define_insn "*vec_extractv2di_1"
13669 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
13671 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
13672 (parallel [(const_int 1)])))]
13673 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13675 %vpextrq\t{$1, %1, %0|%0, %1, 1}
13676 vpextrq\t{$1, %1, %0|%0, %1, 1}
13677 %vmovhps\t{%1, %0|%0, %1}
13678 psrldq\t{$8, %0|%0, 8}
13679 vpsrldq\t{$8, %1, %0|%0, %1, 8}
13680 vpsrldq\t{$8, %1, %0|%0, %1, 8}
13681 movhlps\t{%1, %0|%0, %1}
13685 (cond [(eq_attr "alternative" "0")
13686 (const_string "x64_sse4")
13687 (eq_attr "alternative" "1")
13688 (const_string "x64_avx512dq")
13689 (eq_attr "alternative" "3")
13690 (const_string "sse2_noavx")
13691 (eq_attr "alternative" "4")
13692 (const_string "avx")
13693 (eq_attr "alternative" "5")
13694 (const_string "avx512bw")
13695 (eq_attr "alternative" "6")
13696 (const_string "noavx")
13697 (eq_attr "alternative" "8")
13698 (const_string "x64")
13700 (const_string "*")))
13702 (cond [(eq_attr "alternative" "2,6,7")
13703 (const_string "ssemov")
13704 (eq_attr "alternative" "3,4,5")
13705 (const_string "sseishft1")
13706 (eq_attr "alternative" "8")
13707 (const_string "imov")
13709 (const_string "sselog1")))
13710 (set (attr "length_immediate")
13711 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
13713 (const_string "*")))
13714 (set (attr "prefix_rex")
13715 (if_then_else (eq_attr "alternative" "0,1")
13717 (const_string "*")))
13718 (set (attr "prefix_extra")
13719 (if_then_else (eq_attr "alternative" "0,1")
13721 (const_string "*")))
13722 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
13723 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
13726 [(set (match_operand:<ssescalarmode> 0 "register_operand")
13727 (vec_select:<ssescalarmode>
13728 (match_operand:VI_128 1 "memory_operand")
13730 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13731 "TARGET_SSE && reload_completed"
13732 [(set (match_dup 0) (match_dup 1))]
13734 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
13736 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
13739 (define_insn "*vec_extractv2ti"
13740 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
13742 (match_operand:V2TI 1 "register_operand" "x,v")
13744 [(match_operand:SI 2 "const_0_to_1_operand")])))]
13747 vextract%~128\t{%2, %1, %0|%0, %1, %2}
13748 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
13749 [(set_attr "type" "sselog")
13750 (set_attr "prefix_extra" "1")
13751 (set_attr "length_immediate" "1")
13752 (set_attr "prefix" "vex,evex")
13753 (set_attr "mode" "OI")])
13755 (define_insn "*vec_extractv4ti"
13756 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
13758 (match_operand:V4TI 1 "register_operand" "v")
13760 [(match_operand:SI 2 "const_0_to_3_operand")])))]
13762 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
13763 [(set_attr "type" "sselog")
13764 (set_attr "prefix_extra" "1")
13765 (set_attr "length_immediate" "1")
13766 (set_attr "prefix" "evex")
13767 (set_attr "mode" "XI")])
13769 (define_mode_iterator VEXTRACTI128_MODE
13770 [(V4TI "TARGET_AVX512F") V2TI])
13773 [(set (match_operand:TI 0 "nonimmediate_operand")
13775 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
13776 (parallel [(const_int 0)])))]
13778 && reload_completed
13779 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
13780 [(set (match_dup 0) (match_dup 1))]
13781 "operands[1] = gen_lowpart (TImode, operands[1]);")
13783 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
13784 ;; vector modes into vec_extract*.
13786 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13787 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
13788 "can_create_pseudo_p ()
13789 && REG_P (operands[1])
13790 && VECTOR_MODE_P (GET_MODE (operands[1]))
13791 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
13792 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
13793 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
13794 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
13795 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
13796 (parallel [(const_int 0)])))]
13800 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
13803 if (<MODE>mode == SImode)
13805 tmp = gen_reg_rtx (V8SImode);
13806 emit_insn (gen_vec_extract_lo_v16si (tmp,
13807 gen_lowpart (V16SImode,
13812 tmp = gen_reg_rtx (V4DImode);
13813 emit_insn (gen_vec_extract_lo_v8di (tmp,
13814 gen_lowpart (V8DImode,
13820 tmp = gen_reg_rtx (<ssevecmode>mode);
13821 if (<MODE>mode == SImode)
13822 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
13825 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
13830 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
13835 (define_insn "*vec_concatv2si_sse4_1"
13836 [(set (match_operand:V2SI 0 "register_operand"
13837 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
13839 (match_operand:SI 1 "nonimmediate_operand"
13840 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
13841 (match_operand:SI 2 "vector_move_operand"
13842 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
13843 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13845 pinsrd\t{$1, %2, %0|%0, %2, 1}
13846 pinsrd\t{$1, %2, %0|%0, %2, 1}
13847 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13848 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13849 punpckldq\t{%2, %0|%0, %2}
13850 punpckldq\t{%2, %0|%0, %2}
13851 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
13852 %vmovd\t{%1, %0|%0, %1}
13853 punpckldq\t{%2, %0|%0, %2}
13854 movd\t{%1, %0|%0, %1}"
13855 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
13857 (cond [(eq_attr "alternative" "7")
13858 (const_string "ssemov")
13859 (eq_attr "alternative" "8")
13860 (const_string "mmxcvt")
13861 (eq_attr "alternative" "9")
13862 (const_string "mmxmov")
13864 (const_string "sselog")))
13865 (set (attr "prefix_extra")
13866 (if_then_else (eq_attr "alternative" "0,1,2,3")
13868 (const_string "*")))
13869 (set (attr "length_immediate")
13870 (if_then_else (eq_attr "alternative" "0,1,2,3")
13872 (const_string "*")))
13873 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
13874 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
13876 ;; ??? In theory we can match memory for the MMX alternative, but allowing
13877 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
13878 ;; alternatives pretty much forces the MMX alternative to be chosen.
13879 (define_insn "*vec_concatv2si"
13880 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
13882 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
13883 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
13884 "TARGET_SSE && !TARGET_SSE4_1"
13886 punpckldq\t{%2, %0|%0, %2}
13887 movd\t{%1, %0|%0, %1}
13888 movd\t{%1, %0|%0, %1}
13889 unpcklps\t{%2, %0|%0, %2}
13890 movss\t{%1, %0|%0, %1}
13891 punpckldq\t{%2, %0|%0, %2}
13892 movd\t{%1, %0|%0, %1}"
13893 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
13894 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
13895 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
13897 (define_insn "*vec_concatv4si"
13898 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
13900 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
13901 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
13904 punpcklqdq\t{%2, %0|%0, %2}
13905 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13906 movlhps\t{%2, %0|%0, %2}
13907 movhps\t{%2, %0|%0, %q2}
13908 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
13909 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
13910 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
13911 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
13912 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
13914 ;; movd instead of movq is required to handle broken assemblers.
13915 (define_insn "vec_concatv2di"
13916 [(set (match_operand:V2DI 0 "register_operand"
13917 "=Yr,*x,x ,v ,Yi,v ,x ,x,v ,x,x,v")
13919 (match_operand:DI 1 "nonimmediate_operand"
13920 " 0, 0,x ,Yv,r ,vm,?!*Yn,0,Yv,0,0,v")
13921 (match_operand:DI 2 "vector_move_operand"
13922 " rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))]
13925 pinsrq\t{$1, %2, %0|%0, %2, 1}
13926 pinsrq\t{$1, %2, %0|%0, %2, 1}
13927 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13928 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13929 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
13930 %vmovq\t{%1, %0|%0, %1}
13931 movq2dq\t{%1, %0|%0, %1}
13932 punpcklqdq\t{%2, %0|%0, %2}
13933 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13934 movlhps\t{%2, %0|%0, %2}
13935 movhps\t{%2, %0|%0, %2}
13936 vmovhps\t{%2, %1, %0|%0, %1, %2}"
13938 (cond [(eq_attr "alternative" "0,1")
13939 (const_string "x64_sse4_noavx")
13940 (eq_attr "alternative" "2")
13941 (const_string "x64_avx")
13942 (eq_attr "alternative" "3")
13943 (const_string "x64_avx512dq")
13944 (eq_attr "alternative" "4")
13945 (const_string "x64")
13946 (eq_attr "alternative" "5,6")
13947 (const_string "sse2")
13948 (eq_attr "alternative" "7")
13949 (const_string "sse2_noavx")
13950 (eq_attr "alternative" "8,11")
13951 (const_string "avx")
13953 (const_string "noavx")))
13956 (eq_attr "alternative" "0,1,2,3,7,8")
13957 (const_string "sselog")
13958 (const_string "ssemov")))
13959 (set (attr "prefix_rex")
13960 (if_then_else (eq_attr "alternative" "0,1,2,3,4")
13962 (const_string "*")))
13963 (set (attr "prefix_extra")
13964 (if_then_else (eq_attr "alternative" "0,1,2,3")
13966 (const_string "*")))
13967 (set (attr "length_immediate")
13968 (if_then_else (eq_attr "alternative" "0,1,2,3")
13970 (const_string "*")))
13971 (set (attr "prefix")
13972 (cond [(eq_attr "alternative" "2")
13973 (const_string "vex")
13974 (eq_attr "alternative" "3")
13975 (const_string "evex")
13976 (eq_attr "alternative" "4,5")
13977 (const_string "maybe_vex")
13978 (eq_attr "alternative" "8,11")
13979 (const_string "maybe_evex")
13981 (const_string "orig")))
13982 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
13984 (define_expand "vec_unpacks_lo_<mode>"
13985 [(match_operand:<sseunpackmode> 0 "register_operand")
13986 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13988 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
13990 (define_expand "vec_unpacks_hi_<mode>"
13991 [(match_operand:<sseunpackmode> 0 "register_operand")
13992 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13994 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
13996 (define_expand "vec_unpacku_lo_<mode>"
13997 [(match_operand:<sseunpackmode> 0 "register_operand")
13998 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14000 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
14002 (define_expand "vec_unpacks_lo_hi"
14003 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14004 (match_operand:HI 1 "register_operand"))]
14007 (define_expand "vec_unpacks_lo_si"
14008 [(set (match_operand:HI 0 "register_operand")
14009 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
14012 (define_expand "vec_unpacks_lo_di"
14013 [(set (match_operand:SI 0 "register_operand")
14014 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
14017 (define_expand "vec_unpacku_hi_<mode>"
14018 [(match_operand:<sseunpackmode> 0 "register_operand")
14019 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14021 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
14023 (define_expand "vec_unpacks_hi_hi"
14025 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14026 (lshiftrt:HI (match_operand:HI 1 "register_operand")
14028 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14031 (define_expand "vec_unpacks_hi_<mode>"
14033 [(set (subreg:SWI48x
14034 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
14035 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
14037 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14039 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
14041 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14045 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14047 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
14048 [(set (match_operand:VI12_AVX2 0 "register_operand")
14049 (truncate:VI12_AVX2
14050 (lshiftrt:<ssedoublemode>
14051 (plus:<ssedoublemode>
14052 (plus:<ssedoublemode>
14053 (zero_extend:<ssedoublemode>
14054 (match_operand:VI12_AVX2 1 "vector_operand"))
14055 (zero_extend:<ssedoublemode>
14056 (match_operand:VI12_AVX2 2 "vector_operand")))
14057 (match_dup <mask_expand_op3>))
14059 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14062 if (<mask_applied>)
14064 operands[3] = CONST1_RTX(<MODE>mode);
14065 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
14067 if (<mask_applied>)
14069 operands[5] = operands[3];
14074 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
14075 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
14076 (truncate:VI12_AVX2
14077 (lshiftrt:<ssedoublemode>
14078 (plus:<ssedoublemode>
14079 (plus:<ssedoublemode>
14080 (zero_extend:<ssedoublemode>
14081 (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
14082 (zero_extend:<ssedoublemode>
14083 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
14084 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
14086 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14087 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14089 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
14090 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14091 [(set_attr "isa" "noavx,avx")
14092 (set_attr "type" "sseiadd")
14093 (set_attr "prefix_data16" "1,*")
14094 (set_attr "prefix" "orig,<mask_prefix>")
14095 (set_attr "mode" "<sseinsnmode>")])
14097 ;; The correct representation for this is absolutely enormous, and
14098 ;; surely not generally useful.
14099 (define_insn "<sse2_avx2>_psadbw"
14100 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
14101 (unspec:VI8_AVX2_AVX512BW
14102 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
14103 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
14107 psadbw\t{%2, %0|%0, %2}
14108 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
14109 [(set_attr "isa" "noavx,avx")
14110 (set_attr "type" "sseiadd")
14111 (set_attr "atom_unit" "simul")
14112 (set_attr "prefix_data16" "1,*")
14113 (set_attr "prefix" "orig,maybe_evex")
14114 (set_attr "mode" "<sseinsnmode>")])
14116 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
14117 [(set (match_operand:SI 0 "register_operand" "=r")
14119 [(match_operand:VF_128_256 1 "register_operand" "x")]
14122 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
14123 [(set_attr "type" "ssemov")
14124 (set_attr "prefix" "maybe_vex")
14125 (set_attr "mode" "<MODE>")])
14127 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
14128 [(set (match_operand:DI 0 "register_operand" "=r")
14131 [(match_operand:VF_128_256 1 "register_operand" "x")]
14133 "TARGET_64BIT && TARGET_SSE"
14134 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
14135 [(set_attr "type" "ssemov")
14136 (set_attr "prefix" "maybe_vex")
14137 (set_attr "mode" "<MODE>")])
14139 (define_insn "<sse2_avx2>_pmovmskb"
14140 [(set (match_operand:SI 0 "register_operand" "=r")
14142 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14145 "%vpmovmskb\t{%1, %0|%0, %1}"
14146 [(set_attr "type" "ssemov")
14147 (set (attr "prefix_data16")
14149 (match_test "TARGET_AVX")
14151 (const_string "1")))
14152 (set_attr "prefix" "maybe_vex")
14153 (set_attr "mode" "SI")])
14155 (define_insn "*<sse2_avx2>_pmovmskb_zext"
14156 [(set (match_operand:DI 0 "register_operand" "=r")
14159 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14161 "TARGET_64BIT && TARGET_SSE2"
14162 "%vpmovmskb\t{%1, %k0|%k0, %1}"
14163 [(set_attr "type" "ssemov")
14164 (set (attr "prefix_data16")
14166 (match_test "TARGET_AVX")
14168 (const_string "1")))
14169 (set_attr "prefix" "maybe_vex")
14170 (set_attr "mode" "SI")])
14172 (define_expand "sse2_maskmovdqu"
14173 [(set (match_operand:V16QI 0 "memory_operand")
14174 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
14175 (match_operand:V16QI 2 "register_operand")
14180 (define_insn "*sse2_maskmovdqu"
14181 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
14182 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
14183 (match_operand:V16QI 2 "register_operand" "x")
14184 (mem:V16QI (match_dup 0))]
14188 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
14189 that requires %v to be at the beginning of the opcode name. */
14190 if (Pmode != word_mode)
14191 fputs ("\taddr32", asm_out_file);
14192 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
14194 [(set_attr "type" "ssemov")
14195 (set_attr "prefix_data16" "1")
14196 (set (attr "length_address")
14197 (symbol_ref ("Pmode != word_mode")))
14198 ;; The implicit %rdi operand confuses default length_vex computation.
14199 (set (attr "length_vex")
14200 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
14201 (set_attr "prefix" "maybe_vex")
14202 (set_attr "znver1_decode" "vector")
14203 (set_attr "mode" "TI")])
14205 (define_insn "sse_ldmxcsr"
14206 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
14210 [(set_attr "type" "sse")
14211 (set_attr "atom_sse_attr" "mxcsr")
14212 (set_attr "prefix" "maybe_vex")
14213 (set_attr "memory" "load")])
14215 (define_insn "sse_stmxcsr"
14216 [(set (match_operand:SI 0 "memory_operand" "=m")
14217 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
14220 [(set_attr "type" "sse")
14221 (set_attr "atom_sse_attr" "mxcsr")
14222 (set_attr "prefix" "maybe_vex")
14223 (set_attr "memory" "store")])
14225 (define_insn "sse2_clflush"
14226 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
14230 [(set_attr "type" "sse")
14231 (set_attr "atom_sse_attr" "fence")
14232 (set_attr "memory" "unknown")])
14234 ;; As per AMD and Intel ISA manuals, the first operand is extensions
14235 ;; and it goes to %ecx. The second operand received is hints and it goes
14237 (define_insn "sse3_mwait"
14238 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
14239 (match_operand:SI 1 "register_operand" "a")]
14242 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
14243 ;; Since 32bit register operands are implicitly zero extended to 64bit,
14244 ;; we only need to set up 32bit registers.
14246 [(set_attr "length" "3")])
14248 (define_insn "sse3_monitor_<mode>"
14249 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
14250 (match_operand:SI 1 "register_operand" "c")
14251 (match_operand:SI 2 "register_operand" "d")]
14254 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
14255 ;; RCX and RDX are used. Since 32bit register operands are implicitly
14256 ;; zero extended to 64bit, we only need to set up 32bit registers.
14258 [(set (attr "length")
14259 (symbol_ref ("(Pmode != word_mode) + 3")))])
14261 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14263 ;; SSSE3 instructions
14265 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14267 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
14269 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
14270 [(set (match_operand:V16HI 0 "register_operand" "=x")
14275 (ssse3_plusminus:HI
14277 (match_operand:V16HI 1 "register_operand" "x")
14278 (parallel [(const_int 0)]))
14279 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14280 (ssse3_plusminus:HI
14281 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14282 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14284 (ssse3_plusminus:HI
14285 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14286 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14287 (ssse3_plusminus:HI
14288 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14289 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14292 (ssse3_plusminus:HI
14293 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
14294 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
14295 (ssse3_plusminus:HI
14296 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
14297 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
14299 (ssse3_plusminus:HI
14300 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
14301 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
14302 (ssse3_plusminus:HI
14303 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
14304 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
14308 (ssse3_plusminus:HI
14310 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14311 (parallel [(const_int 0)]))
14312 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14313 (ssse3_plusminus:HI
14314 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14315 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14317 (ssse3_plusminus:HI
14318 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14319 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14320 (ssse3_plusminus:HI
14321 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14322 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
14325 (ssse3_plusminus:HI
14326 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
14327 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
14328 (ssse3_plusminus:HI
14329 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
14330 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
14332 (ssse3_plusminus:HI
14333 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
14334 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
14335 (ssse3_plusminus:HI
14336 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
14337 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
14339 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14340 [(set_attr "type" "sseiadd")
14341 (set_attr "prefix_extra" "1")
14342 (set_attr "prefix" "vex")
14343 (set_attr "mode" "OI")])
14345 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
14346 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
14350 (ssse3_plusminus:HI
14352 (match_operand:V8HI 1 "register_operand" "0,x")
14353 (parallel [(const_int 0)]))
14354 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14355 (ssse3_plusminus:HI
14356 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14357 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14359 (ssse3_plusminus:HI
14360 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14361 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14362 (ssse3_plusminus:HI
14363 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14364 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14367 (ssse3_plusminus:HI
14369 (match_operand:V8HI 2 "vector_operand" "xBm,xm")
14370 (parallel [(const_int 0)]))
14371 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14372 (ssse3_plusminus:HI
14373 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14374 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14376 (ssse3_plusminus:HI
14377 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14378 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14379 (ssse3_plusminus:HI
14380 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14381 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
14384 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
14385 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14386 [(set_attr "isa" "noavx,avx")
14387 (set_attr "type" "sseiadd")
14388 (set_attr "atom_unit" "complex")
14389 (set_attr "prefix_data16" "1,*")
14390 (set_attr "prefix_extra" "1")
14391 (set_attr "prefix" "orig,vex")
14392 (set_attr "mode" "TI")])
14394 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
14395 [(set (match_operand:V4HI 0 "register_operand" "=y")
14398 (ssse3_plusminus:HI
14400 (match_operand:V4HI 1 "register_operand" "0")
14401 (parallel [(const_int 0)]))
14402 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14403 (ssse3_plusminus:HI
14404 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14405 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14407 (ssse3_plusminus:HI
14409 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
14410 (parallel [(const_int 0)]))
14411 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14412 (ssse3_plusminus:HI
14413 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14414 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
14416 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
14417 [(set_attr "type" "sseiadd")
14418 (set_attr "atom_unit" "complex")
14419 (set_attr "prefix_extra" "1")
14420 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14421 (set_attr "mode" "DI")])
14423 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
14424 [(set (match_operand:V8SI 0 "register_operand" "=x")
14430 (match_operand:V8SI 1 "register_operand" "x")
14431 (parallel [(const_int 0)]))
14432 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14434 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14435 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14438 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
14439 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
14441 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
14442 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
14447 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
14448 (parallel [(const_int 0)]))
14449 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14451 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14452 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
14455 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
14456 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
14458 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
14459 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
14461 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14462 [(set_attr "type" "sseiadd")
14463 (set_attr "prefix_extra" "1")
14464 (set_attr "prefix" "vex")
14465 (set_attr "mode" "OI")])
14467 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
14468 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
14473 (match_operand:V4SI 1 "register_operand" "0,x")
14474 (parallel [(const_int 0)]))
14475 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14477 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14478 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14482 (match_operand:V4SI 2 "vector_operand" "xBm,xm")
14483 (parallel [(const_int 0)]))
14484 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14486 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14487 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
14490 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
14491 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14492 [(set_attr "isa" "noavx,avx")
14493 (set_attr "type" "sseiadd")
14494 (set_attr "atom_unit" "complex")
14495 (set_attr "prefix_data16" "1,*")
14496 (set_attr "prefix_extra" "1")
14497 (set_attr "prefix" "orig,vex")
14498 (set_attr "mode" "TI")])
14500 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
14501 [(set (match_operand:V2SI 0 "register_operand" "=y")
14505 (match_operand:V2SI 1 "register_operand" "0")
14506 (parallel [(const_int 0)]))
14507 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14510 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
14511 (parallel [(const_int 0)]))
14512 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
14514 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
14515 [(set_attr "type" "sseiadd")
14516 (set_attr "atom_unit" "complex")
14517 (set_attr "prefix_extra" "1")
14518 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14519 (set_attr "mode" "DI")])
14521 (define_insn "avx2_pmaddubsw256"
14522 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
14527 (match_operand:V32QI 1 "register_operand" "x,v")
14528 (parallel [(const_int 0) (const_int 2)
14529 (const_int 4) (const_int 6)
14530 (const_int 8) (const_int 10)
14531 (const_int 12) (const_int 14)
14532 (const_int 16) (const_int 18)
14533 (const_int 20) (const_int 22)
14534 (const_int 24) (const_int 26)
14535 (const_int 28) (const_int 30)])))
14538 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
14539 (parallel [(const_int 0) (const_int 2)
14540 (const_int 4) (const_int 6)
14541 (const_int 8) (const_int 10)
14542 (const_int 12) (const_int 14)
14543 (const_int 16) (const_int 18)
14544 (const_int 20) (const_int 22)
14545 (const_int 24) (const_int 26)
14546 (const_int 28) (const_int 30)]))))
14549 (vec_select:V16QI (match_dup 1)
14550 (parallel [(const_int 1) (const_int 3)
14551 (const_int 5) (const_int 7)
14552 (const_int 9) (const_int 11)
14553 (const_int 13) (const_int 15)
14554 (const_int 17) (const_int 19)
14555 (const_int 21) (const_int 23)
14556 (const_int 25) (const_int 27)
14557 (const_int 29) (const_int 31)])))
14559 (vec_select:V16QI (match_dup 2)
14560 (parallel [(const_int 1) (const_int 3)
14561 (const_int 5) (const_int 7)
14562 (const_int 9) (const_int 11)
14563 (const_int 13) (const_int 15)
14564 (const_int 17) (const_int 19)
14565 (const_int 21) (const_int 23)
14566 (const_int 25) (const_int 27)
14567 (const_int 29) (const_int 31)]))))))]
14569 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14570 [(set_attr "isa" "*,avx512bw")
14571 (set_attr "type" "sseiadd")
14572 (set_attr "prefix_extra" "1")
14573 (set_attr "prefix" "vex,evex")
14574 (set_attr "mode" "OI")])
14576 ;; The correct representation for this is absolutely enormous, and
14577 ;; surely not generally useful.
14578 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
14579 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
14580 (unspec:VI2_AVX512VL
14581 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
14582 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
14583 UNSPEC_PMADDUBSW512))]
14585 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
14586 [(set_attr "type" "sseiadd")
14587 (set_attr "prefix" "evex")
14588 (set_attr "mode" "XI")])
14590 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
14591 [(set (match_operand:V32HI 0 "register_operand" "=v")
14598 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
14600 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
14602 (const_vector:V32HI [(const_int 1) (const_int 1)
14603 (const_int 1) (const_int 1)
14604 (const_int 1) (const_int 1)
14605 (const_int 1) (const_int 1)
14606 (const_int 1) (const_int 1)
14607 (const_int 1) (const_int 1)
14608 (const_int 1) (const_int 1)
14609 (const_int 1) (const_int 1)
14610 (const_int 1) (const_int 1)
14611 (const_int 1) (const_int 1)
14612 (const_int 1) (const_int 1)
14613 (const_int 1) (const_int 1)
14614 (const_int 1) (const_int 1)
14615 (const_int 1) (const_int 1)
14616 (const_int 1) (const_int 1)
14617 (const_int 1) (const_int 1)]))
14620 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14621 [(set_attr "type" "sseimul")
14622 (set_attr "prefix" "evex")
14623 (set_attr "mode" "XI")])
14625 (define_insn "ssse3_pmaddubsw128"
14626 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
14631 (match_operand:V16QI 1 "register_operand" "0,x,v")
14632 (parallel [(const_int 0) (const_int 2)
14633 (const_int 4) (const_int 6)
14634 (const_int 8) (const_int 10)
14635 (const_int 12) (const_int 14)])))
14638 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
14639 (parallel [(const_int 0) (const_int 2)
14640 (const_int 4) (const_int 6)
14641 (const_int 8) (const_int 10)
14642 (const_int 12) (const_int 14)]))))
14645 (vec_select:V8QI (match_dup 1)
14646 (parallel [(const_int 1) (const_int 3)
14647 (const_int 5) (const_int 7)
14648 (const_int 9) (const_int 11)
14649 (const_int 13) (const_int 15)])))
14651 (vec_select:V8QI (match_dup 2)
14652 (parallel [(const_int 1) (const_int 3)
14653 (const_int 5) (const_int 7)
14654 (const_int 9) (const_int 11)
14655 (const_int 13) (const_int 15)]))))))]
14658 pmaddubsw\t{%2, %0|%0, %2}
14659 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
14660 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14661 [(set_attr "isa" "noavx,avx,avx512bw")
14662 (set_attr "type" "sseiadd")
14663 (set_attr "atom_unit" "simul")
14664 (set_attr "prefix_data16" "1,*,*")
14665 (set_attr "prefix_extra" "1")
14666 (set_attr "prefix" "orig,vex,evex")
14667 (set_attr "mode" "TI")])
14669 (define_insn "ssse3_pmaddubsw"
14670 [(set (match_operand:V4HI 0 "register_operand" "=y")
14675 (match_operand:V8QI 1 "register_operand" "0")
14676 (parallel [(const_int 0) (const_int 2)
14677 (const_int 4) (const_int 6)])))
14680 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
14681 (parallel [(const_int 0) (const_int 2)
14682 (const_int 4) (const_int 6)]))))
14685 (vec_select:V4QI (match_dup 1)
14686 (parallel [(const_int 1) (const_int 3)
14687 (const_int 5) (const_int 7)])))
14689 (vec_select:V4QI (match_dup 2)
14690 (parallel [(const_int 1) (const_int 3)
14691 (const_int 5) (const_int 7)]))))))]
14693 "pmaddubsw\t{%2, %0|%0, %2}"
14694 [(set_attr "type" "sseiadd")
14695 (set_attr "atom_unit" "simul")
14696 (set_attr "prefix_extra" "1")
14697 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14698 (set_attr "mode" "DI")])
14700 (define_mode_iterator PMULHRSW
14701 [V4HI V8HI (V16HI "TARGET_AVX2")])
14703 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
14704 [(set (match_operand:PMULHRSW 0 "register_operand")
14705 (vec_merge:PMULHRSW
14707 (lshiftrt:<ssedoublemode>
14708 (plus:<ssedoublemode>
14709 (lshiftrt:<ssedoublemode>
14710 (mult:<ssedoublemode>
14711 (sign_extend:<ssedoublemode>
14712 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14713 (sign_extend:<ssedoublemode>
14714 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14718 (match_operand:PMULHRSW 3 "register_operand")
14719 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14720 "TARGET_AVX512BW && TARGET_AVX512VL"
14722 operands[5] = CONST1_RTX(<MODE>mode);
14723 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14726 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
14727 [(set (match_operand:PMULHRSW 0 "register_operand")
14729 (lshiftrt:<ssedoublemode>
14730 (plus:<ssedoublemode>
14731 (lshiftrt:<ssedoublemode>
14732 (mult:<ssedoublemode>
14733 (sign_extend:<ssedoublemode>
14734 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14735 (sign_extend:<ssedoublemode>
14736 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14742 operands[3] = CONST1_RTX(<MODE>mode);
14743 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14746 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
14747 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
14749 (lshiftrt:<ssedoublemode>
14750 (plus:<ssedoublemode>
14751 (lshiftrt:<ssedoublemode>
14752 (mult:<ssedoublemode>
14753 (sign_extend:<ssedoublemode>
14754 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
14755 (sign_extend:<ssedoublemode>
14756 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
14758 (match_operand:VI2_AVX2 3 "const1_operand"))
14760 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14761 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14763 pmulhrsw\t{%2, %0|%0, %2}
14764 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
14765 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
14766 [(set_attr "isa" "noavx,avx,avx512bw")
14767 (set_attr "type" "sseimul")
14768 (set_attr "prefix_data16" "1,*,*")
14769 (set_attr "prefix_extra" "1")
14770 (set_attr "prefix" "orig,maybe_evex,evex")
14771 (set_attr "mode" "<sseinsnmode>")])
14773 (define_insn "*ssse3_pmulhrswv4hi3"
14774 [(set (match_operand:V4HI 0 "register_operand" "=y")
14781 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
14783 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
14785 (match_operand:V4HI 3 "const1_operand"))
14787 "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14788 "pmulhrsw\t{%2, %0|%0, %2}"
14789 [(set_attr "type" "sseimul")
14790 (set_attr "prefix_extra" "1")
14791 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14792 (set_attr "mode" "DI")])
14794 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
14795 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
14797 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
14798 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
14800 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14802 pshufb\t{%2, %0|%0, %2}
14803 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
14804 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14805 [(set_attr "isa" "noavx,avx,avx512bw")
14806 (set_attr "type" "sselog1")
14807 (set_attr "prefix_data16" "1,*,*")
14808 (set_attr "prefix_extra" "1")
14809 (set_attr "prefix" "orig,maybe_evex,evex")
14810 (set_attr "btver2_decode" "vector")
14811 (set_attr "mode" "<sseinsnmode>")])
14813 (define_insn "ssse3_pshufbv8qi3"
14814 [(set (match_operand:V8QI 0 "register_operand" "=y")
14815 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
14816 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
14819 "pshufb\t{%2, %0|%0, %2}";
14820 [(set_attr "type" "sselog1")
14821 (set_attr "prefix_extra" "1")
14822 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14823 (set_attr "mode" "DI")])
14825 (define_insn "<ssse3_avx2>_psign<mode>3"
14826 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
14828 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
14829 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
14833 psign<ssemodesuffix>\t{%2, %0|%0, %2}
14834 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14835 [(set_attr "isa" "noavx,avx")
14836 (set_attr "type" "sselog1")
14837 (set_attr "prefix_data16" "1,*")
14838 (set_attr "prefix_extra" "1")
14839 (set_attr "prefix" "orig,vex")
14840 (set_attr "mode" "<sseinsnmode>")])
14842 (define_insn "ssse3_psign<mode>3"
14843 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14845 [(match_operand:MMXMODEI 1 "register_operand" "0")
14846 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
14849 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
14850 [(set_attr "type" "sselog1")
14851 (set_attr "prefix_extra" "1")
14852 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14853 (set_attr "mode" "DI")])
14855 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
14856 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
14857 (vec_merge:VI1_AVX512
14859 [(match_operand:VI1_AVX512 1 "register_operand" "v")
14860 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
14861 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14863 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
14864 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
14865 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
14867 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14868 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
14870 [(set_attr "type" "sseishft")
14871 (set_attr "atom_unit" "sishuf")
14872 (set_attr "prefix_extra" "1")
14873 (set_attr "length_immediate" "1")
14874 (set_attr "prefix" "evex")
14875 (set_attr "mode" "<sseinsnmode>")])
14877 (define_insn "<ssse3_avx2>_palignr<mode>"
14878 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
14879 (unspec:SSESCALARMODE
14880 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
14881 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
14882 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
14886 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14888 switch (which_alternative)
14891 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14894 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14896 gcc_unreachable ();
14899 [(set_attr "isa" "noavx,avx,avx512bw")
14900 (set_attr "type" "sseishft")
14901 (set_attr "atom_unit" "sishuf")
14902 (set_attr "prefix_data16" "1,*,*")
14903 (set_attr "prefix_extra" "1")
14904 (set_attr "length_immediate" "1")
14905 (set_attr "prefix" "orig,vex,evex")
14906 (set_attr "mode" "<sseinsnmode>")])
14908 (define_insn "ssse3_palignrdi"
14909 [(set (match_operand:DI 0 "register_operand" "=y")
14910 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
14911 (match_operand:DI 2 "nonimmediate_operand" "ym")
14912 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14916 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14917 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14919 [(set_attr "type" "sseishft")
14920 (set_attr "atom_unit" "sishuf")
14921 (set_attr "prefix_extra" "1")
14922 (set_attr "length_immediate" "1")
14923 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14924 (set_attr "mode" "DI")])
14926 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
14927 ;; modes for abs instruction on pre AVX-512 targets.
14928 (define_mode_iterator VI1248_AVX512VL_AVX512BW
14929 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
14930 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
14931 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
14932 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
14934 (define_insn "*abs<mode>2"
14935 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
14936 (abs:VI1248_AVX512VL_AVX512BW
14937 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
14939 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
14940 [(set_attr "type" "sselog1")
14941 (set_attr "prefix_data16" "1")
14942 (set_attr "prefix_extra" "1")
14943 (set_attr "prefix" "maybe_vex")
14944 (set_attr "mode" "<sseinsnmode>")])
14946 (define_insn "abs<mode>2_mask"
14947 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
14948 (vec_merge:VI48_AVX512VL
14950 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
14951 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
14952 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14954 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14955 [(set_attr "type" "sselog1")
14956 (set_attr "prefix" "evex")
14957 (set_attr "mode" "<sseinsnmode>")])
14959 (define_insn "abs<mode>2_mask"
14960 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
14961 (vec_merge:VI12_AVX512VL
14963 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
14964 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
14965 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14967 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14968 [(set_attr "type" "sselog1")
14969 (set_attr "prefix" "evex")
14970 (set_attr "mode" "<sseinsnmode>")])
14972 (define_expand "abs<mode>2"
14973 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
14974 (abs:VI1248_AVX512VL_AVX512BW
14975 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))]
14980 ix86_expand_sse2_abs (operands[0], operands[1]);
14985 (define_insn "abs<mode>2"
14986 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14988 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
14990 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
14991 [(set_attr "type" "sselog1")
14992 (set_attr "prefix_rep" "0")
14993 (set_attr "prefix_extra" "1")
14994 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14995 (set_attr "mode" "DI")])
14997 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14999 ;; AMD SSE4A instructions
15001 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15003 (define_insn "sse4a_movnt<mode>"
15004 [(set (match_operand:MODEF 0 "memory_operand" "=m")
15006 [(match_operand:MODEF 1 "register_operand" "x")]
15009 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
15010 [(set_attr "type" "ssemov")
15011 (set_attr "mode" "<MODE>")])
15013 (define_insn "sse4a_vmmovnt<mode>"
15014 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
15015 (unspec:<ssescalarmode>
15016 [(vec_select:<ssescalarmode>
15017 (match_operand:VF_128 1 "register_operand" "x")
15018 (parallel [(const_int 0)]))]
15021 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
15022 [(set_attr "type" "ssemov")
15023 (set_attr "mode" "<ssescalarmode>")])
15025 (define_insn "sse4a_extrqi"
15026 [(set (match_operand:V2DI 0 "register_operand" "=x")
15027 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15028 (match_operand 2 "const_0_to_255_operand")
15029 (match_operand 3 "const_0_to_255_operand")]
15032 "extrq\t{%3, %2, %0|%0, %2, %3}"
15033 [(set_attr "type" "sse")
15034 (set_attr "prefix_data16" "1")
15035 (set_attr "length_immediate" "2")
15036 (set_attr "mode" "TI")])
15038 (define_insn "sse4a_extrq"
15039 [(set (match_operand:V2DI 0 "register_operand" "=x")
15040 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15041 (match_operand:V16QI 2 "register_operand" "x")]
15044 "extrq\t{%2, %0|%0, %2}"
15045 [(set_attr "type" "sse")
15046 (set_attr "prefix_data16" "1")
15047 (set_attr "mode" "TI")])
15049 (define_insn "sse4a_insertqi"
15050 [(set (match_operand:V2DI 0 "register_operand" "=x")
15051 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15052 (match_operand:V2DI 2 "register_operand" "x")
15053 (match_operand 3 "const_0_to_255_operand")
15054 (match_operand 4 "const_0_to_255_operand")]
15057 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
15058 [(set_attr "type" "sseins")
15059 (set_attr "prefix_data16" "0")
15060 (set_attr "prefix_rep" "1")
15061 (set_attr "length_immediate" "2")
15062 (set_attr "mode" "TI")])
15064 (define_insn "sse4a_insertq"
15065 [(set (match_operand:V2DI 0 "register_operand" "=x")
15066 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15067 (match_operand:V2DI 2 "register_operand" "x")]
15070 "insertq\t{%2, %0|%0, %2}"
15071 [(set_attr "type" "sseins")
15072 (set_attr "prefix_data16" "0")
15073 (set_attr "prefix_rep" "1")
15074 (set_attr "mode" "TI")])
15076 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15078 ;; Intel SSE4.1 instructions
15080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15082 ;; Mapping of immediate bits for blend instructions
15083 (define_mode_attr blendbits
15084 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
15086 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
15087 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15088 (vec_merge:VF_128_256
15089 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15090 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
15091 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
15094 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15095 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15096 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15097 [(set_attr "isa" "noavx,noavx,avx")
15098 (set_attr "type" "ssemov")
15099 (set_attr "length_immediate" "1")
15100 (set_attr "prefix_data16" "1,1,*")
15101 (set_attr "prefix_extra" "1")
15102 (set_attr "prefix" "orig,orig,vex")
15103 (set_attr "mode" "<MODE>")])
15105 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
15106 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15108 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
15109 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15110 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
15114 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15115 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15116 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15117 [(set_attr "isa" "noavx,noavx,avx")
15118 (set_attr "type" "ssemov")
15119 (set_attr "length_immediate" "1")
15120 (set_attr "prefix_data16" "1,1,*")
15121 (set_attr "prefix_extra" "1")
15122 (set_attr "prefix" "orig,orig,vex")
15123 (set_attr "btver2_decode" "vector,vector,vector")
15124 (set_attr "mode" "<MODE>")])
15126 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
15127 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15129 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
15130 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15131 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15135 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15136 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15137 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15138 [(set_attr "isa" "noavx,noavx,avx")
15139 (set_attr "type" "ssemul")
15140 (set_attr "length_immediate" "1")
15141 (set_attr "prefix_data16" "1,1,*")
15142 (set_attr "prefix_extra" "1")
15143 (set_attr "prefix" "orig,orig,vex")
15144 (set_attr "btver2_decode" "vector,vector,vector")
15145 (set_attr "znver1_decode" "vector,vector,vector")
15146 (set_attr "mode" "<MODE>")])
15148 ;; Mode attribute used by `vmovntdqa' pattern
15149 (define_mode_attr vi8_sse4_1_avx2_avx512
15150 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
15152 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
15153 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
15154 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
15157 "%vmovntdqa\t{%1, %0|%0, %1}"
15158 [(set_attr "isa" "noavx,noavx,avx")
15159 (set_attr "type" "ssemov")
15160 (set_attr "prefix_extra" "1,1,*")
15161 (set_attr "prefix" "orig,orig,maybe_evex")
15162 (set_attr "mode" "<sseinsnmode>")])
15164 (define_insn "<sse4_1_avx2>_mpsadbw"
15165 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15167 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
15168 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15169 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15173 mpsadbw\t{%3, %2, %0|%0, %2, %3}
15174 mpsadbw\t{%3, %2, %0|%0, %2, %3}
15175 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15176 [(set_attr "isa" "noavx,noavx,avx")
15177 (set_attr "type" "sselog1")
15178 (set_attr "length_immediate" "1")
15179 (set_attr "prefix_extra" "1")
15180 (set_attr "prefix" "orig,orig,vex")
15181 (set_attr "btver2_decode" "vector,vector,vector")
15182 (set_attr "znver1_decode" "vector,vector,vector")
15183 (set_attr "mode" "<sseinsnmode>")])
15185 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
15186 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
15187 (vec_concat:VI2_AVX2
15188 (us_truncate:<ssehalfvecmode>
15189 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
15190 (us_truncate:<ssehalfvecmode>
15191 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
15192 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15194 packusdw\t{%2, %0|%0, %2}
15195 packusdw\t{%2, %0|%0, %2}
15196 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
15197 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15198 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
15199 (set_attr "type" "sselog")
15200 (set_attr "prefix_extra" "1")
15201 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
15202 (set_attr "mode" "<sseinsnmode>")])
15204 (define_insn "<sse4_1_avx2>_pblendvb"
15205 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15207 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
15208 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15209 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
15213 pblendvb\t{%3, %2, %0|%0, %2, %3}
15214 pblendvb\t{%3, %2, %0|%0, %2, %3}
15215 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15216 [(set_attr "isa" "noavx,noavx,avx")
15217 (set_attr "type" "ssemov")
15218 (set_attr "prefix_extra" "1")
15219 (set_attr "length_immediate" "*,*,1")
15220 (set_attr "prefix" "orig,orig,vex")
15221 (set_attr "btver2_decode" "vector,vector,vector")
15222 (set_attr "mode" "<sseinsnmode>")])
15224 (define_insn "sse4_1_pblendw"
15225 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15227 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
15228 (match_operand:V8HI 1 "register_operand" "0,0,x")
15229 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
15232 pblendw\t{%3, %2, %0|%0, %2, %3}
15233 pblendw\t{%3, %2, %0|%0, %2, %3}
15234 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15235 [(set_attr "isa" "noavx,noavx,avx")
15236 (set_attr "type" "ssemov")
15237 (set_attr "prefix_extra" "1")
15238 (set_attr "length_immediate" "1")
15239 (set_attr "prefix" "orig,orig,vex")
15240 (set_attr "mode" "TI")])
15242 ;; The builtin uses an 8-bit immediate. Expand that.
15243 (define_expand "avx2_pblendw"
15244 [(set (match_operand:V16HI 0 "register_operand")
15246 (match_operand:V16HI 2 "nonimmediate_operand")
15247 (match_operand:V16HI 1 "register_operand")
15248 (match_operand:SI 3 "const_0_to_255_operand")))]
15251 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
15252 operands[3] = GEN_INT (val << 8 | val);
15255 (define_insn "*avx2_pblendw"
15256 [(set (match_operand:V16HI 0 "register_operand" "=x")
15258 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
15259 (match_operand:V16HI 1 "register_operand" "x")
15260 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
15263 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
15264 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15266 [(set_attr "type" "ssemov")
15267 (set_attr "prefix_extra" "1")
15268 (set_attr "length_immediate" "1")
15269 (set_attr "prefix" "vex")
15270 (set_attr "mode" "OI")])
15272 (define_insn "avx2_pblendd<mode>"
15273 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
15274 (vec_merge:VI4_AVX2
15275 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
15276 (match_operand:VI4_AVX2 1 "register_operand" "x")
15277 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
15279 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15280 [(set_attr "type" "ssemov")
15281 (set_attr "prefix_extra" "1")
15282 (set_attr "length_immediate" "1")
15283 (set_attr "prefix" "vex")
15284 (set_attr "mode" "<sseinsnmode>")])
15286 (define_insn "sse4_1_phminposuw"
15287 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15288 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
15289 UNSPEC_PHMINPOSUW))]
15291 "%vphminposuw\t{%1, %0|%0, %1}"
15292 [(set_attr "isa" "noavx,noavx,avx")
15293 (set_attr "type" "sselog1")
15294 (set_attr "prefix_extra" "1")
15295 (set_attr "prefix" "orig,orig,vex")
15296 (set_attr "mode" "TI")])
15298 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
15299 [(set (match_operand:V16HI 0 "register_operand" "=v")
15301 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15302 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15303 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15304 [(set_attr "type" "ssemov")
15305 (set_attr "prefix_extra" "1")
15306 (set_attr "prefix" "maybe_evex")
15307 (set_attr "mode" "OI")])
15309 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
15310 [(set (match_operand:V32HI 0 "register_operand" "=v")
15312 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
15314 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15315 [(set_attr "type" "ssemov")
15316 (set_attr "prefix_extra" "1")
15317 (set_attr "prefix" "evex")
15318 (set_attr "mode" "XI")])
15320 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
15321 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
15324 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15325 (parallel [(const_int 0) (const_int 1)
15326 (const_int 2) (const_int 3)
15327 (const_int 4) (const_int 5)
15328 (const_int 6) (const_int 7)]))))]
15329 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15330 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15331 [(set_attr "isa" "noavx,noavx,avx")
15332 (set_attr "type" "ssemov")
15333 (set_attr "prefix_extra" "1")
15334 (set_attr "prefix" "orig,orig,maybe_evex")
15335 (set_attr "mode" "TI")])
15337 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
15338 [(set (match_operand:V16SI 0 "register_operand" "=v")
15340 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15342 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15343 [(set_attr "type" "ssemov")
15344 (set_attr "prefix" "evex")
15345 (set_attr "mode" "XI")])
15347 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
15348 [(set (match_operand:V8SI 0 "register_operand" "=v")
15351 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15352 (parallel [(const_int 0) (const_int 1)
15353 (const_int 2) (const_int 3)
15354 (const_int 4) (const_int 5)
15355 (const_int 6) (const_int 7)]))))]
15356 "TARGET_AVX2 && <mask_avx512vl_condition>"
15357 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15358 [(set_attr "type" "ssemov")
15359 (set_attr "prefix_extra" "1")
15360 (set_attr "prefix" "maybe_evex")
15361 (set_attr "mode" "OI")])
15363 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
15364 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15367 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15368 (parallel [(const_int 0) (const_int 1)
15369 (const_int 2) (const_int 3)]))))]
15370 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15371 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15372 [(set_attr "isa" "noavx,noavx,avx")
15373 (set_attr "type" "ssemov")
15374 (set_attr "prefix_extra" "1")
15375 (set_attr "prefix" "orig,orig,maybe_evex")
15376 (set_attr "mode" "TI")])
15378 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
15379 [(set (match_operand:V16SI 0 "register_operand" "=v")
15381 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
15383 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15384 [(set_attr "type" "ssemov")
15385 (set_attr "prefix" "evex")
15386 (set_attr "mode" "XI")])
15388 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
15389 [(set (match_operand:V8SI 0 "register_operand" "=v")
15391 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15392 "TARGET_AVX2 && <mask_avx512vl_condition>"
15393 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15394 [(set_attr "type" "ssemov")
15395 (set_attr "prefix_extra" "1")
15396 (set_attr "prefix" "maybe_evex")
15397 (set_attr "mode" "OI")])
15399 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
15400 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15403 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15404 (parallel [(const_int 0) (const_int 1)
15405 (const_int 2) (const_int 3)]))))]
15406 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15407 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15408 [(set_attr "isa" "noavx,noavx,avx")
15409 (set_attr "type" "ssemov")
15410 (set_attr "prefix_extra" "1")
15411 (set_attr "prefix" "orig,orig,maybe_evex")
15412 (set_attr "mode" "TI")])
15414 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
15415 [(set (match_operand:V8DI 0 "register_operand" "=v")
15418 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15419 (parallel [(const_int 0) (const_int 1)
15420 (const_int 2) (const_int 3)
15421 (const_int 4) (const_int 5)
15422 (const_int 6) (const_int 7)]))))]
15424 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15425 [(set_attr "type" "ssemov")
15426 (set_attr "prefix" "evex")
15427 (set_attr "mode" "XI")])
15429 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
15430 [(set (match_operand:V4DI 0 "register_operand" "=v")
15433 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15434 (parallel [(const_int 0) (const_int 1)
15435 (const_int 2) (const_int 3)]))))]
15436 "TARGET_AVX2 && <mask_avx512vl_condition>"
15437 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15438 [(set_attr "type" "ssemov")
15439 (set_attr "prefix_extra" "1")
15440 (set_attr "prefix" "maybe_evex")
15441 (set_attr "mode" "OI")])
15443 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
15444 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15447 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15448 (parallel [(const_int 0) (const_int 1)]))))]
15449 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15450 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
15451 [(set_attr "isa" "noavx,noavx,avx")
15452 (set_attr "type" "ssemov")
15453 (set_attr "prefix_extra" "1")
15454 (set_attr "prefix" "orig,orig,maybe_evex")
15455 (set_attr "mode" "TI")])
15457 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
15458 [(set (match_operand:V8DI 0 "register_operand" "=v")
15460 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15462 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15463 [(set_attr "type" "ssemov")
15464 (set_attr "prefix" "evex")
15465 (set_attr "mode" "XI")])
15467 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
15468 [(set (match_operand:V4DI 0 "register_operand" "=v")
15471 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
15472 (parallel [(const_int 0) (const_int 1)
15473 (const_int 2) (const_int 3)]))))]
15474 "TARGET_AVX2 && <mask_avx512vl_condition>"
15475 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15476 [(set_attr "type" "ssemov")
15477 (set_attr "prefix_extra" "1")
15478 (set_attr "prefix" "maybe_evex")
15479 (set_attr "mode" "OI")])
15481 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
15482 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15485 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15486 (parallel [(const_int 0) (const_int 1)]))))]
15487 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15488 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15489 [(set_attr "isa" "noavx,noavx,avx")
15490 (set_attr "type" "ssemov")
15491 (set_attr "prefix_extra" "1")
15492 (set_attr "prefix" "orig,orig,maybe_evex")
15493 (set_attr "mode" "TI")])
15495 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
15496 [(set (match_operand:V8DI 0 "register_operand" "=v")
15498 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
15500 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15501 [(set_attr "type" "ssemov")
15502 (set_attr "prefix" "evex")
15503 (set_attr "mode" "XI")])
15505 (define_insn "avx2_<code>v4siv4di2<mask_name>"
15506 [(set (match_operand:V4DI 0 "register_operand" "=v")
15508 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
15509 "TARGET_AVX2 && <mask_avx512vl_condition>"
15510 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15511 [(set_attr "type" "ssemov")
15512 (set_attr "prefix" "maybe_evex")
15513 (set_attr "prefix_extra" "1")
15514 (set_attr "mode" "OI")])
15516 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
15517 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15520 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15521 (parallel [(const_int 0) (const_int 1)]))))]
15522 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15523 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15524 [(set_attr "isa" "noavx,noavx,avx")
15525 (set_attr "type" "ssemov")
15526 (set_attr "prefix_extra" "1")
15527 (set_attr "prefix" "orig,orig,maybe_evex")
15528 (set_attr "mode" "TI")])
15530 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
15531 ;; setting FLAGS_REG. But it is not a really compare instruction.
15532 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
15533 [(set (reg:CC FLAGS_REG)
15534 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
15535 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
15538 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
15539 [(set_attr "type" "ssecomi")
15540 (set_attr "prefix_extra" "1")
15541 (set_attr "prefix" "vex")
15542 (set_attr "mode" "<MODE>")])
15544 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
15545 ;; But it is not a really compare instruction.
15546 (define_insn "<sse4_1>_ptest<mode>"
15547 [(set (reg:CC FLAGS_REG)
15548 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
15549 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
15552 "%vptest\t{%1, %0|%0, %1}"
15553 [(set_attr "isa" "noavx,noavx,avx")
15554 (set_attr "type" "ssecomi")
15555 (set_attr "prefix_extra" "1")
15556 (set_attr "prefix" "orig,orig,vex")
15557 (set (attr "btver2_decode")
15559 (match_test "<sseinsnmode>mode==OImode")
15560 (const_string "vector")
15561 (const_string "*")))
15562 (set_attr "mode" "<sseinsnmode>")])
15564 (define_insn "ptesttf2"
15565 [(set (reg:CC FLAGS_REG)
15566 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
15567 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
15570 "%vptest\t{%1, %0|%0, %1}"
15571 [(set_attr "isa" "noavx,noavx,avx")
15572 (set_attr "type" "ssecomi")
15573 (set_attr "prefix_extra" "1")
15574 (set_attr "prefix" "orig,orig,vex")
15575 (set_attr "mode" "TI")])
15577 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
15578 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15580 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
15581 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
15584 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15585 [(set_attr "isa" "noavx,noavx,avx")
15586 (set_attr "type" "ssecvt")
15587 (set_attr "prefix_data16" "1,1,*")
15588 (set_attr "prefix_extra" "1")
15589 (set_attr "length_immediate" "1")
15590 (set_attr "prefix" "orig,orig,vex")
15591 (set_attr "mode" "<MODE>")])
15593 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
15594 [(match_operand:<sseintvecmode> 0 "register_operand")
15595 (match_operand:VF1_128_256 1 "vector_operand")
15596 (match_operand:SI 2 "const_0_to_15_operand")]
15599 rtx tmp = gen_reg_rtx (<MODE>mode);
15602 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
15605 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15609 (define_expand "avx512f_round<castmode>512"
15610 [(match_operand:VF_512 0 "register_operand")
15611 (match_operand:VF_512 1 "nonimmediate_operand")
15612 (match_operand:SI 2 "const_0_to_15_operand")]
15615 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
15619 (define_expand "avx512f_roundps512_sfix"
15620 [(match_operand:V16SI 0 "register_operand")
15621 (match_operand:V16SF 1 "nonimmediate_operand")
15622 (match_operand:SI 2 "const_0_to_15_operand")]
15625 rtx tmp = gen_reg_rtx (V16SFmode);
15626 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
15627 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
15631 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
15632 [(match_operand:<ssepackfltmode> 0 "register_operand")
15633 (match_operand:VF2 1 "vector_operand")
15634 (match_operand:VF2 2 "vector_operand")
15635 (match_operand:SI 3 "const_0_to_15_operand")]
15640 if (<MODE>mode == V2DFmode
15641 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15643 rtx tmp2 = gen_reg_rtx (V4DFmode);
15645 tmp0 = gen_reg_rtx (V4DFmode);
15646 tmp1 = force_reg (V2DFmode, operands[1]);
15648 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15649 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
15650 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15654 tmp0 = gen_reg_rtx (<MODE>mode);
15655 tmp1 = gen_reg_rtx (<MODE>mode);
15658 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
15661 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
15664 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15669 (define_insn "sse4_1_round<ssescalarmodesuffix>"
15670 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
15673 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
15674 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
15676 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
15680 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15681 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15682 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
15683 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15684 [(set_attr "isa" "noavx,noavx,avx,avx512f")
15685 (set_attr "type" "ssecvt")
15686 (set_attr "length_immediate" "1")
15687 (set_attr "prefix_data16" "1,1,*,*")
15688 (set_attr "prefix_extra" "1")
15689 (set_attr "prefix" "orig,orig,vex,evex")
15690 (set_attr "mode" "<MODE>")])
15692 (define_expand "round<mode>2"
15693 [(set (match_dup 3)
15695 (match_operand:VF 1 "register_operand")
15697 (set (match_operand:VF 0 "register_operand")
15699 [(match_dup 3) (match_dup 4)]
15701 "TARGET_SSE4_1 && !flag_trapping_math"
15703 machine_mode scalar_mode;
15704 const struct real_format *fmt;
15705 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
15706 rtx half, vec_half;
15708 scalar_mode = GET_MODE_INNER (<MODE>mode);
15710 /* load nextafter (0.5, 0.0) */
15711 fmt = REAL_MODE_FORMAT (scalar_mode);
15712 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
15713 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
15714 half = const_double_from_real_value (pred_half, scalar_mode);
15716 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
15717 vec_half = force_reg (<MODE>mode, vec_half);
15719 operands[2] = gen_reg_rtx (<MODE>mode);
15720 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
15722 operands[3] = gen_reg_rtx (<MODE>mode);
15723 operands[4] = GEN_INT (ROUND_TRUNC);
15726 (define_expand "round<mode>2_sfix"
15727 [(match_operand:<sseintvecmode> 0 "register_operand")
15728 (match_operand:VF1 1 "register_operand")]
15729 "TARGET_SSE4_1 && !flag_trapping_math"
15731 rtx tmp = gen_reg_rtx (<MODE>mode);
15733 emit_insn (gen_round<mode>2 (tmp, operands[1]));
15736 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15740 (define_expand "round<mode>2_vec_pack_sfix"
15741 [(match_operand:<ssepackfltmode> 0 "register_operand")
15742 (match_operand:VF2 1 "register_operand")
15743 (match_operand:VF2 2 "register_operand")]
15744 "TARGET_SSE4_1 && !flag_trapping_math"
15748 if (<MODE>mode == V2DFmode
15749 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15751 rtx tmp2 = gen_reg_rtx (V4DFmode);
15753 tmp0 = gen_reg_rtx (V4DFmode);
15754 tmp1 = force_reg (V2DFmode, operands[1]);
15756 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15757 emit_insn (gen_roundv4df2 (tmp2, tmp0));
15758 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15762 tmp0 = gen_reg_rtx (<MODE>mode);
15763 tmp1 = gen_reg_rtx (<MODE>mode);
15765 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
15766 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
15769 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15774 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15776 ;; Intel SSE4.2 string/text processing instructions
15778 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15780 (define_insn_and_split "sse4_2_pcmpestr"
15781 [(set (match_operand:SI 0 "register_operand" "=c,c")
15783 [(match_operand:V16QI 2 "register_operand" "x,x")
15784 (match_operand:SI 3 "register_operand" "a,a")
15785 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
15786 (match_operand:SI 5 "register_operand" "d,d")
15787 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
15789 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15797 (set (reg:CC FLAGS_REG)
15806 && can_create_pseudo_p ()"
15811 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15812 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15813 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15816 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
15817 operands[3], operands[4],
15818 operands[5], operands[6]));
15820 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
15821 operands[3], operands[4],
15822 operands[5], operands[6]));
15823 if (flags && !(ecx || xmm0))
15824 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
15825 operands[2], operands[3],
15826 operands[4], operands[5],
15828 if (!(flags || ecx || xmm0))
15829 emit_note (NOTE_INSN_DELETED);
15833 [(set_attr "type" "sselog")
15834 (set_attr "prefix_data16" "1")
15835 (set_attr "prefix_extra" "1")
15836 (set_attr "length_immediate" "1")
15837 (set_attr "memory" "none,load")
15838 (set_attr "mode" "TI")])
15840 (define_insn "sse4_2_pcmpestri"
15841 [(set (match_operand:SI 0 "register_operand" "=c,c")
15843 [(match_operand:V16QI 1 "register_operand" "x,x")
15844 (match_operand:SI 2 "register_operand" "a,a")
15845 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15846 (match_operand:SI 4 "register_operand" "d,d")
15847 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15849 (set (reg:CC FLAGS_REG)
15858 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
15859 [(set_attr "type" "sselog")
15860 (set_attr "prefix_data16" "1")
15861 (set_attr "prefix_extra" "1")
15862 (set_attr "prefix" "maybe_vex")
15863 (set_attr "length_immediate" "1")
15864 (set_attr "btver2_decode" "vector")
15865 (set_attr "memory" "none,load")
15866 (set_attr "mode" "TI")])
15868 (define_insn "sse4_2_pcmpestrm"
15869 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15871 [(match_operand:V16QI 1 "register_operand" "x,x")
15872 (match_operand:SI 2 "register_operand" "a,a")
15873 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15874 (match_operand:SI 4 "register_operand" "d,d")
15875 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15877 (set (reg:CC FLAGS_REG)
15886 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
15887 [(set_attr "type" "sselog")
15888 (set_attr "prefix_data16" "1")
15889 (set_attr "prefix_extra" "1")
15890 (set_attr "length_immediate" "1")
15891 (set_attr "prefix" "maybe_vex")
15892 (set_attr "btver2_decode" "vector")
15893 (set_attr "memory" "none,load")
15894 (set_attr "mode" "TI")])
15896 (define_insn "sse4_2_pcmpestr_cconly"
15897 [(set (reg:CC FLAGS_REG)
15899 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15900 (match_operand:SI 3 "register_operand" "a,a,a,a")
15901 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
15902 (match_operand:SI 5 "register_operand" "d,d,d,d")
15903 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
15905 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15906 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15909 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15910 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15911 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
15912 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
15913 [(set_attr "type" "sselog")
15914 (set_attr "prefix_data16" "1")
15915 (set_attr "prefix_extra" "1")
15916 (set_attr "length_immediate" "1")
15917 (set_attr "memory" "none,load,none,load")
15918 (set_attr "btver2_decode" "vector,vector,vector,vector")
15919 (set_attr "prefix" "maybe_vex")
15920 (set_attr "mode" "TI")])
15922 (define_insn_and_split "sse4_2_pcmpistr"
15923 [(set (match_operand:SI 0 "register_operand" "=c,c")
15925 [(match_operand:V16QI 2 "register_operand" "x,x")
15926 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15927 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
15929 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15935 (set (reg:CC FLAGS_REG)
15942 && can_create_pseudo_p ()"
15947 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15948 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15949 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15952 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15953 operands[3], operands[4]));
15955 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15956 operands[3], operands[4]));
15957 if (flags && !(ecx || xmm0))
15958 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15959 operands[2], operands[3],
15961 if (!(flags || ecx || xmm0))
15962 emit_note (NOTE_INSN_DELETED);
15966 [(set_attr "type" "sselog")
15967 (set_attr "prefix_data16" "1")
15968 (set_attr "prefix_extra" "1")
15969 (set_attr "length_immediate" "1")
15970 (set_attr "memory" "none,load")
15971 (set_attr "mode" "TI")])
15973 (define_insn "sse4_2_pcmpistri"
15974 [(set (match_operand:SI 0 "register_operand" "=c,c")
15976 [(match_operand:V16QI 1 "register_operand" "x,x")
15977 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15978 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15980 (set (reg:CC FLAGS_REG)
15987 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
15988 [(set_attr "type" "sselog")
15989 (set_attr "prefix_data16" "1")
15990 (set_attr "prefix_extra" "1")
15991 (set_attr "length_immediate" "1")
15992 (set_attr "prefix" "maybe_vex")
15993 (set_attr "memory" "none,load")
15994 (set_attr "btver2_decode" "vector")
15995 (set_attr "mode" "TI")])
15997 (define_insn "sse4_2_pcmpistrm"
15998 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
16000 [(match_operand:V16QI 1 "register_operand" "x,x")
16001 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16002 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16004 (set (reg:CC FLAGS_REG)
16011 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
16012 [(set_attr "type" "sselog")
16013 (set_attr "prefix_data16" "1")
16014 (set_attr "prefix_extra" "1")
16015 (set_attr "length_immediate" "1")
16016 (set_attr "prefix" "maybe_vex")
16017 (set_attr "memory" "none,load")
16018 (set_attr "btver2_decode" "vector")
16019 (set_attr "mode" "TI")])
16021 (define_insn "sse4_2_pcmpistr_cconly"
16022 [(set (reg:CC FLAGS_REG)
16024 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
16025 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
16026 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
16028 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
16029 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
16032 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16033 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16034 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
16035 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
16036 [(set_attr "type" "sselog")
16037 (set_attr "prefix_data16" "1")
16038 (set_attr "prefix_extra" "1")
16039 (set_attr "length_immediate" "1")
16040 (set_attr "memory" "none,load,none,load")
16041 (set_attr "prefix" "maybe_vex")
16042 (set_attr "btver2_decode" "vector,vector,vector,vector")
16043 (set_attr "mode" "TI")])
16045 ;; Packed float variants
16046 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
16047 [(V8DI "V8SF") (V16SI "V16SF")])
16049 (define_expand "avx512pf_gatherpf<mode>sf"
16051 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16052 (mem:<GATHER_SCATTER_SF_MEM_MODE>
16054 [(match_operand 2 "vsib_address_operand")
16055 (match_operand:VI48_512 1 "register_operand")
16056 (match_operand:SI 3 "const1248_operand")]))
16057 (match_operand:SI 4 "const_2_to_3_operand")]
16058 UNSPEC_GATHER_PREFETCH)]
16062 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16063 operands[3]), UNSPEC_VSIBADDR);
16066 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
16068 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16069 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16071 [(match_operand:P 2 "vsib_address_operand" "Tv")
16072 (match_operand:VI48_512 1 "register_operand" "v")
16073 (match_operand:SI 3 "const1248_operand" "n")]
16075 (match_operand:SI 4 "const_2_to_3_operand" "n")]
16076 UNSPEC_GATHER_PREFETCH)]
16079 switch (INTVAL (operands[4]))
16082 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16084 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16086 gcc_unreachable ();
16089 [(set_attr "type" "sse")
16090 (set_attr "prefix" "evex")
16091 (set_attr "mode" "XI")])
16093 ;; Packed double variants
16094 (define_expand "avx512pf_gatherpf<mode>df"
16096 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16099 [(match_operand 2 "vsib_address_operand")
16100 (match_operand:VI4_256_8_512 1 "register_operand")
16101 (match_operand:SI 3 "const1248_operand")]))
16102 (match_operand:SI 4 "const_2_to_3_operand")]
16103 UNSPEC_GATHER_PREFETCH)]
16107 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16108 operands[3]), UNSPEC_VSIBADDR);
16111 (define_insn "*avx512pf_gatherpf<mode>df_mask"
16113 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16114 (match_operator:V8DF 5 "vsib_mem_operator"
16116 [(match_operand:P 2 "vsib_address_operand" "Tv")
16117 (match_operand:VI4_256_8_512 1 "register_operand" "v")
16118 (match_operand:SI 3 "const1248_operand" "n")]
16120 (match_operand:SI 4 "const_2_to_3_operand" "n")]
16121 UNSPEC_GATHER_PREFETCH)]
16124 switch (INTVAL (operands[4]))
16127 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16129 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16131 gcc_unreachable ();
16134 [(set_attr "type" "sse")
16135 (set_attr "prefix" "evex")
16136 (set_attr "mode" "XI")])
16138 ;; Packed float variants
16139 (define_expand "avx512pf_scatterpf<mode>sf"
16141 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16142 (mem:<GATHER_SCATTER_SF_MEM_MODE>
16144 [(match_operand 2 "vsib_address_operand")
16145 (match_operand:VI48_512 1 "register_operand")
16146 (match_operand:SI 3 "const1248_operand")]))
16147 (match_operand:SI 4 "const2367_operand")]
16148 UNSPEC_SCATTER_PREFETCH)]
16152 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16153 operands[3]), UNSPEC_VSIBADDR);
16156 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
16158 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16159 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16161 [(match_operand:P 2 "vsib_address_operand" "Tv")
16162 (match_operand:VI48_512 1 "register_operand" "v")
16163 (match_operand:SI 3 "const1248_operand" "n")]
16165 (match_operand:SI 4 "const2367_operand" "n")]
16166 UNSPEC_SCATTER_PREFETCH)]
16169 switch (INTVAL (operands[4]))
16173 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16176 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16178 gcc_unreachable ();
16181 [(set_attr "type" "sse")
16182 (set_attr "prefix" "evex")
16183 (set_attr "mode" "XI")])
16185 ;; Packed double variants
16186 (define_expand "avx512pf_scatterpf<mode>df"
16188 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16191 [(match_operand 2 "vsib_address_operand")
16192 (match_operand:VI4_256_8_512 1 "register_operand")
16193 (match_operand:SI 3 "const1248_operand")]))
16194 (match_operand:SI 4 "const2367_operand")]
16195 UNSPEC_SCATTER_PREFETCH)]
16199 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16200 operands[3]), UNSPEC_VSIBADDR);
16203 (define_insn "*avx512pf_scatterpf<mode>df_mask"
16205 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16206 (match_operator:V8DF 5 "vsib_mem_operator"
16208 [(match_operand:P 2 "vsib_address_operand" "Tv")
16209 (match_operand:VI4_256_8_512 1 "register_operand" "v")
16210 (match_operand:SI 3 "const1248_operand" "n")]
16212 (match_operand:SI 4 "const2367_operand" "n")]
16213 UNSPEC_SCATTER_PREFETCH)]
16216 switch (INTVAL (operands[4]))
16220 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16223 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16225 gcc_unreachable ();
16228 [(set_attr "type" "sse")
16229 (set_attr "prefix" "evex")
16230 (set_attr "mode" "XI")])
16232 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
16233 [(set (match_operand:VF_512 0 "register_operand" "=v")
16235 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16238 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16239 [(set_attr "prefix" "evex")
16240 (set_attr "type" "sse")
16241 (set_attr "mode" "<MODE>")])
16243 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
16244 [(set (match_operand:VF_512 0 "register_operand" "=v")
16246 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16249 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16250 [(set_attr "prefix" "evex")
16251 (set_attr "type" "sse")
16252 (set_attr "mode" "<MODE>")])
16254 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
16255 [(set (match_operand:VF_128 0 "register_operand" "=v")
16258 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16260 (match_operand:VF_128 2 "register_operand" "v")
16263 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
16264 [(set_attr "length_immediate" "1")
16265 (set_attr "prefix" "evex")
16266 (set_attr "type" "sse")
16267 (set_attr "mode" "<MODE>")])
16269 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
16270 [(set (match_operand:VF_512 0 "register_operand" "=v")
16272 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16275 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16276 [(set_attr "prefix" "evex")
16277 (set_attr "type" "sse")
16278 (set_attr "mode" "<MODE>")])
16280 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
16281 [(set (match_operand:VF_128 0 "register_operand" "=v")
16284 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16286 (match_operand:VF_128 2 "register_operand" "v")
16289 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
16290 [(set_attr "length_immediate" "1")
16291 (set_attr "type" "sse")
16292 (set_attr "prefix" "evex")
16293 (set_attr "mode" "<MODE>")])
16295 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16297 ;; XOP instructions
16299 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16301 (define_code_iterator xop_plus [plus ss_plus])
16303 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
16304 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
16306 ;; XOP parallel integer multiply/add instructions.
16308 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
16309 [(set (match_operand:VI24_128 0 "register_operand" "=x")
16312 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
16313 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
16314 (match_operand:VI24_128 3 "register_operand" "x")))]
16316 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16317 [(set_attr "type" "ssemuladd")
16318 (set_attr "mode" "TI")])
16320 (define_insn "xop_p<macs>dql"
16321 [(set (match_operand:V2DI 0 "register_operand" "=x")
16326 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16327 (parallel [(const_int 0) (const_int 2)])))
16330 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16331 (parallel [(const_int 0) (const_int 2)]))))
16332 (match_operand:V2DI 3 "register_operand" "x")))]
16334 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16335 [(set_attr "type" "ssemuladd")
16336 (set_attr "mode" "TI")])
16338 (define_insn "xop_p<macs>dqh"
16339 [(set (match_operand:V2DI 0 "register_operand" "=x")
16344 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16345 (parallel [(const_int 1) (const_int 3)])))
16348 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16349 (parallel [(const_int 1) (const_int 3)]))))
16350 (match_operand:V2DI 3 "register_operand" "x")))]
16352 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16353 [(set_attr "type" "ssemuladd")
16354 (set_attr "mode" "TI")])
16356 ;; XOP parallel integer multiply/add instructions for the intrinisics
16357 (define_insn "xop_p<macs>wd"
16358 [(set (match_operand:V4SI 0 "register_operand" "=x")
16363 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16364 (parallel [(const_int 1) (const_int 3)
16365 (const_int 5) (const_int 7)])))
16368 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16369 (parallel [(const_int 1) (const_int 3)
16370 (const_int 5) (const_int 7)]))))
16371 (match_operand:V4SI 3 "register_operand" "x")))]
16373 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16374 [(set_attr "type" "ssemuladd")
16375 (set_attr "mode" "TI")])
16377 (define_insn "xop_p<madcs>wd"
16378 [(set (match_operand:V4SI 0 "register_operand" "=x")
16384 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16385 (parallel [(const_int 0) (const_int 2)
16386 (const_int 4) (const_int 6)])))
16389 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16390 (parallel [(const_int 0) (const_int 2)
16391 (const_int 4) (const_int 6)]))))
16396 (parallel [(const_int 1) (const_int 3)
16397 (const_int 5) (const_int 7)])))
16401 (parallel [(const_int 1) (const_int 3)
16402 (const_int 5) (const_int 7)])))))
16403 (match_operand:V4SI 3 "register_operand" "x")))]
16405 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16406 [(set_attr "type" "ssemuladd")
16407 (set_attr "mode" "TI")])
16409 ;; XOP parallel XMM conditional moves
16410 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
16411 [(set (match_operand:V 0 "register_operand" "=x,x")
16413 (match_operand:V 3 "nonimmediate_operand" "x,m")
16414 (match_operand:V 1 "register_operand" "x,x")
16415 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
16417 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16418 [(set_attr "type" "sse4arg")])
16420 ;; XOP horizontal add/subtract instructions
16421 (define_insn "xop_phadd<u>bw"
16422 [(set (match_operand:V8HI 0 "register_operand" "=x")
16426 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16427 (parallel [(const_int 0) (const_int 2)
16428 (const_int 4) (const_int 6)
16429 (const_int 8) (const_int 10)
16430 (const_int 12) (const_int 14)])))
16434 (parallel [(const_int 1) (const_int 3)
16435 (const_int 5) (const_int 7)
16436 (const_int 9) (const_int 11)
16437 (const_int 13) (const_int 15)])))))]
16439 "vphadd<u>bw\t{%1, %0|%0, %1}"
16440 [(set_attr "type" "sseiadd1")])
16442 (define_insn "xop_phadd<u>bd"
16443 [(set (match_operand:V4SI 0 "register_operand" "=x")
16448 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16449 (parallel [(const_int 0) (const_int 4)
16450 (const_int 8) (const_int 12)])))
16454 (parallel [(const_int 1) (const_int 5)
16455 (const_int 9) (const_int 13)]))))
16460 (parallel [(const_int 2) (const_int 6)
16461 (const_int 10) (const_int 14)])))
16465 (parallel [(const_int 3) (const_int 7)
16466 (const_int 11) (const_int 15)]))))))]
16468 "vphadd<u>bd\t{%1, %0|%0, %1}"
16469 [(set_attr "type" "sseiadd1")])
16471 (define_insn "xop_phadd<u>bq"
16472 [(set (match_operand:V2DI 0 "register_operand" "=x")
16478 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16479 (parallel [(const_int 0) (const_int 8)])))
16483 (parallel [(const_int 1) (const_int 9)]))))
16488 (parallel [(const_int 2) (const_int 10)])))
16492 (parallel [(const_int 3) (const_int 11)])))))
16498 (parallel [(const_int 4) (const_int 12)])))
16502 (parallel [(const_int 5) (const_int 13)]))))
16507 (parallel [(const_int 6) (const_int 14)])))
16511 (parallel [(const_int 7) (const_int 15)])))))))]
16513 "vphadd<u>bq\t{%1, %0|%0, %1}"
16514 [(set_attr "type" "sseiadd1")])
16516 (define_insn "xop_phadd<u>wd"
16517 [(set (match_operand:V4SI 0 "register_operand" "=x")
16521 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16522 (parallel [(const_int 0) (const_int 2)
16523 (const_int 4) (const_int 6)])))
16527 (parallel [(const_int 1) (const_int 3)
16528 (const_int 5) (const_int 7)])))))]
16530 "vphadd<u>wd\t{%1, %0|%0, %1}"
16531 [(set_attr "type" "sseiadd1")])
16533 (define_insn "xop_phadd<u>wq"
16534 [(set (match_operand:V2DI 0 "register_operand" "=x")
16539 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16540 (parallel [(const_int 0) (const_int 4)])))
16544 (parallel [(const_int 1) (const_int 5)]))))
16549 (parallel [(const_int 2) (const_int 6)])))
16553 (parallel [(const_int 3) (const_int 7)]))))))]
16555 "vphadd<u>wq\t{%1, %0|%0, %1}"
16556 [(set_attr "type" "sseiadd1")])
16558 (define_insn "xop_phadd<u>dq"
16559 [(set (match_operand:V2DI 0 "register_operand" "=x")
16563 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16564 (parallel [(const_int 0) (const_int 2)])))
16568 (parallel [(const_int 1) (const_int 3)])))))]
16570 "vphadd<u>dq\t{%1, %0|%0, %1}"
16571 [(set_attr "type" "sseiadd1")])
16573 (define_insn "xop_phsubbw"
16574 [(set (match_operand:V8HI 0 "register_operand" "=x")
16578 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16579 (parallel [(const_int 0) (const_int 2)
16580 (const_int 4) (const_int 6)
16581 (const_int 8) (const_int 10)
16582 (const_int 12) (const_int 14)])))
16586 (parallel [(const_int 1) (const_int 3)
16587 (const_int 5) (const_int 7)
16588 (const_int 9) (const_int 11)
16589 (const_int 13) (const_int 15)])))))]
16591 "vphsubbw\t{%1, %0|%0, %1}"
16592 [(set_attr "type" "sseiadd1")])
16594 (define_insn "xop_phsubwd"
16595 [(set (match_operand:V4SI 0 "register_operand" "=x")
16599 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16600 (parallel [(const_int 0) (const_int 2)
16601 (const_int 4) (const_int 6)])))
16605 (parallel [(const_int 1) (const_int 3)
16606 (const_int 5) (const_int 7)])))))]
16608 "vphsubwd\t{%1, %0|%0, %1}"
16609 [(set_attr "type" "sseiadd1")])
16611 (define_insn "xop_phsubdq"
16612 [(set (match_operand:V2DI 0 "register_operand" "=x")
16616 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16617 (parallel [(const_int 0) (const_int 2)])))
16621 (parallel [(const_int 1) (const_int 3)])))))]
16623 "vphsubdq\t{%1, %0|%0, %1}"
16624 [(set_attr "type" "sseiadd1")])
16626 ;; XOP permute instructions
16627 (define_insn "xop_pperm"
16628 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16630 [(match_operand:V16QI 1 "register_operand" "x,x")
16631 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16632 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
16633 UNSPEC_XOP_PERMUTE))]
16634 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16635 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16636 [(set_attr "type" "sse4arg")
16637 (set_attr "mode" "TI")])
16639 ;; XOP pack instructions that combine two vectors into a smaller vector
16640 (define_insn "xop_pperm_pack_v2di_v4si"
16641 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16644 (match_operand:V2DI 1 "register_operand" "x,x"))
16646 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
16647 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16648 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16649 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16650 [(set_attr "type" "sse4arg")
16651 (set_attr "mode" "TI")])
16653 (define_insn "xop_pperm_pack_v4si_v8hi"
16654 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16657 (match_operand:V4SI 1 "register_operand" "x,x"))
16659 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
16660 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16661 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16662 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16663 [(set_attr "type" "sse4arg")
16664 (set_attr "mode" "TI")])
16666 (define_insn "xop_pperm_pack_v8hi_v16qi"
16667 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16670 (match_operand:V8HI 1 "register_operand" "x,x"))
16672 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
16673 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16674 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16675 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16676 [(set_attr "type" "sse4arg")
16677 (set_attr "mode" "TI")])
16679 ;; XOP packed rotate instructions
16680 (define_expand "rotl<mode>3"
16681 [(set (match_operand:VI_128 0 "register_operand")
16683 (match_operand:VI_128 1 "nonimmediate_operand")
16684 (match_operand:SI 2 "general_operand")))]
16687 /* If we were given a scalar, convert it to parallel */
16688 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16690 rtvec vs = rtvec_alloc (<ssescalarnum>);
16691 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16692 rtx reg = gen_reg_rtx (<MODE>mode);
16693 rtx op2 = operands[2];
16696 if (GET_MODE (op2) != <ssescalarmode>mode)
16698 op2 = gen_reg_rtx (<ssescalarmode>mode);
16699 convert_move (op2, operands[2], false);
16702 for (i = 0; i < <ssescalarnum>; i++)
16703 RTVEC_ELT (vs, i) = op2;
16705 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16706 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16711 (define_expand "rotr<mode>3"
16712 [(set (match_operand:VI_128 0 "register_operand")
16714 (match_operand:VI_128 1 "nonimmediate_operand")
16715 (match_operand:SI 2 "general_operand")))]
16718 /* If we were given a scalar, convert it to parallel */
16719 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16721 rtvec vs = rtvec_alloc (<ssescalarnum>);
16722 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16723 rtx neg = gen_reg_rtx (<MODE>mode);
16724 rtx reg = gen_reg_rtx (<MODE>mode);
16725 rtx op2 = operands[2];
16728 if (GET_MODE (op2) != <ssescalarmode>mode)
16730 op2 = gen_reg_rtx (<ssescalarmode>mode);
16731 convert_move (op2, operands[2], false);
16734 for (i = 0; i < <ssescalarnum>; i++)
16735 RTVEC_ELT (vs, i) = op2;
16737 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16738 emit_insn (gen_neg<mode>2 (neg, reg));
16739 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
16744 (define_insn "xop_rotl<mode>3"
16745 [(set (match_operand:VI_128 0 "register_operand" "=x")
16747 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16748 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16750 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16751 [(set_attr "type" "sseishft")
16752 (set_attr "length_immediate" "1")
16753 (set_attr "mode" "TI")])
16755 (define_insn "xop_rotr<mode>3"
16756 [(set (match_operand:VI_128 0 "register_operand" "=x")
16758 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16759 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16763 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
16764 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
16766 [(set_attr "type" "sseishft")
16767 (set_attr "length_immediate" "1")
16768 (set_attr "mode" "TI")])
16770 (define_expand "vrotr<mode>3"
16771 [(match_operand:VI_128 0 "register_operand")
16772 (match_operand:VI_128 1 "register_operand")
16773 (match_operand:VI_128 2 "register_operand")]
16776 rtx reg = gen_reg_rtx (<MODE>mode);
16777 emit_insn (gen_neg<mode>2 (reg, operands[2]));
16778 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16782 (define_expand "vrotl<mode>3"
16783 [(match_operand:VI_128 0 "register_operand")
16784 (match_operand:VI_128 1 "register_operand")
16785 (match_operand:VI_128 2 "register_operand")]
16788 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
16792 (define_insn "xop_vrotl<mode>3"
16793 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16794 (if_then_else:VI_128
16796 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16799 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16803 (neg:VI_128 (match_dup 2)))))]
16804 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16805 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16806 [(set_attr "type" "sseishft")
16807 (set_attr "prefix_data16" "0")
16808 (set_attr "prefix_extra" "2")
16809 (set_attr "mode" "TI")])
16811 ;; XOP packed shift instructions.
16812 (define_expand "vlshr<mode>3"
16813 [(set (match_operand:VI12_128 0 "register_operand")
16815 (match_operand:VI12_128 1 "register_operand")
16816 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16819 rtx neg = gen_reg_rtx (<MODE>mode);
16820 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16821 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16825 (define_expand "vlshr<mode>3"
16826 [(set (match_operand:VI48_128 0 "register_operand")
16828 (match_operand:VI48_128 1 "register_operand")
16829 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16830 "TARGET_AVX2 || TARGET_XOP"
16834 rtx neg = gen_reg_rtx (<MODE>mode);
16835 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16836 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16841 (define_expand "vlshr<mode>3"
16842 [(set (match_operand:VI48_512 0 "register_operand")
16844 (match_operand:VI48_512 1 "register_operand")
16845 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16848 (define_expand "vlshr<mode>3"
16849 [(set (match_operand:VI48_256 0 "register_operand")
16851 (match_operand:VI48_256 1 "register_operand")
16852 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16855 (define_expand "vashrv8hi3<mask_name>"
16856 [(set (match_operand:V8HI 0 "register_operand")
16858 (match_operand:V8HI 1 "register_operand")
16859 (match_operand:V8HI 2 "nonimmediate_operand")))]
16860 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
16864 rtx neg = gen_reg_rtx (V8HImode);
16865 emit_insn (gen_negv8hi2 (neg, operands[2]));
16866 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16871 (define_expand "vashrv16qi3"
16872 [(set (match_operand:V16QI 0 "register_operand")
16874 (match_operand:V16QI 1 "register_operand")
16875 (match_operand:V16QI 2 "nonimmediate_operand")))]
16878 rtx neg = gen_reg_rtx (V16QImode);
16879 emit_insn (gen_negv16qi2 (neg, operands[2]));
16880 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16884 (define_expand "vashrv2di3<mask_name>"
16885 [(set (match_operand:V2DI 0 "register_operand")
16887 (match_operand:V2DI 1 "register_operand")
16888 (match_operand:V2DI 2 "nonimmediate_operand")))]
16889 "TARGET_XOP || TARGET_AVX512VL"
16893 rtx neg = gen_reg_rtx (V2DImode);
16894 emit_insn (gen_negv2di2 (neg, operands[2]));
16895 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16900 (define_expand "vashrv4si3"
16901 [(set (match_operand:V4SI 0 "register_operand")
16902 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16903 (match_operand:V4SI 2 "nonimmediate_operand")))]
16904 "TARGET_AVX2 || TARGET_XOP"
16908 rtx neg = gen_reg_rtx (V4SImode);
16909 emit_insn (gen_negv4si2 (neg, operands[2]));
16910 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16915 (define_expand "vashrv16si3"
16916 [(set (match_operand:V16SI 0 "register_operand")
16917 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16918 (match_operand:V16SI 2 "nonimmediate_operand")))]
16921 (define_expand "vashrv8si3"
16922 [(set (match_operand:V8SI 0 "register_operand")
16923 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16924 (match_operand:V8SI 2 "nonimmediate_operand")))]
16927 (define_expand "vashl<mode>3"
16928 [(set (match_operand:VI12_128 0 "register_operand")
16930 (match_operand:VI12_128 1 "register_operand")
16931 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16934 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16938 (define_expand "vashl<mode>3"
16939 [(set (match_operand:VI48_128 0 "register_operand")
16941 (match_operand:VI48_128 1 "register_operand")
16942 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16943 "TARGET_AVX2 || TARGET_XOP"
16947 operands[2] = force_reg (<MODE>mode, operands[2]);
16948 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16953 (define_expand "vashl<mode>3"
16954 [(set (match_operand:VI48_512 0 "register_operand")
16956 (match_operand:VI48_512 1 "register_operand")
16957 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16960 (define_expand "vashl<mode>3"
16961 [(set (match_operand:VI48_256 0 "register_operand")
16963 (match_operand:VI48_256 1 "register_operand")
16964 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16967 (define_insn "xop_sha<mode>3"
16968 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16969 (if_then_else:VI_128
16971 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16974 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16978 (neg:VI_128 (match_dup 2)))))]
16979 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16980 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16981 [(set_attr "type" "sseishft")
16982 (set_attr "prefix_data16" "0")
16983 (set_attr "prefix_extra" "2")
16984 (set_attr "mode" "TI")])
16986 (define_insn "xop_shl<mode>3"
16987 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16988 (if_then_else:VI_128
16990 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16993 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16997 (neg:VI_128 (match_dup 2)))))]
16998 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16999 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17000 [(set_attr "type" "sseishft")
17001 (set_attr "prefix_data16" "0")
17002 (set_attr "prefix_extra" "2")
17003 (set_attr "mode" "TI")])
17005 (define_expand "<shift_insn><mode>3"
17006 [(set (match_operand:VI1_AVX512 0 "register_operand")
17007 (any_shift:VI1_AVX512
17008 (match_operand:VI1_AVX512 1 "register_operand")
17009 (match_operand:SI 2 "nonmemory_operand")))]
17012 if (TARGET_XOP && <MODE>mode == V16QImode)
17014 bool negate = false;
17015 rtx (*gen) (rtx, rtx, rtx);
17019 if (<CODE> != ASHIFT)
17021 if (CONST_INT_P (operands[2]))
17022 operands[2] = GEN_INT (-INTVAL (operands[2]));
17026 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
17027 for (i = 0; i < 16; i++)
17028 XVECEXP (par, 0, i) = operands[2];
17030 tmp = gen_reg_rtx (V16QImode);
17031 emit_insn (gen_vec_initv16qiqi (tmp, par));
17034 emit_insn (gen_negv16qi2 (tmp, tmp));
17036 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
17037 emit_insn (gen (operands[0], operands[1], tmp));
17040 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
17044 (define_expand "ashrv2di3"
17045 [(set (match_operand:V2DI 0 "register_operand")
17047 (match_operand:V2DI 1 "register_operand")
17048 (match_operand:DI 2 "nonmemory_operand")))]
17049 "TARGET_XOP || TARGET_AVX512VL"
17051 if (!TARGET_AVX512VL)
17053 rtx reg = gen_reg_rtx (V2DImode);
17055 bool negate = false;
17058 if (CONST_INT_P (operands[2]))
17059 operands[2] = GEN_INT (-INTVAL (operands[2]));
17063 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
17064 for (i = 0; i < 2; i++)
17065 XVECEXP (par, 0, i) = operands[2];
17067 emit_insn (gen_vec_initv2didi (reg, par));
17070 emit_insn (gen_negv2di2 (reg, reg));
17072 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
17077 ;; XOP FRCZ support
17078 (define_insn "xop_frcz<mode>2"
17079 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
17081 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
17084 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
17085 [(set_attr "type" "ssecvt1")
17086 (set_attr "mode" "<MODE>")])
17088 (define_expand "xop_vmfrcz<mode>2"
17089 [(set (match_operand:VF_128 0 "register_operand")
17092 [(match_operand:VF_128 1 "nonimmediate_operand")]
17097 "operands[2] = CONST0_RTX (<MODE>mode);")
17099 (define_insn "*xop_vmfrcz<mode>2"
17100 [(set (match_operand:VF_128 0 "register_operand" "=x")
17103 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
17105 (match_operand:VF_128 2 "const0_operand")
17108 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
17109 [(set_attr "type" "ssecvt1")
17110 (set_attr "mode" "<MODE>")])
17112 (define_insn "xop_maskcmp<mode>3"
17113 [(set (match_operand:VI_128 0 "register_operand" "=x")
17114 (match_operator:VI_128 1 "ix86_comparison_int_operator"
17115 [(match_operand:VI_128 2 "register_operand" "x")
17116 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17118 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17119 [(set_attr "type" "sse4arg")
17120 (set_attr "prefix_data16" "0")
17121 (set_attr "prefix_rep" "0")
17122 (set_attr "prefix_extra" "2")
17123 (set_attr "length_immediate" "1")
17124 (set_attr "mode" "TI")])
17126 (define_insn "xop_maskcmp_uns<mode>3"
17127 [(set (match_operand:VI_128 0 "register_operand" "=x")
17128 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
17129 [(match_operand:VI_128 2 "register_operand" "x")
17130 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17132 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17133 [(set_attr "type" "ssecmp")
17134 (set_attr "prefix_data16" "0")
17135 (set_attr "prefix_rep" "0")
17136 (set_attr "prefix_extra" "2")
17137 (set_attr "length_immediate" "1")
17138 (set_attr "mode" "TI")])
17140 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
17141 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
17142 ;; the exact instruction generated for the intrinsic.
17143 (define_insn "xop_maskcmp_uns2<mode>3"
17144 [(set (match_operand:VI_128 0 "register_operand" "=x")
17146 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
17147 [(match_operand:VI_128 2 "register_operand" "x")
17148 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
17149 UNSPEC_XOP_UNSIGNED_CMP))]
17151 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17152 [(set_attr "type" "ssecmp")
17153 (set_attr "prefix_data16" "0")
17154 (set_attr "prefix_extra" "2")
17155 (set_attr "length_immediate" "1")
17156 (set_attr "mode" "TI")])
17158 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
17159 ;; being added here to be complete.
17160 (define_insn "xop_pcom_tf<mode>3"
17161 [(set (match_operand:VI_128 0 "register_operand" "=x")
17163 [(match_operand:VI_128 1 "register_operand" "x")
17164 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
17165 (match_operand:SI 3 "const_int_operand" "n")]
17166 UNSPEC_XOP_TRUEFALSE))]
17169 return ((INTVAL (operands[3]) != 0)
17170 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17171 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
17173 [(set_attr "type" "ssecmp")
17174 (set_attr "prefix_data16" "0")
17175 (set_attr "prefix_extra" "2")
17176 (set_attr "length_immediate" "1")
17177 (set_attr "mode" "TI")])
17179 (define_insn "xop_vpermil2<mode>3"
17180 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
17182 [(match_operand:VF_128_256 1 "register_operand" "x,x")
17183 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
17184 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
17185 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
17188 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
17189 [(set_attr "type" "sse4arg")
17190 (set_attr "length_immediate" "1")
17191 (set_attr "mode" "<MODE>")])
17193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17195 (define_insn "aesenc"
17196 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17197 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17198 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17202 aesenc\t{%2, %0|%0, %2}
17203 vaesenc\t{%2, %1, %0|%0, %1, %2}"
17204 [(set_attr "isa" "noavx,avx")
17205 (set_attr "type" "sselog1")
17206 (set_attr "prefix_extra" "1")
17207 (set_attr "prefix" "orig,vex")
17208 (set_attr "btver2_decode" "double,double")
17209 (set_attr "mode" "TI")])
17211 (define_insn "aesenclast"
17212 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17213 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17214 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17215 UNSPEC_AESENCLAST))]
17218 aesenclast\t{%2, %0|%0, %2}
17219 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
17220 [(set_attr "isa" "noavx,avx")
17221 (set_attr "type" "sselog1")
17222 (set_attr "prefix_extra" "1")
17223 (set_attr "prefix" "orig,vex")
17224 (set_attr "btver2_decode" "double,double")
17225 (set_attr "mode" "TI")])
17227 (define_insn "aesdec"
17228 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17229 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17230 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17234 aesdec\t{%2, %0|%0, %2}
17235 vaesdec\t{%2, %1, %0|%0, %1, %2}"
17236 [(set_attr "isa" "noavx,avx")
17237 (set_attr "type" "sselog1")
17238 (set_attr "prefix_extra" "1")
17239 (set_attr "prefix" "orig,vex")
17240 (set_attr "btver2_decode" "double,double")
17241 (set_attr "mode" "TI")])
17243 (define_insn "aesdeclast"
17244 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17246 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17247 UNSPEC_AESDECLAST))]
17250 aesdeclast\t{%2, %0|%0, %2}
17251 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
17252 [(set_attr "isa" "noavx,avx")
17253 (set_attr "type" "sselog1")
17254 (set_attr "prefix_extra" "1")
17255 (set_attr "prefix" "orig,vex")
17256 (set_attr "btver2_decode" "double,double")
17257 (set_attr "mode" "TI")])
17259 (define_insn "aesimc"
17260 [(set (match_operand:V2DI 0 "register_operand" "=x")
17261 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
17264 "%vaesimc\t{%1, %0|%0, %1}"
17265 [(set_attr "type" "sselog1")
17266 (set_attr "prefix_extra" "1")
17267 (set_attr "prefix" "maybe_vex")
17268 (set_attr "mode" "TI")])
17270 (define_insn "aeskeygenassist"
17271 [(set (match_operand:V2DI 0 "register_operand" "=x")
17272 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
17273 (match_operand:SI 2 "const_0_to_255_operand" "n")]
17274 UNSPEC_AESKEYGENASSIST))]
17276 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
17277 [(set_attr "type" "sselog1")
17278 (set_attr "prefix_extra" "1")
17279 (set_attr "length_immediate" "1")
17280 (set_attr "prefix" "maybe_vex")
17281 (set_attr "mode" "TI")])
17283 (define_insn "pclmulqdq"
17284 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17285 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17286 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
17287 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17291 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
17292 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17293 [(set_attr "isa" "noavx,avx")
17294 (set_attr "type" "sselog1")
17295 (set_attr "prefix_extra" "1")
17296 (set_attr "length_immediate" "1")
17297 (set_attr "prefix" "orig,vex")
17298 (set_attr "mode" "TI")])
17300 (define_expand "avx_vzeroall"
17301 [(match_par_dup 0 [(const_int 0)])]
17304 int nregs = TARGET_64BIT ? 16 : 8;
17307 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
17309 XVECEXP (operands[0], 0, 0)
17310 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
17313 for (regno = 0; regno < nregs; regno++)
17314 XVECEXP (operands[0], 0, regno + 1)
17315 = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
17316 CONST0_RTX (V8SImode));
17319 (define_insn "*avx_vzeroall"
17320 [(match_parallel 0 "vzeroall_operation"
17321 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
17324 [(set_attr "type" "sse")
17325 (set_attr "modrm" "0")
17326 (set_attr "memory" "none")
17327 (set_attr "prefix" "vex")
17328 (set_attr "btver2_decode" "vector")
17329 (set_attr "mode" "OI")])
17331 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
17332 ;; if the upper 128bits are unused.
17333 (define_insn "avx_vzeroupper"
17334 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
17337 [(set_attr "type" "sse")
17338 (set_attr "modrm" "0")
17339 (set_attr "memory" "none")
17340 (set_attr "prefix" "vex")
17341 (set_attr "btver2_decode" "vector")
17342 (set_attr "mode" "OI")])
17344 (define_mode_attr pbroadcast_evex_isa
17345 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
17346 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
17347 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
17348 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
17350 (define_insn "avx2_pbroadcast<mode>"
17351 [(set (match_operand:VI 0 "register_operand" "=x,v")
17353 (vec_select:<ssescalarmode>
17354 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
17355 (parallel [(const_int 0)]))))]
17357 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
17358 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
17359 (set_attr "type" "ssemov")
17360 (set_attr "prefix_extra" "1")
17361 (set_attr "prefix" "vex,evex")
17362 (set_attr "mode" "<sseinsnmode>")])
17364 (define_insn "avx2_pbroadcast<mode>_1"
17365 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
17366 (vec_duplicate:VI_256
17367 (vec_select:<ssescalarmode>
17368 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
17369 (parallel [(const_int 0)]))))]
17372 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17373 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17374 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17375 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
17376 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
17377 (set_attr "type" "ssemov")
17378 (set_attr "prefix_extra" "1")
17379 (set_attr "prefix" "vex")
17380 (set_attr "mode" "<sseinsnmode>")])
17382 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
17383 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
17384 (unspec:VI48F_256_512
17385 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
17386 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17388 "TARGET_AVX2 && <mask_mode512bit_condition>"
17389 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17390 [(set_attr "type" "sselog")
17391 (set_attr "prefix" "<mask_prefix2>")
17392 (set_attr "mode" "<sseinsnmode>")])
17394 (define_insn "<avx512>_permvar<mode><mask_name>"
17395 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17396 (unspec:VI1_AVX512VL
17397 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
17398 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17400 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
17401 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17402 [(set_attr "type" "sselog")
17403 (set_attr "prefix" "<mask_prefix2>")
17404 (set_attr "mode" "<sseinsnmode>")])
17406 (define_insn "<avx512>_permvar<mode><mask_name>"
17407 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17408 (unspec:VI2_AVX512VL
17409 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
17410 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17412 "TARGET_AVX512BW && <mask_mode512bit_condition>"
17413 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17414 [(set_attr "type" "sselog")
17415 (set_attr "prefix" "<mask_prefix2>")
17416 (set_attr "mode" "<sseinsnmode>")])
17418 (define_expand "avx2_perm<mode>"
17419 [(match_operand:VI8F_256 0 "register_operand")
17420 (match_operand:VI8F_256 1 "nonimmediate_operand")
17421 (match_operand:SI 2 "const_0_to_255_operand")]
17424 int mask = INTVAL (operands[2]);
17425 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
17426 GEN_INT ((mask >> 0) & 3),
17427 GEN_INT ((mask >> 2) & 3),
17428 GEN_INT ((mask >> 4) & 3),
17429 GEN_INT ((mask >> 6) & 3)));
17433 (define_expand "avx512vl_perm<mode>_mask"
17434 [(match_operand:VI8F_256 0 "register_operand")
17435 (match_operand:VI8F_256 1 "nonimmediate_operand")
17436 (match_operand:SI 2 "const_0_to_255_operand")
17437 (match_operand:VI8F_256 3 "vector_move_operand")
17438 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17441 int mask = INTVAL (operands[2]);
17442 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
17443 GEN_INT ((mask >> 0) & 3),
17444 GEN_INT ((mask >> 2) & 3),
17445 GEN_INT ((mask >> 4) & 3),
17446 GEN_INT ((mask >> 6) & 3),
17447 operands[3], operands[4]));
17451 (define_insn "avx2_perm<mode>_1<mask_name>"
17452 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17453 (vec_select:VI8F_256
17454 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
17455 (parallel [(match_operand 2 "const_0_to_3_operand")
17456 (match_operand 3 "const_0_to_3_operand")
17457 (match_operand 4 "const_0_to_3_operand")
17458 (match_operand 5 "const_0_to_3_operand")])))]
17459 "TARGET_AVX2 && <mask_mode512bit_condition>"
17462 mask |= INTVAL (operands[2]) << 0;
17463 mask |= INTVAL (operands[3]) << 2;
17464 mask |= INTVAL (operands[4]) << 4;
17465 mask |= INTVAL (operands[5]) << 6;
17466 operands[2] = GEN_INT (mask);
17467 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
17469 [(set_attr "type" "sselog")
17470 (set_attr "prefix" "<mask_prefix2>")
17471 (set_attr "mode" "<sseinsnmode>")])
17473 (define_expand "avx512f_perm<mode>"
17474 [(match_operand:V8FI 0 "register_operand")
17475 (match_operand:V8FI 1 "nonimmediate_operand")
17476 (match_operand:SI 2 "const_0_to_255_operand")]
17479 int mask = INTVAL (operands[2]);
17480 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
17481 GEN_INT ((mask >> 0) & 3),
17482 GEN_INT ((mask >> 2) & 3),
17483 GEN_INT ((mask >> 4) & 3),
17484 GEN_INT ((mask >> 6) & 3),
17485 GEN_INT (((mask >> 0) & 3) + 4),
17486 GEN_INT (((mask >> 2) & 3) + 4),
17487 GEN_INT (((mask >> 4) & 3) + 4),
17488 GEN_INT (((mask >> 6) & 3) + 4)));
17492 (define_expand "avx512f_perm<mode>_mask"
17493 [(match_operand:V8FI 0 "register_operand")
17494 (match_operand:V8FI 1 "nonimmediate_operand")
17495 (match_operand:SI 2 "const_0_to_255_operand")
17496 (match_operand:V8FI 3 "vector_move_operand")
17497 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17500 int mask = INTVAL (operands[2]);
17501 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
17502 GEN_INT ((mask >> 0) & 3),
17503 GEN_INT ((mask >> 2) & 3),
17504 GEN_INT ((mask >> 4) & 3),
17505 GEN_INT ((mask >> 6) & 3),
17506 GEN_INT (((mask >> 0) & 3) + 4),
17507 GEN_INT (((mask >> 2) & 3) + 4),
17508 GEN_INT (((mask >> 4) & 3) + 4),
17509 GEN_INT (((mask >> 6) & 3) + 4),
17510 operands[3], operands[4]));
17514 (define_insn "avx512f_perm<mode>_1<mask_name>"
17515 [(set (match_operand:V8FI 0 "register_operand" "=v")
17517 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
17518 (parallel [(match_operand 2 "const_0_to_3_operand")
17519 (match_operand 3 "const_0_to_3_operand")
17520 (match_operand 4 "const_0_to_3_operand")
17521 (match_operand 5 "const_0_to_3_operand")
17522 (match_operand 6 "const_4_to_7_operand")
17523 (match_operand 7 "const_4_to_7_operand")
17524 (match_operand 8 "const_4_to_7_operand")
17525 (match_operand 9 "const_4_to_7_operand")])))]
17526 "TARGET_AVX512F && <mask_mode512bit_condition>
17527 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
17528 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
17529 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
17530 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
17533 mask |= INTVAL (operands[2]) << 0;
17534 mask |= INTVAL (operands[3]) << 2;
17535 mask |= INTVAL (operands[4]) << 4;
17536 mask |= INTVAL (operands[5]) << 6;
17537 operands[2] = GEN_INT (mask);
17538 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
17540 [(set_attr "type" "sselog")
17541 (set_attr "prefix" "<mask_prefix2>")
17542 (set_attr "mode" "<sseinsnmode>")])
17544 (define_insn "avx2_permv2ti"
17545 [(set (match_operand:V4DI 0 "register_operand" "=x")
17547 [(match_operand:V4DI 1 "register_operand" "x")
17548 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
17549 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17552 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17553 [(set_attr "type" "sselog")
17554 (set_attr "prefix" "vex")
17555 (set_attr "mode" "OI")])
17557 (define_insn "avx2_vec_dupv4df"
17558 [(set (match_operand:V4DF 0 "register_operand" "=v")
17559 (vec_duplicate:V4DF
17561 (match_operand:V2DF 1 "register_operand" "v")
17562 (parallel [(const_int 0)]))))]
17564 "vbroadcastsd\t{%1, %0|%0, %1}"
17565 [(set_attr "type" "sselog1")
17566 (set_attr "prefix" "maybe_evex")
17567 (set_attr "mode" "V4DF")])
17569 (define_insn "<avx512>_vec_dup<mode>_1"
17570 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
17571 (vec_duplicate:VI_AVX512BW
17572 (vec_select:<ssescalarmode>
17573 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
17574 (parallel [(const_int 0)]))))]
17577 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17578 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
17579 [(set_attr "type" "ssemov")
17580 (set_attr "prefix" "evex")
17581 (set_attr "mode" "<sseinsnmode>")])
17583 (define_insn "<avx512>_vec_dup<mode><mask_name>"
17584 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
17585 (vec_duplicate:V48_AVX512VL
17586 (vec_select:<ssescalarmode>
17587 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17588 (parallel [(const_int 0)]))))]
17591 /* There is no DF broadcast (in AVX-512*) to 128b register.
17592 Mimic it with integer variant. */
17593 if (<MODE>mode == V2DFmode)
17594 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
17596 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
17597 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}";
17599 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
17601 [(set_attr "type" "ssemov")
17602 (set_attr "prefix" "evex")
17603 (set_attr "mode" "<sseinsnmode>")])
17605 (define_insn "<avx512>_vec_dup<mode><mask_name>"
17606 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17607 (vec_duplicate:VI12_AVX512VL
17608 (vec_select:<ssescalarmode>
17609 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17610 (parallel [(const_int 0)]))))]
17612 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17613 [(set_attr "type" "ssemov")
17614 (set_attr "prefix" "evex")
17615 (set_attr "mode" "<sseinsnmode>")])
17617 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17618 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17619 (vec_duplicate:V16FI
17620 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17623 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
17624 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17625 [(set_attr "type" "ssemov")
17626 (set_attr "prefix" "evex")
17627 (set_attr "mode" "<sseinsnmode>")])
17629 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17630 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
17631 (vec_duplicate:V8FI
17632 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17635 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17636 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17637 [(set_attr "type" "ssemov")
17638 (set_attr "prefix" "evex")
17639 (set_attr "mode" "<sseinsnmode>")])
17641 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17642 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
17643 (vec_duplicate:VI12_AVX512VL
17644 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17647 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
17648 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
17649 [(set_attr "type" "ssemov")
17650 (set_attr "prefix" "evex")
17651 (set_attr "mode" "<sseinsnmode>")])
17653 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17654 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
17655 (vec_duplicate:V48_AVX512VL
17656 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17658 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17659 [(set_attr "type" "ssemov")
17660 (set_attr "prefix" "evex")
17661 (set_attr "mode" "<sseinsnmode>")
17662 (set (attr "enabled")
17663 (if_then_else (eq_attr "alternative" "1")
17664 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
17665 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
17668 (define_insn "vec_dupv4sf"
17669 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
17670 (vec_duplicate:V4SF
17671 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
17674 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
17675 vbroadcastss\t{%1, %0|%0, %1}
17676 shufps\t{$0, %0, %0|%0, %0, 0}"
17677 [(set_attr "isa" "avx,avx,noavx")
17678 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
17679 (set_attr "length_immediate" "1,0,1")
17680 (set_attr "prefix_extra" "0,1,*")
17681 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
17682 (set_attr "mode" "V4SF")])
17684 (define_insn "*vec_dupv4si"
17685 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
17686 (vec_duplicate:V4SI
17687 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
17690 %vpshufd\t{$0, %1, %0|%0, %1, 0}
17691 vbroadcastss\t{%1, %0|%0, %1}
17692 shufps\t{$0, %0, %0|%0, %0, 0}"
17693 [(set_attr "isa" "sse2,avx,noavx")
17694 (set_attr "type" "sselog1,ssemov,sselog1")
17695 (set_attr "length_immediate" "1,0,1")
17696 (set_attr "prefix_extra" "0,1,*")
17697 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
17698 (set_attr "mode" "TI,V4SF,V4SF")])
17700 (define_insn "*vec_dupv2di"
17701 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
17702 (vec_duplicate:V2DI
17703 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,m,0")))]
17707 vpunpcklqdq\t{%d1, %0|%0, %d1}
17708 %vmovddup\t{%1, %0|%0, %1}
17710 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
17711 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
17712 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
17713 (set_attr "mode" "TI,TI,DF,V4SF")])
17715 (define_insn "avx2_vbroadcasti128_<mode>"
17716 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
17718 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
17722 vbroadcasti128\t{%1, %0|%0, %1}
17723 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
17724 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
17725 [(set_attr "isa" "*,avx512dq,avx512vl")
17726 (set_attr "type" "ssemov")
17727 (set_attr "prefix_extra" "1")
17728 (set_attr "prefix" "vex,evex,evex")
17729 (set_attr "mode" "OI")])
17731 ;; Modes handled by AVX vec_dup patterns.
17732 (define_mode_iterator AVX_VEC_DUP_MODE
17733 [V8SI V8SF V4DI V4DF])
17734 ;; Modes handled by AVX2 vec_dup patterns.
17735 (define_mode_iterator AVX2_VEC_DUP_MODE
17736 [V32QI V16QI V16HI V8HI V8SI V4SI])
17738 (define_insn "*vec_dup<mode>"
17739 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
17740 (vec_duplicate:AVX2_VEC_DUP_MODE
17741 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
17744 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17745 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17747 [(set_attr "isa" "*,*,noavx512vl")
17748 (set_attr "type" "ssemov")
17749 (set_attr "prefix_extra" "1")
17750 (set_attr "prefix" "maybe_evex")
17751 (set_attr "mode" "<sseinsnmode>")])
17753 (define_insn "vec_dup<mode>"
17754 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
17755 (vec_duplicate:AVX_VEC_DUP_MODE
17756 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
17759 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17760 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
17761 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17762 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
17764 [(set_attr "type" "ssemov")
17765 (set_attr "prefix_extra" "1")
17766 (set_attr "prefix" "maybe_evex")
17767 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
17768 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
17771 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
17772 (vec_duplicate:AVX2_VEC_DUP_MODE
17773 (match_operand:<ssescalarmode> 1 "register_operand")))]
17775 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
17776 available, because then we can broadcast from GPRs directly.
17777 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
17778 for V*SI mode it requires just -mavx512vl. */
17779 && !(TARGET_AVX512VL
17780 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
17781 && reload_completed && GENERAL_REG_P (operands[1])"
17784 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
17785 CONST0_RTX (V4SImode),
17786 gen_lowpart (SImode, operands[1])));
17787 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
17788 gen_lowpart (<ssexmmmode>mode,
17794 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
17795 (vec_duplicate:AVX_VEC_DUP_MODE
17796 (match_operand:<ssescalarmode> 1 "register_operand")))]
17797 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
17798 [(set (match_dup 2)
17799 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
17801 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
17802 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
17804 (define_insn "avx_vbroadcastf128_<mode>"
17805 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
17807 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
17811 vbroadcast<i128>\t{%1, %0|%0, %1}
17812 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17813 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
17814 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
17815 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17816 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
17817 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
17818 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
17819 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
17820 (set_attr "prefix_extra" "1")
17821 (set_attr "length_immediate" "0,1,1,0,1,0,1")
17822 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
17823 (set_attr "mode" "<sseinsnmode>")])
17825 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
17826 (define_mode_iterator VI4F_BRCST32x2
17827 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
17828 V16SF (V8SF "TARGET_AVX512VL")])
17830 (define_mode_attr 64x2mode
17831 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
17833 (define_mode_attr 32x2mode
17834 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
17835 (V8SF "V2SF") (V4SI "V2SI")])
17837 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
17838 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
17839 (vec_duplicate:VI4F_BRCST32x2
17840 (vec_select:<32x2mode>
17841 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17842 (parallel [(const_int 0) (const_int 1)]))))]
17844 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
17845 [(set_attr "type" "ssemov")
17846 (set_attr "prefix_extra" "1")
17847 (set_attr "prefix" "evex")
17848 (set_attr "mode" "<sseinsnmode>")])
17850 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
17851 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
17852 (vec_duplicate:VI4F_256
17853 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17856 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
17857 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17858 [(set_attr "type" "ssemov")
17859 (set_attr "prefix_extra" "1")
17860 (set_attr "prefix" "evex")
17861 (set_attr "mode" "<sseinsnmode>")])
17863 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17864 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17865 (vec_duplicate:V16FI
17866 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17869 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17870 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17871 [(set_attr "type" "ssemov")
17872 (set_attr "prefix_extra" "1")
17873 (set_attr "prefix" "evex")
17874 (set_attr "mode" "<sseinsnmode>")])
17876 ;; For broadcast[i|f]64x2
17877 (define_mode_iterator VI8F_BRCST64x2
17878 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
17880 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17881 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
17882 (vec_duplicate:VI8F_BRCST64x2
17883 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
17886 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
17887 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17888 [(set_attr "type" "ssemov")
17889 (set_attr "prefix_extra" "1")
17890 (set_attr "prefix" "evex")
17891 (set_attr "mode" "<sseinsnmode>")])
17893 (define_insn "avx512cd_maskb_vec_dup<mode>"
17894 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
17895 (vec_duplicate:VI8_AVX512VL
17897 (match_operand:QI 1 "register_operand" "Yk"))))]
17899 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
17900 [(set_attr "type" "mskmov")
17901 (set_attr "prefix" "evex")
17902 (set_attr "mode" "XI")])
17904 (define_insn "avx512cd_maskw_vec_dup<mode>"
17905 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
17906 (vec_duplicate:VI4_AVX512VL
17908 (match_operand:HI 1 "register_operand" "Yk"))))]
17910 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
17911 [(set_attr "type" "mskmov")
17912 (set_attr "prefix" "evex")
17913 (set_attr "mode" "XI")])
17915 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
17916 ;; If it so happens that the input is in memory, use vbroadcast.
17917 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
17918 (define_insn "*avx_vperm_broadcast_v4sf"
17919 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
17921 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
17922 (match_parallel 2 "avx_vbroadcast_operand"
17923 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17926 int elt = INTVAL (operands[3]);
17927 switch (which_alternative)
17931 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
17932 return "vbroadcastss\t{%1, %0|%0, %k1}";
17934 operands[2] = GEN_INT (elt * 0x55);
17935 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
17937 gcc_unreachable ();
17940 [(set_attr "type" "ssemov,ssemov,sselog1")
17941 (set_attr "prefix_extra" "1")
17942 (set_attr "length_immediate" "0,0,1")
17943 (set_attr "prefix" "maybe_evex")
17944 (set_attr "mode" "SF,SF,V4SF")])
17946 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
17947 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
17949 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
17950 (match_parallel 2 "avx_vbroadcast_operand"
17951 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17954 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
17955 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
17957 rtx op0 = operands[0], op1 = operands[1];
17958 int elt = INTVAL (operands[3]);
17964 if (TARGET_AVX2 && elt == 0)
17966 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17971 /* Shuffle element we care about into all elements of the 128-bit lane.
17972 The other lane gets shuffled too, but we don't care. */
17973 if (<MODE>mode == V4DFmode)
17974 mask = (elt & 1 ? 15 : 0);
17976 mask = (elt & 3) * 0x55;
17977 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17979 /* Shuffle the lane we care about into both lanes of the dest. */
17980 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17981 if (EXT_REX_SSE_REG_P (op0))
17983 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
17985 gcc_assert (<MODE>mode == V8SFmode);
17986 if ((mask & 1) == 0)
17987 emit_insn (gen_avx2_vec_dupv8sf (op0,
17988 gen_lowpart (V4SFmode, op0)));
17990 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
17991 GEN_INT (4), GEN_INT (5),
17992 GEN_INT (6), GEN_INT (7),
17993 GEN_INT (12), GEN_INT (13),
17994 GEN_INT (14), GEN_INT (15)));
17998 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
18002 operands[1] = adjust_address (op1, <ssescalarmode>mode,
18003 elt * GET_MODE_SIZE (<ssescalarmode>mode));
18006 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18007 [(set (match_operand:VF2 0 "register_operand")
18009 (match_operand:VF2 1 "nonimmediate_operand")
18010 (match_operand:SI 2 "const_0_to_255_operand")))]
18011 "TARGET_AVX && <mask_mode512bit_condition>"
18013 int mask = INTVAL (operands[2]);
18014 rtx perm[<ssescalarnum>];
18017 for (i = 0; i < <ssescalarnum>; i = i + 2)
18019 perm[i] = GEN_INT (((mask >> i) & 1) + i);
18020 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
18024 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18027 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18028 [(set (match_operand:VF1 0 "register_operand")
18030 (match_operand:VF1 1 "nonimmediate_operand")
18031 (match_operand:SI 2 "const_0_to_255_operand")))]
18032 "TARGET_AVX && <mask_mode512bit_condition>"
18034 int mask = INTVAL (operands[2]);
18035 rtx perm[<ssescalarnum>];
18038 for (i = 0; i < <ssescalarnum>; i = i + 4)
18040 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
18041 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
18042 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
18043 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
18047 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18050 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
18051 [(set (match_operand:VF 0 "register_operand" "=v")
18053 (match_operand:VF 1 "nonimmediate_operand" "vm")
18054 (match_parallel 2 ""
18055 [(match_operand 3 "const_int_operand")])))]
18056 "TARGET_AVX && <mask_mode512bit_condition>
18057 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
18059 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
18060 operands[2] = GEN_INT (mask);
18061 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
18063 [(set_attr "type" "sselog")
18064 (set_attr "prefix_extra" "1")
18065 (set_attr "length_immediate" "1")
18066 (set_attr "prefix" "<mask_prefix>")
18067 (set_attr "mode" "<sseinsnmode>")])
18069 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
18070 [(set (match_operand:VF 0 "register_operand" "=v")
18072 [(match_operand:VF 1 "register_operand" "v")
18073 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
18075 "TARGET_AVX && <mask_mode512bit_condition>"
18076 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18077 [(set_attr "type" "sselog")
18078 (set_attr "prefix_extra" "1")
18079 (set_attr "btver2_decode" "vector")
18080 (set_attr "prefix" "<mask_prefix>")
18081 (set_attr "mode" "<sseinsnmode>")])
18083 (define_mode_iterator VPERMI2
18084 [V16SI V16SF V8DI V8DF
18085 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
18086 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
18087 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
18088 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
18089 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
18090 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
18091 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
18092 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
18094 (define_mode_iterator VPERMI2I
18096 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
18097 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
18098 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
18099 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
18100 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
18101 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
18103 (define_expand "<avx512>_vpermi2var<mode>3_mask"
18104 [(set (match_operand:VPERMI2 0 "register_operand")
18107 [(match_operand:<sseintvecmode> 2 "register_operand")
18108 (match_operand:VPERMI2 1 "register_operand")
18109 (match_operand:VPERMI2 3 "nonimmediate_operand")]
18112 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
18114 "operands[5] = gen_lowpart (<MODE>mode, operands[2]);")
18116 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
18117 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
18118 (vec_merge:VPERMI2I
18120 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
18121 (match_operand:VPERMI2I 1 "register_operand" "v")
18122 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
18125 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18127 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18128 [(set_attr "type" "sselog")
18129 (set_attr "prefix" "evex")
18130 (set_attr "mode" "<sseinsnmode>")])
18132 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
18133 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18134 (vec_merge:VF_AVX512VL
18135 (unspec:VF_AVX512VL
18136 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
18137 (match_operand:VF_AVX512VL 1 "register_operand" "v")
18138 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
18140 (subreg:VF_AVX512VL (match_dup 2) 0)
18141 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18143 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18144 [(set_attr "type" "sselog")
18145 (set_attr "prefix" "evex")
18146 (set_attr "mode" "<sseinsnmode>")])
18148 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
18149 [(match_operand:VPERMI2 0 "register_operand")
18150 (match_operand:<sseintvecmode> 1 "register_operand")
18151 (match_operand:VPERMI2 2 "register_operand")
18152 (match_operand:VPERMI2 3 "nonimmediate_operand")
18153 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18156 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
18157 operands[0], operands[1], operands[2], operands[3],
18158 CONST0_RTX (<MODE>mode), operands[4]));
18162 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
18163 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
18165 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
18166 (match_operand:VPERMI2 2 "register_operand" "0,v")
18167 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
18171 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
18172 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18173 [(set_attr "type" "sselog")
18174 (set_attr "prefix" "evex")
18175 (set_attr "mode" "<sseinsnmode>")])
18177 (define_insn "<avx512>_vpermt2var<mode>3_mask"
18178 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
18181 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18182 (match_operand:VPERMI2 2 "register_operand" "0")
18183 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
18186 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18188 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18189 [(set_attr "type" "sselog")
18190 (set_attr "prefix" "evex")
18191 (set_attr "mode" "<sseinsnmode>")])
18193 (define_expand "avx_vperm2f128<mode>3"
18194 [(set (match_operand:AVX256MODE2P 0 "register_operand")
18195 (unspec:AVX256MODE2P
18196 [(match_operand:AVX256MODE2P 1 "register_operand")
18197 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
18198 (match_operand:SI 3 "const_0_to_255_operand")]
18199 UNSPEC_VPERMIL2F128))]
18202 int mask = INTVAL (operands[3]);
18203 if ((mask & 0x88) == 0)
18205 rtx perm[<ssescalarnum>], t1, t2;
18206 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
18208 base = (mask & 3) * nelt2;
18209 for (i = 0; i < nelt2; ++i)
18210 perm[i] = GEN_INT (base + i);
18212 base = ((mask >> 4) & 3) * nelt2;
18213 for (i = 0; i < nelt2; ++i)
18214 perm[i + nelt2] = GEN_INT (base + i);
18216 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
18217 operands[1], operands[2]);
18218 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
18219 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
18220 t2 = gen_rtx_SET (operands[0], t2);
18226 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
18227 ;; means that in order to represent this properly in rtl we'd have to
18228 ;; nest *another* vec_concat with a zero operand and do the select from
18229 ;; a 4x wide vector. That doesn't seem very nice.
18230 (define_insn "*avx_vperm2f128<mode>_full"
18231 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18232 (unspec:AVX256MODE2P
18233 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
18234 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
18235 (match_operand:SI 3 "const_0_to_255_operand" "n")]
18236 UNSPEC_VPERMIL2F128))]
18238 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18239 [(set_attr "type" "sselog")
18240 (set_attr "prefix_extra" "1")
18241 (set_attr "length_immediate" "1")
18242 (set_attr "prefix" "vex")
18243 (set_attr "mode" "<sseinsnmode>")])
18245 (define_insn "*avx_vperm2f128<mode>_nozero"
18246 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18247 (vec_select:AVX256MODE2P
18248 (vec_concat:<ssedoublevecmode>
18249 (match_operand:AVX256MODE2P 1 "register_operand" "x")
18250 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
18251 (match_parallel 3 ""
18252 [(match_operand 4 "const_int_operand")])))]
18254 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
18256 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
18258 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
18260 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
18261 operands[3] = GEN_INT (mask);
18262 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
18264 [(set_attr "type" "sselog")
18265 (set_attr "prefix_extra" "1")
18266 (set_attr "length_immediate" "1")
18267 (set_attr "prefix" "vex")
18268 (set_attr "mode" "<sseinsnmode>")])
18270 (define_insn "*ssse3_palignr<mode>_perm"
18271 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
18273 (match_operand:V_128 1 "register_operand" "0,x,v")
18274 (match_parallel 2 "palignr_operand"
18275 [(match_operand 3 "const_int_operand" "n,n,n")])))]
18278 operands[2] = (GEN_INT (INTVAL (operands[3])
18279 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
18281 switch (which_alternative)
18284 return "palignr\t{%2, %1, %0|%0, %1, %2}";
18287 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
18289 gcc_unreachable ();
18292 [(set_attr "isa" "noavx,avx,avx512bw")
18293 (set_attr "type" "sseishft")
18294 (set_attr "atom_unit" "sishuf")
18295 (set_attr "prefix_data16" "1,*,*")
18296 (set_attr "prefix_extra" "1")
18297 (set_attr "length_immediate" "1")
18298 (set_attr "prefix" "orig,vex,evex")])
18300 (define_expand "avx512vl_vinsert<mode>"
18301 [(match_operand:VI48F_256 0 "register_operand")
18302 (match_operand:VI48F_256 1 "register_operand")
18303 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18304 (match_operand:SI 3 "const_0_to_1_operand")
18305 (match_operand:VI48F_256 4 "register_operand")
18306 (match_operand:<avx512fmaskmode> 5 "register_operand")]
18309 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18311 switch (INTVAL (operands[3]))
18314 insn = gen_vec_set_lo_<mode>_mask;
18317 insn = gen_vec_set_hi_<mode>_mask;
18320 gcc_unreachable ();
18323 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
18328 (define_expand "avx_vinsertf128<mode>"
18329 [(match_operand:V_256 0 "register_operand")
18330 (match_operand:V_256 1 "register_operand")
18331 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18332 (match_operand:SI 3 "const_0_to_1_operand")]
18335 rtx (*insn)(rtx, rtx, rtx);
18337 switch (INTVAL (operands[3]))
18340 insn = gen_vec_set_lo_<mode>;
18343 insn = gen_vec_set_hi_<mode>;
18346 gcc_unreachable ();
18349 emit_insn (insn (operands[0], operands[1], operands[2]));
18353 (define_insn "vec_set_lo_<mode><mask_name>"
18354 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18355 (vec_concat:VI8F_256
18356 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18357 (vec_select:<ssehalfvecmode>
18358 (match_operand:VI8F_256 1 "register_operand" "v")
18359 (parallel [(const_int 2) (const_int 3)]))))]
18360 "TARGET_AVX && <mask_avx512dq_condition>"
18362 if (TARGET_AVX512DQ)
18363 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18364 else if (TARGET_AVX512VL)
18365 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18367 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18369 [(set_attr "type" "sselog")
18370 (set_attr "prefix_extra" "1")
18371 (set_attr "length_immediate" "1")
18372 (set_attr "prefix" "vex")
18373 (set_attr "mode" "<sseinsnmode>")])
18375 (define_insn "vec_set_hi_<mode><mask_name>"
18376 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18377 (vec_concat:VI8F_256
18378 (vec_select:<ssehalfvecmode>
18379 (match_operand:VI8F_256 1 "register_operand" "v")
18380 (parallel [(const_int 0) (const_int 1)]))
18381 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18382 "TARGET_AVX && <mask_avx512dq_condition>"
18384 if (TARGET_AVX512DQ)
18385 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18386 else if (TARGET_AVX512VL)
18387 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18389 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18391 [(set_attr "type" "sselog")
18392 (set_attr "prefix_extra" "1")
18393 (set_attr "length_immediate" "1")
18394 (set_attr "prefix" "vex")
18395 (set_attr "mode" "<sseinsnmode>")])
18397 (define_insn "vec_set_lo_<mode><mask_name>"
18398 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18399 (vec_concat:VI4F_256
18400 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18401 (vec_select:<ssehalfvecmode>
18402 (match_operand:VI4F_256 1 "register_operand" "v")
18403 (parallel [(const_int 4) (const_int 5)
18404 (const_int 6) (const_int 7)]))))]
18407 if (TARGET_AVX512VL)
18408 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18410 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18412 [(set_attr "type" "sselog")
18413 (set_attr "prefix_extra" "1")
18414 (set_attr "length_immediate" "1")
18415 (set_attr "prefix" "vex")
18416 (set_attr "mode" "<sseinsnmode>")])
18418 (define_insn "vec_set_hi_<mode><mask_name>"
18419 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18420 (vec_concat:VI4F_256
18421 (vec_select:<ssehalfvecmode>
18422 (match_operand:VI4F_256 1 "register_operand" "v")
18423 (parallel [(const_int 0) (const_int 1)
18424 (const_int 2) (const_int 3)]))
18425 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18428 if (TARGET_AVX512VL)
18429 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18431 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18433 [(set_attr "type" "sselog")
18434 (set_attr "prefix_extra" "1")
18435 (set_attr "length_immediate" "1")
18436 (set_attr "prefix" "vex")
18437 (set_attr "mode" "<sseinsnmode>")])
18439 (define_insn "vec_set_lo_v16hi"
18440 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18442 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
18444 (match_operand:V16HI 1 "register_operand" "x,v")
18445 (parallel [(const_int 8) (const_int 9)
18446 (const_int 10) (const_int 11)
18447 (const_int 12) (const_int 13)
18448 (const_int 14) (const_int 15)]))))]
18451 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18452 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18453 [(set_attr "type" "sselog")
18454 (set_attr "prefix_extra" "1")
18455 (set_attr "length_immediate" "1")
18456 (set_attr "prefix" "vex,evex")
18457 (set_attr "mode" "OI")])
18459 (define_insn "vec_set_hi_v16hi"
18460 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18463 (match_operand:V16HI 1 "register_operand" "x,v")
18464 (parallel [(const_int 0) (const_int 1)
18465 (const_int 2) (const_int 3)
18466 (const_int 4) (const_int 5)
18467 (const_int 6) (const_int 7)]))
18468 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
18471 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18472 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18473 [(set_attr "type" "sselog")
18474 (set_attr "prefix_extra" "1")
18475 (set_attr "length_immediate" "1")
18476 (set_attr "prefix" "vex,evex")
18477 (set_attr "mode" "OI")])
18479 (define_insn "vec_set_lo_v32qi"
18480 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18482 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
18484 (match_operand:V32QI 1 "register_operand" "x,v")
18485 (parallel [(const_int 16) (const_int 17)
18486 (const_int 18) (const_int 19)
18487 (const_int 20) (const_int 21)
18488 (const_int 22) (const_int 23)
18489 (const_int 24) (const_int 25)
18490 (const_int 26) (const_int 27)
18491 (const_int 28) (const_int 29)
18492 (const_int 30) (const_int 31)]))))]
18495 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18496 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18497 [(set_attr "type" "sselog")
18498 (set_attr "prefix_extra" "1")
18499 (set_attr "length_immediate" "1")
18500 (set_attr "prefix" "vex,evex")
18501 (set_attr "mode" "OI")])
18503 (define_insn "vec_set_hi_v32qi"
18504 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18507 (match_operand:V32QI 1 "register_operand" "x,v")
18508 (parallel [(const_int 0) (const_int 1)
18509 (const_int 2) (const_int 3)
18510 (const_int 4) (const_int 5)
18511 (const_int 6) (const_int 7)
18512 (const_int 8) (const_int 9)
18513 (const_int 10) (const_int 11)
18514 (const_int 12) (const_int 13)
18515 (const_int 14) (const_int 15)]))
18516 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
18519 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18520 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18521 [(set_attr "type" "sselog")
18522 (set_attr "prefix_extra" "1")
18523 (set_attr "length_immediate" "1")
18524 (set_attr "prefix" "vex,evex")
18525 (set_attr "mode" "OI")])
18527 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
18528 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
18530 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
18531 (match_operand:V48_AVX2 1 "memory_operand" "m")]
18534 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
18535 [(set_attr "type" "sselog1")
18536 (set_attr "prefix_extra" "1")
18537 (set_attr "prefix" "vex")
18538 (set_attr "btver2_decode" "vector")
18539 (set_attr "mode" "<sseinsnmode>")])
18541 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
18542 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
18544 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
18545 (match_operand:V48_AVX2 2 "register_operand" "x")
18549 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18550 [(set_attr "type" "sselog1")
18551 (set_attr "prefix_extra" "1")
18552 (set_attr "prefix" "vex")
18553 (set_attr "btver2_decode" "vector")
18554 (set_attr "mode" "<sseinsnmode>")])
18556 (define_expand "maskload<mode><sseintvecmodelower>"
18557 [(set (match_operand:V48_AVX2 0 "register_operand")
18559 [(match_operand:<sseintvecmode> 2 "register_operand")
18560 (match_operand:V48_AVX2 1 "memory_operand")]
18564 (define_expand "maskload<mode><avx512fmaskmodelower>"
18565 [(set (match_operand:V48_AVX512VL 0 "register_operand")
18566 (vec_merge:V48_AVX512VL
18567 (match_operand:V48_AVX512VL 1 "memory_operand")
18569 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18572 (define_expand "maskload<mode><avx512fmaskmodelower>"
18573 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
18574 (vec_merge:VI12_AVX512VL
18575 (match_operand:VI12_AVX512VL 1 "memory_operand")
18577 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18580 (define_expand "maskstore<mode><sseintvecmodelower>"
18581 [(set (match_operand:V48_AVX2 0 "memory_operand")
18583 [(match_operand:<sseintvecmode> 2 "register_operand")
18584 (match_operand:V48_AVX2 1 "register_operand")
18589 (define_expand "maskstore<mode><avx512fmaskmodelower>"
18590 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
18591 (vec_merge:V48_AVX512VL
18592 (match_operand:V48_AVX512VL 1 "register_operand")
18594 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18597 (define_expand "maskstore<mode><avx512fmaskmodelower>"
18598 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
18599 (vec_merge:VI12_AVX512VL
18600 (match_operand:VI12_AVX512VL 1 "register_operand")
18602 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18605 (define_expand "cbranch<mode>4"
18606 [(set (reg:CC FLAGS_REG)
18607 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
18608 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
18609 (set (pc) (if_then_else
18610 (match_operator 0 "bt_comparison_operator"
18611 [(reg:CC FLAGS_REG) (const_int 0)])
18612 (label_ref (match_operand 3))
18616 ix86_expand_branch (GET_CODE (operands[0]),
18617 operands[1], operands[2], operands[3]);
18622 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
18623 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
18624 (unspec:AVX256MODE2P
18625 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18627 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
18629 "&& reload_completed"
18630 [(set (match_dup 0) (match_dup 1))]
18632 if (REG_P (operands[0]))
18633 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
18635 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
18636 <ssehalfvecmode>mode);
18639 ;; Modes handled by vec_init expanders.
18640 (define_mode_iterator VEC_INIT_MODE
18641 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18642 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18643 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18644 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
18645 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18646 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
18647 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
18649 ;; Likewise, but for initialization from half sized vectors.
18650 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
18651 (define_mode_iterator VEC_INIT_HALF_MODE
18652 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18653 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18654 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18655 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
18656 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18657 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
18658 (V4TI "TARGET_AVX512F")])
18660 (define_expand "vec_init<mode><ssescalarmodelower>"
18661 [(match_operand:VEC_INIT_MODE 0 "register_operand")
18665 ix86_expand_vector_init (false, operands[0], operands[1]);
18669 (define_expand "vec_init<mode><ssehalfvecmodelower>"
18670 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
18674 ix86_expand_vector_init (false, operands[0], operands[1]);
18678 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18679 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
18680 (ashiftrt:VI48_AVX512F_AVX512VL
18681 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
18682 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
18683 "TARGET_AVX2 && <mask_mode512bit_condition>"
18684 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18685 [(set_attr "type" "sseishft")
18686 (set_attr "prefix" "maybe_evex")
18687 (set_attr "mode" "<sseinsnmode>")])
18689 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18690 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18691 (ashiftrt:VI2_AVX512VL
18692 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18693 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18695 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18696 [(set_attr "type" "sseishft")
18697 (set_attr "prefix" "maybe_evex")
18698 (set_attr "mode" "<sseinsnmode>")])
18700 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18701 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
18702 (any_lshift:VI48_AVX512F
18703 (match_operand:VI48_AVX512F 1 "register_operand" "v")
18704 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
18705 "TARGET_AVX2 && <mask_mode512bit_condition>"
18706 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18707 [(set_attr "type" "sseishft")
18708 (set_attr "prefix" "maybe_evex")
18709 (set_attr "mode" "<sseinsnmode>")])
18711 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18712 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18713 (any_lshift:VI2_AVX512VL
18714 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18715 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18717 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18718 [(set_attr "type" "sseishft")
18719 (set_attr "prefix" "maybe_evex")
18720 (set_attr "mode" "<sseinsnmode>")])
18722 (define_insn "avx_vec_concat<mode>"
18723 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
18724 (vec_concat:V_256_512
18725 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,v,x,v")
18726 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,vm,C,C")))]
18729 switch (which_alternative)
18732 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18734 if (<MODE_SIZE> == 64)
18736 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
18737 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18739 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18743 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18744 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18746 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18750 switch (get_attr_mode (insn))
18753 return "vmovaps\t{%1, %t0|%t0, %1}";
18755 return "vmovapd\t{%1, %t0|%t0, %1}";
18757 return "vmovaps\t{%1, %x0|%x0, %1}";
18759 return "vmovapd\t{%1, %x0|%x0, %1}";
18761 if (which_alternative == 2)
18762 return "vmovdqa\t{%1, %t0|%t0, %1}";
18763 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18764 return "vmovdqa64\t{%1, %t0|%t0, %1}";
18766 return "vmovdqa32\t{%1, %t0|%t0, %1}";
18768 if (which_alternative == 2)
18769 return "vmovdqa\t{%1, %x0|%x0, %1}";
18770 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18771 return "vmovdqa64\t{%1, %x0|%x0, %1}";
18773 return "vmovdqa32\t{%1, %x0|%x0, %1}";
18775 gcc_unreachable ();
18778 gcc_unreachable ();
18781 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
18782 (set_attr "prefix_extra" "1,1,*,*")
18783 (set_attr "length_immediate" "1,1,*,*")
18784 (set_attr "prefix" "maybe_evex")
18785 (set_attr "mode" "<sseinsnmode>")])
18787 (define_insn "vcvtph2ps<mask_name>"
18788 [(set (match_operand:V4SF 0 "register_operand" "=v")
18790 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
18792 (parallel [(const_int 0) (const_int 1)
18793 (const_int 2) (const_int 3)])))]
18794 "TARGET_F16C || TARGET_AVX512VL"
18795 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18796 [(set_attr "type" "ssecvt")
18797 (set_attr "prefix" "maybe_evex")
18798 (set_attr "mode" "V4SF")])
18800 (define_insn "*vcvtph2ps_load<mask_name>"
18801 [(set (match_operand:V4SF 0 "register_operand" "=v")
18802 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
18803 UNSPEC_VCVTPH2PS))]
18804 "TARGET_F16C || TARGET_AVX512VL"
18805 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18806 [(set_attr "type" "ssecvt")
18807 (set_attr "prefix" "vex")
18808 (set_attr "mode" "V8SF")])
18810 (define_insn "vcvtph2ps256<mask_name>"
18811 [(set (match_operand:V8SF 0 "register_operand" "=v")
18812 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
18813 UNSPEC_VCVTPH2PS))]
18814 "TARGET_F16C || TARGET_AVX512VL"
18815 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18816 [(set_attr "type" "ssecvt")
18817 (set_attr "prefix" "vex")
18818 (set_attr "btver2_decode" "double")
18819 (set_attr "mode" "V8SF")])
18821 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
18822 [(set (match_operand:V16SF 0 "register_operand" "=v")
18824 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18825 UNSPEC_VCVTPH2PS))]
18827 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18828 [(set_attr "type" "ssecvt")
18829 (set_attr "prefix" "evex")
18830 (set_attr "mode" "V16SF")])
18832 (define_expand "vcvtps2ph_mask"
18833 [(set (match_operand:V8HI 0 "register_operand")
18836 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18837 (match_operand:SI 2 "const_0_to_255_operand")]
18840 (match_operand:V8HI 3 "vector_move_operand")
18841 (match_operand:QI 4 "register_operand")))]
18843 "operands[5] = CONST0_RTX (V4HImode);")
18845 (define_expand "vcvtps2ph"
18846 [(set (match_operand:V8HI 0 "register_operand")
18848 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18849 (match_operand:SI 2 "const_0_to_255_operand")]
18853 "operands[3] = CONST0_RTX (V4HImode);")
18855 (define_insn "*vcvtps2ph<mask_name>"
18856 [(set (match_operand:V8HI 0 "register_operand" "=v")
18858 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
18859 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18861 (match_operand:V4HI 3 "const0_operand")))]
18862 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
18863 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
18864 [(set_attr "type" "ssecvt")
18865 (set_attr "prefix" "maybe_evex")
18866 (set_attr "mode" "V4SF")])
18868 (define_insn "*vcvtps2ph_store<mask_name>"
18869 [(set (match_operand:V4HI 0 "memory_operand" "=m")
18870 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
18871 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18872 UNSPEC_VCVTPS2PH))]
18873 "TARGET_F16C || TARGET_AVX512VL"
18874 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18875 [(set_attr "type" "ssecvt")
18876 (set_attr "prefix" "maybe_evex")
18877 (set_attr "mode" "V4SF")])
18879 (define_insn "vcvtps2ph256<mask_name>"
18880 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
18881 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
18882 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18883 UNSPEC_VCVTPS2PH))]
18884 "TARGET_F16C || TARGET_AVX512VL"
18885 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18886 [(set_attr "type" "ssecvt")
18887 (set_attr "prefix" "maybe_evex")
18888 (set_attr "btver2_decode" "vector")
18889 (set_attr "mode" "V8SF")])
18891 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
18892 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
18894 [(match_operand:V16SF 1 "register_operand" "v")
18895 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18896 UNSPEC_VCVTPS2PH))]
18898 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18899 [(set_attr "type" "ssecvt")
18900 (set_attr "prefix" "evex")
18901 (set_attr "mode" "V16SF")])
18903 ;; For gather* insn patterns
18904 (define_mode_iterator VEC_GATHER_MODE
18905 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
18906 (define_mode_attr VEC_GATHER_IDXSI
18907 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
18908 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
18909 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
18910 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
18912 (define_mode_attr VEC_GATHER_IDXDI
18913 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18914 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
18915 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
18916 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
18918 (define_mode_attr VEC_GATHER_SRCDI
18919 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18920 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
18921 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
18922 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
18924 (define_expand "avx2_gathersi<mode>"
18925 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18926 (unspec:VEC_GATHER_MODE
18927 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
18928 (mem:<ssescalarmode>
18930 [(match_operand 2 "vsib_address_operand")
18931 (match_operand:<VEC_GATHER_IDXSI>
18932 3 "register_operand")
18933 (match_operand:SI 5 "const1248_operand ")]))
18934 (mem:BLK (scratch))
18935 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
18937 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
18941 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18942 operands[5]), UNSPEC_VSIBADDR);
18945 (define_insn "*avx2_gathersi<mode>"
18946 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18947 (unspec:VEC_GATHER_MODE
18948 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
18949 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18951 [(match_operand:P 3 "vsib_address_operand" "Tv")
18952 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
18953 (match_operand:SI 6 "const1248_operand" "n")]
18955 (mem:BLK (scratch))
18956 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
18958 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18960 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
18961 [(set_attr "type" "ssemov")
18962 (set_attr "prefix" "vex")
18963 (set_attr "mode" "<sseinsnmode>")])
18965 (define_insn "*avx2_gathersi<mode>_2"
18966 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18967 (unspec:VEC_GATHER_MODE
18969 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18971 [(match_operand:P 2 "vsib_address_operand" "Tv")
18972 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18973 (match_operand:SI 5 "const1248_operand" "n")]
18975 (mem:BLK (scratch))
18976 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18978 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18980 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18981 [(set_attr "type" "ssemov")
18982 (set_attr "prefix" "vex")
18983 (set_attr "mode" "<sseinsnmode>")])
18985 (define_expand "avx2_gatherdi<mode>"
18986 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18987 (unspec:VEC_GATHER_MODE
18988 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18989 (mem:<ssescalarmode>
18991 [(match_operand 2 "vsib_address_operand")
18992 (match_operand:<VEC_GATHER_IDXDI>
18993 3 "register_operand")
18994 (match_operand:SI 5 "const1248_operand ")]))
18995 (mem:BLK (scratch))
18996 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
18998 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
19002 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19003 operands[5]), UNSPEC_VSIBADDR);
19006 (define_insn "*avx2_gatherdi<mode>"
19007 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19008 (unspec:VEC_GATHER_MODE
19009 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19010 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19012 [(match_operand:P 3 "vsib_address_operand" "Tv")
19013 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19014 (match_operand:SI 6 "const1248_operand" "n")]
19016 (mem:BLK (scratch))
19017 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19019 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19021 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
19022 [(set_attr "type" "ssemov")
19023 (set_attr "prefix" "vex")
19024 (set_attr "mode" "<sseinsnmode>")])
19026 (define_insn "*avx2_gatherdi<mode>_2"
19027 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19028 (unspec:VEC_GATHER_MODE
19030 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19032 [(match_operand:P 2 "vsib_address_operand" "Tv")
19033 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19034 (match_operand:SI 5 "const1248_operand" "n")]
19036 (mem:BLK (scratch))
19037 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19039 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19042 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19043 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
19044 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
19046 [(set_attr "type" "ssemov")
19047 (set_attr "prefix" "vex")
19048 (set_attr "mode" "<sseinsnmode>")])
19050 (define_insn "*avx2_gatherdi<mode>_3"
19051 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19052 (vec_select:<VEC_GATHER_SRCDI>
19054 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19055 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19057 [(match_operand:P 3 "vsib_address_operand" "Tv")
19058 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19059 (match_operand:SI 6 "const1248_operand" "n")]
19061 (mem:BLK (scratch))
19062 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19064 (parallel [(const_int 0) (const_int 1)
19065 (const_int 2) (const_int 3)])))
19066 (clobber (match_scratch:VI4F_256 1 "=&x"))]
19068 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
19069 [(set_attr "type" "ssemov")
19070 (set_attr "prefix" "vex")
19071 (set_attr "mode" "<sseinsnmode>")])
19073 (define_insn "*avx2_gatherdi<mode>_4"
19074 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19075 (vec_select:<VEC_GATHER_SRCDI>
19078 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19080 [(match_operand:P 2 "vsib_address_operand" "Tv")
19081 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19082 (match_operand:SI 5 "const1248_operand" "n")]
19084 (mem:BLK (scratch))
19085 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19087 (parallel [(const_int 0) (const_int 1)
19088 (const_int 2) (const_int 3)])))
19089 (clobber (match_scratch:VI4F_256 1 "=&x"))]
19091 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
19092 [(set_attr "type" "ssemov")
19093 (set_attr "prefix" "vex")
19094 (set_attr "mode" "<sseinsnmode>")])
19096 ;; Memory operand override for -masm=intel of the v*gatherq* patterns.
19097 (define_mode_attr gatherq_mode
19098 [(V4SI "q") (V2DI "x") (V4SF "q") (V2DF "x")
19099 (V8SI "x") (V4DI "t") (V8SF "x") (V4DF "t")
19100 (V16SI "t") (V8DI "g") (V16SF "t") (V8DF "g")])
19102 (define_expand "<avx512>_gathersi<mode>"
19103 [(parallel [(set (match_operand:VI48F 0 "register_operand")
19105 [(match_operand:VI48F 1 "register_operand")
19106 (match_operand:<avx512fmaskmode> 4 "register_operand")
19107 (mem:<ssescalarmode>
19109 [(match_operand 2 "vsib_address_operand")
19110 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
19111 (match_operand:SI 5 "const1248_operand")]))]
19113 (clobber (match_scratch:<avx512fmaskmode> 7))])]
19117 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19118 operands[5]), UNSPEC_VSIBADDR);
19121 (define_insn "*avx512f_gathersi<mode>"
19122 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19124 [(match_operand:VI48F 1 "register_operand" "0")
19125 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
19126 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19128 [(match_operand:P 4 "vsib_address_operand" "Tv")
19129 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
19130 (match_operand:SI 5 "const1248_operand" "n")]
19131 UNSPEC_VSIBADDR)])]
19133 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
19135 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %<xtg_mode>6}"
19136 [(set_attr "type" "ssemov")
19137 (set_attr "prefix" "evex")
19138 (set_attr "mode" "<sseinsnmode>")])
19140 (define_insn "*avx512f_gathersi<mode>_2"
19141 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19144 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19145 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19147 [(match_operand:P 3 "vsib_address_operand" "Tv")
19148 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19149 (match_operand:SI 4 "const1248_operand" "n")]
19150 UNSPEC_VSIBADDR)])]
19152 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19154 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<xtg_mode>5}"
19155 [(set_attr "type" "ssemov")
19156 (set_attr "prefix" "evex")
19157 (set_attr "mode" "<sseinsnmode>")])
19160 (define_expand "<avx512>_gatherdi<mode>"
19161 [(parallel [(set (match_operand:VI48F 0 "register_operand")
19163 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
19164 (match_operand:QI 4 "register_operand")
19165 (mem:<ssescalarmode>
19167 [(match_operand 2 "vsib_address_operand")
19168 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
19169 (match_operand:SI 5 "const1248_operand")]))]
19171 (clobber (match_scratch:QI 7))])]
19175 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19176 operands[5]), UNSPEC_VSIBADDR);
19179 (define_insn "*avx512f_gatherdi<mode>"
19180 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19182 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
19183 (match_operand:QI 7 "register_operand" "2")
19184 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19186 [(match_operand:P 4 "vsib_address_operand" "Tv")
19187 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
19188 (match_operand:SI 5 "const1248_operand" "n")]
19189 UNSPEC_VSIBADDR)])]
19191 (clobber (match_scratch:QI 2 "=&Yk"))]
19194 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %<gatherq_mode>6}";
19196 [(set_attr "type" "ssemov")
19197 (set_attr "prefix" "evex")
19198 (set_attr "mode" "<sseinsnmode>")])
19200 (define_insn "*avx512f_gatherdi<mode>_2"
19201 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19204 (match_operand:QI 6 "register_operand" "1")
19205 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19207 [(match_operand:P 3 "vsib_address_operand" "Tv")
19208 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19209 (match_operand:SI 4 "const1248_operand" "n")]
19210 UNSPEC_VSIBADDR)])]
19212 (clobber (match_scratch:QI 1 "=&Yk"))]
19215 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19217 if (<MODE_SIZE> != 64)
19218 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %<gatherq_mode>5}";
19220 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %t5}";
19222 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<gatherq_mode>5}";
19224 [(set_attr "type" "ssemov")
19225 (set_attr "prefix" "evex")
19226 (set_attr "mode" "<sseinsnmode>")])
19228 (define_expand "<avx512>_scattersi<mode>"
19229 [(parallel [(set (mem:VI48F
19231 [(match_operand 0 "vsib_address_operand")
19232 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
19233 (match_operand:SI 4 "const1248_operand")]))
19235 [(match_operand:<avx512fmaskmode> 1 "register_operand")
19236 (match_operand:VI48F 3 "register_operand")]
19238 (clobber (match_scratch:<avx512fmaskmode> 6))])]
19242 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19243 operands[4]), UNSPEC_VSIBADDR);
19246 (define_insn "*avx512f_scattersi<mode>"
19247 [(set (match_operator:VI48F 5 "vsib_mem_operator"
19249 [(match_operand:P 0 "vsib_address_operand" "Tv")
19250 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19251 (match_operand:SI 4 "const1248_operand" "n")]
19254 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19255 (match_operand:VI48F 3 "register_operand" "v")]
19257 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19259 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
19260 [(set_attr "type" "ssemov")
19261 (set_attr "prefix" "evex")
19262 (set_attr "mode" "<sseinsnmode>")])
19264 (define_expand "<avx512>_scatterdi<mode>"
19265 [(parallel [(set (mem:VI48F
19267 [(match_operand 0 "vsib_address_operand")
19268 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
19269 (match_operand:SI 4 "const1248_operand")]))
19271 [(match_operand:QI 1 "register_operand")
19272 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
19274 (clobber (match_scratch:QI 6))])]
19278 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19279 operands[4]), UNSPEC_VSIBADDR);
19282 (define_insn "*avx512f_scatterdi<mode>"
19283 [(set (match_operator:VI48F 5 "vsib_mem_operator"
19285 [(match_operand:P 0 "vsib_address_operand" "Tv")
19286 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19287 (match_operand:SI 4 "const1248_operand" "n")]
19290 [(match_operand:QI 6 "register_operand" "1")
19291 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
19293 (clobber (match_scratch:QI 1 "=&Yk"))]
19296 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
19297 return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}";
19298 return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%t5%{%1%}, %3}";
19300 [(set_attr "type" "ssemov")
19301 (set_attr "prefix" "evex")
19302 (set_attr "mode" "<sseinsnmode>")])
19304 (define_insn "<avx512>_compress<mode>_mask"
19305 [(set (match_operand:VI48F 0 "register_operand" "=v")
19307 [(match_operand:VI48F 1 "register_operand" "v")
19308 (match_operand:VI48F 2 "vector_move_operand" "0C")
19309 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
19312 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19313 [(set_attr "type" "ssemov")
19314 (set_attr "prefix" "evex")
19315 (set_attr "mode" "<sseinsnmode>")])
19317 (define_insn "compress<mode>_mask"
19318 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
19319 (unspec:VI12_AVX512VLBW
19320 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
19321 (match_operand:VI12_AVX512VLBW 2 "vector_move_operand" "0C")
19322 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
19324 "TARGET_AVX512VBMI2"
19325 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19326 [(set_attr "type" "ssemov")
19327 (set_attr "prefix" "evex")
19328 (set_attr "mode" "<sseinsnmode>")])
19330 (define_insn "<avx512>_compressstore<mode>_mask"
19331 [(set (match_operand:VI48F 0 "memory_operand" "=m")
19333 [(match_operand:VI48F 1 "register_operand" "x")
19335 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
19336 UNSPEC_COMPRESS_STORE))]
19338 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19339 [(set_attr "type" "ssemov")
19340 (set_attr "prefix" "evex")
19341 (set_attr "memory" "store")
19342 (set_attr "mode" "<sseinsnmode>")])
19344 (define_insn "compressstore<mode>_mask"
19345 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
19346 (unspec:VI12_AVX512VLBW
19347 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
19349 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
19350 UNSPEC_COMPRESS_STORE))]
19351 "TARGET_AVX512VBMI2"
19352 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19353 [(set_attr "type" "ssemov")
19354 (set_attr "prefix" "evex")
19355 (set_attr "memory" "store")
19356 (set_attr "mode" "<sseinsnmode>")])
19358 (define_expand "<avx512>_expand<mode>_maskz"
19359 [(set (match_operand:VI48F 0 "register_operand")
19361 [(match_operand:VI48F 1 "nonimmediate_operand")
19362 (match_operand:VI48F 2 "vector_move_operand")
19363 (match_operand:<avx512fmaskmode> 3 "register_operand")]
19366 "operands[2] = CONST0_RTX (<MODE>mode);")
19368 (define_insn "<avx512>_expand<mode>_mask"
19369 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
19371 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
19372 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
19373 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
19376 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19377 [(set_attr "type" "ssemov")
19378 (set_attr "prefix" "evex")
19379 (set_attr "memory" "none,load")
19380 (set_attr "mode" "<sseinsnmode>")])
19382 (define_insn "expand<mode>_mask"
19383 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
19384 (unspec:VI12_AVX512VLBW
19385 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
19386 (match_operand:VI12_AVX512VLBW 2 "vector_move_operand" "0C,0C")
19387 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
19389 "TARGET_AVX512VBMI2"
19390 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19391 [(set_attr "type" "ssemov")
19392 (set_attr "prefix" "evex")
19393 (set_attr "memory" "none,load")
19394 (set_attr "mode" "<sseinsnmode>")])
19396 (define_expand "expand<mode>_maskz"
19397 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
19398 (unspec:VI12_AVX512VLBW
19399 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
19400 (match_operand:VI12_AVX512VLBW 2 "vector_move_operand")
19401 (match_operand:<avx512fmaskmode> 3 "register_operand")]
19403 "TARGET_AVX512VBMI2"
19404 "operands[2] = CONST0_RTX (<MODE>mode);")
19406 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
19407 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19408 (unspec:VF_AVX512VL
19409 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19410 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19411 (match_operand:SI 3 "const_0_to_15_operand")]
19413 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
19414 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
19415 [(set_attr "type" "sse")
19416 (set_attr "prefix" "evex")
19417 (set_attr "mode" "<MODE>")])
19419 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
19420 [(set (match_operand:VF_128 0 "register_operand" "=v")
19423 [(match_operand:VF_128 1 "register_operand" "v")
19424 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19425 (match_operand:SI 3 "const_0_to_15_operand")]
19430 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
19431 [(set_attr "type" "sse")
19432 (set_attr "prefix" "evex")
19433 (set_attr "mode" "<MODE>")])
19435 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
19436 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19437 (unspec:<avx512fmaskmode>
19438 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19439 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19442 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
19443 [(set_attr "type" "sse")
19444 (set_attr "length_immediate" "1")
19445 (set_attr "prefix" "evex")
19446 (set_attr "mode" "<MODE>")])
19448 (define_insn "avx512dq_vmfpclass<mode>"
19449 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19450 (and:<avx512fmaskmode>
19451 (unspec:<avx512fmaskmode>
19452 [(match_operand:VF_128 1 "register_operand" "v")
19453 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19457 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
19458 [(set_attr "type" "sse")
19459 (set_attr "length_immediate" "1")
19460 (set_attr "prefix" "evex")
19461 (set_attr "mode" "<MODE>")])
19463 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
19464 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19465 (unspec:VF_AVX512VL
19466 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
19467 (match_operand:SI 2 "const_0_to_15_operand")]
19470 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
19471 [(set_attr "prefix" "evex")
19472 (set_attr "mode" "<MODE>")])
19474 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
19475 [(set (match_operand:VF_128 0 "register_operand" "=v")
19478 [(match_operand:VF_128 1 "register_operand" "v")
19479 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
19480 (match_operand:SI 3 "const_0_to_15_operand")]
19485 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_scalar_mask_op4>, %3}";
19486 [(set_attr "prefix" "evex")
19487 (set_attr "mode" "<ssescalarmode>")])
19489 ;; The correct representation for this is absolutely enormous, and
19490 ;; surely not generally useful.
19491 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
19492 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19493 (unspec:VI2_AVX512VL
19494 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
19495 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
19496 (match_operand:SI 3 "const_0_to_255_operand")]
19499 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
19500 [(set_attr "type" "sselog1")
19501 (set_attr "length_immediate" "1")
19502 (set_attr "prefix" "evex")
19503 (set_attr "mode" "<sseinsnmode>")])
19505 (define_insn "clz<mode>2<mask_name>"
19506 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19508 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
19510 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19511 [(set_attr "type" "sse")
19512 (set_attr "prefix" "evex")
19513 (set_attr "mode" "<sseinsnmode>")])
19515 (define_insn "<mask_codefor>conflict<mode><mask_name>"
19516 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19517 (unspec:VI48_AVX512VL
19518 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
19521 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19522 [(set_attr "type" "sse")
19523 (set_attr "prefix" "evex")
19524 (set_attr "mode" "<sseinsnmode>")])
19526 (define_insn "sha1msg1"
19527 [(set (match_operand:V4SI 0 "register_operand" "=x")
19529 [(match_operand:V4SI 1 "register_operand" "0")
19530 (match_operand:V4SI 2 "vector_operand" "xBm")]
19533 "sha1msg1\t{%2, %0|%0, %2}"
19534 [(set_attr "type" "sselog1")
19535 (set_attr "mode" "TI")])
19537 (define_insn "sha1msg2"
19538 [(set (match_operand:V4SI 0 "register_operand" "=x")
19540 [(match_operand:V4SI 1 "register_operand" "0")
19541 (match_operand:V4SI 2 "vector_operand" "xBm")]
19544 "sha1msg2\t{%2, %0|%0, %2}"
19545 [(set_attr "type" "sselog1")
19546 (set_attr "mode" "TI")])
19548 (define_insn "sha1nexte"
19549 [(set (match_operand:V4SI 0 "register_operand" "=x")
19551 [(match_operand:V4SI 1 "register_operand" "0")
19552 (match_operand:V4SI 2 "vector_operand" "xBm")]
19553 UNSPEC_SHA1NEXTE))]
19555 "sha1nexte\t{%2, %0|%0, %2}"
19556 [(set_attr "type" "sselog1")
19557 (set_attr "mode" "TI")])
19559 (define_insn "sha1rnds4"
19560 [(set (match_operand:V4SI 0 "register_operand" "=x")
19562 [(match_operand:V4SI 1 "register_operand" "0")
19563 (match_operand:V4SI 2 "vector_operand" "xBm")
19564 (match_operand:SI 3 "const_0_to_3_operand" "n")]
19565 UNSPEC_SHA1RNDS4))]
19567 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
19568 [(set_attr "type" "sselog1")
19569 (set_attr "length_immediate" "1")
19570 (set_attr "mode" "TI")])
19572 (define_insn "sha256msg1"
19573 [(set (match_operand:V4SI 0 "register_operand" "=x")
19575 [(match_operand:V4SI 1 "register_operand" "0")
19576 (match_operand:V4SI 2 "vector_operand" "xBm")]
19577 UNSPEC_SHA256MSG1))]
19579 "sha256msg1\t{%2, %0|%0, %2}"
19580 [(set_attr "type" "sselog1")
19581 (set_attr "mode" "TI")])
19583 (define_insn "sha256msg2"
19584 [(set (match_operand:V4SI 0 "register_operand" "=x")
19586 [(match_operand:V4SI 1 "register_operand" "0")
19587 (match_operand:V4SI 2 "vector_operand" "xBm")]
19588 UNSPEC_SHA256MSG2))]
19590 "sha256msg2\t{%2, %0|%0, %2}"
19591 [(set_attr "type" "sselog1")
19592 (set_attr "mode" "TI")])
19594 (define_insn "sha256rnds2"
19595 [(set (match_operand:V4SI 0 "register_operand" "=x")
19597 [(match_operand:V4SI 1 "register_operand" "0")
19598 (match_operand:V4SI 2 "vector_operand" "xBm")
19599 (match_operand:V4SI 3 "register_operand" "Yz")]
19600 UNSPEC_SHA256RNDS2))]
19602 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
19603 [(set_attr "type" "sselog1")
19604 (set_attr "length_immediate" "1")
19605 (set_attr "mode" "TI")])
19607 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
19608 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19609 (unspec:AVX512MODE2P
19610 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
19612 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19614 "&& reload_completed"
19615 [(set (match_dup 0) (match_dup 1))]
19617 if (REG_P (operands[0]))
19618 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
19620 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19621 <ssequartermode>mode);
19624 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
19625 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19626 (unspec:AVX512MODE2P
19627 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
19629 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19631 "&& reload_completed"
19632 [(set (match_dup 0) (match_dup 1))]
19634 if (REG_P (operands[0]))
19635 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
19637 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19638 <ssehalfvecmode>mode);
19641 (define_int_iterator VPMADD52
19642 [UNSPEC_VPMADD52LUQ
19643 UNSPEC_VPMADD52HUQ])
19645 (define_int_attr vpmadd52type
19646 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
19648 (define_expand "vpamdd52huq<mode>_maskz"
19649 [(match_operand:VI8_AVX512VL 0 "register_operand")
19650 (match_operand:VI8_AVX512VL 1 "register_operand")
19651 (match_operand:VI8_AVX512VL 2 "register_operand")
19652 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19653 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19654 "TARGET_AVX512IFMA"
19656 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
19657 operands[0], operands[1], operands[2], operands[3],
19658 CONST0_RTX (<MODE>mode), operands[4]));
19662 (define_expand "vpamdd52luq<mode>_maskz"
19663 [(match_operand:VI8_AVX512VL 0 "register_operand")
19664 (match_operand:VI8_AVX512VL 1 "register_operand")
19665 (match_operand:VI8_AVX512VL 2 "register_operand")
19666 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19667 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19668 "TARGET_AVX512IFMA"
19670 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
19671 operands[0], operands[1], operands[2], operands[3],
19672 CONST0_RTX (<MODE>mode), operands[4]));
19676 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
19677 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19678 (unspec:VI8_AVX512VL
19679 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19680 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19681 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19683 "TARGET_AVX512IFMA"
19684 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
19685 [(set_attr "type" "ssemuladd")
19686 (set_attr "prefix" "evex")
19687 (set_attr "mode" "<sseinsnmode>")])
19689 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
19690 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19691 (vec_merge:VI8_AVX512VL
19692 (unspec:VI8_AVX512VL
19693 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19694 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19695 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19698 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
19699 "TARGET_AVX512IFMA"
19700 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
19701 [(set_attr "type" "ssemuladd")
19702 (set_attr "prefix" "evex")
19703 (set_attr "mode" "<sseinsnmode>")])
19705 (define_insn "vpmultishiftqb<mode><mask_name>"
19706 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19707 (unspec:VI1_AVX512VL
19708 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
19709 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
19710 UNSPEC_VPMULTISHIFT))]
19711 "TARGET_AVX512VBMI"
19712 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19713 [(set_attr "type" "sselog")
19714 (set_attr "prefix" "evex")
19715 (set_attr "mode" "<sseinsnmode>")])
19717 (define_mode_iterator IMOD4
19718 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
19720 (define_mode_attr imod4_narrow
19721 [(V64SF "V16SF") (V64SI "V16SI")])
19723 (define_expand "mov<mode>"
19724 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
19725 (match_operand:IMOD4 1 "vector_move_operand"))]
19728 ix86_expand_vector_move (<MODE>mode, operands);
19732 (define_insn_and_split "*mov<mode>_internal"
19733 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
19734 (match_operand:IMOD4 1 "vector_move_operand" " C,vm,v"))]
19736 && (register_operand (operands[0], <MODE>mode)
19737 || register_operand (operands[1], <MODE>mode))"
19739 "&& reload_completed"
19745 for (i = 0; i < 4; i++)
19747 op0 = simplify_subreg
19748 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
19749 op1 = simplify_subreg
19750 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
19751 emit_move_insn (op0, op1);
19756 (define_insn "avx5124fmaddps_4fmaddps"
19757 [(set (match_operand:V16SF 0 "register_operand" "=v")
19759 [(match_operand:V16SF 1 "register_operand" "0")
19760 (match_operand:V64SF 2 "register_operand" "Yh")
19761 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
19762 "TARGET_AVX5124FMAPS"
19763 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
19764 [(set_attr ("type") ("ssemuladd"))
19765 (set_attr ("prefix") ("evex"))
19766 (set_attr ("mode") ("V16SF"))])
19768 (define_insn "avx5124fmaddps_4fmaddps_mask"
19769 [(set (match_operand:V16SF 0 "register_operand" "=v")
19772 [(match_operand:V64SF 1 "register_operand" "Yh")
19773 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
19774 (match_operand:V16SF 3 "register_operand" "0")
19775 (match_operand:HI 4 "register_operand" "Yk")))]
19776 "TARGET_AVX5124FMAPS"
19777 "v4fmaddps\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19778 [(set_attr ("type") ("ssemuladd"))
19779 (set_attr ("prefix") ("evex"))
19780 (set_attr ("mode") ("V16SF"))])
19782 (define_insn "avx5124fmaddps_4fmaddps_maskz"
19783 [(set (match_operand:V16SF 0 "register_operand" "=v")
19786 [(match_operand:V16SF 1 "register_operand" "0")
19787 (match_operand:V64SF 2 "register_operand" "Yh")
19788 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
19789 (match_operand:V16SF 4 "const0_operand" "C")
19790 (match_operand:HI 5 "register_operand" "Yk")))]
19791 "TARGET_AVX5124FMAPS"
19792 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19793 [(set_attr ("type") ("ssemuladd"))
19794 (set_attr ("prefix") ("evex"))
19795 (set_attr ("mode") ("V16SF"))])
19797 (define_insn "avx5124fmaddps_4fmaddss"
19798 [(set (match_operand:V4SF 0 "register_operand" "=v")
19800 [(match_operand:V4SF 1 "register_operand" "0")
19801 (match_operand:V64SF 2 "register_operand" "Yh")
19802 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
19803 "TARGET_AVX5124FMAPS"
19804 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
19805 [(set_attr ("type") ("ssemuladd"))
19806 (set_attr ("prefix") ("evex"))
19807 (set_attr ("mode") ("SF"))])
19809 (define_insn "avx5124fmaddps_4fmaddss_mask"
19810 [(set (match_operand:V4SF 0 "register_operand" "=v")
19813 [(match_operand:V64SF 1 "register_operand" "Yh")
19814 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
19815 (match_operand:V4SF 3 "register_operand" "0")
19816 (match_operand:QI 4 "register_operand" "Yk")))]
19817 "TARGET_AVX5124FMAPS"
19818 "v4fmaddss\t{%2, %x1, %0%{%4%}|%{%4%}%0, %x1, %2}"
19819 [(set_attr ("type") ("ssemuladd"))
19820 (set_attr ("prefix") ("evex"))
19821 (set_attr ("mode") ("SF"))])
19823 (define_insn "avx5124fmaddps_4fmaddss_maskz"
19824 [(set (match_operand:V4SF 0 "register_operand" "=v")
19827 [(match_operand:V4SF 1 "register_operand" "0")
19828 (match_operand:V64SF 2 "register_operand" "Yh")
19829 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
19830 (match_operand:V4SF 4 "const0_operand" "C")
19831 (match_operand:QI 5 "register_operand" "Yk")))]
19832 "TARGET_AVX5124FMAPS"
19833 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %x2, %3}"
19834 [(set_attr ("type") ("ssemuladd"))
19835 (set_attr ("prefix") ("evex"))
19836 (set_attr ("mode") ("SF"))])
19838 (define_insn "avx5124fmaddps_4fnmaddps"
19839 [(set (match_operand:V16SF 0 "register_operand" "=v")
19841 [(match_operand:V16SF 1 "register_operand" "0")
19842 (match_operand:V64SF 2 "register_operand" "Yh")
19843 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
19844 "TARGET_AVX5124FMAPS"
19845 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
19846 [(set_attr ("type") ("ssemuladd"))
19847 (set_attr ("prefix") ("evex"))
19848 (set_attr ("mode") ("V16SF"))])
19850 (define_insn "avx5124fmaddps_4fnmaddps_mask"
19851 [(set (match_operand:V16SF 0 "register_operand" "=v")
19854 [(match_operand:V64SF 1 "register_operand" "Yh")
19855 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19856 (match_operand:V16SF 3 "register_operand" "0")
19857 (match_operand:HI 4 "register_operand" "Yk")))]
19858 "TARGET_AVX5124FMAPS"
19859 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19860 [(set_attr ("type") ("ssemuladd"))
19861 (set_attr ("prefix") ("evex"))
19862 (set_attr ("mode") ("V16SF"))])
19864 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
19865 [(set (match_operand:V16SF 0 "register_operand" "=v")
19868 [(match_operand:V16SF 1 "register_operand" "0")
19869 (match_operand:V64SF 2 "register_operand" "Yh")
19870 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19871 (match_operand:V16SF 4 "const0_operand" "C")
19872 (match_operand:HI 5 "register_operand" "Yk")))]
19873 "TARGET_AVX5124FMAPS"
19874 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19875 [(set_attr ("type") ("ssemuladd"))
19876 (set_attr ("prefix") ("evex"))
19877 (set_attr ("mode") ("V16SF"))])
19879 (define_insn "avx5124fmaddps_4fnmaddss"
19880 [(set (match_operand:V4SF 0 "register_operand" "=v")
19882 [(match_operand:V4SF 1 "register_operand" "0")
19883 (match_operand:V64SF 2 "register_operand" "Yh")
19884 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
19885 "TARGET_AVX5124FMAPS"
19886 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
19887 [(set_attr ("type") ("ssemuladd"))
19888 (set_attr ("prefix") ("evex"))
19889 (set_attr ("mode") ("SF"))])
19891 (define_insn "avx5124fmaddps_4fnmaddss_mask"
19892 [(set (match_operand:V4SF 0 "register_operand" "=v")
19895 [(match_operand:V64SF 1 "register_operand" "Yh")
19896 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19897 (match_operand:V4SF 3 "register_operand" "0")
19898 (match_operand:QI 4 "register_operand" "Yk")))]
19899 "TARGET_AVX5124FMAPS"
19900 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%{%4%}%0, %x1, %2}"
19901 [(set_attr ("type") ("ssemuladd"))
19902 (set_attr ("prefix") ("evex"))
19903 (set_attr ("mode") ("SF"))])
19905 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
19906 [(set (match_operand:V4SF 0 "register_operand" "=v")
19909 [(match_operand:V4SF 1 "register_operand" "0")
19910 (match_operand:V64SF 2 "register_operand" "Yh")
19911 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19912 (match_operand:V4SF 4 "const0_operand" "C")
19913 (match_operand:QI 5 "register_operand" "Yk")))]
19914 "TARGET_AVX5124FMAPS"
19915 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %x2, %3}"
19916 [(set_attr ("type") ("ssemuladd"))
19917 (set_attr ("prefix") ("evex"))
19918 (set_attr ("mode") ("SF"))])
19920 (define_insn "avx5124vnniw_vp4dpwssd"
19921 [(set (match_operand:V16SI 0 "register_operand" "=v")
19923 [(match_operand:V16SI 1 "register_operand" "0")
19924 (match_operand:V64SI 2 "register_operand" "Yh")
19925 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
19926 "TARGET_AVX5124VNNIW"
19927 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
19928 [(set_attr ("type") ("ssemuladd"))
19929 (set_attr ("prefix") ("evex"))
19930 (set_attr ("mode") ("TI"))])
19932 (define_insn "avx5124vnniw_vp4dpwssd_mask"
19933 [(set (match_operand:V16SI 0 "register_operand" "=v")
19936 [(match_operand:V64SI 1 "register_operand" "Yh")
19937 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
19938 (match_operand:V16SI 3 "register_operand" "0")
19939 (match_operand:HI 4 "register_operand" "Yk")))]
19940 "TARGET_AVX5124VNNIW"
19941 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19942 [(set_attr ("type") ("ssemuladd"))
19943 (set_attr ("prefix") ("evex"))
19944 (set_attr ("mode") ("TI"))])
19946 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
19947 [(set (match_operand:V16SI 0 "register_operand" "=v")
19950 [(match_operand:V16SI 1 "register_operand" "0")
19951 (match_operand:V64SI 2 "register_operand" "Yh")
19952 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
19953 (match_operand:V16SI 4 "const0_operand" "C")
19954 (match_operand:HI 5 "register_operand" "Yk")))]
19955 "TARGET_AVX5124VNNIW"
19956 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19957 [(set_attr ("type") ("ssemuladd"))
19958 (set_attr ("prefix") ("evex"))
19959 (set_attr ("mode") ("TI"))])
19961 (define_insn "avx5124vnniw_vp4dpwssds"
19962 [(set (match_operand:V16SI 0 "register_operand" "=v")
19964 [(match_operand:V16SI 1 "register_operand" "0")
19965 (match_operand:V64SI 2 "register_operand" "Yh")
19966 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
19967 "TARGET_AVX5124VNNIW"
19968 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
19969 [(set_attr ("type") ("ssemuladd"))
19970 (set_attr ("prefix") ("evex"))
19971 (set_attr ("mode") ("TI"))])
19973 (define_insn "avx5124vnniw_vp4dpwssds_mask"
19974 [(set (match_operand:V16SI 0 "register_operand" "=v")
19977 [(match_operand:V64SI 1 "register_operand" "Yh")
19978 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
19979 (match_operand:V16SI 3 "register_operand" "0")
19980 (match_operand:HI 4 "register_operand" "Yk")))]
19981 "TARGET_AVX5124VNNIW"
19982 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19983 [(set_attr ("type") ("ssemuladd"))
19984 (set_attr ("prefix") ("evex"))
19985 (set_attr ("mode") ("TI"))])
19987 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
19988 [(set (match_operand:V16SI 0 "register_operand" "=v")
19991 [(match_operand:V16SI 1 "register_operand" "0")
19992 (match_operand:V64SI 2 "register_operand" "Yh")
19993 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
19994 (match_operand:V16SI 4 "const0_operand" "C")
19995 (match_operand:HI 5 "register_operand" "Yk")))]
19996 "TARGET_AVX5124VNNIW"
19997 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19998 [(set_attr ("type") ("ssemuladd"))
19999 (set_attr ("prefix") ("evex"))
20000 (set_attr ("mode") ("TI"))])
20002 (define_insn "vpopcount<mode><mask_name>"
20003 [(set (match_operand:VI48_512 0 "register_operand" "=v")
20005 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
20006 "TARGET_AVX512VPOPCNTDQ"
20007 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
20009 ;; Save multiple registers out-of-line.
20010 (define_insn "save_multiple<mode>"
20011 [(match_parallel 0 "save_multiple"
20012 [(use (match_operand:P 1 "symbol_operand"))])]
20013 "TARGET_SSE && TARGET_64BIT"
20016 ;; Restore multiple registers out-of-line.
20017 (define_insn "restore_multiple<mode>"
20018 [(match_parallel 0 "restore_multiple"
20019 [(use (match_operand:P 1 "symbol_operand"))])]
20020 "TARGET_SSE && TARGET_64BIT"
20023 ;; Restore multiple registers out-of-line and return.
20024 (define_insn "restore_multiple_and_return<mode>"
20025 [(match_parallel 0 "restore_multiple"
20027 (use (match_operand:P 1 "symbol_operand"))
20028 (set (reg:DI SP_REG) (reg:DI R10_REG))
20030 "TARGET_SSE && TARGET_64BIT"
20033 ;; Restore multiple registers out-of-line when hard frame pointer is used,
20034 ;; perform the leave operation prior to returning (from the function).
20035 (define_insn "restore_multiple_leave_return<mode>"
20036 [(match_parallel 0 "restore_multiple"
20038 (use (match_operand:P 1 "symbol_operand"))
20039 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
20040 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
20041 (clobber (mem:BLK (scratch)))
20043 "TARGET_SSE && TARGET_64BIT"
20046 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
20047 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20048 (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20049 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
20050 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
20051 UNSPEC_GF2P8AFFINEINV))]
20054 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
20055 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
20056 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
20057 [(set_attr "isa" "noavx,avx,avx512bw")
20058 (set_attr "prefix_data16" "1,*,*")
20059 (set_attr "prefix_extra" "1")
20060 (set_attr "prefix" "orig,maybe_evex,evex")
20061 (set_attr "mode" "<sseinsnmode>")])
20063 (define_insn "vgf2p8affineqb_<mode><mask_name>"
20064 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20065 (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20066 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
20067 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
20068 UNSPEC_GF2P8AFFINE))]
20071 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
20072 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
20073 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
20074 [(set_attr "isa" "noavx,avx,avx512bw")
20075 (set_attr "prefix_data16" "1,*,*")
20076 (set_attr "prefix_extra" "1")
20077 (set_attr "prefix" "orig,maybe_evex,evex")
20078 (set_attr "mode" "<sseinsnmode>")])
20080 (define_insn "vgf2p8mulb_<mode><mask_name>"
20081 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20082 (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20083 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")]
20087 gf2p8mulb\t{%2, %0| %0, %2}
20088 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}
20089 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
20090 [(set_attr "isa" "noavx,avx,avx512bw")
20091 (set_attr "prefix_data16" "1,*,*")
20092 (set_attr "prefix_extra" "1")
20093 (set_attr "prefix" "orig,maybe_evex,evex")
20094 (set_attr "mode" "<sseinsnmode>")])
20096 (define_insn "vpshrd_<mode><mask_name>"
20097 [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
20099 [(match_operand:VI248_VLBW 1 "register_operand" "v")
20100 (match_operand:VI248_VLBW 2 "nonimmediate_operand" "vm")
20101 (match_operand:SI 3 "const_0_to_255_operand" "n")
20103 "TARGET_AVX512VBMI2"
20104 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
20105 [(set_attr ("prefix") ("evex"))])
20107 (define_insn "vpshld_<mode><mask_name>"
20108 [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
20110 [(match_operand:VI248_VLBW 1 "register_operand" "v")
20111 (match_operand:VI248_VLBW 2 "nonimmediate_operand" "vm")
20112 (match_operand:SI 3 "const_0_to_255_operand" "n")
20114 "TARGET_AVX512VBMI2"
20115 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
20116 [(set_attr ("prefix") ("evex"))])