677e7023eb27736193815ef5a304ce277bbfc3c3
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2019 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23
24 ;; SSE2
25 UNSPEC_MOVDI_TO_SSE
26
27 ;; SSE3
28 UNSPEC_LDDQU
29
30 ;; SSSE3
31 UNSPEC_PSHUFB
32 UNSPEC_PSIGN
33 UNSPEC_PALIGNR
34
35 ;; For SSE4A support
36 UNSPEC_EXTRQI
37 UNSPEC_EXTRQ
38 UNSPEC_INSERTQI
39 UNSPEC_INSERTQ
40
41 ;; For SSE4.1 support
42 UNSPEC_BLENDV
43 UNSPEC_INSERTPS
44 UNSPEC_DP
45 UNSPEC_MOVNTDQA
46 UNSPEC_MPSADBW
47 UNSPEC_PHMINPOSUW
48 UNSPEC_PTEST
49
50 ;; For SSE4.2 support
51 UNSPEC_PCMPESTR
52 UNSPEC_PCMPISTR
53
54 ;; For FMA4 support
55 UNSPEC_FMADDSUB
56 UNSPEC_XOP_UNSIGNED_CMP
57 UNSPEC_XOP_TRUEFALSE
58 UNSPEC_XOP_PERMUTE
59 UNSPEC_FRCZ
60
61 ;; For AES support
62 UNSPEC_AESENC
63 UNSPEC_AESENCLAST
64 UNSPEC_AESDEC
65 UNSPEC_AESDECLAST
66 UNSPEC_AESIMC
67 UNSPEC_AESKEYGENASSIST
68
69 ;; For PCLMUL support
70 UNSPEC_PCLMUL
71
72 ;; For AVX support
73 UNSPEC_PCMP
74 UNSPEC_VPERMIL
75 UNSPEC_VPERMIL2
76 UNSPEC_VPERMIL2F128
77 UNSPEC_CAST
78 UNSPEC_VTESTP
79 UNSPEC_VCVTPH2PS
80 UNSPEC_VCVTPS2PH
81
82 ;; For AVX2 support
83 UNSPEC_VPERMVAR
84 UNSPEC_VPERMTI
85 UNSPEC_GATHER
86 UNSPEC_VSIBADDR
87
88 ;; For AVX512F support
89 UNSPEC_VPERMT2
90 UNSPEC_UNSIGNED_FIX_NOTRUNC
91 UNSPEC_UNSIGNED_PCMP
92 UNSPEC_TESTM
93 UNSPEC_TESTNM
94 UNSPEC_SCATTER
95 UNSPEC_RCP14
96 UNSPEC_RSQRT14
97 UNSPEC_FIXUPIMM
98 UNSPEC_SCALEF
99 UNSPEC_VTERNLOG
100 UNSPEC_GETEXP
101 UNSPEC_GETMANT
102 UNSPEC_ALIGN
103 UNSPEC_CONFLICT
104 UNSPEC_COMPRESS
105 UNSPEC_COMPRESS_STORE
106 UNSPEC_EXPAND
107 UNSPEC_MASKED_EQ
108 UNSPEC_MASKED_GT
109
110 ;; Mask operations
111 UNSPEC_MASKOP
112 UNSPEC_KORTEST
113 UNSPEC_KTEST
114
115 ;; For embed. rounding feature
116 UNSPEC_EMBEDDED_ROUNDING
117
118 ;; For AVX512PF support
119 UNSPEC_GATHER_PREFETCH
120 UNSPEC_SCATTER_PREFETCH
121
122 ;; For AVX512ER support
123 UNSPEC_EXP2
124 UNSPEC_RCP28
125 UNSPEC_RSQRT28
126
127 ;; For SHA support
128 UNSPEC_SHA1MSG1
129 UNSPEC_SHA1MSG2
130 UNSPEC_SHA1NEXTE
131 UNSPEC_SHA1RNDS4
132 UNSPEC_SHA256MSG1
133 UNSPEC_SHA256MSG2
134 UNSPEC_SHA256RNDS2
135
136 ;; For AVX512BW support
137 UNSPEC_DBPSADBW
138 UNSPEC_PMADDUBSW512
139 UNSPEC_PMADDWD512
140 UNSPEC_PSHUFHW
141 UNSPEC_PSHUFLW
142 UNSPEC_CVTINT2MASK
143
144 ;; For AVX512DQ support
145 UNSPEC_REDUCE
146 UNSPEC_FPCLASS
147 UNSPEC_RANGE
148
149 ;; For AVX512IFMA support
150 UNSPEC_VPMADD52LUQ
151 UNSPEC_VPMADD52HUQ
152
153 ;; For AVX512VBMI support
154 UNSPEC_VPMULTISHIFT
155
156 ;; For AVX5124FMAPS/AVX5124VNNIW support
157 UNSPEC_VP4FMADD
158 UNSPEC_VP4FNMADD
159 UNSPEC_VP4DPWSSD
160 UNSPEC_VP4DPWSSDS
161
162 ;; For GFNI support
163 UNSPEC_GF2P8AFFINEINV
164 UNSPEC_GF2P8AFFINE
165 UNSPEC_GF2P8MUL
166
167 ;; For AVX512VBMI2 support
168 UNSPEC_VPSHLD
169 UNSPEC_VPSHRD
170 UNSPEC_VPSHRDV
171 UNSPEC_VPSHLDV
172
173 ;; For AVX512VNNI support
174 UNSPEC_VPMADDUBSWACCD
175 UNSPEC_VPMADDUBSWACCSSD
176 UNSPEC_VPMADDWDACCD
177 UNSPEC_VPMADDWDACCSSD
178
179 ;; For VAES support
180 UNSPEC_VAESDEC
181 UNSPEC_VAESDECLAST
182 UNSPEC_VAESENC
183 UNSPEC_VAESENCLAST
184
185 ;; For VPCLMULQDQ support
186 UNSPEC_VPCLMULQDQ
187
188 ;; For AVX512BITALG support
189 UNSPEC_VPSHUFBIT
190
191 ;; For AVX512BF16 support
192 UNSPEC_VCVTNE2PS2BF16
193 UNSPEC_VCVTNEPS2BF16
194 UNSPEC_VDPBF16PS
195 ])
196
197 (define_c_enum "unspecv" [
198 UNSPECV_LDMXCSR
199 UNSPECV_STMXCSR
200 UNSPECV_CLFLUSH
201 UNSPECV_MONITOR
202 UNSPECV_MWAIT
203 UNSPECV_VZEROALL
204 UNSPECV_VZEROUPPER
205 ])
206
207 ;; All vector modes including V?TImode, used in move patterns.
208 (define_mode_iterator VMOVE
209 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
210 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
211 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
212 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
213 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
214 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
215 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
216
217 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
218 (define_mode_iterator V48_AVX512VL
219 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
220 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
221 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
222 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
223
224 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
225 (define_mode_iterator VI12_AVX512VL
226 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
227 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
228
229 ;; Same iterator, but without supposed TARGET_AVX512BW
230 (define_mode_iterator VI12_AVX512VLBW
231 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
232 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
233 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
234
235 (define_mode_iterator VI1_AVX512VL
236 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
237
238 ;; All vector modes
239 (define_mode_iterator V
240 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
241 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
242 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
243 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
244 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
245 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
246
247 ;; All 128bit vector modes
248 (define_mode_iterator V_128
249 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
250
251 ;; All 256bit vector modes
252 (define_mode_iterator V_256
253 [V32QI V16HI V8SI V4DI V8SF V4DF])
254
255 ;; All 128bit and 256bit vector modes
256 (define_mode_iterator V_128_256
257 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
258
259 ;; All 512bit vector modes
260 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
261
262 ;; All 256bit and 512bit vector modes
263 (define_mode_iterator V_256_512
264 [V32QI V16HI V8SI V4DI V8SF V4DF
265 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
266 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
267
268 ;; All vector float modes
269 (define_mode_iterator VF
270 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
271 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
272
273 ;; 128- and 256-bit float vector modes
274 (define_mode_iterator VF_128_256
275 [(V8SF "TARGET_AVX") V4SF
276 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
277
278 ;; All SFmode vector float modes
279 (define_mode_iterator VF1
280 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
281
282 ;; 128- and 256-bit SF vector modes
283 (define_mode_iterator VF1_128_256
284 [(V8SF "TARGET_AVX") V4SF])
285
286 (define_mode_iterator VF1_128_256VL
287 [V8SF (V4SF "TARGET_AVX512VL")])
288
289 ;; All DFmode vector float modes
290 (define_mode_iterator VF2
291 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
292
293 ;; 128- and 256-bit DF vector modes
294 (define_mode_iterator VF2_128_256
295 [(V4DF "TARGET_AVX") V2DF])
296
297 (define_mode_iterator VF2_512_256
298 [(V8DF "TARGET_AVX512F") V4DF])
299
300 (define_mode_iterator VF2_512_256VL
301 [V8DF (V4DF "TARGET_AVX512VL")])
302
303 ;; All 128bit vector float modes
304 (define_mode_iterator VF_128
305 [V4SF (V2DF "TARGET_SSE2")])
306
307 ;; All 256bit vector float modes
308 (define_mode_iterator VF_256
309 [V8SF V4DF])
310
311 ;; All 512bit vector float modes
312 (define_mode_iterator VF_512
313 [V16SF V8DF])
314
315 (define_mode_iterator VI48_AVX512VL
316 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
317 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
318
319 (define_mode_iterator VF_AVX512VL
320 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
321 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
322
323 (define_mode_iterator VF2_AVX512VL
324 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
325
326 (define_mode_iterator VF1_AVX512VL
327 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
328
329 ;; All vector integer modes
330 (define_mode_iterator VI
331 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
332 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
333 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
334 (V8SI "TARGET_AVX") V4SI
335 (V4DI "TARGET_AVX") V2DI])
336
337 (define_mode_iterator VI_AVX2
338 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
339 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
340 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
341 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
342
343 ;; All QImode vector integer modes
344 (define_mode_iterator VI1
345 [(V32QI "TARGET_AVX") V16QI])
346
347 ;; All DImode vector integer modes
348 (define_mode_iterator V_AVX
349 [V16QI V8HI V4SI V2DI V4SF V2DF
350 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
351 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
352 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
353
354 (define_mode_iterator VI48_AVX
355 [V4SI V2DI
356 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
357
358 (define_mode_iterator VI8
359 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
360
361 (define_mode_iterator VI8_FVL
362 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
363
364 (define_mode_iterator VI8_AVX512VL
365 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
366
367 (define_mode_iterator VI8_256_512
368 [V8DI (V4DI "TARGET_AVX512VL")])
369
370 (define_mode_iterator VI1_AVX2
371 [(V32QI "TARGET_AVX2") V16QI])
372
373 (define_mode_iterator VI1_AVX512
374 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
375
376 (define_mode_iterator VI1_AVX512F
377 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
378
379 (define_mode_iterator VI2_AVX2
380 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
381
382 (define_mode_iterator VI2_AVX512F
383 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
384
385 (define_mode_iterator VI4_AVX
386 [(V8SI "TARGET_AVX") V4SI])
387
388 (define_mode_iterator VI4_AVX2
389 [(V8SI "TARGET_AVX2") V4SI])
390
391 (define_mode_iterator VI4_AVX512F
392 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
393
394 (define_mode_iterator VI4_AVX512VL
395 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
396
397 (define_mode_iterator VI48_AVX512F_AVX512VL
398 [V4SI V8SI (V16SI "TARGET_AVX512F")
399 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
400
401 (define_mode_iterator VI2_AVX512VL
402 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
403
404 (define_mode_iterator VI1_AVX512VL_F
405 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
406
407 (define_mode_iterator VI8_AVX2_AVX512BW
408 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
409
410 (define_mode_iterator VI8_AVX2
411 [(V4DI "TARGET_AVX2") V2DI])
412
413 (define_mode_iterator VI8_AVX2_AVX512F
414 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
415
416 (define_mode_iterator VI8_AVX_AVX512F
417 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
418
419 (define_mode_iterator VI4_128_8_256
420 [V4SI V4DI])
421
422 ;; All V8D* modes
423 (define_mode_iterator V8FI
424 [V8DF V8DI])
425
426 ;; All V16S* modes
427 (define_mode_iterator V16FI
428 [V16SF V16SI])
429
430 ;; ??? We should probably use TImode instead.
431 (define_mode_iterator VIMAX_AVX2_AVX512BW
432 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
433
434 ;; Suppose TARGET_AVX512BW as baseline
435 (define_mode_iterator VIMAX_AVX512VL
436 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
437
438 (define_mode_iterator VIMAX_AVX2
439 [(V2TI "TARGET_AVX2") V1TI])
440
441 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
442 (define_mode_iterator SSESCALARMODE
443 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
444
445 (define_mode_iterator VI12_AVX2
446 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
447 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
448
449 (define_mode_iterator VI24_AVX2
450 [(V16HI "TARGET_AVX2") V8HI
451 (V8SI "TARGET_AVX2") V4SI])
452
453 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
454 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
455 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
456 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
457
458 (define_mode_iterator VI124_AVX2
459 [(V32QI "TARGET_AVX2") V16QI
460 (V16HI "TARGET_AVX2") V8HI
461 (V8SI "TARGET_AVX2") V4SI])
462
463 (define_mode_iterator VI2_AVX2_AVX512BW
464 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
465
466 (define_mode_iterator VI248_AVX512VL
467 [V32HI V16SI V8DI
468 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
469 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
470 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
471
472 (define_mode_iterator VI48_AVX2
473 [(V8SI "TARGET_AVX2") V4SI
474 (V4DI "TARGET_AVX2") V2DI])
475
476 (define_mode_iterator VI248_AVX2
477 [(V16HI "TARGET_AVX2") V8HI
478 (V8SI "TARGET_AVX2") V4SI
479 (V4DI "TARGET_AVX2") V2DI])
480
481 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
482 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
483 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
484 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
485
486 (define_mode_iterator VI248_AVX512BW
487 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
488
489 (define_mode_iterator VI248_AVX512BW_AVX512VL
490 [(V32HI "TARGET_AVX512BW")
491 (V4DI "TARGET_AVX512VL") V16SI V8DI])
492
493 ;; Suppose TARGET_AVX512VL as baseline
494 (define_mode_iterator VI248_AVX512BW_1
495 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
496 V8SI V4SI
497 V2DI])
498
499 (define_mode_iterator VI248_AVX512BW_2
500 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
501 V8SI V4SI
502 V4DI V2DI])
503
504 (define_mode_iterator VI48_AVX512F
505 [(V16SI "TARGET_AVX512F") V8SI V4SI
506 (V8DI "TARGET_AVX512F") V4DI V2DI])
507
508 (define_mode_iterator VI48_AVX_AVX512F
509 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
510 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
511
512 (define_mode_iterator VI12_AVX_AVX512F
513 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
514 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
515
516 (define_mode_iterator V48_AVX2
517 [V4SF V2DF
518 V8SF V4DF
519 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
520 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
521
522 (define_mode_iterator VI1_AVX512VLBW
523 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL")
524 (V16QI "TARGET_AVX512VL")])
525
526 (define_mode_attr avx512
527 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
528 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
529 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
530 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
531 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
532 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
533
534 (define_mode_attr sse2_avx_avx512f
535 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
536 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
537 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
538 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
539 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
540 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
541
542 (define_mode_attr sse2_avx2
543 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
544 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
545 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
546 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
547 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
548
549 (define_mode_attr ssse3_avx2
550 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
551 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
552 (V4SI "ssse3") (V8SI "avx2")
553 (V2DI "ssse3") (V4DI "avx2")
554 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
555
556 (define_mode_attr sse4_1_avx2
557 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
558 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
559 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
560 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
561
562 (define_mode_attr avx_avx2
563 [(V4SF "avx") (V2DF "avx")
564 (V8SF "avx") (V4DF "avx")
565 (V4SI "avx2") (V2DI "avx2")
566 (V8SI "avx2") (V4DI "avx2")])
567
568 (define_mode_attr vec_avx2
569 [(V16QI "vec") (V32QI "avx2")
570 (V8HI "vec") (V16HI "avx2")
571 (V4SI "vec") (V8SI "avx2")
572 (V2DI "vec") (V4DI "avx2")])
573
574 (define_mode_attr avx2_avx512
575 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
576 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
577 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
578 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
579 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
580
581 (define_mode_attr shuffletype
582 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
583 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
584 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
585 (V32HI "i") (V16HI "i") (V8HI "i")
586 (V64QI "i") (V32QI "i") (V16QI "i")
587 (V4TI "i") (V2TI "i") (V1TI "i")])
588
589 (define_mode_attr ssequartermode
590 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
591
592 (define_mode_attr ssequarterinsnmode
593 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
594
595 (define_mode_attr ssedoublemodelower
596 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
597 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
598 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
599
600 (define_mode_attr ssedoublemode
601 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
602 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
603 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
604 (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
605 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
606 (V4DI "V8DI") (V8DI "V16DI")])
607
608 (define_mode_attr ssebytemode
609 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
610 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
611
612 ;; All 128bit vector integer modes
613 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
614
615 ;; All 256bit vector integer modes
616 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
617
618 ;; Various 128bit vector integer mode combinations
619 (define_mode_iterator VI12_128 [V16QI V8HI])
620 (define_mode_iterator VI14_128 [V16QI V4SI])
621 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
622 (define_mode_iterator VI24_128 [V8HI V4SI])
623 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
624 (define_mode_iterator VI48_128 [V4SI V2DI])
625
626 ;; Various 256bit and 512 vector integer mode combinations
627 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
628 (define_mode_iterator VI124_256_AVX512F_AVX512BW
629 [V32QI V16HI V8SI
630 (V64QI "TARGET_AVX512BW")
631 (V32HI "TARGET_AVX512BW")
632 (V16SI "TARGET_AVX512F")])
633 (define_mode_iterator VI48_256 [V8SI V4DI])
634 (define_mode_iterator VI48_512 [V16SI V8DI])
635 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
636 (define_mode_iterator VI_AVX512BW
637 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
638
639 ;; Int-float size matches
640 (define_mode_iterator VI4F_128 [V4SI V4SF])
641 (define_mode_iterator VI8F_128 [V2DI V2DF])
642 (define_mode_iterator VI4F_256 [V8SI V8SF])
643 (define_mode_iterator VI8F_256 [V4DI V4DF])
644 (define_mode_iterator VI4F_256_512
645 [V8SI V8SF
646 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
647 (define_mode_iterator VI48F_256_512
648 [V8SI V8SF
649 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
650 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
651 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
652 (define_mode_iterator VF48_I1248
653 [V16SI V16SF V8DI V8DF V32HI V64QI])
654 (define_mode_iterator VI48F
655 [V16SI V16SF V8DI V8DF
656 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
657 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
658 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
659 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
660 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
661
662 (define_mode_iterator VF_AVX512
663 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
664 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
665 V16SF V8DF])
666
667 (define_mode_attr avx512bcst
668 [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
669 (V8SI "%{1to8%}") (V4DI "%{1to4%}")
670 (V16SI "%{1to16%}") (V8DI "%{1to8%}")
671 (V4SF "%{1to4%}") (V2DF "%{1to2%}")
672 (V8SF "%{1to8%}") (V4DF "%{1to4%}")
673 (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
674
675 ;; Mapping from float mode to required SSE level
676 (define_mode_attr sse
677 [(SF "sse") (DF "sse2")
678 (V4SF "sse") (V2DF "sse2")
679 (V16SF "avx512f") (V8SF "avx")
680 (V8DF "avx512f") (V4DF "avx")])
681
682 (define_mode_attr sse2
683 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
684 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
685
686 (define_mode_attr sse3
687 [(V16QI "sse3") (V32QI "avx")])
688
689 (define_mode_attr sse4_1
690 [(V4SF "sse4_1") (V2DF "sse4_1")
691 (V8SF "avx") (V4DF "avx")
692 (V8DF "avx512f")
693 (V4DI "avx") (V2DI "sse4_1")
694 (V8SI "avx") (V4SI "sse4_1")
695 (V16QI "sse4_1") (V32QI "avx")
696 (V8HI "sse4_1") (V16HI "avx")])
697
698 (define_mode_attr avxsizesuffix
699 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
700 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
701 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
702 (V16SF "512") (V8DF "512")
703 (V8SF "256") (V4DF "256")
704 (V4SF "") (V2DF "")])
705
706 ;; SSE instruction mode
707 (define_mode_attr sseinsnmode
708 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
709 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
710 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
711 (V16SF "V16SF") (V8DF "V8DF")
712 (V8SF "V8SF") (V4DF "V4DF")
713 (V4SF "V4SF") (V2DF "V2DF")
714 (TI "TI")])
715
716 ;; Mapping of vector modes to corresponding mask size
717 (define_mode_attr avx512fmaskmode
718 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
719 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
720 (V16SI "HI") (V8SI "QI") (V4SI "QI")
721 (V8DI "QI") (V4DI "QI") (V2DI "QI")
722 (V16SF "HI") (V8SF "QI") (V4SF "QI")
723 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
724
725 ;; Mapping of vector modes to corresponding mask size
726 (define_mode_attr avx512fmaskmodelower
727 [(V64QI "di") (V32QI "si") (V16QI "hi")
728 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
729 (V16SI "hi") (V8SI "qi") (V4SI "qi")
730 (V8DI "qi") (V4DI "qi") (V2DI "qi")
731 (V16SF "hi") (V8SF "qi") (V4SF "qi")
732 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
733
734 ;; Mapping of vector modes to corresponding mask half size
735 (define_mode_attr avx512fmaskhalfmode
736 [(V64QI "SI") (V32QI "HI") (V16QI "QI")
737 (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
738 (V16SI "QI") (V8SI "QI") (V4SI "QI")
739 (V8DI "QI") (V4DI "QI") (V2DI "QI")
740 (V16SF "QI") (V8SF "QI") (V4SF "QI")
741 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
742
743 ;; Mapping of vector float modes to an integer mode of the same size
744 (define_mode_attr sseintvecmode
745 [(V16SF "V16SI") (V8DF "V8DI")
746 (V8SF "V8SI") (V4DF "V4DI")
747 (V4SF "V4SI") (V2DF "V2DI")
748 (V16SI "V16SI") (V8DI "V8DI")
749 (V8SI "V8SI") (V4DI "V4DI")
750 (V4SI "V4SI") (V2DI "V2DI")
751 (V16HI "V16HI") (V8HI "V8HI")
752 (V32HI "V32HI") (V64QI "V64QI")
753 (V32QI "V32QI") (V16QI "V16QI")])
754
755 (define_mode_attr sseintvecmode2
756 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
757 (V8SF "OI") (V4SF "TI")])
758
759 (define_mode_attr sseintvecmodelower
760 [(V16SF "v16si") (V8DF "v8di")
761 (V8SF "v8si") (V4DF "v4di")
762 (V4SF "v4si") (V2DF "v2di")
763 (V8SI "v8si") (V4DI "v4di")
764 (V4SI "v4si") (V2DI "v2di")
765 (V16HI "v16hi") (V8HI "v8hi")
766 (V32QI "v32qi") (V16QI "v16qi")])
767
768 ;; Mapping of vector modes to a vector mode of double size
769 (define_mode_attr ssedoublevecmode
770 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
771 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
772 (V8SF "V16SF") (V4DF "V8DF")
773 (V4SF "V8SF") (V2DF "V4DF")])
774
775 ;; Mapping of vector modes to a vector mode of half size
776 (define_mode_attr ssehalfvecmode
777 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
778 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
779 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
780 (V16SF "V8SF") (V8DF "V4DF")
781 (V8SF "V4SF") (V4DF "V2DF")
782 (V4SF "V2SF")])
783
784 (define_mode_attr ssehalfvecmodelower
785 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
786 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
787 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
788 (V16SF "v8sf") (V8DF "v4df")
789 (V8SF "v4sf") (V4DF "v2df")
790 (V4SF "v2sf")])
791
792 ;; Mapping of vector modes ti packed single mode of the same size
793 (define_mode_attr ssePSmode
794 [(V16SI "V16SF") (V8DF "V16SF")
795 (V16SF "V16SF") (V8DI "V16SF")
796 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
797 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
798 (V8SI "V8SF") (V4SI "V4SF")
799 (V4DI "V8SF") (V2DI "V4SF")
800 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
801 (V8SF "V8SF") (V4SF "V4SF")
802 (V4DF "V8SF") (V2DF "V4SF")])
803
804 (define_mode_attr ssePSmode2
805 [(V8DI "V8SF") (V4DI "V4SF")])
806
807 ;; Mapping of vector modes back to the scalar modes
808 (define_mode_attr ssescalarmode
809 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
810 (V32HI "HI") (V16HI "HI") (V8HI "HI")
811 (V16SI "SI") (V8SI "SI") (V4SI "SI")
812 (V8DI "DI") (V4DI "DI") (V2DI "DI")
813 (V16SF "SF") (V8SF "SF") (V4SF "SF")
814 (V8DF "DF") (V4DF "DF") (V2DF "DF")
815 (V4TI "TI") (V2TI "TI")])
816
817 ;; Mapping of vector modes back to the scalar modes
818 (define_mode_attr ssescalarmodelower
819 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
820 (V32HI "hi") (V16HI "hi") (V8HI "hi")
821 (V16SI "si") (V8SI "si") (V4SI "si")
822 (V8DI "di") (V4DI "di") (V2DI "di")
823 (V16SF "sf") (V8SF "sf") (V4SF "sf")
824 (V8DF "df") (V4DF "df") (V2DF "df")
825 (V4TI "ti") (V2TI "ti")])
826
827 ;; Mapping of vector modes to the 128bit modes
828 (define_mode_attr ssexmmmode
829 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
830 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
831 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
832 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
833 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
834 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
835
836 ;; Pointer size override for scalar modes (Intel asm dialect)
837 (define_mode_attr iptr
838 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
839 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
840 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
841 (V16SF "k") (V8DF "q")
842 (V8SF "k") (V4DF "q")
843 (V4SF "k") (V2DF "q")
844 (SF "k") (DF "q")])
845
846 ;; Number of scalar elements in each vector type
847 (define_mode_attr ssescalarnum
848 [(V64QI "64") (V16SI "16") (V8DI "8")
849 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
850 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
851 (V16SF "16") (V8DF "8")
852 (V8SF "8") (V4DF "4")
853 (V4SF "4") (V2DF "2")])
854
855 ;; Mask of scalar elements in each vector type
856 (define_mode_attr ssescalarnummask
857 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
858 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
859 (V8SF "7") (V4DF "3")
860 (V4SF "3") (V2DF "1")])
861
862 (define_mode_attr ssescalarsize
863 [(V4TI "64") (V2TI "64") (V1TI "64")
864 (V8DI "64") (V4DI "64") (V2DI "64")
865 (V64QI "8") (V32QI "8") (V16QI "8")
866 (V32HI "16") (V16HI "16") (V8HI "16")
867 (V16SI "32") (V8SI "32") (V4SI "32")
868 (V16SF "32") (V8SF "32") (V4SF "32")
869 (V8DF "64") (V4DF "64") (V2DF "64")])
870
871 ;; SSE prefix for integer vector modes
872 (define_mode_attr sseintprefix
873 [(V2DI "p") (V2DF "")
874 (V4DI "p") (V4DF "")
875 (V8DI "p") (V8DF "")
876 (V4SI "p") (V4SF "")
877 (V8SI "p") (V8SF "")
878 (V16SI "p") (V16SF "")
879 (V16QI "p") (V8HI "p")
880 (V32QI "p") (V16HI "p")
881 (V64QI "p") (V32HI "p")])
882
883 ;; SSE scalar suffix for vector modes
884 (define_mode_attr ssescalarmodesuffix
885 [(SF "ss") (DF "sd")
886 (V16SF "ss") (V8DF "sd")
887 (V8SF "ss") (V4DF "sd")
888 (V4SF "ss") (V2DF "sd")
889 (V16SI "d") (V8DI "q")
890 (V8SI "d") (V4DI "q")
891 (V4SI "d") (V2DI "q")])
892
893 ;; Pack/unpack vector modes
894 (define_mode_attr sseunpackmode
895 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
896 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
897 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
898
899 (define_mode_attr ssepackmode
900 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
901 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
902 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
903
904 ;; Mapping of the max integer size for xop rotate immediate constraint
905 (define_mode_attr sserotatemax
906 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
907
908 ;; Mapping of mode to cast intrinsic name
909 (define_mode_attr castmode
910 [(V8SI "si") (V8SF "ps") (V4DF "pd")
911 (V16SI "si") (V16SF "ps") (V8DF "pd")])
912
913 ;; Instruction suffix for sign and zero extensions.
914 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
915
916 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
917 ;; i64x4 or f64x4 for 512bit modes.
918 (define_mode_attr i128
919 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
920 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
921 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
922
923 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
924 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
925 (define_mode_attr i128vldq
926 [(V8SF "f32x4") (V4DF "f64x2")
927 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
928
929 ;; Mix-n-match
930 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
931 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
932
933 ;; Mapping for dbpsabbw modes
934 (define_mode_attr dbpsadbwmode
935 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
936
937 ;; Mapping suffixes for broadcast
938 (define_mode_attr bcstscalarsuff
939 [(V64QI "b") (V32QI "b") (V16QI "b")
940 (V32HI "w") (V16HI "w") (V8HI "w")
941 (V16SI "d") (V8SI "d") (V4SI "d")
942 (V8DI "q") (V4DI "q") (V2DI "q")
943 (V16SF "ss") (V8SF "ss") (V4SF "ss")
944 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
945
946 ;; Tie mode of assembler operand to mode iterator
947 (define_mode_attr xtg_mode
948 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
949 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
950 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
951
952 ;; Half mask mode for unpacks
953 (define_mode_attr HALFMASKMODE
954 [(DI "SI") (SI "HI")])
955
956 ;; Double mask mode for packs
957 (define_mode_attr DOUBLEMASKMODE
958 [(HI "SI") (SI "DI")])
959
960
961 ;; Include define_subst patterns for instructions with mask
962 (include "subst.md")
963
964 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
965
966 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
967 ;;
968 ;; Move patterns
969 ;;
970 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
971
972 ;; All of these patterns are enabled for SSE1 as well as SSE2.
973 ;; This is essential for maintaining stable calling conventions.
974
975 (define_expand "mov<mode>"
976 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
977 (match_operand:VMOVE 1 "nonimmediate_operand"))]
978 "TARGET_SSE"
979 {
980 ix86_expand_vector_move (<MODE>mode, operands);
981 DONE;
982 })
983
984 (define_insn "mov<mode>_internal"
985 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
986 "=v,v ,v ,m")
987 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
988 " C,BC,vm,v"))]
989 "TARGET_SSE
990 && (register_operand (operands[0], <MODE>mode)
991 || register_operand (operands[1], <MODE>mode))"
992 {
993 switch (get_attr_type (insn))
994 {
995 case TYPE_SSELOG1:
996 return standard_sse_constant_opcode (insn, operands);
997
998 case TYPE_SSEMOV:
999 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
1000 in avx512f, so we need to use workarounds, to access sse registers
1001 16-31, which are evex-only. In avx512vl we don't need workarounds. */
1002 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
1003 && (EXT_REX_SSE_REG_P (operands[0])
1004 || EXT_REX_SSE_REG_P (operands[1])))
1005 {
1006 if (memory_operand (operands[0], <MODE>mode))
1007 {
1008 if (<MODE_SIZE> == 32)
1009 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
1010 else if (<MODE_SIZE> == 16)
1011 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
1012 else
1013 gcc_unreachable ();
1014 }
1015 else if (memory_operand (operands[1], <MODE>mode))
1016 {
1017 if (<MODE_SIZE> == 32)
1018 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
1019 else if (<MODE_SIZE> == 16)
1020 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
1021 else
1022 gcc_unreachable ();
1023 }
1024 else
1025 /* Reg -> reg move is always aligned. Just use wider move. */
1026 switch (get_attr_mode (insn))
1027 {
1028 case MODE_V8SF:
1029 case MODE_V4SF:
1030 return "vmovaps\t{%g1, %g0|%g0, %g1}";
1031 case MODE_V4DF:
1032 case MODE_V2DF:
1033 return "vmovapd\t{%g1, %g0|%g0, %g1}";
1034 case MODE_OI:
1035 case MODE_TI:
1036 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
1037 default:
1038 gcc_unreachable ();
1039 }
1040 }
1041
1042 switch (get_attr_mode (insn))
1043 {
1044 case MODE_V16SF:
1045 case MODE_V8SF:
1046 case MODE_V4SF:
1047 if (misaligned_operand (operands[0], <MODE>mode)
1048 || misaligned_operand (operands[1], <MODE>mode))
1049 return "%vmovups\t{%1, %0|%0, %1}";
1050 else
1051 return "%vmovaps\t{%1, %0|%0, %1}";
1052
1053 case MODE_V8DF:
1054 case MODE_V4DF:
1055 case MODE_V2DF:
1056 if (misaligned_operand (operands[0], <MODE>mode)
1057 || misaligned_operand (operands[1], <MODE>mode))
1058 return "%vmovupd\t{%1, %0|%0, %1}";
1059 else
1060 return "%vmovapd\t{%1, %0|%0, %1}";
1061
1062 case MODE_OI:
1063 case MODE_TI:
1064 if (misaligned_operand (operands[0], <MODE>mode)
1065 || misaligned_operand (operands[1], <MODE>mode))
1066 return TARGET_AVX512VL
1067 && (<MODE>mode == V4SImode
1068 || <MODE>mode == V2DImode
1069 || <MODE>mode == V8SImode
1070 || <MODE>mode == V4DImode
1071 || TARGET_AVX512BW)
1072 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1073 : "%vmovdqu\t{%1, %0|%0, %1}";
1074 else
1075 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
1076 : "%vmovdqa\t{%1, %0|%0, %1}";
1077 case MODE_XI:
1078 if (misaligned_operand (operands[0], <MODE>mode)
1079 || misaligned_operand (operands[1], <MODE>mode))
1080 return (<MODE>mode == V16SImode
1081 || <MODE>mode == V8DImode
1082 || TARGET_AVX512BW)
1083 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1084 : "vmovdqu64\t{%1, %0|%0, %1}";
1085 else
1086 return "vmovdqa64\t{%1, %0|%0, %1}";
1087
1088 default:
1089 gcc_unreachable ();
1090 }
1091
1092 default:
1093 gcc_unreachable ();
1094 }
1095 }
1096 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1097 (set_attr "prefix" "maybe_vex")
1098 (set (attr "mode")
1099 (cond [(and (eq_attr "alternative" "1")
1100 (match_test "TARGET_AVX512VL"))
1101 (const_string "<sseinsnmode>")
1102 (and (match_test "<MODE_SIZE> == 16")
1103 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1104 (and (eq_attr "alternative" "3")
1105 (match_test "TARGET_SSE_TYPELESS_STORES"))))
1106 (const_string "<ssePSmode>")
1107 (match_test "TARGET_AVX")
1108 (const_string "<sseinsnmode>")
1109 (ior (not (match_test "TARGET_SSE2"))
1110 (match_test "optimize_function_for_size_p (cfun)"))
1111 (const_string "V4SF")
1112 (and (eq_attr "alternative" "0")
1113 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1114 (const_string "TI")
1115 ]
1116 (const_string "<sseinsnmode>")))
1117 (set (attr "enabled")
1118 (cond [(and (match_test "<MODE_SIZE> == 16")
1119 (eq_attr "alternative" "1"))
1120 (symbol_ref "TARGET_SSE2")
1121 (and (match_test "<MODE_SIZE> == 32")
1122 (eq_attr "alternative" "1"))
1123 (symbol_ref "TARGET_AVX2")
1124 ]
1125 (symbol_ref "true")))])
1126
1127 (define_insn "<avx512>_load<mode>_mask"
1128 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1129 (vec_merge:V48_AVX512VL
1130 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1131 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1132 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1133 "TARGET_AVX512F"
1134 {
1135 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1136 {
1137 if (misaligned_operand (operands[1], <MODE>mode))
1138 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1139 else
1140 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1141 }
1142 else
1143 {
1144 if (misaligned_operand (operands[1], <MODE>mode))
1145 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1146 else
1147 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1148 }
1149 }
1150 [(set_attr "type" "ssemov")
1151 (set_attr "prefix" "evex")
1152 (set_attr "memory" "none,load")
1153 (set_attr "mode" "<sseinsnmode>")])
1154
1155 (define_insn "<avx512>_load<mode>_mask"
1156 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1157 (vec_merge:VI12_AVX512VL
1158 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1159 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1160 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1161 "TARGET_AVX512BW"
1162 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1163 [(set_attr "type" "ssemov")
1164 (set_attr "prefix" "evex")
1165 (set_attr "memory" "none,load")
1166 (set_attr "mode" "<sseinsnmode>")])
1167
1168 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
1169 [(set (match_operand:VF_128 0 "register_operand" "=v")
1170 (vec_merge:VF_128
1171 (vec_merge:VF_128
1172 (match_operand:VF_128 2 "register_operand" "v")
1173 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1174 (match_operand:QI 4 "register_operand" "Yk"))
1175 (match_operand:VF_128 1 "register_operand" "v")
1176 (const_int 1)))]
1177 "TARGET_AVX512F"
1178 "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1179 [(set_attr "type" "ssemov")
1180 (set_attr "prefix" "evex")
1181 (set_attr "mode" "<ssescalarmode>")])
1182
1183 (define_expand "avx512f_load<mode>_mask"
1184 [(set (match_operand:<ssevecmode> 0 "register_operand")
1185 (vec_merge:<ssevecmode>
1186 (vec_merge:<ssevecmode>
1187 (vec_duplicate:<ssevecmode>
1188 (match_operand:MODEF 1 "memory_operand"))
1189 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1190 (match_operand:QI 3 "register_operand"))
1191 (match_dup 4)
1192 (const_int 1)))]
1193 "TARGET_AVX512F"
1194 "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1195
1196 (define_insn "*avx512f_load<mode>_mask"
1197 [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1198 (vec_merge:<ssevecmode>
1199 (vec_merge:<ssevecmode>
1200 (vec_duplicate:<ssevecmode>
1201 (match_operand:MODEF 1 "memory_operand" "m"))
1202 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1203 (match_operand:QI 3 "register_operand" "Yk"))
1204 (match_operand:<ssevecmode> 4 "const0_operand" "C")
1205 (const_int 1)))]
1206 "TARGET_AVX512F"
1207 "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1208 [(set_attr "type" "ssemov")
1209 (set_attr "prefix" "evex")
1210 (set_attr "memory" "load")
1211 (set_attr "mode" "<MODE>")])
1212
1213 (define_insn "avx512f_store<mode>_mask"
1214 [(set (match_operand:MODEF 0 "memory_operand" "=m")
1215 (if_then_else:MODEF
1216 (and:QI (match_operand:QI 2 "register_operand" "Yk")
1217 (const_int 1))
1218 (vec_select:MODEF
1219 (match_operand:<ssevecmode> 1 "register_operand" "v")
1220 (parallel [(const_int 0)]))
1221 (match_dup 0)))]
1222 "TARGET_AVX512F"
1223 "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1224 [(set_attr "type" "ssemov")
1225 (set_attr "prefix" "evex")
1226 (set_attr "memory" "store")
1227 (set_attr "mode" "<MODE>")])
1228
1229 (define_insn "<avx512>_blendm<mode>"
1230 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1231 (vec_merge:V48_AVX512VL
1232 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1233 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1234 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1235 "TARGET_AVX512F"
1236 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1237 [(set_attr "type" "ssemov")
1238 (set_attr "prefix" "evex")
1239 (set_attr "mode" "<sseinsnmode>")])
1240
1241 (define_insn "<avx512>_blendm<mode>"
1242 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1243 (vec_merge:VI12_AVX512VL
1244 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1245 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1246 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1247 "TARGET_AVX512BW"
1248 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1249 [(set_attr "type" "ssemov")
1250 (set_attr "prefix" "evex")
1251 (set_attr "mode" "<sseinsnmode>")])
1252
1253 (define_insn "<avx512>_store<mode>_mask"
1254 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1255 (vec_merge:V48_AVX512VL
1256 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1257 (match_dup 0)
1258 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1259 "TARGET_AVX512F"
1260 {
1261 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1262 {
1263 if (misaligned_operand (operands[0], <MODE>mode))
1264 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1265 else
1266 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1267 }
1268 else
1269 {
1270 if (misaligned_operand (operands[0], <MODE>mode))
1271 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1272 else
1273 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1274 }
1275 }
1276 [(set_attr "type" "ssemov")
1277 (set_attr "prefix" "evex")
1278 (set_attr "memory" "store")
1279 (set_attr "mode" "<sseinsnmode>")])
1280
1281 (define_insn "<avx512>_store<mode>_mask"
1282 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1283 (vec_merge:VI12_AVX512VL
1284 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1285 (match_dup 0)
1286 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1287 "TARGET_AVX512BW"
1288 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1289 [(set_attr "type" "ssemov")
1290 (set_attr "prefix" "evex")
1291 (set_attr "memory" "store")
1292 (set_attr "mode" "<sseinsnmode>")])
1293
1294 (define_insn "sse2_movq128"
1295 [(set (match_operand:V2DI 0 "register_operand" "=v")
1296 (vec_concat:V2DI
1297 (vec_select:DI
1298 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1299 (parallel [(const_int 0)]))
1300 (const_int 0)))]
1301 "TARGET_SSE2"
1302 "%vmovq\t{%1, %0|%0, %q1}"
1303 [(set_attr "type" "ssemov")
1304 (set_attr "prefix" "maybe_vex")
1305 (set_attr "mode" "TI")])
1306
1307 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1308 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1309 ;; from memory, we'd prefer to load the memory directly into the %xmm
1310 ;; register. To facilitate this happy circumstance, this pattern won't
1311 ;; split until after register allocation. If the 64-bit value didn't
1312 ;; come from memory, this is the best we can do. This is much better
1313 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1314 ;; from there.
1315
1316 (define_insn_and_split "movdi_to_sse"
1317 [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1318 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1319 UNSPEC_MOVDI_TO_SSE))
1320 (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1321 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1322 "#"
1323 "&& reload_completed"
1324 [(const_int 0)]
1325 {
1326 if (register_operand (operands[1], DImode))
1327 {
1328 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1329 Assemble the 64-bit DImode value in an xmm register. */
1330 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1331 gen_lowpart (SImode, operands[1])));
1332 if (TARGET_SSE4_1)
1333 emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1334 gen_highpart (SImode, operands[1]),
1335 GEN_INT (2)));
1336 else
1337 {
1338 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1339 gen_highpart (SImode, operands[1])));
1340 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1341 operands[2]));
1342 }
1343 }
1344 else if (memory_operand (operands[1], DImode))
1345 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1346 operands[1], const0_rtx));
1347 else
1348 gcc_unreachable ();
1349 DONE;
1350 }
1351 [(set_attr "isa" "sse4,*,*")])
1352
1353 (define_split
1354 [(set (match_operand:V4SF 0 "register_operand")
1355 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1356 "TARGET_SSE && reload_completed"
1357 [(set (match_dup 0)
1358 (vec_merge:V4SF
1359 (vec_duplicate:V4SF (match_dup 1))
1360 (match_dup 2)
1361 (const_int 1)))]
1362 {
1363 operands[1] = gen_lowpart (SFmode, operands[1]);
1364 operands[2] = CONST0_RTX (V4SFmode);
1365 })
1366
1367 (define_split
1368 [(set (match_operand:V2DF 0 "register_operand")
1369 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1370 "TARGET_SSE2 && reload_completed"
1371 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1372 {
1373 operands[1] = gen_lowpart (DFmode, operands[1]);
1374 operands[2] = CONST0_RTX (DFmode);
1375 })
1376
1377 (define_expand "movmisalign<mode>"
1378 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1379 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1380 "TARGET_SSE"
1381 {
1382 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1383 DONE;
1384 })
1385
1386 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1387 (define_peephole2
1388 [(set (match_operand:V2DF 0 "sse_reg_operand")
1389 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1390 (match_operand:DF 4 "const0_operand")))
1391 (set (match_operand:V2DF 2 "sse_reg_operand")
1392 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1393 (parallel [(const_int 0)]))
1394 (match_operand:DF 3 "memory_operand")))]
1395 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1396 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1397 [(set (match_dup 2) (match_dup 5))]
1398 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1399
1400 (define_peephole2
1401 [(set (match_operand:DF 0 "sse_reg_operand")
1402 (match_operand:DF 1 "memory_operand"))
1403 (set (match_operand:V2DF 2 "sse_reg_operand")
1404 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1405 (match_operand:DF 3 "memory_operand")))]
1406 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1407 && REGNO (operands[4]) == REGNO (operands[2])
1408 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1409 [(set (match_dup 2) (match_dup 5))]
1410 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1411
1412 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1413 (define_peephole2
1414 [(set (match_operand:DF 0 "memory_operand")
1415 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1416 (parallel [(const_int 0)])))
1417 (set (match_operand:DF 2 "memory_operand")
1418 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1419 (parallel [(const_int 1)])))]
1420 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1421 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1422 [(set (match_dup 4) (match_dup 1))]
1423 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1424
1425 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1426 [(set (match_operand:VI1 0 "register_operand" "=x")
1427 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1428 UNSPEC_LDDQU))]
1429 "TARGET_SSE3"
1430 "%vlddqu\t{%1, %0|%0, %1}"
1431 [(set_attr "type" "ssemov")
1432 (set_attr "movu" "1")
1433 (set (attr "prefix_data16")
1434 (if_then_else
1435 (match_test "TARGET_AVX")
1436 (const_string "*")
1437 (const_string "0")))
1438 (set (attr "prefix_rep")
1439 (if_then_else
1440 (match_test "TARGET_AVX")
1441 (const_string "*")
1442 (const_string "1")))
1443 (set_attr "prefix" "maybe_vex")
1444 (set_attr "mode" "<sseinsnmode>")])
1445
1446 (define_insn "sse2_movnti<mode>"
1447 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1448 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1449 UNSPEC_MOVNT))]
1450 "TARGET_SSE2"
1451 "movnti\t{%1, %0|%0, %1}"
1452 [(set_attr "type" "ssemov")
1453 (set_attr "prefix_data16" "0")
1454 (set_attr "mode" "<MODE>")])
1455
1456 (define_insn "<sse>_movnt<mode>"
1457 [(set (match_operand:VF 0 "memory_operand" "=m")
1458 (unspec:VF
1459 [(match_operand:VF 1 "register_operand" "v")]
1460 UNSPEC_MOVNT))]
1461 "TARGET_SSE"
1462 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1463 [(set_attr "type" "ssemov")
1464 (set_attr "prefix" "maybe_vex")
1465 (set_attr "mode" "<MODE>")])
1466
1467 (define_insn "<sse2>_movnt<mode>"
1468 [(set (match_operand:VI8 0 "memory_operand" "=m")
1469 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1470 UNSPEC_MOVNT))]
1471 "TARGET_SSE2"
1472 "%vmovntdq\t{%1, %0|%0, %1}"
1473 [(set_attr "type" "ssecvt")
1474 (set (attr "prefix_data16")
1475 (if_then_else
1476 (match_test "TARGET_AVX")
1477 (const_string "*")
1478 (const_string "1")))
1479 (set_attr "prefix" "maybe_vex")
1480 (set_attr "mode" "<sseinsnmode>")])
1481
1482 ; Expand patterns for non-temporal stores. At the moment, only those
1483 ; that directly map to insns are defined; it would be possible to
1484 ; define patterns for other modes that would expand to several insns.
1485
1486 ;; Modes handled by storent patterns.
1487 (define_mode_iterator STORENT_MODE
1488 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1489 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1490 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1491 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1492 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1493
1494 (define_expand "storent<mode>"
1495 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1496 (unspec:STORENT_MODE
1497 [(match_operand:STORENT_MODE 1 "register_operand")]
1498 UNSPEC_MOVNT))]
1499 "TARGET_SSE")
1500
1501 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1502 ;;
1503 ;; Mask operations
1504 ;;
1505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1506
1507 ;; All integer modes with AVX512BW/DQ.
1508 (define_mode_iterator SWI1248_AVX512BWDQ
1509 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1510
1511 ;; All integer modes with AVX512BW, where HImode operation
1512 ;; can be used instead of QImode.
1513 (define_mode_iterator SWI1248_AVX512BW
1514 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1515
1516 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1517 (define_mode_iterator SWI1248_AVX512BWDQ2
1518 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1519 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1520
1521 (define_expand "kmov<mskmodesuffix>"
1522 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1523 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1524 "TARGET_AVX512F
1525 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1526
1527 (define_insn "k<code><mode>"
1528 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1529 (any_logic:SWI1248_AVX512BW
1530 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1531 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1532 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1533 "TARGET_AVX512F"
1534 {
1535 if (get_attr_mode (insn) == MODE_HI)
1536 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1537 else
1538 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1539 }
1540 [(set_attr "type" "msklog")
1541 (set_attr "prefix" "vex")
1542 (set (attr "mode")
1543 (cond [(and (match_test "<MODE>mode == QImode")
1544 (not (match_test "TARGET_AVX512DQ")))
1545 (const_string "HI")
1546 ]
1547 (const_string "<MODE>")))])
1548
1549 (define_insn "kandn<mode>"
1550 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1551 (and:SWI1248_AVX512BW
1552 (not:SWI1248_AVX512BW
1553 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1554 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1555 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1556 "TARGET_AVX512F"
1557 {
1558 if (get_attr_mode (insn) == MODE_HI)
1559 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1560 else
1561 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1562 }
1563 [(set_attr "type" "msklog")
1564 (set_attr "prefix" "vex")
1565 (set (attr "mode")
1566 (cond [(and (match_test "<MODE>mode == QImode")
1567 (not (match_test "TARGET_AVX512DQ")))
1568 (const_string "HI")
1569 ]
1570 (const_string "<MODE>")))])
1571
1572 (define_insn "kxnor<mode>"
1573 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1574 (not:SWI1248_AVX512BW
1575 (xor:SWI1248_AVX512BW
1576 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1577 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1578 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1579 "TARGET_AVX512F"
1580 {
1581 if (get_attr_mode (insn) == MODE_HI)
1582 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1583 else
1584 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1585 }
1586 [(set_attr "type" "msklog")
1587 (set_attr "prefix" "vex")
1588 (set (attr "mode")
1589 (cond [(and (match_test "<MODE>mode == QImode")
1590 (not (match_test "TARGET_AVX512DQ")))
1591 (const_string "HI")
1592 ]
1593 (const_string "<MODE>")))])
1594
1595 (define_insn "knot<mode>"
1596 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1597 (not:SWI1248_AVX512BW
1598 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1599 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1600 "TARGET_AVX512F"
1601 {
1602 if (get_attr_mode (insn) == MODE_HI)
1603 return "knotw\t{%1, %0|%0, %1}";
1604 else
1605 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1606 }
1607 [(set_attr "type" "msklog")
1608 (set_attr "prefix" "vex")
1609 (set (attr "mode")
1610 (cond [(and (match_test "<MODE>mode == QImode")
1611 (not (match_test "TARGET_AVX512DQ")))
1612 (const_string "HI")
1613 ]
1614 (const_string "<MODE>")))])
1615
1616 (define_insn "kadd<mode>"
1617 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1618 (plus:SWI1248_AVX512BWDQ2
1619 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1620 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1621 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1622 "TARGET_AVX512F"
1623 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1624 [(set_attr "type" "msklog")
1625 (set_attr "prefix" "vex")
1626 (set_attr "mode" "<MODE>")])
1627
1628 ;; Mask variant shift mnemonics
1629 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1630
1631 (define_insn "k<code><mode>"
1632 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1633 (any_lshift:SWI1248_AVX512BWDQ
1634 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1635 (match_operand:QI 2 "immediate_operand" "n")))
1636 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1637 "TARGET_AVX512F"
1638 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1639 [(set_attr "type" "msklog")
1640 (set_attr "prefix" "vex")
1641 (set_attr "mode" "<MODE>")])
1642
1643 (define_insn "ktest<mode>"
1644 [(set (reg:CC FLAGS_REG)
1645 (unspec:CC
1646 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1647 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1648 UNSPEC_KTEST))]
1649 "TARGET_AVX512F"
1650 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1651 [(set_attr "mode" "<MODE>")
1652 (set_attr "type" "msklog")
1653 (set_attr "prefix" "vex")])
1654
1655 (define_insn "kortest<mode>"
1656 [(set (reg:CC FLAGS_REG)
1657 (unspec:CC
1658 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1659 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1660 UNSPEC_KORTEST))]
1661 "TARGET_AVX512F"
1662 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1663 [(set_attr "mode" "<MODE>")
1664 (set_attr "type" "msklog")
1665 (set_attr "prefix" "vex")])
1666
1667 (define_insn "kunpckhi"
1668 [(set (match_operand:HI 0 "register_operand" "=k")
1669 (ior:HI
1670 (ashift:HI
1671 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1672 (const_int 8))
1673 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1674 "TARGET_AVX512F"
1675 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1676 [(set_attr "mode" "HI")
1677 (set_attr "type" "msklog")
1678 (set_attr "prefix" "vex")])
1679
1680 (define_insn "kunpcksi"
1681 [(set (match_operand:SI 0 "register_operand" "=k")
1682 (ior:SI
1683 (ashift:SI
1684 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1685 (const_int 16))
1686 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1687 "TARGET_AVX512BW"
1688 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1689 [(set_attr "mode" "SI")])
1690
1691 (define_insn "kunpckdi"
1692 [(set (match_operand:DI 0 "register_operand" "=k")
1693 (ior:DI
1694 (ashift:DI
1695 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1696 (const_int 32))
1697 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1698 "TARGET_AVX512BW"
1699 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1700 [(set_attr "mode" "DI")])
1701
1702
1703 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1704 ;;
1705 ;; Parallel floating point arithmetic
1706 ;;
1707 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1708
1709 (define_expand "<code><mode>2"
1710 [(set (match_operand:VF 0 "register_operand")
1711 (absneg:VF
1712 (match_operand:VF 1 "register_operand")))]
1713 "TARGET_SSE"
1714 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1715
1716 (define_insn_and_split "*absneg<mode>2"
1717 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1718 (match_operator:VF 3 "absneg_operator"
1719 [(match_operand:VF 1 "vector_operand" "0, xBm,v, m")]))
1720 (use (match_operand:VF 2 "vector_operand" "xBm,0, vm,v"))]
1721 "TARGET_SSE"
1722 "#"
1723 "&& reload_completed"
1724 [(const_int 0)]
1725 {
1726 enum rtx_code absneg_op;
1727 rtx op1, op2;
1728 rtx t;
1729
1730 if (TARGET_AVX)
1731 {
1732 if (MEM_P (operands[1]))
1733 op1 = operands[2], op2 = operands[1];
1734 else
1735 op1 = operands[1], op2 = operands[2];
1736 }
1737 else
1738 {
1739 op1 = operands[0];
1740 if (rtx_equal_p (operands[0], operands[1]))
1741 op2 = operands[2];
1742 else
1743 op2 = operands[1];
1744 }
1745
1746 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1747 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1748 t = gen_rtx_SET (operands[0], t);
1749 emit_insn (t);
1750 DONE;
1751 }
1752 [(set_attr "isa" "noavx,noavx,avx,avx")])
1753
1754 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1755 [(set (match_operand:VF 0 "register_operand")
1756 (plusminus:VF
1757 (match_operand:VF 1 "<round_nimm_predicate>")
1758 (match_operand:VF 2 "<round_nimm_predicate>")))]
1759 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1760 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1761
1762 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1763 [(set (match_operand:VF 0 "register_operand" "=x,v")
1764 (plusminus:VF
1765 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1766 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1767 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1768 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1769 "@
1770 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1771 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1772 [(set_attr "isa" "noavx,avx")
1773 (set_attr "type" "sseadd")
1774 (set_attr "prefix" "<mask_prefix3>")
1775 (set_attr "mode" "<MODE>")])
1776
1777 (define_insn "*sub<mode>3<mask_name>_bcst"
1778 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1779 (minus:VF_AVX512
1780 (match_operand:VF_AVX512 1 "register_operand" "v")
1781 (vec_duplicate:VF_AVX512
1782 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1783 "TARGET_AVX512F
1784 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
1785 && <mask_mode512bit_condition>"
1786 "vsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<avx512bcst>}"
1787 [(set_attr "prefix" "evex")
1788 (set_attr "type" "sseadd")
1789 (set_attr "mode" "<MODE>")])
1790
1791 (define_insn "*add<mode>3<mask_name>_bcst"
1792 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1793 (plus:VF_AVX512
1794 (vec_duplicate:VF_AVX512
1795 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1796 (match_operand:VF_AVX512 2 "register_operand" "v")))]
1797 "TARGET_AVX512F
1798 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
1799 && <mask_mode512bit_condition>"
1800 "vadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
1801 [(set_attr "prefix" "evex")
1802 (set_attr "type" "sseadd")
1803 (set_attr "mode" "<MODE>")])
1804
1805 (define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1806 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1807 (vec_merge:VF_128
1808 (plusminus:VF_128
1809 (match_operand:VF_128 1 "register_operand" "0,v")
1810 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1811 (match_dup 1)
1812 (const_int 1)))]
1813 "TARGET_SSE"
1814 "@
1815 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1816 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1817 [(set_attr "isa" "noavx,avx")
1818 (set_attr "type" "sseadd")
1819 (set_attr "prefix" "<round_scalar_prefix>")
1820 (set_attr "mode" "<ssescalarmode>")])
1821
1822 (define_expand "mul<mode>3<mask_name><round_name>"
1823 [(set (match_operand:VF 0 "register_operand")
1824 (mult:VF
1825 (match_operand:VF 1 "<round_nimm_predicate>")
1826 (match_operand:VF 2 "<round_nimm_predicate>")))]
1827 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1828 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1829
1830 (define_insn "*mul<mode>3<mask_name><round_name>"
1831 [(set (match_operand:VF 0 "register_operand" "=x,v")
1832 (mult:VF
1833 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1834 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1835 "TARGET_SSE
1836 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1837 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1838 "@
1839 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1840 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1841 [(set_attr "isa" "noavx,avx")
1842 (set_attr "type" "ssemul")
1843 (set_attr "prefix" "<mask_prefix3>")
1844 (set_attr "btver2_decode" "direct,double")
1845 (set_attr "mode" "<MODE>")])
1846
1847 (define_insn "*mul<mode>3<mask_name>_bcst"
1848 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1849 (mult:VF_AVX512
1850 (vec_duplicate:VF_AVX512
1851 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1852 (match_operand:VF_AVX512 2 "register_operand" "v")))]
1853 "TARGET_AVX512F && <mask_mode512bit_condition>"
1854 "vmul<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<<avx512bcst>>}"
1855 [(set_attr "prefix" "evex")
1856 (set_attr "type" "ssemul")
1857 (set_attr "mode" "<MODE>")])
1858
1859 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1860 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1861 (vec_merge:VF_128
1862 (multdiv:VF_128
1863 (match_operand:VF_128 1 "register_operand" "0,v")
1864 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1865 (match_dup 1)
1866 (const_int 1)))]
1867 "TARGET_SSE"
1868 "@
1869 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1870 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1871 [(set_attr "isa" "noavx,avx")
1872 (set_attr "type" "sse<multdiv_mnemonic>")
1873 (set_attr "prefix" "<round_scalar_prefix>")
1874 (set_attr "btver2_decode" "direct,double")
1875 (set_attr "mode" "<ssescalarmode>")])
1876
1877 (define_expand "div<mode>3"
1878 [(set (match_operand:VF2 0 "register_operand")
1879 (div:VF2 (match_operand:VF2 1 "register_operand")
1880 (match_operand:VF2 2 "vector_operand")))]
1881 "TARGET_SSE2"
1882 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1883
1884 (define_expand "div<mode>3"
1885 [(set (match_operand:VF1 0 "register_operand")
1886 (div:VF1 (match_operand:VF1 1 "register_operand")
1887 (match_operand:VF1 2 "vector_operand")))]
1888 "TARGET_SSE"
1889 {
1890 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1891
1892 if (TARGET_SSE_MATH
1893 && TARGET_RECIP_VEC_DIV
1894 && !optimize_insn_for_size_p ()
1895 && flag_finite_math_only && !flag_trapping_math
1896 && flag_unsafe_math_optimizations)
1897 {
1898 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1899 DONE;
1900 }
1901 })
1902
1903 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1904 [(set (match_operand:VF 0 "register_operand" "=x,v")
1905 (div:VF
1906 (match_operand:VF 1 "register_operand" "0,v")
1907 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1908 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1909 "@
1910 div<ssemodesuffix>\t{%2, %0|%0, %2}
1911 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1912 [(set_attr "isa" "noavx,avx")
1913 (set_attr "type" "ssediv")
1914 (set_attr "prefix" "<mask_prefix3>")
1915 (set_attr "mode" "<MODE>")])
1916
1917 (define_insn "*<avx512>_div<mode>3<mask_name>_bcst"
1918 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1919 (div:VF_AVX512
1920 (match_operand:VF_AVX512 1 "register_operand" "v")
1921 (vec_duplicate:VF_AVX512
1922 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1923 "TARGET_AVX512F && <mask_mode512bit_condition>"
1924 "vdiv<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<<avx512bcst>>}"
1925 [(set_attr "prefix" "evex")
1926 (set_attr "type" "ssediv")
1927 (set_attr "mode" "<MODE>")])
1928
1929 (define_insn "<sse>_rcp<mode>2"
1930 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1931 (unspec:VF1_128_256
1932 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1933 "TARGET_SSE"
1934 "%vrcpps\t{%1, %0|%0, %1}"
1935 [(set_attr "type" "sse")
1936 (set_attr "atom_sse_attr" "rcp")
1937 (set_attr "btver2_sse_attr" "rcp")
1938 (set_attr "prefix" "maybe_vex")
1939 (set_attr "mode" "<MODE>")])
1940
1941 (define_insn "sse_vmrcpv4sf2"
1942 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1943 (vec_merge:V4SF
1944 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1945 UNSPEC_RCP)
1946 (match_operand:V4SF 2 "register_operand" "0,x")
1947 (const_int 1)))]
1948 "TARGET_SSE"
1949 "@
1950 rcpss\t{%1, %0|%0, %k1}
1951 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1952 [(set_attr "isa" "noavx,avx")
1953 (set_attr "type" "sse")
1954 (set_attr "atom_sse_attr" "rcp")
1955 (set_attr "btver2_sse_attr" "rcp")
1956 (set_attr "prefix" "orig,vex")
1957 (set_attr "mode" "SF")])
1958
1959 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1960 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1961 (unspec:VF_AVX512VL
1962 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1963 UNSPEC_RCP14))]
1964 "TARGET_AVX512F"
1965 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1966 [(set_attr "type" "sse")
1967 (set_attr "prefix" "evex")
1968 (set_attr "mode" "<MODE>")])
1969
1970 (define_insn "srcp14<mode>"
1971 [(set (match_operand:VF_128 0 "register_operand" "=v")
1972 (vec_merge:VF_128
1973 (unspec:VF_128
1974 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1975 UNSPEC_RCP14)
1976 (match_operand:VF_128 2 "register_operand" "v")
1977 (const_int 1)))]
1978 "TARGET_AVX512F"
1979 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1980 [(set_attr "type" "sse")
1981 (set_attr "prefix" "evex")
1982 (set_attr "mode" "<MODE>")])
1983
1984 (define_insn "srcp14<mode>_mask"
1985 [(set (match_operand:VF_128 0 "register_operand" "=v")
1986 (vec_merge:VF_128
1987 (vec_merge:VF_128
1988 (unspec:VF_128
1989 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1990 UNSPEC_RCP14)
1991 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1992 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1993 (match_operand:VF_128 2 "register_operand" "v")
1994 (const_int 1)))]
1995 "TARGET_AVX512F"
1996 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1997 [(set_attr "type" "sse")
1998 (set_attr "prefix" "evex")
1999 (set_attr "mode" "<MODE>")])
2000
2001 (define_expand "sqrt<mode>2"
2002 [(set (match_operand:VF2 0 "register_operand")
2003 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
2004 "TARGET_SSE2")
2005
2006 (define_expand "sqrt<mode>2"
2007 [(set (match_operand:VF1 0 "register_operand")
2008 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
2009 "TARGET_SSE"
2010 {
2011 if (TARGET_SSE_MATH
2012 && TARGET_RECIP_VEC_SQRT
2013 && !optimize_insn_for_size_p ()
2014 && flag_finite_math_only && !flag_trapping_math
2015 && flag_unsafe_math_optimizations)
2016 {
2017 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2018 DONE;
2019 }
2020 })
2021
2022 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2023 [(set (match_operand:VF 0 "register_operand" "=x,v")
2024 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2025 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2026 "@
2027 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2028 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2029 [(set_attr "isa" "noavx,avx")
2030 (set_attr "type" "sse")
2031 (set_attr "atom_sse_attr" "sqrt")
2032 (set_attr "btver2_sse_attr" "sqrt")
2033 (set_attr "prefix" "maybe_vex")
2034 (set_attr "mode" "<MODE>")])
2035
2036 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2037 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2038 (vec_merge:VF_128
2039 (sqrt:VF_128
2040 (match_operand:VF_128 1 "vector_operand" "xBm,<round_scalar_constraint>"))
2041 (match_operand:VF_128 2 "register_operand" "0,v")
2042 (const_int 1)))]
2043 "TARGET_SSE"
2044 "@
2045 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2046 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2047 [(set_attr "isa" "noavx,avx")
2048 (set_attr "type" "sse")
2049 (set_attr "atom_sse_attr" "sqrt")
2050 (set_attr "prefix" "<round_scalar_prefix>")
2051 (set_attr "btver2_sse_attr" "sqrt")
2052 (set_attr "mode" "<ssescalarmode>")])
2053
2054 (define_expand "rsqrt<mode>2"
2055 [(set (match_operand:VF1_128_256 0 "register_operand")
2056 (unspec:VF1_128_256
2057 [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
2058 "TARGET_SSE && TARGET_SSE_MATH"
2059 {
2060 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2061 DONE;
2062 })
2063
2064 (define_expand "rsqrtv16sf2"
2065 [(set (match_operand:V16SF 0 "register_operand")
2066 (unspec:V16SF
2067 [(match_operand:V16SF 1 "vector_operand")]
2068 UNSPEC_RSQRT28))]
2069 "TARGET_AVX512ER && TARGET_SSE_MATH"
2070 {
2071 ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
2072 DONE;
2073 })
2074
2075 (define_insn "<sse>_rsqrt<mode>2"
2076 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2077 (unspec:VF1_128_256
2078 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2079 "TARGET_SSE"
2080 "%vrsqrtps\t{%1, %0|%0, %1}"
2081 [(set_attr "type" "sse")
2082 (set_attr "prefix" "maybe_vex")
2083 (set_attr "mode" "<MODE>")])
2084
2085 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2086 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2087 (unspec:VF_AVX512VL
2088 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2089 UNSPEC_RSQRT14))]
2090 "TARGET_AVX512F"
2091 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2092 [(set_attr "type" "sse")
2093 (set_attr "prefix" "evex")
2094 (set_attr "mode" "<MODE>")])
2095
2096 (define_insn "rsqrt14<mode>"
2097 [(set (match_operand:VF_128 0 "register_operand" "=v")
2098 (vec_merge:VF_128
2099 (unspec:VF_128
2100 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2101 UNSPEC_RSQRT14)
2102 (match_operand:VF_128 2 "register_operand" "v")
2103 (const_int 1)))]
2104 "TARGET_AVX512F"
2105 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2106 [(set_attr "type" "sse")
2107 (set_attr "prefix" "evex")
2108 (set_attr "mode" "<MODE>")])
2109
2110 (define_insn "rsqrt14_<mode>_mask"
2111 [(set (match_operand:VF_128 0 "register_operand" "=v")
2112 (vec_merge:VF_128
2113 (vec_merge:VF_128
2114 (unspec:VF_128
2115 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2116 UNSPEC_RSQRT14)
2117 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2118 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2119 (match_operand:VF_128 2 "register_operand" "v")
2120 (const_int 1)))]
2121 "TARGET_AVX512F"
2122 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2123 [(set_attr "type" "sse")
2124 (set_attr "prefix" "evex")
2125 (set_attr "mode" "<MODE>")])
2126
2127 (define_insn "sse_vmrsqrtv4sf2"
2128 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2129 (vec_merge:V4SF
2130 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2131 UNSPEC_RSQRT)
2132 (match_operand:V4SF 2 "register_operand" "0,x")
2133 (const_int 1)))]
2134 "TARGET_SSE"
2135 "@
2136 rsqrtss\t{%1, %0|%0, %k1}
2137 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2138 [(set_attr "isa" "noavx,avx")
2139 (set_attr "type" "sse")
2140 (set_attr "prefix" "orig,vex")
2141 (set_attr "mode" "SF")])
2142
2143 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2144 [(set (match_operand:VF 0 "register_operand")
2145 (smaxmin:VF
2146 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2147 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2148 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2149 {
2150 if (!flag_finite_math_only || flag_signed_zeros)
2151 {
2152 operands[1] = force_reg (<MODE>mode, operands[1]);
2153 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2154 (operands[0], operands[1], operands[2]
2155 <mask_operand_arg34>
2156 <round_saeonly_mask_arg3>));
2157 DONE;
2158 }
2159 else
2160 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2161 })
2162
2163 ;; These versions of the min/max patterns are intentionally ignorant of
2164 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2165 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2166 ;; are undefined in this condition, we're certain this is correct.
2167
2168 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2169 [(set (match_operand:VF 0 "register_operand" "=x,v")
2170 (smaxmin:VF
2171 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2172 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2173 "TARGET_SSE
2174 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2175 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2176 "@
2177 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2178 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2179 [(set_attr "isa" "noavx,avx")
2180 (set_attr "type" "sseadd")
2181 (set_attr "btver2_sse_attr" "maxmin")
2182 (set_attr "prefix" "<mask_prefix3>")
2183 (set_attr "mode" "<MODE>")])
2184
2185 ;; These versions of the min/max patterns implement exactly the operations
2186 ;; min = (op1 < op2 ? op1 : op2)
2187 ;; max = (!(op1 < op2) ? op1 : op2)
2188 ;; Their operands are not commutative, and thus they may be used in the
2189 ;; presence of -0.0 and NaN.
2190
2191 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2192 [(set (match_operand:VF 0 "register_operand" "=x,v")
2193 (unspec:VF
2194 [(match_operand:VF 1 "register_operand" "0,v")
2195 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2196 IEEE_MAXMIN))]
2197 "TARGET_SSE
2198 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2199 "@
2200 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2201 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2202 [(set_attr "isa" "noavx,avx")
2203 (set_attr "type" "sseadd")
2204 (set_attr "btver2_sse_attr" "maxmin")
2205 (set_attr "prefix" "<mask_prefix3>")
2206 (set_attr "mode" "<MODE>")])
2207
2208 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2209 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2210 (vec_merge:VF_128
2211 (smaxmin:VF_128
2212 (match_operand:VF_128 1 "register_operand" "0,v")
2213 (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
2214 (match_dup 1)
2215 (const_int 1)))]
2216 "TARGET_SSE"
2217 "@
2218 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2219 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2220 [(set_attr "isa" "noavx,avx")
2221 (set_attr "type" "sse")
2222 (set_attr "btver2_sse_attr" "maxmin")
2223 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2224 (set_attr "mode" "<ssescalarmode>")])
2225
2226 (define_insn "avx_addsubv4df3"
2227 [(set (match_operand:V4DF 0 "register_operand" "=x")
2228 (vec_merge:V4DF
2229 (minus:V4DF
2230 (match_operand:V4DF 1 "register_operand" "x")
2231 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2232 (plus:V4DF (match_dup 1) (match_dup 2))
2233 (const_int 5)))]
2234 "TARGET_AVX"
2235 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2236 [(set_attr "type" "sseadd")
2237 (set_attr "prefix" "vex")
2238 (set_attr "mode" "V4DF")])
2239
2240 (define_insn "sse3_addsubv2df3"
2241 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2242 (vec_merge:V2DF
2243 (minus:V2DF
2244 (match_operand:V2DF 1 "register_operand" "0,x")
2245 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2246 (plus:V2DF (match_dup 1) (match_dup 2))
2247 (const_int 1)))]
2248 "TARGET_SSE3"
2249 "@
2250 addsubpd\t{%2, %0|%0, %2}
2251 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2252 [(set_attr "isa" "noavx,avx")
2253 (set_attr "type" "sseadd")
2254 (set_attr "atom_unit" "complex")
2255 (set_attr "prefix" "orig,vex")
2256 (set_attr "mode" "V2DF")])
2257
2258 (define_insn "avx_addsubv8sf3"
2259 [(set (match_operand:V8SF 0 "register_operand" "=x")
2260 (vec_merge:V8SF
2261 (minus:V8SF
2262 (match_operand:V8SF 1 "register_operand" "x")
2263 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2264 (plus:V8SF (match_dup 1) (match_dup 2))
2265 (const_int 85)))]
2266 "TARGET_AVX"
2267 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2268 [(set_attr "type" "sseadd")
2269 (set_attr "prefix" "vex")
2270 (set_attr "mode" "V8SF")])
2271
2272 (define_insn "sse3_addsubv4sf3"
2273 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2274 (vec_merge:V4SF
2275 (minus:V4SF
2276 (match_operand:V4SF 1 "register_operand" "0,x")
2277 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2278 (plus:V4SF (match_dup 1) (match_dup 2))
2279 (const_int 5)))]
2280 "TARGET_SSE3"
2281 "@
2282 addsubps\t{%2, %0|%0, %2}
2283 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2284 [(set_attr "isa" "noavx,avx")
2285 (set_attr "type" "sseadd")
2286 (set_attr "prefix" "orig,vex")
2287 (set_attr "prefix_rep" "1,*")
2288 (set_attr "mode" "V4SF")])
2289
2290 (define_split
2291 [(set (match_operand:VF_128_256 0 "register_operand")
2292 (match_operator:VF_128_256 6 "addsub_vm_operator"
2293 [(minus:VF_128_256
2294 (match_operand:VF_128_256 1 "register_operand")
2295 (match_operand:VF_128_256 2 "vector_operand"))
2296 (plus:VF_128_256
2297 (match_operand:VF_128_256 3 "vector_operand")
2298 (match_operand:VF_128_256 4 "vector_operand"))
2299 (match_operand 5 "const_int_operand")]))]
2300 "TARGET_SSE3
2301 && can_create_pseudo_p ()
2302 && ((rtx_equal_p (operands[1], operands[3])
2303 && rtx_equal_p (operands[2], operands[4]))
2304 || (rtx_equal_p (operands[1], operands[4])
2305 && rtx_equal_p (operands[2], operands[3])))"
2306 [(set (match_dup 0)
2307 (vec_merge:VF_128_256
2308 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2309 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2310 (match_dup 5)))])
2311
2312 (define_split
2313 [(set (match_operand:VF_128_256 0 "register_operand")
2314 (match_operator:VF_128_256 6 "addsub_vm_operator"
2315 [(plus:VF_128_256
2316 (match_operand:VF_128_256 1 "vector_operand")
2317 (match_operand:VF_128_256 2 "vector_operand"))
2318 (minus:VF_128_256
2319 (match_operand:VF_128_256 3 "register_operand")
2320 (match_operand:VF_128_256 4 "vector_operand"))
2321 (match_operand 5 "const_int_operand")]))]
2322 "TARGET_SSE3
2323 && can_create_pseudo_p ()
2324 && ((rtx_equal_p (operands[1], operands[3])
2325 && rtx_equal_p (operands[2], operands[4]))
2326 || (rtx_equal_p (operands[1], operands[4])
2327 && rtx_equal_p (operands[2], operands[3])))"
2328 [(set (match_dup 0)
2329 (vec_merge:VF_128_256
2330 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2331 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2332 (match_dup 5)))]
2333 {
2334 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2335 operands[5]
2336 = GEN_INT (~INTVAL (operands[5])
2337 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2338 })
2339
2340 (define_split
2341 [(set (match_operand:VF_128_256 0 "register_operand")
2342 (match_operator:VF_128_256 7 "addsub_vs_operator"
2343 [(vec_concat:<ssedoublemode>
2344 (minus:VF_128_256
2345 (match_operand:VF_128_256 1 "register_operand")
2346 (match_operand:VF_128_256 2 "vector_operand"))
2347 (plus:VF_128_256
2348 (match_operand:VF_128_256 3 "vector_operand")
2349 (match_operand:VF_128_256 4 "vector_operand")))
2350 (match_parallel 5 "addsub_vs_parallel"
2351 [(match_operand 6 "const_int_operand")])]))]
2352 "TARGET_SSE3
2353 && can_create_pseudo_p ()
2354 && ((rtx_equal_p (operands[1], operands[3])
2355 && rtx_equal_p (operands[2], operands[4]))
2356 || (rtx_equal_p (operands[1], operands[4])
2357 && rtx_equal_p (operands[2], operands[3])))"
2358 [(set (match_dup 0)
2359 (vec_merge:VF_128_256
2360 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2361 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2362 (match_dup 5)))]
2363 {
2364 int i, nelt = XVECLEN (operands[5], 0);
2365 HOST_WIDE_INT ival = 0;
2366
2367 for (i = 0; i < nelt; i++)
2368 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2369 ival |= HOST_WIDE_INT_1 << i;
2370
2371 operands[5] = GEN_INT (ival);
2372 })
2373
2374 (define_split
2375 [(set (match_operand:VF_128_256 0 "register_operand")
2376 (match_operator:VF_128_256 7 "addsub_vs_operator"
2377 [(vec_concat:<ssedoublemode>
2378 (plus:VF_128_256
2379 (match_operand:VF_128_256 1 "vector_operand")
2380 (match_operand:VF_128_256 2 "vector_operand"))
2381 (minus:VF_128_256
2382 (match_operand:VF_128_256 3 "register_operand")
2383 (match_operand:VF_128_256 4 "vector_operand")))
2384 (match_parallel 5 "addsub_vs_parallel"
2385 [(match_operand 6 "const_int_operand")])]))]
2386 "TARGET_SSE3
2387 && can_create_pseudo_p ()
2388 && ((rtx_equal_p (operands[1], operands[3])
2389 && rtx_equal_p (operands[2], operands[4]))
2390 || (rtx_equal_p (operands[1], operands[4])
2391 && rtx_equal_p (operands[2], operands[3])))"
2392 [(set (match_dup 0)
2393 (vec_merge:VF_128_256
2394 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2395 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2396 (match_dup 5)))]
2397 {
2398 int i, nelt = XVECLEN (operands[5], 0);
2399 HOST_WIDE_INT ival = 0;
2400
2401 for (i = 0; i < nelt; i++)
2402 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2403 ival |= HOST_WIDE_INT_1 << i;
2404
2405 operands[5] = GEN_INT (ival);
2406 })
2407
2408 (define_insn "avx_h<plusminus_insn>v4df3"
2409 [(set (match_operand:V4DF 0 "register_operand" "=x")
2410 (vec_concat:V4DF
2411 (vec_concat:V2DF
2412 (plusminus:DF
2413 (vec_select:DF
2414 (match_operand:V4DF 1 "register_operand" "x")
2415 (parallel [(const_int 0)]))
2416 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2417 (plusminus:DF
2418 (vec_select:DF
2419 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2420 (parallel [(const_int 0)]))
2421 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2422 (vec_concat:V2DF
2423 (plusminus:DF
2424 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2425 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2426 (plusminus:DF
2427 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2428 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2429 "TARGET_AVX"
2430 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2431 [(set_attr "type" "sseadd")
2432 (set_attr "prefix" "vex")
2433 (set_attr "mode" "V4DF")])
2434
2435 (define_expand "sse3_haddv2df3"
2436 [(set (match_operand:V2DF 0 "register_operand")
2437 (vec_concat:V2DF
2438 (plus:DF
2439 (vec_select:DF
2440 (match_operand:V2DF 1 "register_operand")
2441 (parallel [(const_int 0)]))
2442 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2443 (plus:DF
2444 (vec_select:DF
2445 (match_operand:V2DF 2 "vector_operand")
2446 (parallel [(const_int 0)]))
2447 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2448 "TARGET_SSE3")
2449
2450 (define_insn "*sse3_haddv2df3"
2451 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2452 (vec_concat:V2DF
2453 (plus:DF
2454 (vec_select:DF
2455 (match_operand:V2DF 1 "register_operand" "0,x")
2456 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2457 (vec_select:DF
2458 (match_dup 1)
2459 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2460 (plus:DF
2461 (vec_select:DF
2462 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2463 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2464 (vec_select:DF
2465 (match_dup 2)
2466 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2467 "TARGET_SSE3
2468 && INTVAL (operands[3]) != INTVAL (operands[4])
2469 && INTVAL (operands[5]) != INTVAL (operands[6])"
2470 "@
2471 haddpd\t{%2, %0|%0, %2}
2472 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2473 [(set_attr "isa" "noavx,avx")
2474 (set_attr "type" "sseadd")
2475 (set_attr "prefix" "orig,vex")
2476 (set_attr "mode" "V2DF")])
2477
2478 (define_insn "sse3_hsubv2df3"
2479 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2480 (vec_concat:V2DF
2481 (minus:DF
2482 (vec_select:DF
2483 (match_operand:V2DF 1 "register_operand" "0,x")
2484 (parallel [(const_int 0)]))
2485 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2486 (minus:DF
2487 (vec_select:DF
2488 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2489 (parallel [(const_int 0)]))
2490 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2491 "TARGET_SSE3"
2492 "@
2493 hsubpd\t{%2, %0|%0, %2}
2494 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2495 [(set_attr "isa" "noavx,avx")
2496 (set_attr "type" "sseadd")
2497 (set_attr "prefix" "orig,vex")
2498 (set_attr "mode" "V2DF")])
2499
2500 (define_insn "*sse3_haddv2df3_low"
2501 [(set (match_operand:DF 0 "register_operand" "=x,x")
2502 (plus:DF
2503 (vec_select:DF
2504 (match_operand:V2DF 1 "register_operand" "0,x")
2505 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2506 (vec_select:DF
2507 (match_dup 1)
2508 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2509 "TARGET_SSE3
2510 && INTVAL (operands[2]) != INTVAL (operands[3])"
2511 "@
2512 haddpd\t{%0, %0|%0, %0}
2513 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2514 [(set_attr "isa" "noavx,avx")
2515 (set_attr "type" "sseadd1")
2516 (set_attr "prefix" "orig,vex")
2517 (set_attr "mode" "V2DF")])
2518
2519 (define_insn "*sse3_hsubv2df3_low"
2520 [(set (match_operand:DF 0 "register_operand" "=x,x")
2521 (minus:DF
2522 (vec_select:DF
2523 (match_operand:V2DF 1 "register_operand" "0,x")
2524 (parallel [(const_int 0)]))
2525 (vec_select:DF
2526 (match_dup 1)
2527 (parallel [(const_int 1)]))))]
2528 "TARGET_SSE3"
2529 "@
2530 hsubpd\t{%0, %0|%0, %0}
2531 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2532 [(set_attr "isa" "noavx,avx")
2533 (set_attr "type" "sseadd1")
2534 (set_attr "prefix" "orig,vex")
2535 (set_attr "mode" "V2DF")])
2536
2537 (define_insn "avx_h<plusminus_insn>v8sf3"
2538 [(set (match_operand:V8SF 0 "register_operand" "=x")
2539 (vec_concat:V8SF
2540 (vec_concat:V4SF
2541 (vec_concat:V2SF
2542 (plusminus:SF
2543 (vec_select:SF
2544 (match_operand:V8SF 1 "register_operand" "x")
2545 (parallel [(const_int 0)]))
2546 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2547 (plusminus:SF
2548 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2549 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2550 (vec_concat:V2SF
2551 (plusminus:SF
2552 (vec_select:SF
2553 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2554 (parallel [(const_int 0)]))
2555 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2556 (plusminus:SF
2557 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2558 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2559 (vec_concat:V4SF
2560 (vec_concat:V2SF
2561 (plusminus:SF
2562 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2563 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2564 (plusminus:SF
2565 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2566 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2567 (vec_concat:V2SF
2568 (plusminus:SF
2569 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2570 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2571 (plusminus:SF
2572 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2573 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2574 "TARGET_AVX"
2575 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2576 [(set_attr "type" "sseadd")
2577 (set_attr "prefix" "vex")
2578 (set_attr "mode" "V8SF")])
2579
2580 (define_insn "sse3_h<plusminus_insn>v4sf3"
2581 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2582 (vec_concat:V4SF
2583 (vec_concat:V2SF
2584 (plusminus:SF
2585 (vec_select:SF
2586 (match_operand:V4SF 1 "register_operand" "0,x")
2587 (parallel [(const_int 0)]))
2588 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2589 (plusminus:SF
2590 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2591 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2592 (vec_concat:V2SF
2593 (plusminus:SF
2594 (vec_select:SF
2595 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2596 (parallel [(const_int 0)]))
2597 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2598 (plusminus:SF
2599 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2600 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2601 "TARGET_SSE3"
2602 "@
2603 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2604 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2605 [(set_attr "isa" "noavx,avx")
2606 (set_attr "type" "sseadd")
2607 (set_attr "atom_unit" "complex")
2608 (set_attr "prefix" "orig,vex")
2609 (set_attr "prefix_rep" "1,*")
2610 (set_attr "mode" "V4SF")])
2611
2612 (define_mode_iterator REDUC_SSE_PLUS_MODE
2613 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2614
2615 (define_expand "reduc_plus_scal_<mode>"
2616 [(plus:REDUC_SSE_PLUS_MODE
2617 (match_operand:<ssescalarmode> 0 "register_operand")
2618 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2619 ""
2620 {
2621 rtx tmp = gen_reg_rtx (<MODE>mode);
2622 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2623 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2624 const0_rtx));
2625 DONE;
2626 })
2627
2628 (define_mode_iterator REDUC_PLUS_MODE
2629 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2630 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
2631
2632 (define_expand "reduc_plus_scal_<mode>"
2633 [(plus:REDUC_PLUS_MODE
2634 (match_operand:<ssescalarmode> 0 "register_operand")
2635 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2636 ""
2637 {
2638 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2639 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2640 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2641 emit_insn (gen_add<ssehalfvecmodelower>3
2642 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2643 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2644 DONE;
2645 })
2646
2647 ;; Modes handled by reduc_sm{in,ax}* patterns.
2648 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
2649 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2650 (V2DI "TARGET_SSE") (V4SI "TARGET_SSE") (V8HI "TARGET_SSE")
2651 (V16QI "TARGET_SSE")])
2652
2653 (define_expand "reduc_<code>_scal_<mode>"
2654 [(smaxmin:REDUC_SSE_SMINMAX_MODE
2655 (match_operand:<ssescalarmode> 0 "register_operand")
2656 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2657 ""
2658 {
2659 rtx tmp = gen_reg_rtx (<MODE>mode);
2660 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2661 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2662 const0_rtx));
2663 DONE;
2664 })
2665
2666 (define_mode_iterator REDUC_SMINMAX_MODE
2667 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2668 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2669 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2670 (V64QI "TARGET_AVX512BW")
2671 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2672 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2673 (V8DF "TARGET_AVX512F")])
2674
2675 (define_expand "reduc_<code>_scal_<mode>"
2676 [(smaxmin:REDUC_SMINMAX_MODE
2677 (match_operand:<ssescalarmode> 0 "register_operand")
2678 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2679 ""
2680 {
2681 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2682 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2683 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2684 emit_insn (gen_<code><ssehalfvecmodelower>3
2685 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2686 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2687 DONE;
2688 })
2689
2690 (define_expand "reduc_<code>_scal_<mode>"
2691 [(umaxmin:VI_AVX512BW
2692 (match_operand:<ssescalarmode> 0 "register_operand")
2693 (match_operand:VI_AVX512BW 1 "register_operand"))]
2694 "TARGET_AVX512F"
2695 {
2696 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2697 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2698 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2699 emit_insn (gen_<code><ssehalfvecmodelower>3
2700 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2701 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2702 DONE;
2703 })
2704
2705 (define_expand "reduc_<code>_scal_<mode>"
2706 [(umaxmin:VI_256
2707 (match_operand:<ssescalarmode> 0 "register_operand")
2708 (match_operand:VI_256 1 "register_operand"))]
2709 "TARGET_AVX2"
2710 {
2711 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2712 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2713 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2714 emit_insn (gen_<code><ssehalfvecmodelower>3
2715 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2716 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
2717 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
2718 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
2719 (operands[0], tmp3, const0_rtx));
2720 DONE;
2721 })
2722
2723 (define_expand "reduc_umin_scal_v8hi"
2724 [(umin:V8HI
2725 (match_operand:HI 0 "register_operand")
2726 (match_operand:V8HI 1 "register_operand"))]
2727 "TARGET_SSE4_1"
2728 {
2729 rtx tmp = gen_reg_rtx (V8HImode);
2730 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2731 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2732 DONE;
2733 })
2734
2735 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2736 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2737 (unspec:VF_AVX512VL
2738 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2739 (match_operand:SI 2 "const_0_to_255_operand")]
2740 UNSPEC_REDUCE))]
2741 "TARGET_AVX512DQ"
2742 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2743 [(set_attr "type" "sse")
2744 (set_attr "prefix" "evex")
2745 (set_attr "mode" "<MODE>")])
2746
2747 (define_insn "reduces<mode><mask_scalar_name>"
2748 [(set (match_operand:VF_128 0 "register_operand" "=v")
2749 (vec_merge:VF_128
2750 (unspec:VF_128
2751 [(match_operand:VF_128 1 "register_operand" "v")
2752 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2753 (match_operand:SI 3 "const_0_to_255_operand")]
2754 UNSPEC_REDUCE)
2755 (match_dup 1)
2756 (const_int 1)))]
2757 "TARGET_AVX512DQ"
2758 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2, %3}"
2759 [(set_attr "type" "sse")
2760 (set_attr "prefix" "evex")
2761 (set_attr "mode" "<MODE>")])
2762
2763 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2764 ;;
2765 ;; Parallel floating point comparisons
2766 ;;
2767 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2768
2769 (define_insn "avx_cmp<mode>3"
2770 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2771 (unspec:VF_128_256
2772 [(match_operand:VF_128_256 1 "register_operand" "x")
2773 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2774 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2775 UNSPEC_PCMP))]
2776 "TARGET_AVX"
2777 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2778 [(set_attr "type" "ssecmp")
2779 (set_attr "length_immediate" "1")
2780 (set_attr "prefix" "vex")
2781 (set_attr "mode" "<MODE>")])
2782
2783 (define_insn "avx_vmcmp<mode>3"
2784 [(set (match_operand:VF_128 0 "register_operand" "=x")
2785 (vec_merge:VF_128
2786 (unspec:VF_128
2787 [(match_operand:VF_128 1 "register_operand" "x")
2788 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2789 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2790 UNSPEC_PCMP)
2791 (match_dup 1)
2792 (const_int 1)))]
2793 "TARGET_AVX"
2794 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2795 [(set_attr "type" "ssecmp")
2796 (set_attr "length_immediate" "1")
2797 (set_attr "prefix" "vex")
2798 (set_attr "mode" "<ssescalarmode>")])
2799
2800 (define_insn "*<sse>_maskcmp<mode>3_comm"
2801 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2802 (match_operator:VF_128_256 3 "sse_comparison_operator"
2803 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2804 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2805 "TARGET_SSE
2806 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2807 "@
2808 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2809 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2810 [(set_attr "isa" "noavx,avx")
2811 (set_attr "type" "ssecmp")
2812 (set_attr "length_immediate" "1")
2813 (set_attr "prefix" "orig,vex")
2814 (set_attr "mode" "<MODE>")])
2815
2816 (define_insn "<sse>_maskcmp<mode>3"
2817 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2818 (match_operator:VF_128_256 3 "sse_comparison_operator"
2819 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2820 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2821 "TARGET_SSE"
2822 "@
2823 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2824 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2825 [(set_attr "isa" "noavx,avx")
2826 (set_attr "type" "ssecmp")
2827 (set_attr "length_immediate" "1")
2828 (set_attr "prefix" "orig,vex")
2829 (set_attr "mode" "<MODE>")])
2830
2831 (define_insn "<sse>_vmmaskcmp<mode>3"
2832 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2833 (vec_merge:VF_128
2834 (match_operator:VF_128 3 "sse_comparison_operator"
2835 [(match_operand:VF_128 1 "register_operand" "0,x")
2836 (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2837 (match_dup 1)
2838 (const_int 1)))]
2839 "TARGET_SSE"
2840 "@
2841 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2842 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2843 [(set_attr "isa" "noavx,avx")
2844 (set_attr "type" "ssecmp")
2845 (set_attr "length_immediate" "1,*")
2846 (set_attr "prefix" "orig,vex")
2847 (set_attr "mode" "<ssescalarmode>")])
2848
2849 (define_mode_attr cmp_imm_predicate
2850 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2851 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2852 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2853 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2854 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2855 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2856 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2857 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2858 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2859
2860 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2861 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2862 (unspec:<avx512fmaskmode>
2863 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2864 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2865 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2866 UNSPEC_PCMP))]
2867 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2868 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2869 [(set_attr "type" "ssecmp")
2870 (set_attr "length_immediate" "1")
2871 (set_attr "prefix" "evex")
2872 (set_attr "mode" "<sseinsnmode>")])
2873
2874 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2875 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2876 (unspec:<avx512fmaskmode>
2877 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2878 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2879 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2880 UNSPEC_PCMP))]
2881 "TARGET_AVX512BW"
2882 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2883 [(set_attr "type" "ssecmp")
2884 (set_attr "length_immediate" "1")
2885 (set_attr "prefix" "evex")
2886 (set_attr "mode" "<sseinsnmode>")])
2887
2888 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2889 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2890 (unspec:<avx512fmaskmode>
2891 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2892 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2893 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2894 UNSPEC_UNSIGNED_PCMP))]
2895 "TARGET_AVX512BW"
2896 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2897 [(set_attr "type" "ssecmp")
2898 (set_attr "length_immediate" "1")
2899 (set_attr "prefix" "evex")
2900 (set_attr "mode" "<sseinsnmode>")])
2901
2902 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2903 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2904 (unspec:<avx512fmaskmode>
2905 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2906 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2907 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2908 UNSPEC_UNSIGNED_PCMP))]
2909 "TARGET_AVX512F"
2910 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2911 [(set_attr "type" "ssecmp")
2912 (set_attr "length_immediate" "1")
2913 (set_attr "prefix" "evex")
2914 (set_attr "mode" "<sseinsnmode>")])
2915
2916 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2917 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2918 (and:<avx512fmaskmode>
2919 (unspec:<avx512fmaskmode>
2920 [(match_operand:VF_128 1 "register_operand" "v")
2921 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2922 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2923 UNSPEC_PCMP)
2924 (const_int 1)))]
2925 "TARGET_AVX512F"
2926 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
2927 [(set_attr "type" "ssecmp")
2928 (set_attr "length_immediate" "1")
2929 (set_attr "prefix" "evex")
2930 (set_attr "mode" "<ssescalarmode>")])
2931
2932 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2933 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2934 (and:<avx512fmaskmode>
2935 (unspec:<avx512fmaskmode>
2936 [(match_operand:VF_128 1 "register_operand" "v")
2937 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2938 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2939 UNSPEC_PCMP)
2940 (and:<avx512fmaskmode>
2941 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2942 (const_int 1))))]
2943 "TARGET_AVX512F"
2944 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
2945 [(set_attr "type" "ssecmp")
2946 (set_attr "length_immediate" "1")
2947 (set_attr "prefix" "evex")
2948 (set_attr "mode" "<ssescalarmode>")])
2949
2950 (define_insn "avx512f_maskcmp<mode>3"
2951 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2952 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2953 [(match_operand:VF 1 "register_operand" "v")
2954 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2955 "TARGET_AVX512F"
2956 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2957 [(set_attr "type" "ssecmp")
2958 (set_attr "length_immediate" "1")
2959 (set_attr "prefix" "evex")
2960 (set_attr "mode" "<sseinsnmode>")])
2961
2962 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
2963 [(set (reg:CCFP FLAGS_REG)
2964 (compare:CCFP
2965 (vec_select:MODEF
2966 (match_operand:<ssevecmode> 0 "register_operand" "v")
2967 (parallel [(const_int 0)]))
2968 (vec_select:MODEF
2969 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2970 (parallel [(const_int 0)]))))]
2971 "SSE_FLOAT_MODE_P (<MODE>mode)"
2972 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2973 [(set_attr "type" "ssecomi")
2974 (set_attr "prefix" "maybe_vex")
2975 (set_attr "prefix_rep" "0")
2976 (set (attr "prefix_data16")
2977 (if_then_else (eq_attr "mode" "DF")
2978 (const_string "1")
2979 (const_string "0")))
2980 (set_attr "mode" "<MODE>")])
2981
2982 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2983 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2984 (match_operator:<avx512fmaskmode> 1 ""
2985 [(match_operand:V48_AVX512VL 2 "register_operand")
2986 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2987 "TARGET_AVX512F"
2988 {
2989 bool ok = ix86_expand_mask_vec_cmp (operands);
2990 gcc_assert (ok);
2991 DONE;
2992 })
2993
2994 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2995 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2996 (match_operator:<avx512fmaskmode> 1 ""
2997 [(match_operand:VI12_AVX512VL 2 "register_operand")
2998 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2999 "TARGET_AVX512BW"
3000 {
3001 bool ok = ix86_expand_mask_vec_cmp (operands);
3002 gcc_assert (ok);
3003 DONE;
3004 })
3005
3006 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3007 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3008 (match_operator:<sseintvecmode> 1 ""
3009 [(match_operand:VI_256 2 "register_operand")
3010 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3011 "TARGET_AVX2"
3012 {
3013 bool ok = ix86_expand_int_vec_cmp (operands);
3014 gcc_assert (ok);
3015 DONE;
3016 })
3017
3018 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3019 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3020 (match_operator:<sseintvecmode> 1 ""
3021 [(match_operand:VI124_128 2 "register_operand")
3022 (match_operand:VI124_128 3 "vector_operand")]))]
3023 "TARGET_SSE2"
3024 {
3025 bool ok = ix86_expand_int_vec_cmp (operands);
3026 gcc_assert (ok);
3027 DONE;
3028 })
3029
3030 (define_expand "vec_cmpv2div2di"
3031 [(set (match_operand:V2DI 0 "register_operand")
3032 (match_operator:V2DI 1 ""
3033 [(match_operand:V2DI 2 "register_operand")
3034 (match_operand:V2DI 3 "vector_operand")]))]
3035 "TARGET_SSE4_2"
3036 {
3037 bool ok = ix86_expand_int_vec_cmp (operands);
3038 gcc_assert (ok);
3039 DONE;
3040 })
3041
3042 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3043 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3044 (match_operator:<sseintvecmode> 1 ""
3045 [(match_operand:VF_256 2 "register_operand")
3046 (match_operand:VF_256 3 "nonimmediate_operand")]))]
3047 "TARGET_AVX"
3048 {
3049 bool ok = ix86_expand_fp_vec_cmp (operands);
3050 gcc_assert (ok);
3051 DONE;
3052 })
3053
3054 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3055 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3056 (match_operator:<sseintvecmode> 1 ""
3057 [(match_operand:VF_128 2 "register_operand")
3058 (match_operand:VF_128 3 "vector_operand")]))]
3059 "TARGET_SSE"
3060 {
3061 bool ok = ix86_expand_fp_vec_cmp (operands);
3062 gcc_assert (ok);
3063 DONE;
3064 })
3065
3066 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3067 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3068 (match_operator:<avx512fmaskmode> 1 ""
3069 [(match_operand:VI48_AVX512VL 2 "register_operand")
3070 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
3071 "TARGET_AVX512F"
3072 {
3073 bool ok = ix86_expand_mask_vec_cmp (operands);
3074 gcc_assert (ok);
3075 DONE;
3076 })
3077
3078 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3079 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3080 (match_operator:<avx512fmaskmode> 1 ""
3081 [(match_operand:VI12_AVX512VL 2 "register_operand")
3082 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3083 "TARGET_AVX512BW"
3084 {
3085 bool ok = ix86_expand_mask_vec_cmp (operands);
3086 gcc_assert (ok);
3087 DONE;
3088 })
3089
3090 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3091 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3092 (match_operator:<sseintvecmode> 1 ""
3093 [(match_operand:VI_256 2 "register_operand")
3094 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3095 "TARGET_AVX2"
3096 {
3097 bool ok = ix86_expand_int_vec_cmp (operands);
3098 gcc_assert (ok);
3099 DONE;
3100 })
3101
3102 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3103 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3104 (match_operator:<sseintvecmode> 1 ""
3105 [(match_operand:VI124_128 2 "register_operand")
3106 (match_operand:VI124_128 3 "vector_operand")]))]
3107 "TARGET_SSE2"
3108 {
3109 bool ok = ix86_expand_int_vec_cmp (operands);
3110 gcc_assert (ok);
3111 DONE;
3112 })
3113
3114 (define_expand "vec_cmpuv2div2di"
3115 [(set (match_operand:V2DI 0 "register_operand")
3116 (match_operator:V2DI 1 ""
3117 [(match_operand:V2DI 2 "register_operand")
3118 (match_operand:V2DI 3 "vector_operand")]))]
3119 "TARGET_SSE4_2"
3120 {
3121 bool ok = ix86_expand_int_vec_cmp (operands);
3122 gcc_assert (ok);
3123 DONE;
3124 })
3125
3126 (define_expand "vec_cmpeqv2div2di"
3127 [(set (match_operand:V2DI 0 "register_operand")
3128 (match_operator:V2DI 1 ""
3129 [(match_operand:V2DI 2 "register_operand")
3130 (match_operand:V2DI 3 "vector_operand")]))]
3131 "TARGET_SSE4_1"
3132 {
3133 bool ok = ix86_expand_int_vec_cmp (operands);
3134 gcc_assert (ok);
3135 DONE;
3136 })
3137
3138 (define_expand "vcond<V_512:mode><VF_512:mode>"
3139 [(set (match_operand:V_512 0 "register_operand")
3140 (if_then_else:V_512
3141 (match_operator 3 ""
3142 [(match_operand:VF_512 4 "nonimmediate_operand")
3143 (match_operand:VF_512 5 "nonimmediate_operand")])
3144 (match_operand:V_512 1 "general_operand")
3145 (match_operand:V_512 2 "general_operand")))]
3146 "TARGET_AVX512F
3147 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3148 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3149 {
3150 bool ok = ix86_expand_fp_vcond (operands);
3151 gcc_assert (ok);
3152 DONE;
3153 })
3154
3155 (define_expand "vcond<V_256:mode><VF_256:mode>"
3156 [(set (match_operand:V_256 0 "register_operand")
3157 (if_then_else:V_256
3158 (match_operator 3 ""
3159 [(match_operand:VF_256 4 "nonimmediate_operand")
3160 (match_operand:VF_256 5 "nonimmediate_operand")])
3161 (match_operand:V_256 1 "general_operand")
3162 (match_operand:V_256 2 "general_operand")))]
3163 "TARGET_AVX
3164 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3165 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3166 {
3167 bool ok = ix86_expand_fp_vcond (operands);
3168 gcc_assert (ok);
3169 DONE;
3170 })
3171
3172 (define_expand "vcond<V_128:mode><VF_128:mode>"
3173 [(set (match_operand:V_128 0 "register_operand")
3174 (if_then_else:V_128
3175 (match_operator 3 ""
3176 [(match_operand:VF_128 4 "vector_operand")
3177 (match_operand:VF_128 5 "vector_operand")])
3178 (match_operand:V_128 1 "general_operand")
3179 (match_operand:V_128 2 "general_operand")))]
3180 "TARGET_SSE
3181 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3182 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3183 {
3184 bool ok = ix86_expand_fp_vcond (operands);
3185 gcc_assert (ok);
3186 DONE;
3187 })
3188
3189 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3190 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3191 (vec_merge:V48_AVX512VL
3192 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3193 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3194 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3195 "TARGET_AVX512F")
3196
3197 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3198 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3199 (vec_merge:VI12_AVX512VL
3200 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3201 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3202 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3203 "TARGET_AVX512BW")
3204
3205 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3206 [(set (match_operand:VI_256 0 "register_operand")
3207 (vec_merge:VI_256
3208 (match_operand:VI_256 1 "nonimmediate_operand")
3209 (match_operand:VI_256 2 "nonimm_or_0_operand")
3210 (match_operand:<sseintvecmode> 3 "register_operand")))]
3211 "TARGET_AVX2"
3212 {
3213 ix86_expand_sse_movcc (operands[0], operands[3],
3214 operands[1], operands[2]);
3215 DONE;
3216 })
3217
3218 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3219 [(set (match_operand:VI124_128 0 "register_operand")
3220 (vec_merge:VI124_128
3221 (match_operand:VI124_128 1 "vector_operand")
3222 (match_operand:VI124_128 2 "nonimm_or_0_operand")
3223 (match_operand:<sseintvecmode> 3 "register_operand")))]
3224 "TARGET_SSE2"
3225 {
3226 ix86_expand_sse_movcc (operands[0], operands[3],
3227 operands[1], operands[2]);
3228 DONE;
3229 })
3230
3231 (define_expand "vcond_mask_v2div2di"
3232 [(set (match_operand:V2DI 0 "register_operand")
3233 (vec_merge:V2DI
3234 (match_operand:V2DI 1 "vector_operand")
3235 (match_operand:V2DI 2 "nonimm_or_0_operand")
3236 (match_operand:V2DI 3 "register_operand")))]
3237 "TARGET_SSE4_2"
3238 {
3239 ix86_expand_sse_movcc (operands[0], operands[3],
3240 operands[1], operands[2]);
3241 DONE;
3242 })
3243
3244 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3245 [(set (match_operand:VF_256 0 "register_operand")
3246 (vec_merge:VF_256
3247 (match_operand:VF_256 1 "nonimmediate_operand")
3248 (match_operand:VF_256 2 "nonimm_or_0_operand")
3249 (match_operand:<sseintvecmode> 3 "register_operand")))]
3250 "TARGET_AVX"
3251 {
3252 ix86_expand_sse_movcc (operands[0], operands[3],
3253 operands[1], operands[2]);
3254 DONE;
3255 })
3256
3257 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3258 [(set (match_operand:VF_128 0 "register_operand")
3259 (vec_merge:VF_128
3260 (match_operand:VF_128 1 "vector_operand")
3261 (match_operand:VF_128 2 "nonimm_or_0_operand")
3262 (match_operand:<sseintvecmode> 3 "register_operand")))]
3263 "TARGET_SSE"
3264 {
3265 ix86_expand_sse_movcc (operands[0], operands[3],
3266 operands[1], operands[2]);
3267 DONE;
3268 })
3269
3270 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3271 ;;
3272 ;; Parallel floating point logical operations
3273 ;;
3274 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3275
3276 (define_insn "<sse>_andnot<mode>3<mask_name>"
3277 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3278 (and:VF_128_256
3279 (not:VF_128_256
3280 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3281 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3282 "TARGET_SSE && <mask_avx512vl_condition>"
3283 {
3284 char buf[128];
3285 const char *ops;
3286 const char *suffix;
3287
3288 switch (which_alternative)
3289 {
3290 case 0:
3291 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3292 break;
3293 case 1:
3294 case 2:
3295 case 3:
3296 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3297 break;
3298 default:
3299 gcc_unreachable ();
3300 }
3301
3302 switch (get_attr_mode (insn))
3303 {
3304 case MODE_V8SF:
3305 case MODE_V4SF:
3306 suffix = "ps";
3307 break;
3308 case MODE_OI:
3309 case MODE_TI:
3310 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3311 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3312 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3313 break;
3314 default:
3315 suffix = "<ssemodesuffix>";
3316 }
3317
3318 snprintf (buf, sizeof (buf), ops, suffix);
3319 output_asm_insn (buf, operands);
3320 return "";
3321 }
3322 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3323 (set_attr "type" "sselog")
3324 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3325 (set (attr "mode")
3326 (cond [(and (match_test "<mask_applied>")
3327 (and (eq_attr "alternative" "1")
3328 (match_test "!TARGET_AVX512DQ")))
3329 (const_string "<sseintvecmode2>")
3330 (eq_attr "alternative" "3")
3331 (const_string "<sseintvecmode2>")
3332 (and (match_test "<MODE_SIZE> == 16")
3333 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3334 (const_string "<ssePSmode>")
3335 (match_test "TARGET_AVX")
3336 (const_string "<MODE>")
3337 (match_test "optimize_function_for_size_p (cfun)")
3338 (const_string "V4SF")
3339 ]
3340 (const_string "<MODE>")))])
3341
3342
3343 (define_insn "<sse>_andnot<mode>3<mask_name>"
3344 [(set (match_operand:VF_512 0 "register_operand" "=v")
3345 (and:VF_512
3346 (not:VF_512
3347 (match_operand:VF_512 1 "register_operand" "v"))
3348 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3349 "TARGET_AVX512F"
3350 {
3351 char buf[128];
3352 const char *ops;
3353 const char *suffix;
3354
3355 suffix = "<ssemodesuffix>";
3356 ops = "";
3357
3358 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3359 if (!TARGET_AVX512DQ)
3360 {
3361 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3362 ops = "p";
3363 }
3364
3365 snprintf (buf, sizeof (buf),
3366 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3367 ops, suffix);
3368 output_asm_insn (buf, operands);
3369 return "";
3370 }
3371 [(set_attr "type" "sselog")
3372 (set_attr "prefix" "evex")
3373 (set (attr "mode")
3374 (if_then_else (match_test "TARGET_AVX512DQ")
3375 (const_string "<sseinsnmode>")
3376 (const_string "XI")))])
3377
3378 (define_expand "<code><mode>3<mask_name>"
3379 [(set (match_operand:VF_128_256 0 "register_operand")
3380 (any_logic:VF_128_256
3381 (match_operand:VF_128_256 1 "vector_operand")
3382 (match_operand:VF_128_256 2 "vector_operand")))]
3383 "TARGET_SSE && <mask_avx512vl_condition>"
3384 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3385
3386 (define_expand "<code><mode>3<mask_name>"
3387 [(set (match_operand:VF_512 0 "register_operand")
3388 (any_logic:VF_512
3389 (match_operand:VF_512 1 "nonimmediate_operand")
3390 (match_operand:VF_512 2 "nonimmediate_operand")))]
3391 "TARGET_AVX512F"
3392 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3393
3394 (define_insn "*<code><mode>3<mask_name>"
3395 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3396 (any_logic:VF_128_256
3397 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3398 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3399 "TARGET_SSE && <mask_avx512vl_condition>
3400 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3401 {
3402 char buf[128];
3403 const char *ops;
3404 const char *suffix;
3405
3406 switch (which_alternative)
3407 {
3408 case 0:
3409 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3410 break;
3411 case 1:
3412 case 2:
3413 case 3:
3414 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3415 break;
3416 default:
3417 gcc_unreachable ();
3418 }
3419
3420 switch (get_attr_mode (insn))
3421 {
3422 case MODE_V8SF:
3423 case MODE_V4SF:
3424 suffix = "ps";
3425 break;
3426 case MODE_OI:
3427 case MODE_TI:
3428 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3429 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3430 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3431 break;
3432 default:
3433 suffix = "<ssemodesuffix>";
3434 }
3435
3436 snprintf (buf, sizeof (buf), ops, suffix);
3437 output_asm_insn (buf, operands);
3438 return "";
3439 }
3440 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3441 (set_attr "type" "sselog")
3442 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3443 (set (attr "mode")
3444 (cond [(and (match_test "<mask_applied>")
3445 (and (eq_attr "alternative" "1")
3446 (match_test "!TARGET_AVX512DQ")))
3447 (const_string "<sseintvecmode2>")
3448 (eq_attr "alternative" "3")
3449 (const_string "<sseintvecmode2>")
3450 (and (match_test "<MODE_SIZE> == 16")
3451 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3452 (const_string "<ssePSmode>")
3453 (match_test "TARGET_AVX")
3454 (const_string "<MODE>")
3455 (match_test "optimize_function_for_size_p (cfun)")
3456 (const_string "V4SF")
3457 ]
3458 (const_string "<MODE>")))])
3459
3460 (define_insn "*<code><mode>3<mask_name>"
3461 [(set (match_operand:VF_512 0 "register_operand" "=v")
3462 (any_logic:VF_512
3463 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3464 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3465 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3466 {
3467 char buf[128];
3468 const char *ops;
3469 const char *suffix;
3470
3471 suffix = "<ssemodesuffix>";
3472 ops = "";
3473
3474 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3475 if (!TARGET_AVX512DQ)
3476 {
3477 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3478 ops = "p";
3479 }
3480
3481 snprintf (buf, sizeof (buf),
3482 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3483 ops, suffix);
3484 output_asm_insn (buf, operands);
3485 return "";
3486 }
3487 [(set_attr "type" "sselog")
3488 (set_attr "prefix" "evex")
3489 (set (attr "mode")
3490 (if_then_else (match_test "TARGET_AVX512DQ")
3491 (const_string "<sseinsnmode>")
3492 (const_string "XI")))])
3493
3494 (define_expand "copysign<mode>3"
3495 [(set (match_dup 4)
3496 (and:VF
3497 (not:VF (match_dup 3))
3498 (match_operand:VF 1 "vector_operand")))
3499 (set (match_dup 5)
3500 (and:VF (match_dup 3)
3501 (match_operand:VF 2 "vector_operand")))
3502 (set (match_operand:VF 0 "register_operand")
3503 (ior:VF (match_dup 4) (match_dup 5)))]
3504 "TARGET_SSE"
3505 {
3506 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3507
3508 operands[4] = gen_reg_rtx (<MODE>mode);
3509 operands[5] = gen_reg_rtx (<MODE>mode);
3510 })
3511
3512 (define_expand "xorsign<mode>3"
3513 [(set (match_dup 4)
3514 (and:VF (match_dup 3)
3515 (match_operand:VF 2 "vector_operand")))
3516 (set (match_operand:VF 0 "register_operand")
3517 (xor:VF (match_dup 4)
3518 (match_operand:VF 1 "vector_operand")))]
3519 "TARGET_SSE"
3520 {
3521 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3522
3523 operands[4] = gen_reg_rtx (<MODE>mode);
3524 })
3525
3526 ;; Also define scalar versions. These are used for abs, neg, and
3527 ;; conditional move. Using subregs into vector modes causes register
3528 ;; allocation lossage. These patterns do not allow memory operands
3529 ;; because the native instructions read the full 128-bits.
3530
3531 (define_insn "*andnot<mode>3"
3532 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3533 (and:MODEF
3534 (not:MODEF
3535 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3536 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3537 "SSE_FLOAT_MODE_P (<MODE>mode)"
3538 {
3539 char buf[128];
3540 const char *ops;
3541 const char *suffix
3542 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3543
3544 switch (which_alternative)
3545 {
3546 case 0:
3547 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3548 break;
3549 case 1:
3550 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3551 break;
3552 case 2:
3553 if (TARGET_AVX512DQ)
3554 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3555 else
3556 {
3557 suffix = <MODE>mode == DFmode ? "q" : "d";
3558 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3559 }
3560 break;
3561 case 3:
3562 if (TARGET_AVX512DQ)
3563 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3564 else
3565 {
3566 suffix = <MODE>mode == DFmode ? "q" : "d";
3567 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3568 }
3569 break;
3570 default:
3571 gcc_unreachable ();
3572 }
3573
3574 snprintf (buf, sizeof (buf), ops, suffix);
3575 output_asm_insn (buf, operands);
3576 return "";
3577 }
3578 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3579 (set_attr "type" "sselog")
3580 (set_attr "prefix" "orig,vex,evex,evex")
3581 (set (attr "mode")
3582 (cond [(eq_attr "alternative" "2")
3583 (if_then_else (match_test "TARGET_AVX512DQ")
3584 (const_string "<ssevecmode>")
3585 (const_string "TI"))
3586 (eq_attr "alternative" "3")
3587 (if_then_else (match_test "TARGET_AVX512DQ")
3588 (const_string "<avx512fvecmode>")
3589 (const_string "XI"))
3590 (and (match_test "<MODE_SIZE> == 16")
3591 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3592 (const_string "V4SF")
3593 (match_test "TARGET_AVX")
3594 (const_string "<ssevecmode>")
3595 (match_test "optimize_function_for_size_p (cfun)")
3596 (const_string "V4SF")
3597 ]
3598 (const_string "<ssevecmode>")))])
3599
3600 (define_insn "*andnottf3"
3601 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3602 (and:TF
3603 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3604 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3605 "TARGET_SSE"
3606 {
3607 char buf[128];
3608 const char *ops;
3609 const char *tmp
3610 = (which_alternative >= 2 ? "pandnq"
3611 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3612
3613 switch (which_alternative)
3614 {
3615 case 0:
3616 ops = "%s\t{%%2, %%0|%%0, %%2}";
3617 break;
3618 case 1:
3619 case 2:
3620 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3621 break;
3622 case 3:
3623 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3624 break;
3625 default:
3626 gcc_unreachable ();
3627 }
3628
3629 snprintf (buf, sizeof (buf), ops, tmp);
3630 output_asm_insn (buf, operands);
3631 return "";
3632 }
3633 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3634 (set_attr "type" "sselog")
3635 (set (attr "prefix_data16")
3636 (if_then_else
3637 (and (eq_attr "alternative" "0")
3638 (eq_attr "mode" "TI"))
3639 (const_string "1")
3640 (const_string "*")))
3641 (set_attr "prefix" "orig,vex,evex,evex")
3642 (set (attr "mode")
3643 (cond [(eq_attr "alternative" "2")
3644 (const_string "TI")
3645 (eq_attr "alternative" "3")
3646 (const_string "XI")
3647 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3648 (const_string "V4SF")
3649 (match_test "TARGET_AVX")
3650 (const_string "TI")
3651 (ior (not (match_test "TARGET_SSE2"))
3652 (match_test "optimize_function_for_size_p (cfun)"))
3653 (const_string "V4SF")
3654 ]
3655 (const_string "TI")))])
3656
3657 (define_insn "*<code><mode>3"
3658 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3659 (any_logic:MODEF
3660 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3661 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3662 "SSE_FLOAT_MODE_P (<MODE>mode)"
3663 {
3664 char buf[128];
3665 const char *ops;
3666 const char *suffix
3667 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3668
3669 switch (which_alternative)
3670 {
3671 case 0:
3672 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3673 break;
3674 case 2:
3675 if (!TARGET_AVX512DQ)
3676 {
3677 suffix = <MODE>mode == DFmode ? "q" : "d";
3678 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3679 break;
3680 }
3681 /* FALLTHRU */
3682 case 1:
3683 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3684 break;
3685 case 3:
3686 if (TARGET_AVX512DQ)
3687 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3688 else
3689 {
3690 suffix = <MODE>mode == DFmode ? "q" : "d";
3691 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3692 }
3693 break;
3694 default:
3695 gcc_unreachable ();
3696 }
3697
3698 snprintf (buf, sizeof (buf), ops, suffix);
3699 output_asm_insn (buf, operands);
3700 return "";
3701 }
3702 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3703 (set_attr "type" "sselog")
3704 (set_attr "prefix" "orig,vex,evex,evex")
3705 (set (attr "mode")
3706 (cond [(eq_attr "alternative" "2")
3707 (if_then_else (match_test "TARGET_AVX512DQ")
3708 (const_string "<ssevecmode>")
3709 (const_string "TI"))
3710 (eq_attr "alternative" "3")
3711 (if_then_else (match_test "TARGET_AVX512DQ")
3712 (const_string "<avx512fvecmode>")
3713 (const_string "XI"))
3714 (and (match_test "<MODE_SIZE> == 16")
3715 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3716 (const_string "V4SF")
3717 (match_test "TARGET_AVX")
3718 (const_string "<ssevecmode>")
3719 (match_test "optimize_function_for_size_p (cfun)")
3720 (const_string "V4SF")
3721 ]
3722 (const_string "<ssevecmode>")))])
3723
3724 (define_expand "<code>tf3"
3725 [(set (match_operand:TF 0 "register_operand")
3726 (any_logic:TF
3727 (match_operand:TF 1 "vector_operand")
3728 (match_operand:TF 2 "vector_operand")))]
3729 "TARGET_SSE"
3730 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3731
3732 (define_insn "*<code>tf3"
3733 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3734 (any_logic:TF
3735 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3736 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3737 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3738 {
3739 char buf[128];
3740 const char *ops;
3741 const char *tmp
3742 = (which_alternative >= 2 ? "p<logic>q"
3743 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3744
3745 switch (which_alternative)
3746 {
3747 case 0:
3748 ops = "%s\t{%%2, %%0|%%0, %%2}";
3749 break;
3750 case 1:
3751 case 2:
3752 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3753 break;
3754 case 3:
3755 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3756 break;
3757 default:
3758 gcc_unreachable ();
3759 }
3760
3761 snprintf (buf, sizeof (buf), ops, tmp);
3762 output_asm_insn (buf, operands);
3763 return "";
3764 }
3765 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3766 (set_attr "type" "sselog")
3767 (set (attr "prefix_data16")
3768 (if_then_else
3769 (and (eq_attr "alternative" "0")
3770 (eq_attr "mode" "TI"))
3771 (const_string "1")
3772 (const_string "*")))
3773 (set_attr "prefix" "orig,vex,evex,evex")
3774 (set (attr "mode")
3775 (cond [(eq_attr "alternative" "2")
3776 (const_string "TI")
3777 (eq_attr "alternative" "3")
3778 (const_string "QI")
3779 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3780 (const_string "V4SF")
3781 (match_test "TARGET_AVX")
3782 (const_string "TI")
3783 (ior (not (match_test "TARGET_SSE2"))
3784 (match_test "optimize_function_for_size_p (cfun)"))
3785 (const_string "V4SF")
3786 ]
3787 (const_string "TI")))])
3788
3789 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3790 ;;
3791 ;; FMA floating point multiply/accumulate instructions. These include
3792 ;; scalar versions of the instructions as well as vector versions.
3793 ;;
3794 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3795
3796 ;; The standard names for scalar FMA are only available with SSE math enabled.
3797 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3798 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3799 ;; and TARGET_FMA4 are both false.
3800 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3801 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3802 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3803 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3804 (define_mode_iterator FMAMODEM
3805 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3806 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3807 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3808 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3809 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3810 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3811 (V16SF "TARGET_AVX512F")
3812 (V8DF "TARGET_AVX512F")])
3813
3814 (define_expand "fma<mode>4"
3815 [(set (match_operand:FMAMODEM 0 "register_operand")
3816 (fma:FMAMODEM
3817 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3818 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3819 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3820
3821 (define_expand "fms<mode>4"
3822 [(set (match_operand:FMAMODEM 0 "register_operand")
3823 (fma:FMAMODEM
3824 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3825 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3826 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3827
3828 (define_expand "fnma<mode>4"
3829 [(set (match_operand:FMAMODEM 0 "register_operand")
3830 (fma:FMAMODEM
3831 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3832 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3833 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3834
3835 (define_expand "fnms<mode>4"
3836 [(set (match_operand:FMAMODEM 0 "register_operand")
3837 (fma:FMAMODEM
3838 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3839 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3840 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3841
3842 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3843 (define_mode_iterator FMAMODE_AVX512
3844 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3845 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3846 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3847 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3848 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3849 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3850 (V16SF "TARGET_AVX512F")
3851 (V8DF "TARGET_AVX512F")])
3852
3853 (define_mode_iterator FMAMODE
3854 [SF DF V4SF V2DF V8SF V4DF])
3855
3856 (define_expand "fma4i_fmadd_<mode>"
3857 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3858 (fma:FMAMODE_AVX512
3859 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3860 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3861 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3862
3863 (define_expand "fma4i_fmsub_<mode>"
3864 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3865 (fma:FMAMODE_AVX512
3866 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3867 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3868 (neg:FMAMODE_AVX512
3869 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
3870
3871 (define_expand "fma4i_fnmadd_<mode>"
3872 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3873 (fma:FMAMODE_AVX512
3874 (neg:FMAMODE_AVX512
3875 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
3876 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3877 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3878
3879 (define_expand "fma4i_fnmsub_<mode>"
3880 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3881 (fma:FMAMODE_AVX512
3882 (neg:FMAMODE_AVX512
3883 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
3884 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3885 (neg:FMAMODE_AVX512
3886 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
3887
3888 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3889 [(match_operand:VF_AVX512VL 0 "register_operand")
3890 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3891 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3892 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3893 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3894 "TARGET_AVX512F && <round_mode512bit_condition>"
3895 {
3896 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3897 operands[0], operands[1], operands[2], operands[3],
3898 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3899 DONE;
3900 })
3901
3902 (define_insn "*fma_fmadd_<mode>"
3903 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3904 (fma:FMAMODE
3905 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3906 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3907 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3908 "TARGET_FMA || TARGET_FMA4"
3909 "@
3910 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3911 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3912 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3913 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3914 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3915 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3916 (set_attr "type" "ssemuladd")
3917 (set_attr "mode" "<MODE>")])
3918
3919 ;; Suppose AVX-512F as baseline
3920 (define_mode_iterator VF_SF_AVX512VL
3921 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3922 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3923
3924 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3925 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3926 (fma:VF_SF_AVX512VL
3927 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3928 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3929 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3930 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3931 "@
3932 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3933 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3934 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3935 [(set_attr "type" "ssemuladd")
3936 (set_attr "mode" "<MODE>")])
3937
3938 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1"
3939 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3940 (fma:VF_AVX512
3941 (match_operand:VF_AVX512 1 "register_operand" "0,v")
3942 (match_operand:VF_AVX512 2 "register_operand" "v,0")
3943 (vec_duplicate:VF_AVX512
3944 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m"))))]
3945 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3946 "vfmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
3947 [(set_attr "type" "ssemuladd")
3948 (set_attr "mode" "<MODE>")])
3949
3950 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2"
3951 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3952 (fma:VF_AVX512
3953 (vec_duplicate:VF_AVX512
3954 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
3955 (match_operand:VF_AVX512 2 "register_operand" "0,v")
3956 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
3957 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3958 "@
3959 vfmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
3960 vfmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
3961 [(set_attr "type" "ssemuladd")
3962 (set_attr "mode" "<MODE>")])
3963
3964 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3"
3965 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3966 (fma:VF_AVX512
3967 (match_operand:VF_AVX512 1 "register_operand" "0,v")
3968 (vec_duplicate:VF_AVX512
3969 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
3970 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
3971 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3972 "@
3973 vfmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
3974 vfmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
3975 [(set_attr "type" "ssemuladd")
3976 (set_attr "mode" "<MODE>")])
3977
3978 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3979 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3980 (vec_merge:VF_AVX512VL
3981 (fma:VF_AVX512VL
3982 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3983 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
3984 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3985 (match_dup 1)
3986 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3987 "TARGET_AVX512F && <round_mode512bit_condition>"
3988 "@
3989 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3990 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3991 [(set_attr "type" "ssemuladd")
3992 (set_attr "mode" "<MODE>")])
3993
3994 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3995 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3996 (vec_merge:VF_AVX512VL
3997 (fma:VF_AVX512VL
3998 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
3999 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4000 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4001 (match_dup 3)
4002 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4003 "TARGET_AVX512F"
4004 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4005 [(set_attr "type" "ssemuladd")
4006 (set_attr "mode" "<MODE>")])
4007
4008 (define_insn "*fma_fmsub_<mode>"
4009 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4010 (fma:FMAMODE
4011 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4012 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4013 (neg:FMAMODE
4014 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4015 "TARGET_FMA || TARGET_FMA4"
4016 "@
4017 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4018 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4019 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4020 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4021 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4022 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4023 (set_attr "type" "ssemuladd")
4024 (set_attr "mode" "<MODE>")])
4025
4026 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
4027 [(match_operand:VF_AVX512VL 0 "register_operand")
4028 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4029 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4030 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4031 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4032 "TARGET_AVX512F && <round_mode512bit_condition>"
4033 {
4034 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
4035 operands[0], operands[1], operands[2], operands[3],
4036 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4037 DONE;
4038 })
4039
4040 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4041 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4042 (fma:VF_SF_AVX512VL
4043 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4044 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4045 (neg:VF_SF_AVX512VL
4046 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
4047 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4048 "@
4049 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4050 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4051 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4052 [(set_attr "type" "ssemuladd")
4053 (set_attr "mode" "<MODE>")])
4054
4055 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1"
4056 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4057 (fma:VF_AVX512
4058 (match_operand:VF_AVX512 1 "register_operand" "0,v")
4059 (match_operand:VF_AVX512 2 "register_operand" "v,0")
4060 (neg:VF_AVX512
4061 (vec_duplicate:VF_AVX512
4062 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
4063 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4064 "vfmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4065 [(set_attr "type" "ssemuladd")
4066 (set_attr "mode" "<MODE>")])
4067
4068 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2"
4069 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4070 (fma:VF_AVX512
4071 (vec_duplicate:VF_AVX512
4072 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
4073 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4074 (neg:VF_AVX512
4075 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4076 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4077 "@
4078 vfmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4079 vfmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4080 [(set_attr "type" "ssemuladd")
4081 (set_attr "mode" "<MODE>")])
4082
4083 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3"
4084 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4085 (fma:VF_AVX512
4086 (match_operand:VF_AVX512 1 "register_operand" "0,v")
4087 (vec_duplicate:VF_AVX512
4088 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4089 (neg:VF_AVX512
4090 (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0"))))]
4091 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4092 "@
4093 vfmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4094 vfmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4095 [(set_attr "type" "ssemuladd")
4096 (set_attr "mode" "<MODE>")])
4097
4098 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
4099 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4100 (vec_merge:VF_AVX512VL
4101 (fma:VF_AVX512VL
4102 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4103 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4104 (neg:VF_AVX512VL
4105 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4106 (match_dup 1)
4107 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4108 "TARGET_AVX512F"
4109 "@
4110 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4111 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4112 [(set_attr "type" "ssemuladd")
4113 (set_attr "mode" "<MODE>")])
4114
4115 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4116 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4117 (vec_merge:VF_AVX512VL
4118 (fma:VF_AVX512VL
4119 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4120 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4121 (neg:VF_AVX512VL
4122 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4123 (match_dup 3)
4124 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4125 "TARGET_AVX512F && <round_mode512bit_condition>"
4126 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4127 [(set_attr "type" "ssemuladd")
4128 (set_attr "mode" "<MODE>")])
4129
4130 (define_insn "*fma_fnmadd_<mode>"
4131 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4132 (fma:FMAMODE
4133 (neg:FMAMODE
4134 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4135 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4136 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4137 "TARGET_FMA || TARGET_FMA4"
4138 "@
4139 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4140 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4141 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4142 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4143 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4144 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4145 (set_attr "type" "ssemuladd")
4146 (set_attr "mode" "<MODE>")])
4147
4148 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4149 [(match_operand:VF_AVX512VL 0 "register_operand")
4150 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4151 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4152 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4153 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4154 "TARGET_AVX512F && <round_mode512bit_condition>"
4155 {
4156 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4157 operands[0], operands[1], operands[2], operands[3],
4158 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4159 DONE;
4160 })
4161
4162 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4163 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4164 (fma:VF_SF_AVX512VL
4165 (neg:VF_SF_AVX512VL
4166 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4167 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4168 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
4169 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4170 "@
4171 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4172 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4173 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4174 [(set_attr "type" "ssemuladd")
4175 (set_attr "mode" "<MODE>")])
4176
4177 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1"
4178 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4179 (fma:VF_AVX512
4180 (neg:VF_AVX512
4181 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4182 (match_operand:VF_AVX512 2 "register_operand" "v,0")
4183 (vec_duplicate:VF_AVX512
4184 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m"))))]
4185 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4186 "vfnmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4187 [(set_attr "type" "ssemuladd")
4188 (set_attr "mode" "<MODE>")])
4189
4190 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2"
4191 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4192 (fma:VF_AVX512
4193 (neg:VF_AVX512
4194 (vec_duplicate:VF_AVX512
4195 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4196 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4197 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4198 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4199 "@
4200 vfnmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4201 vfnmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4202 [(set_attr "type" "ssemuladd")
4203 (set_attr "mode" "<MODE>")])
4204
4205 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3"
4206 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4207 (fma:VF_AVX512
4208 (neg:VF_AVX512
4209 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4210 (vec_duplicate:VF_AVX512
4211 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4212 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4213 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4214 "@
4215 vfnmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4216 vfnmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4217 [(set_attr "type" "ssemuladd")
4218 (set_attr "mode" "<MODE>")])
4219
4220 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4221 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4222 (vec_merge:VF_AVX512VL
4223 (fma:VF_AVX512VL
4224 (neg:VF_AVX512VL
4225 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4226 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4227 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4228 (match_dup 1)
4229 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4230 "TARGET_AVX512F && <round_mode512bit_condition>"
4231 "@
4232 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4233 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4234 [(set_attr "type" "ssemuladd")
4235 (set_attr "mode" "<MODE>")])
4236
4237 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4238 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4239 (vec_merge:VF_AVX512VL
4240 (fma:VF_AVX512VL
4241 (neg:VF_AVX512VL
4242 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4243 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4244 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4245 (match_dup 3)
4246 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4247 "TARGET_AVX512F && <round_mode512bit_condition>"
4248 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4249 [(set_attr "type" "ssemuladd")
4250 (set_attr "mode" "<MODE>")])
4251
4252 (define_insn "*fma_fnmsub_<mode>"
4253 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4254 (fma:FMAMODE
4255 (neg:FMAMODE
4256 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4257 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4258 (neg:FMAMODE
4259 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4260 "TARGET_FMA || TARGET_FMA4"
4261 "@
4262 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4263 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4264 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4265 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4266 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4267 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4268 (set_attr "type" "ssemuladd")
4269 (set_attr "mode" "<MODE>")])
4270
4271 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4272 [(match_operand:VF_AVX512VL 0 "register_operand")
4273 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4274 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4275 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4276 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4277 "TARGET_AVX512F && <round_mode512bit_condition>"
4278 {
4279 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4280 operands[0], operands[1], operands[2], operands[3],
4281 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4282 DONE;
4283 })
4284
4285 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4286 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4287 (fma:VF_SF_AVX512VL
4288 (neg:VF_SF_AVX512VL
4289 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4290 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4291 (neg:VF_SF_AVX512VL
4292 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
4293 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4294 "@
4295 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4296 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4297 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4298 [(set_attr "type" "ssemuladd")
4299 (set_attr "mode" "<MODE>")])
4300
4301 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1"
4302 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4303 (fma:VF_AVX512
4304 (neg:VF_AVX512
4305 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4306 (match_operand:VF_AVX512 2 "register_operand" "v,0")
4307 (neg:VF_AVX512
4308 (vec_duplicate:VF_AVX512
4309 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
4310 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4311 "vfnmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4312 [(set_attr "type" "ssemuladd")
4313 (set_attr "mode" "<MODE>")])
4314
4315 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2"
4316 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4317 (fma:VF_AVX512
4318 (neg:VF_AVX512
4319 (vec_duplicate:VF_AVX512
4320 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4321 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4322 (neg:VF_AVX512
4323 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4324 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4325 "@
4326 vfnmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4327 vfnmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4328 [(set_attr "type" "ssemuladd")
4329 (set_attr "mode" "<MODE>")])
4330
4331 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3"
4332 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4333 (fma:VF_AVX512
4334 (neg:VF_AVX512
4335 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4336 (vec_duplicate:VF_AVX512
4337 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4338 (neg:VF_AVX512
4339 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4340 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4341 "@
4342 vfnmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4343 vfnmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4344 [(set_attr "type" "ssemuladd")
4345 (set_attr "mode" "<MODE>")])
4346
4347 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4348 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4349 (vec_merge:VF_AVX512VL
4350 (fma:VF_AVX512VL
4351 (neg:VF_AVX512VL
4352 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4353 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4354 (neg:VF_AVX512VL
4355 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4356 (match_dup 1)
4357 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4358 "TARGET_AVX512F && <round_mode512bit_condition>"
4359 "@
4360 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4361 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4362 [(set_attr "type" "ssemuladd")
4363 (set_attr "mode" "<MODE>")])
4364
4365 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4366 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4367 (vec_merge:VF_AVX512VL
4368 (fma:VF_AVX512VL
4369 (neg:VF_AVX512VL
4370 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4371 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4372 (neg:VF_AVX512VL
4373 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4374 (match_dup 3)
4375 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4376 "TARGET_AVX512F"
4377 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4378 [(set_attr "type" "ssemuladd")
4379 (set_attr "mode" "<MODE>")])
4380
4381 ;; FMA parallel floating point multiply addsub and subadd operations.
4382
4383 ;; It would be possible to represent these without the UNSPEC as
4384 ;;
4385 ;; (vec_merge
4386 ;; (fma op1 op2 op3)
4387 ;; (fma op1 op2 (neg op3))
4388 ;; (merge-const))
4389 ;;
4390 ;; But this doesn't seem useful in practice.
4391
4392 (define_expand "fmaddsub_<mode>"
4393 [(set (match_operand:VF 0 "register_operand")
4394 (unspec:VF
4395 [(match_operand:VF 1 "nonimmediate_operand")
4396 (match_operand:VF 2 "nonimmediate_operand")
4397 (match_operand:VF 3 "nonimmediate_operand")]
4398 UNSPEC_FMADDSUB))]
4399 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4400
4401 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4402 [(match_operand:VF_AVX512VL 0 "register_operand")
4403 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4404 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4405 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4406 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4407 "TARGET_AVX512F"
4408 {
4409 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4410 operands[0], operands[1], operands[2], operands[3],
4411 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4412 DONE;
4413 })
4414
4415 (define_insn "*fma_fmaddsub_<mode>"
4416 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4417 (unspec:VF_128_256
4418 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4419 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4420 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4421 UNSPEC_FMADDSUB))]
4422 "TARGET_FMA || TARGET_FMA4"
4423 "@
4424 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4425 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4426 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4427 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4428 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4429 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4430 (set_attr "type" "ssemuladd")
4431 (set_attr "mode" "<MODE>")])
4432
4433 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4434 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4435 (unspec:VF_SF_AVX512VL
4436 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4437 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4438 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4439 UNSPEC_FMADDSUB))]
4440 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4441 "@
4442 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4443 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4444 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4445 [(set_attr "type" "ssemuladd")
4446 (set_attr "mode" "<MODE>")])
4447
4448 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4449 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4450 (vec_merge:VF_AVX512VL
4451 (unspec:VF_AVX512VL
4452 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4453 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4454 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
4455 UNSPEC_FMADDSUB)
4456 (match_dup 1)
4457 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4458 "TARGET_AVX512F"
4459 "@
4460 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4461 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4462 [(set_attr "type" "ssemuladd")
4463 (set_attr "mode" "<MODE>")])
4464
4465 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4466 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4467 (vec_merge:VF_AVX512VL
4468 (unspec:VF_AVX512VL
4469 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4470 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4471 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4472 UNSPEC_FMADDSUB)
4473 (match_dup 3)
4474 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4475 "TARGET_AVX512F"
4476 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4477 [(set_attr "type" "ssemuladd")
4478 (set_attr "mode" "<MODE>")])
4479
4480 (define_insn "*fma_fmsubadd_<mode>"
4481 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4482 (unspec:VF_128_256
4483 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4484 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4485 (neg:VF_128_256
4486 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4487 UNSPEC_FMADDSUB))]
4488 "TARGET_FMA || TARGET_FMA4"
4489 "@
4490 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4491 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4492 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4493 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4494 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4495 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4496 (set_attr "type" "ssemuladd")
4497 (set_attr "mode" "<MODE>")])
4498
4499 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4500 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4501 (unspec:VF_SF_AVX512VL
4502 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4503 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4504 (neg:VF_SF_AVX512VL
4505 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4506 UNSPEC_FMADDSUB))]
4507 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4508 "@
4509 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4510 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4511 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4512 [(set_attr "type" "ssemuladd")
4513 (set_attr "mode" "<MODE>")])
4514
4515 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4516 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4517 (vec_merge:VF_AVX512VL
4518 (unspec:VF_AVX512VL
4519 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4520 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4521 (neg:VF_AVX512VL
4522 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
4523 UNSPEC_FMADDSUB)
4524 (match_dup 1)
4525 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4526 "TARGET_AVX512F"
4527 "@
4528 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4529 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4530 [(set_attr "type" "ssemuladd")
4531 (set_attr "mode" "<MODE>")])
4532
4533 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4534 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4535 (vec_merge:VF_AVX512VL
4536 (unspec:VF_AVX512VL
4537 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4538 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4539 (neg:VF_AVX512VL
4540 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4541 UNSPEC_FMADDSUB)
4542 (match_dup 3)
4543 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4544 "TARGET_AVX512F"
4545 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4546 [(set_attr "type" "ssemuladd")
4547 (set_attr "mode" "<MODE>")])
4548
4549 ;; FMA3 floating point scalar intrinsics. These merge result with
4550 ;; high-order elements from the destination register.
4551
4552 (define_expand "fmai_vmfmadd_<mode><round_name>"
4553 [(set (match_operand:VF_128 0 "register_operand")
4554 (vec_merge:VF_128
4555 (fma:VF_128
4556 (match_operand:VF_128 1 "register_operand")
4557 (match_operand:VF_128 2 "<round_nimm_predicate>")
4558 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4559 (match_dup 1)
4560 (const_int 1)))]
4561 "TARGET_FMA")
4562
4563 (define_expand "fmai_vmfmsub_<mode><round_name>"
4564 [(set (match_operand:VF_128 0 "register_operand")
4565 (vec_merge:VF_128
4566 (fma:VF_128
4567 (match_operand:VF_128 1 "register_operand")
4568 (match_operand:VF_128 2 "<round_nimm_predicate>")
4569 (neg:VF_128
4570 (match_operand:VF_128 3 "<round_nimm_predicate>")))
4571 (match_dup 1)
4572 (const_int 1)))]
4573 "TARGET_FMA")
4574
4575 (define_expand "fmai_vmfnmadd_<mode><round_name>"
4576 [(set (match_operand:VF_128 0 "register_operand")
4577 (vec_merge:VF_128
4578 (fma:VF_128
4579 (neg:VF_128
4580 (match_operand:VF_128 2 "<round_nimm_predicate>"))
4581 (match_operand:VF_128 1 "register_operand")
4582 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4583 (match_dup 1)
4584 (const_int 1)))]
4585 "TARGET_FMA")
4586
4587 (define_expand "fmai_vmfnmsub_<mode><round_name>"
4588 [(set (match_operand:VF_128 0 "register_operand")
4589 (vec_merge:VF_128
4590 (fma:VF_128
4591 (neg:VF_128
4592 (match_operand:VF_128 2 "<round_nimm_predicate>"))
4593 (match_operand:VF_128 1 "register_operand")
4594 (neg:VF_128
4595 (match_operand:VF_128 3 "<round_nimm_predicate>")))
4596 (match_dup 1)
4597 (const_int 1)))]
4598 "TARGET_FMA")
4599
4600 (define_insn "*fmai_fmadd_<mode>"
4601 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4602 (vec_merge:VF_128
4603 (fma:VF_128
4604 (match_operand:VF_128 1 "register_operand" "0,0")
4605 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4606 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4607 (match_dup 1)
4608 (const_int 1)))]
4609 "TARGET_FMA || TARGET_AVX512F"
4610 "@
4611 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4612 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4613 [(set_attr "type" "ssemuladd")
4614 (set_attr "mode" "<MODE>")])
4615
4616 (define_insn "*fmai_fmsub_<mode>"
4617 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4618 (vec_merge:VF_128
4619 (fma:VF_128
4620 (match_operand:VF_128 1 "register_operand" "0,0")
4621 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4622 (neg:VF_128
4623 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4624 (match_dup 1)
4625 (const_int 1)))]
4626 "TARGET_FMA || TARGET_AVX512F"
4627 "@
4628 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4629 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4630 [(set_attr "type" "ssemuladd")
4631 (set_attr "mode" "<MODE>")])
4632
4633 (define_insn "*fmai_fnmadd_<mode><round_name>"
4634 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4635 (vec_merge:VF_128
4636 (fma:VF_128
4637 (neg:VF_128
4638 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4639 (match_operand:VF_128 1 "register_operand" "0,0")
4640 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4641 (match_dup 1)
4642 (const_int 1)))]
4643 "TARGET_FMA || TARGET_AVX512F"
4644 "@
4645 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4646 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4647 [(set_attr "type" "ssemuladd")
4648 (set_attr "mode" "<MODE>")])
4649
4650 (define_insn "*fmai_fnmsub_<mode><round_name>"
4651 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4652 (vec_merge:VF_128
4653 (fma:VF_128
4654 (neg:VF_128
4655 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4656 (match_operand:VF_128 1 "register_operand" "0,0")
4657 (neg:VF_128
4658 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4659 (match_dup 1)
4660 (const_int 1)))]
4661 "TARGET_FMA || TARGET_AVX512F"
4662 "@
4663 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4664 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4665 [(set_attr "type" "ssemuladd")
4666 (set_attr "mode" "<MODE>")])
4667
4668 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
4669 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4670 (vec_merge:VF_128
4671 (vec_merge:VF_128
4672 (fma:VF_128
4673 (match_operand:VF_128 1 "register_operand" "0,0")
4674 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4675 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4676 (match_dup 1)
4677 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4678 (match_dup 1)
4679 (const_int 1)))]
4680 "TARGET_AVX512F"
4681 "@
4682 vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4683 vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4684 [(set_attr "type" "ssemuladd")
4685 (set_attr "mode" "<MODE>")])
4686
4687 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
4688 [(set (match_operand:VF_128 0 "register_operand" "=v")
4689 (vec_merge:VF_128
4690 (vec_merge:VF_128
4691 (fma:VF_128
4692 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4693 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")
4694 (match_operand:VF_128 3 "register_operand" "0"))
4695 (match_dup 3)
4696 (match_operand:QI 4 "register_operand" "Yk"))
4697 (match_dup 3)
4698 (const_int 1)))]
4699 "TARGET_AVX512F"
4700 "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4701 [(set_attr "type" "ssemuladd")
4702 (set_attr "mode" "<MODE>")])
4703
4704 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
4705 [(match_operand:VF_128 0 "register_operand")
4706 (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
4707 (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
4708 (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
4709 (match_operand:QI 4 "register_operand")]
4710 "TARGET_AVX512F"
4711 {
4712 emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
4713 operands[0], operands[1], operands[2], operands[3],
4714 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4715 DONE;
4716 })
4717
4718 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
4719 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4720 (vec_merge:VF_128
4721 (vec_merge:VF_128
4722 (fma:VF_128
4723 (match_operand:VF_128 1 "register_operand" "0,0")
4724 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4725 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4726 (match_operand:VF_128 4 "const0_operand" "C,C")
4727 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4728 (match_dup 1)
4729 (const_int 1)))]
4730 "TARGET_AVX512F"
4731 "@
4732 vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4733 vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4734 [(set_attr "type" "ssemuladd")
4735 (set_attr "mode" "<MODE>")])
4736
4737 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
4738 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4739 (vec_merge:VF_128
4740 (vec_merge:VF_128
4741 (fma:VF_128
4742 (match_operand:VF_128 1 "register_operand" "0,0")
4743 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4744 (neg:VF_128
4745 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4746 (match_dup 1)
4747 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4748 (match_dup 1)
4749 (const_int 1)))]
4750 "TARGET_AVX512F"
4751 "@
4752 vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4753 vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4754 [(set_attr "type" "ssemuladd")
4755 (set_attr "mode" "<MODE>")])
4756
4757 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
4758 [(set (match_operand:VF_128 0 "register_operand" "=v")
4759 (vec_merge:VF_128
4760 (vec_merge:VF_128
4761 (fma:VF_128
4762 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4763 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")
4764 (neg:VF_128
4765 (match_operand:VF_128 3 "register_operand" "0")))
4766 (match_dup 3)
4767 (match_operand:QI 4 "register_operand" "Yk"))
4768 (match_dup 3)
4769 (const_int 1)))]
4770 "TARGET_AVX512F"
4771 "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4772 [(set_attr "type" "ssemuladd")
4773 (set_attr "mode" "<MODE>")])
4774
4775 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
4776 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4777 (vec_merge:VF_128
4778 (vec_merge:VF_128
4779 (fma:VF_128
4780 (match_operand:VF_128 1 "register_operand" "0,0")
4781 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4782 (neg:VF_128
4783 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4784 (match_operand:VF_128 4 "const0_operand" "C,C")
4785 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4786 (match_dup 1)
4787 (const_int 1)))]
4788 "TARGET_AVX512F"
4789 "@
4790 vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4791 vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4792 [(set_attr "type" "ssemuladd")
4793 (set_attr "mode" "<MODE>")])
4794
4795 (define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
4796 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4797 (vec_merge:VF_128
4798 (vec_merge:VF_128
4799 (fma:VF_128
4800 (neg:VF_128
4801 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4802 (match_operand:VF_128 1 "register_operand" "0,0")
4803 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4804 (match_dup 1)
4805 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4806 (match_dup 1)
4807 (const_int 1)))]
4808 "TARGET_AVX512F"
4809 "@
4810 vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4811 vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4812 [(set_attr "type" "ssemuladd")
4813 (set_attr "mode" "<MODE>")])
4814
4815 (define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
4816 [(set (match_operand:VF_128 0 "register_operand" "=v")
4817 (vec_merge:VF_128
4818 (vec_merge:VF_128
4819 (fma:VF_128
4820 (neg:VF_128
4821 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>"))
4822 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4823 (match_operand:VF_128 3 "register_operand" "0"))
4824 (match_dup 3)
4825 (match_operand:QI 4 "register_operand" "Yk"))
4826 (match_dup 3)
4827 (const_int 1)))]
4828 "TARGET_AVX512F"
4829 "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4830 [(set_attr "type" "ssemuladd")
4831 (set_attr "mode" "<MODE>")])
4832
4833 (define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
4834 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4835 (vec_merge:VF_128
4836 (vec_merge:VF_128
4837 (fma:VF_128
4838 (neg:VF_128
4839 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4840 (match_operand:VF_128 1 "register_operand" "0,0")
4841 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4842 (match_operand:VF_128 4 "const0_operand" "C,C")
4843 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4844 (match_dup 1)
4845 (const_int 1)))]
4846 "TARGET_AVX512F"
4847 "@
4848 vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4849 vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4850 [(set_attr "type" "ssemuladd")
4851 (set_attr "mode" "<MODE>")])
4852
4853 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
4854 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4855 (vec_merge:VF_128
4856 (vec_merge:VF_128
4857 (fma:VF_128
4858 (neg:VF_128
4859 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4860 (match_operand:VF_128 1 "register_operand" "0,0")
4861 (neg:VF_128
4862 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4863 (match_dup 1)
4864 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4865 (match_dup 1)
4866 (const_int 1)))]
4867 "TARGET_AVX512F"
4868 "@
4869 vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4870 vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4871 [(set_attr "type" "ssemuladd")
4872 (set_attr "mode" "<MODE>")])
4873
4874 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
4875 [(set (match_operand:VF_128 0 "register_operand" "=v")
4876 (vec_merge:VF_128
4877 (vec_merge:VF_128
4878 (fma:VF_128
4879 (neg:VF_128
4880 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>"))
4881 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4882 (neg:VF_128
4883 (match_operand:VF_128 3 "register_operand" "0")))
4884 (match_dup 3)
4885 (match_operand:QI 4 "register_operand" "Yk"))
4886 (match_dup 3)
4887 (const_int 1)))]
4888 "TARGET_AVX512F"
4889 "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4890 [(set_attr "type" "ssemuladd")
4891 (set_attr "mode" "<MODE>")])
4892
4893 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
4894 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4895 (vec_merge:VF_128
4896 (vec_merge:VF_128
4897 (fma:VF_128
4898 (neg:VF_128
4899 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4900 (match_operand:VF_128 1 "register_operand" "0,0")
4901 (neg:VF_128
4902 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4903 (match_operand:VF_128 4 "const0_operand" "C,C")
4904 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4905 (match_dup 1)
4906 (const_int 1)))]
4907 "TARGET_AVX512F"
4908 "@
4909 vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4910 vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4911 [(set_attr "type" "ssemuladd")
4912 (set_attr "mode" "<MODE>")])
4913
4914 ;; FMA4 floating point scalar intrinsics. These write the
4915 ;; entire destination register, with the high-order elements zeroed.
4916
4917 (define_expand "fma4i_vmfmadd_<mode>"
4918 [(set (match_operand:VF_128 0 "register_operand")
4919 (vec_merge:VF_128
4920 (fma:VF_128
4921 (match_operand:VF_128 1 "nonimmediate_operand")
4922 (match_operand:VF_128 2 "nonimmediate_operand")
4923 (match_operand:VF_128 3 "nonimmediate_operand"))
4924 (match_dup 4)
4925 (const_int 1)))]
4926 "TARGET_FMA4"
4927 "operands[4] = CONST0_RTX (<MODE>mode);")
4928
4929 (define_insn "*fma4i_vmfmadd_<mode>"
4930 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4931 (vec_merge:VF_128
4932 (fma:VF_128
4933 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4934 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4935 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4936 (match_operand:VF_128 4 "const0_operand")
4937 (const_int 1)))]
4938 "TARGET_FMA4"
4939 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4940 [(set_attr "type" "ssemuladd")
4941 (set_attr "mode" "<MODE>")])
4942
4943 (define_insn "*fma4i_vmfmsub_<mode>"
4944 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4945 (vec_merge:VF_128
4946 (fma:VF_128
4947 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4948 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4949 (neg:VF_128
4950 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4951 (match_operand:VF_128 4 "const0_operand")
4952 (const_int 1)))]
4953 "TARGET_FMA4"
4954 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4955 [(set_attr "type" "ssemuladd")
4956 (set_attr "mode" "<MODE>")])
4957
4958 (define_insn "*fma4i_vmfnmadd_<mode>"
4959 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4960 (vec_merge:VF_128
4961 (fma:VF_128
4962 (neg:VF_128
4963 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4964 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4965 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4966 (match_operand:VF_128 4 "const0_operand")
4967 (const_int 1)))]
4968 "TARGET_FMA4"
4969 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4970 [(set_attr "type" "ssemuladd")
4971 (set_attr "mode" "<MODE>")])
4972
4973 (define_insn "*fma4i_vmfnmsub_<mode>"
4974 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4975 (vec_merge:VF_128
4976 (fma:VF_128
4977 (neg:VF_128
4978 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4979 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4980 (neg:VF_128
4981 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4982 (match_operand:VF_128 4 "const0_operand")
4983 (const_int 1)))]
4984 "TARGET_FMA4"
4985 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4986 [(set_attr "type" "ssemuladd")
4987 (set_attr "mode" "<MODE>")])
4988
4989 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4990 ;;
4991 ;; Parallel single-precision floating point conversion operations
4992 ;;
4993 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4994
4995 (define_insn_and_split "sse_cvtpi2ps"
4996 [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
4997 (vec_merge:V4SF
4998 (vec_duplicate:V4SF
4999 (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
5000 (match_operand:V4SF 1 "register_operand" "0,0,Yv")
5001 (const_int 3)))
5002 (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
5003 "TARGET_SSE || TARGET_MMX_WITH_SSE"
5004 "@
5005 cvtpi2ps\t{%2, %0|%0, %2}
5006 #
5007 #"
5008 "TARGET_MMX_WITH_SSE && reload_completed"
5009 [(const_int 0)]
5010 {
5011 rtx op2 = lowpart_subreg (V4SImode, operands[2],
5012 GET_MODE (operands[2]));
5013 /* Generate SSE2 cvtdq2ps. */
5014 rtx insn = gen_floatv4siv4sf2 (operands[3], op2);
5015 emit_insn (insn);
5016
5017 /* Merge operands[3] with operands[0]. */
5018 rtx mask, op1;
5019 if (TARGET_AVX)
5020 {
5021 mask = gen_rtx_PARALLEL (VOIDmode,
5022 gen_rtvec (4, GEN_INT (0), GEN_INT (1),
5023 GEN_INT (6), GEN_INT (7)));
5024 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
5025 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5026 insn = gen_rtx_SET (operands[0], op2);
5027 }
5028 else
5029 {
5030 /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
5031 mask = gen_rtx_PARALLEL (VOIDmode,
5032 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5033 GEN_INT (4), GEN_INT (5)));
5034 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
5035 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5036 insn = gen_rtx_SET (operands[0], op2);
5037 emit_insn (insn);
5038
5039 /* Swap bits 0:63 with bits 64:127. */
5040 mask = gen_rtx_PARALLEL (VOIDmode,
5041 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5042 GEN_INT (0), GEN_INT (1)));
5043 rtx dest = lowpart_subreg (V4SImode, operands[0],
5044 GET_MODE (operands[0]));
5045 op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
5046 insn = gen_rtx_SET (dest, op1);
5047 }
5048 emit_insn (insn);
5049 DONE;
5050 }
5051 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
5052 (set_attr "type" "ssecvt")
5053 (set_attr "mode" "V4SF")])
5054
5055 (define_insn "sse_cvtps2pi"
5056 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5057 (vec_select:V2SI
5058 (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm")]
5059 UNSPEC_FIX_NOTRUNC)
5060 (parallel [(const_int 0) (const_int 1)])))]
5061 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5062 "@
5063 cvtps2pi\t{%1, %0|%0, %q1}
5064 %vcvtps2dq\t{%1, %0|%0, %1}"
5065 [(set_attr "mmx_isa" "native,x64")
5066 (set_attr "type" "ssecvt")
5067 (set_attr "unit" "mmx,*")
5068 (set_attr "mode" "DI")])
5069
5070 (define_insn "sse_cvttps2pi"
5071 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5072 (vec_select:V2SI
5073 (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm"))
5074 (parallel [(const_int 0) (const_int 1)])))]
5075 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5076 "@
5077 cvttps2pi\t{%1, %0|%0, %q1}
5078 %vcvttps2dq\t{%1, %0|%0, %1}"
5079 [(set_attr "mmx_isa" "native,x64")
5080 (set_attr "type" "ssecvt")
5081 (set_attr "unit" "mmx,*")
5082 (set_attr "prefix_rep" "0")
5083 (set_attr "mode" "SF")])
5084
5085 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
5086 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5087 (vec_merge:V4SF
5088 (vec_duplicate:V4SF
5089 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5090 (match_operand:V4SF 1 "register_operand" "0,0,v")
5091 (const_int 1)))]
5092 "TARGET_SSE"
5093 "@
5094 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5095 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5096 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5097 [(set_attr "isa" "noavx,noavx,avx")
5098 (set_attr "type" "sseicvt")
5099 (set_attr "athlon_decode" "vector,double,*")
5100 (set_attr "amdfam10_decode" "vector,double,*")
5101 (set_attr "bdver1_decode" "double,direct,*")
5102 (set_attr "btver2_decode" "double,double,double")
5103 (set_attr "znver1_decode" "double,double,double")
5104 (set (attr "length_vex")
5105 (if_then_else
5106 (and (match_test "<MODE>mode == DImode")
5107 (eq_attr "alternative" "2"))
5108 (const_string "4")
5109 (const_string "*")))
5110 (set (attr "prefix_rex")
5111 (if_then_else
5112 (and (match_test "<MODE>mode == DImode")
5113 (eq_attr "alternative" "0,1"))
5114 (const_string "1")
5115 (const_string "*")))
5116 (set_attr "prefix" "orig,orig,maybe_evex")
5117 (set_attr "mode" "SF")])
5118
5119 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
5120 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5121 (unspec:SWI48
5122 [(vec_select:SF
5123 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5124 (parallel [(const_int 0)]))]
5125 UNSPEC_FIX_NOTRUNC))]
5126 "TARGET_SSE"
5127 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5128 [(set_attr "type" "sseicvt")
5129 (set_attr "athlon_decode" "double,vector")
5130 (set_attr "bdver1_decode" "double,double")
5131 (set_attr "prefix_rep" "1")
5132 (set_attr "prefix" "maybe_vex")
5133 (set_attr "mode" "<MODE>")])
5134
5135 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
5136 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5137 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
5138 UNSPEC_FIX_NOTRUNC))]
5139 "TARGET_SSE"
5140 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %k1}"
5141 [(set_attr "type" "sseicvt")
5142 (set_attr "athlon_decode" "double,vector")
5143 (set_attr "amdfam10_decode" "double,double")
5144 (set_attr "bdver1_decode" "double,double")
5145 (set_attr "prefix_rep" "1")
5146 (set_attr "prefix" "maybe_vex")
5147 (set_attr "mode" "<MODE>")])
5148
5149 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
5150 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5151 (fix:SWI48
5152 (vec_select:SF
5153 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
5154 (parallel [(const_int 0)]))))]
5155 "TARGET_SSE"
5156 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5157 [(set_attr "type" "sseicvt")
5158 (set_attr "athlon_decode" "double,vector")
5159 (set_attr "amdfam10_decode" "double,double")
5160 (set_attr "bdver1_decode" "double,double")
5161 (set_attr "prefix_rep" "1")
5162 (set_attr "prefix" "maybe_vex")
5163 (set_attr "mode" "<MODE>")])
5164
5165 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
5166 [(set (match_operand:VF_128 0 "register_operand" "=v")
5167 (vec_merge:VF_128
5168 (vec_duplicate:VF_128
5169 (unsigned_float:<ssescalarmode>
5170 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
5171 (match_operand:VF_128 1 "register_operand" "v")
5172 (const_int 1)))]
5173 "TARGET_AVX512F && <round_modev4sf_condition>"
5174 "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5175 [(set_attr "type" "sseicvt")
5176 (set_attr "prefix" "evex")
5177 (set_attr "mode" "<ssescalarmode>")])
5178
5179 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
5180 [(set (match_operand:VF_128 0 "register_operand" "=v")
5181 (vec_merge:VF_128
5182 (vec_duplicate:VF_128
5183 (unsigned_float:<ssescalarmode>
5184 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
5185 (match_operand:VF_128 1 "register_operand" "v")
5186 (const_int 1)))]
5187 "TARGET_AVX512F && TARGET_64BIT"
5188 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5189 [(set_attr "type" "sseicvt")
5190 (set_attr "prefix" "evex")
5191 (set_attr "mode" "<ssescalarmode>")])
5192
5193 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
5194 [(set (match_operand:VF1 0 "register_operand" "=x,v")
5195 (float:VF1
5196 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
5197 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
5198 "@
5199 cvtdq2ps\t{%1, %0|%0, %1}
5200 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5201 [(set_attr "isa" "noavx,avx")
5202 (set_attr "type" "ssecvt")
5203 (set_attr "prefix" "maybe_vex")
5204 (set_attr "mode" "<sseinsnmode>")])
5205
5206 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
5207 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
5208 (unsigned_float:VF1_AVX512VL
5209 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5210 "TARGET_AVX512F"
5211 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5212 [(set_attr "type" "ssecvt")
5213 (set_attr "prefix" "evex")
5214 (set_attr "mode" "<MODE>")])
5215
5216 (define_expand "floatuns<sseintvecmodelower><mode>2"
5217 [(match_operand:VF1 0 "register_operand")
5218 (match_operand:<sseintvecmode> 1 "register_operand")]
5219 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
5220 {
5221 if (<MODE>mode == V16SFmode)
5222 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
5223 else
5224 if (TARGET_AVX512VL)
5225 {
5226 if (<MODE>mode == V4SFmode)
5227 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
5228 else
5229 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
5230 }
5231 else
5232 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
5233
5234 DONE;
5235 })
5236
5237
5238 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
5239 (define_mode_attr sf2simodelower
5240 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
5241
5242 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
5243 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
5244 (unspec:VI4_AVX
5245 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
5246 UNSPEC_FIX_NOTRUNC))]
5247 "TARGET_SSE2 && <mask_mode512bit_condition>"
5248 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5249 [(set_attr "type" "ssecvt")
5250 (set (attr "prefix_data16")
5251 (if_then_else
5252 (match_test "TARGET_AVX")
5253 (const_string "*")
5254 (const_string "1")))
5255 (set_attr "prefix" "maybe_vex")
5256 (set_attr "mode" "<sseinsnmode>")])
5257
5258 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
5259 [(set (match_operand:V16SI 0 "register_operand" "=v")
5260 (unspec:V16SI
5261 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
5262 UNSPEC_FIX_NOTRUNC))]
5263 "TARGET_AVX512F"
5264 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5265 [(set_attr "type" "ssecvt")
5266 (set_attr "prefix" "evex")
5267 (set_attr "mode" "XI")])
5268
5269 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
5270 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
5271 (unspec:VI4_AVX512VL
5272 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
5273 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5274 "TARGET_AVX512F"
5275 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5276 [(set_attr "type" "ssecvt")
5277 (set_attr "prefix" "evex")
5278 (set_attr "mode" "<sseinsnmode>")])
5279
5280 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
5281 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5282 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5283 UNSPEC_FIX_NOTRUNC))]
5284 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5285 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5286 [(set_attr "type" "ssecvt")
5287 (set_attr "prefix" "evex")
5288 (set_attr "mode" "<sseinsnmode>")])
5289
5290 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
5291 [(set (match_operand:V2DI 0 "register_operand" "=v")
5292 (unspec:V2DI
5293 [(vec_select:V2SF
5294 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5295 (parallel [(const_int 0) (const_int 1)]))]
5296 UNSPEC_FIX_NOTRUNC))]
5297 "TARGET_AVX512DQ && TARGET_AVX512VL"
5298 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5299 [(set_attr "type" "ssecvt")
5300 (set_attr "prefix" "evex")
5301 (set_attr "mode" "TI")])
5302
5303 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
5304 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5305 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5306 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5307 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5308 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5309 [(set_attr "type" "ssecvt")
5310 (set_attr "prefix" "evex")
5311 (set_attr "mode" "<sseinsnmode>")])
5312
5313 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
5314 [(set (match_operand:V2DI 0 "register_operand" "=v")
5315 (unspec:V2DI
5316 [(vec_select:V2SF
5317 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5318 (parallel [(const_int 0) (const_int 1)]))]
5319 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5320 "TARGET_AVX512DQ && TARGET_AVX512VL"
5321 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5322 [(set_attr "type" "ssecvt")
5323 (set_attr "prefix" "evex")
5324 (set_attr "mode" "TI")])
5325
5326 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
5327 [(set (match_operand:V16SI 0 "register_operand" "=v")
5328 (any_fix:V16SI
5329 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5330 "TARGET_AVX512F"
5331 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5332 [(set_attr "type" "ssecvt")
5333 (set_attr "prefix" "evex")
5334 (set_attr "mode" "XI")])
5335
5336 (define_insn "fix_truncv8sfv8si2<mask_name>"
5337 [(set (match_operand:V8SI 0 "register_operand" "=v")
5338 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
5339 "TARGET_AVX && <mask_avx512vl_condition>"
5340 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5341 [(set_attr "type" "ssecvt")
5342 (set_attr "prefix" "<mask_prefix>")
5343 (set_attr "mode" "OI")])
5344
5345 (define_insn "fix_truncv4sfv4si2<mask_name>"
5346 [(set (match_operand:V4SI 0 "register_operand" "=v")
5347 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
5348 "TARGET_SSE2 && <mask_avx512vl_condition>"
5349 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5350 [(set_attr "type" "ssecvt")
5351 (set (attr "prefix_rep")
5352 (if_then_else
5353 (match_test "TARGET_AVX")
5354 (const_string "*")
5355 (const_string "1")))
5356 (set (attr "prefix_data16")
5357 (if_then_else
5358 (match_test "TARGET_AVX")
5359 (const_string "*")
5360 (const_string "0")))
5361 (set_attr "prefix_data16" "0")
5362 (set_attr "prefix" "<mask_prefix2>")
5363 (set_attr "mode" "TI")])
5364
5365 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
5366 [(match_operand:<sseintvecmode> 0 "register_operand")
5367 (match_operand:VF1 1 "register_operand")]
5368 "TARGET_SSE2"
5369 {
5370 if (<MODE>mode == V16SFmode)
5371 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
5372 operands[1]));
5373 else
5374 {
5375 rtx tmp[3];
5376 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5377 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
5378 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
5379 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
5380 }
5381 DONE;
5382 })
5383
5384 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5385 ;;
5386 ;; Parallel double-precision floating point conversion operations
5387 ;;
5388 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5389
5390 (define_insn "sse2_cvtpi2pd"
5391 [(set (match_operand:V2DF 0 "register_operand" "=v,x")
5392 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,?!y")))]
5393 "TARGET_SSE2"
5394 "@
5395 %vcvtdq2pd\t{%1, %0|%0, %1}
5396 cvtpi2pd\t{%1, %0|%0, %1}"
5397 [(set_attr "mmx_isa" "*,native")
5398 (set_attr "type" "ssecvt")
5399 (set_attr "unit" "*,mmx")
5400 (set_attr "prefix_data16" "*,1")
5401 (set_attr "prefix" "maybe_vex,*")
5402 (set_attr "mode" "V2DF")])
5403
5404 (define_insn "sse2_cvtpd2pi"
5405 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5406 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vBm,xm")]
5407 UNSPEC_FIX_NOTRUNC))]
5408 "TARGET_SSE2"
5409 "@
5410 * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
5411 cvtpd2pi\t{%1, %0|%0, %1}"
5412 [(set_attr "mmx_isa" "*,native")
5413 (set_attr "type" "ssecvt")
5414 (set_attr "unit" "*,mmx")
5415 (set_attr "amdfam10_decode" "double")
5416 (set_attr "athlon_decode" "vector")
5417 (set_attr "bdver1_decode" "double")
5418 (set_attr "prefix_data16" "*,1")
5419 (set_attr "prefix" "maybe_vex,*")
5420 (set_attr "mode" "TI")])
5421
5422 (define_insn "sse2_cvttpd2pi"
5423 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5424 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vBm,xm")))]
5425 "TARGET_SSE2"
5426 "@
5427 * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
5428 cvttpd2pi\t{%1, %0|%0, %1}"
5429 [(set_attr "mmx_isa" "*,native")
5430 (set_attr "type" "ssecvt")
5431 (set_attr "unit" "*,mmx")
5432 (set_attr "amdfam10_decode" "double")
5433 (set_attr "athlon_decode" "vector")
5434 (set_attr "bdver1_decode" "double")
5435 (set_attr "prefix_data16" "*,1")
5436 (set_attr "prefix" "maybe_vex,*")
5437 (set_attr "mode" "TI")])
5438
5439 (define_insn "sse2_cvtsi2sd"
5440 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5441 (vec_merge:V2DF
5442 (vec_duplicate:V2DF
5443 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5444 (match_operand:V2DF 1 "register_operand" "0,0,v")
5445 (const_int 1)))]
5446 "TARGET_SSE2"
5447 "@
5448 cvtsi2sd{l}\t{%2, %0|%0, %2}
5449 cvtsi2sd{l}\t{%2, %0|%0, %2}
5450 vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
5451 [(set_attr "isa" "noavx,noavx,avx")
5452 (set_attr "type" "sseicvt")
5453 (set_attr "athlon_decode" "double,direct,*")
5454 (set_attr "amdfam10_decode" "vector,double,*")
5455 (set_attr "bdver1_decode" "double,direct,*")
5456 (set_attr "btver2_decode" "double,double,double")
5457 (set_attr "znver1_decode" "double,double,double")
5458 (set_attr "prefix" "orig,orig,maybe_evex")
5459 (set_attr "mode" "DF")])
5460
5461 (define_insn "sse2_cvtsi2sdq<round_name>"
5462 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5463 (vec_merge:V2DF
5464 (vec_duplicate:V2DF
5465 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5466 (match_operand:V2DF 1 "register_operand" "0,0,v")
5467 (const_int 1)))]
5468 "TARGET_SSE2 && TARGET_64BIT"
5469 "@
5470 cvtsi2sd{q}\t{%2, %0|%0, %2}
5471 cvtsi2sd{q}\t{%2, %0|%0, %2}
5472 vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5473 [(set_attr "isa" "noavx,noavx,avx")
5474 (set_attr "type" "sseicvt")
5475 (set_attr "athlon_decode" "double,direct,*")
5476 (set_attr "amdfam10_decode" "vector,double,*")
5477 (set_attr "bdver1_decode" "double,direct,*")
5478 (set_attr "length_vex" "*,*,4")
5479 (set_attr "prefix_rex" "1,1,*")
5480 (set_attr "prefix" "orig,orig,maybe_evex")
5481 (set_attr "mode" "DF")])
5482
5483 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
5484 [(set (match_operand:SWI48 0 "register_operand" "=r")
5485 (unspec:SWI48
5486 [(vec_select:SF
5487 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
5488 (parallel [(const_int 0)]))]
5489 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5490 "TARGET_AVX512F"
5491 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5492 [(set_attr "type" "sseicvt")
5493 (set_attr "prefix" "evex")
5494 (set_attr "mode" "<MODE>")])
5495
5496 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
5497 [(set (match_operand:SWI48 0 "register_operand" "=r")
5498 (unsigned_fix:SWI48
5499 (vec_select:SF
5500 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5501 (parallel [(const_int 0)]))))]
5502 "TARGET_AVX512F"
5503 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5504 [(set_attr "type" "sseicvt")
5505 (set_attr "prefix" "evex")
5506 (set_attr "mode" "<MODE>")])
5507
5508 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
5509 [(set (match_operand:SWI48 0 "register_operand" "=r")
5510 (unspec:SWI48
5511 [(vec_select:DF
5512 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
5513 (parallel [(const_int 0)]))]
5514 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5515 "TARGET_AVX512F"
5516 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5517 [(set_attr "type" "sseicvt")
5518 (set_attr "prefix" "evex")
5519 (set_attr "mode" "<MODE>")])
5520
5521 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
5522 [(set (match_operand:SWI48 0 "register_operand" "=r")
5523 (unsigned_fix:SWI48
5524 (vec_select:DF
5525 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5526 (parallel [(const_int 0)]))))]
5527 "TARGET_AVX512F"
5528 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5529 [(set_attr "type" "sseicvt")
5530 (set_attr "prefix" "evex")
5531 (set_attr "mode" "<MODE>")])
5532
5533 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
5534 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5535 (unspec:SWI48
5536 [(vec_select:DF
5537 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5538 (parallel [(const_int 0)]))]
5539 UNSPEC_FIX_NOTRUNC))]
5540 "TARGET_SSE2"
5541 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5542 [(set_attr "type" "sseicvt")
5543 (set_attr "athlon_decode" "double,vector")
5544 (set_attr "bdver1_decode" "double,double")
5545 (set_attr "btver2_decode" "double,double")
5546 (set_attr "prefix_rep" "1")
5547 (set_attr "prefix" "maybe_vex")
5548 (set_attr "mode" "<MODE>")])
5549
5550 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
5551 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5552 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
5553 UNSPEC_FIX_NOTRUNC))]
5554 "TARGET_SSE2"
5555 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
5556 [(set_attr "type" "sseicvt")
5557 (set_attr "athlon_decode" "double,vector")
5558 (set_attr "amdfam10_decode" "double,double")
5559 (set_attr "bdver1_decode" "double,double")
5560 (set_attr "prefix_rep" "1")
5561 (set_attr "prefix" "maybe_vex")
5562 (set_attr "mode" "<MODE>")])
5563
5564 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
5565 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5566 (fix:SWI48
5567 (vec_select:DF
5568 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
5569 (parallel [(const_int 0)]))))]
5570 "TARGET_SSE2"
5571 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5572 [(set_attr "type" "sseicvt")
5573 (set_attr "athlon_decode" "double,vector")
5574 (set_attr "amdfam10_decode" "double,double")
5575 (set_attr "bdver1_decode" "double,double")
5576 (set_attr "btver2_decode" "double,double")
5577 (set_attr "prefix_rep" "1")
5578 (set_attr "prefix" "maybe_vex")
5579 (set_attr "mode" "<MODE>")])
5580
5581 ;; For float<si2dfmode><mode>2 insn pattern
5582 (define_mode_attr si2dfmode
5583 [(V8DF "V8SI") (V4DF "V4SI")])
5584 (define_mode_attr si2dfmodelower
5585 [(V8DF "v8si") (V4DF "v4si")])
5586
5587 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
5588 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5589 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5590 "TARGET_AVX && <mask_mode512bit_condition>"
5591 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5592 [(set_attr "type" "ssecvt")
5593 (set_attr "prefix" "maybe_vex")
5594 (set_attr "mode" "<MODE>")])
5595
5596 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
5597 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5598 (any_float:VF2_AVX512VL
5599 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5600 "TARGET_AVX512DQ"
5601 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5602 [(set_attr "type" "ssecvt")
5603 (set_attr "prefix" "evex")
5604 (set_attr "mode" "<MODE>")])
5605
5606 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
5607 (define_mode_attr qq2pssuff
5608 [(V8SF "") (V4SF "{y}")])
5609
5610 (define_mode_attr sselongvecmode
5611 [(V8SF "V8DI") (V4SF "V4DI")])
5612
5613 (define_mode_attr sselongvecmodelower
5614 [(V8SF "v8di") (V4SF "v4di")])
5615
5616 (define_mode_attr sseintvecmode3
5617 [(V8SF "XI") (V4SF "OI")
5618 (V8DF "OI") (V4DF "TI")])
5619
5620 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
5621 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5622 (any_float:VF1_128_256VL
5623 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5624 "TARGET_AVX512DQ && <round_modev8sf_condition>"
5625 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5626 [(set_attr "type" "ssecvt")
5627 (set_attr "prefix" "evex")
5628 (set_attr "mode" "<MODE>")])
5629
5630 (define_expand "float<floatunssuffix>v2div2sf2"
5631 [(set (match_operand:V4SF 0 "register_operand" "=v")
5632 (vec_concat:V4SF
5633 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5634 (match_dup 2)))]
5635 "TARGET_AVX512DQ && TARGET_AVX512VL"
5636 "operands[2] = CONST0_RTX (V2SFmode);")
5637
5638 (define_insn "*float<floatunssuffix>v2div2sf2"
5639 [(set (match_operand:V4SF 0 "register_operand" "=v")
5640 (vec_concat:V4SF
5641 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5642 (match_operand:V2SF 2 "const0_operand" "C")))]
5643 "TARGET_AVX512DQ && TARGET_AVX512VL"
5644 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5645 [(set_attr "type" "ssecvt")
5646 (set_attr "prefix" "evex")
5647 (set_attr "mode" "V4SF")])
5648
5649 (define_mode_attr vpckfloat_concat_mode
5650 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
5651 (define_mode_attr vpckfloat_temp_mode
5652 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
5653 (define_mode_attr vpckfloat_op_mode
5654 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
5655
5656 (define_expand "vec_pack<floatprefix>_float_<mode>"
5657 [(match_operand:<ssePSmode> 0 "register_operand")
5658 (any_float:<ssePSmode>
5659 (match_operand:VI8_AVX512VL 1 "register_operand"))
5660 (match_operand:VI8_AVX512VL 2 "register_operand")]
5661 "TARGET_AVX512DQ"
5662 {
5663 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5664 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5665 rtx (*gen) (rtx, rtx) = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
5666 emit_insn (gen (r1, operands[1]));
5667 emit_insn (gen (r2, operands[2]));
5668 if (<MODE>mode == V2DImode)
5669 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
5670 else
5671 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
5672 r1, r2));
5673 DONE;
5674 })
5675
5676 (define_expand "float<floatunssuffix>v2div2sf2_mask"
5677 [(set (match_operand:V4SF 0 "register_operand" "=v")
5678 (vec_concat:V4SF
5679 (vec_merge:V2SF
5680 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5681 (vec_select:V2SF
5682 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5683 (parallel [(const_int 0) (const_int 1)]))
5684 (match_operand:QI 3 "register_operand" "Yk"))
5685 (match_dup 4)))]
5686 "TARGET_AVX512DQ && TARGET_AVX512VL"
5687 "operands[4] = CONST0_RTX (V2SFmode);")
5688
5689 (define_insn "*float<floatunssuffix>v2div2sf2_mask"
5690 [(set (match_operand:V4SF 0 "register_operand" "=v")
5691 (vec_concat:V4SF
5692 (vec_merge:V2SF
5693 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5694 (vec_select:V2SF
5695 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5696 (parallel [(const_int 0) (const_int 1)]))
5697 (match_operand:QI 3 "register_operand" "Yk"))
5698 (match_operand:V2SF 4 "const0_operand" "C")))]
5699 "TARGET_AVX512DQ && TARGET_AVX512VL"
5700 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5701 [(set_attr "type" "ssecvt")
5702 (set_attr "prefix" "evex")
5703 (set_attr "mode" "V4SF")])
5704
5705 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
5706 [(set (match_operand:V4SF 0 "register_operand" "=v")
5707 (vec_concat:V4SF
5708 (vec_merge:V2SF
5709 (any_float:V2SF (match_operand:V2DI 1
5710 "nonimmediate_operand" "vm"))
5711 (match_operand:V2SF 3 "const0_operand" "C")
5712 (match_operand:QI 2 "register_operand" "Yk"))
5713 (match_operand:V2SF 4 "const0_operand" "C")))]
5714 "TARGET_AVX512DQ && TARGET_AVX512VL"
5715 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5716 [(set_attr "type" "ssecvt")
5717 (set_attr "prefix" "evex")
5718 (set_attr "mode" "V4SF")])
5719
5720 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5721 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5722 (unsigned_float:VF2_512_256VL
5723 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5724 "TARGET_AVX512F"
5725 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5726 [(set_attr "type" "ssecvt")
5727 (set_attr "prefix" "evex")
5728 (set_attr "mode" "<MODE>")])
5729
5730 (define_insn "ufloatv2siv2df2<mask_name>"
5731 [(set (match_operand:V2DF 0 "register_operand" "=v")
5732 (unsigned_float:V2DF
5733 (vec_select:V2SI
5734 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5735 (parallel [(const_int 0) (const_int 1)]))))]
5736 "TARGET_AVX512VL"
5737 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5738 [(set_attr "type" "ssecvt")
5739 (set_attr "prefix" "evex")
5740 (set_attr "mode" "V2DF")])
5741
5742 (define_insn "avx512f_cvtdq2pd512_2"
5743 [(set (match_operand:V8DF 0 "register_operand" "=v")
5744 (float:V8DF
5745 (vec_select:V8SI
5746 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5747 (parallel [(const_int 0) (const_int 1)
5748 (const_int 2) (const_int 3)
5749 (const_int 4) (const_int 5)
5750 (const_int 6) (const_int 7)]))))]
5751 "TARGET_AVX512F"
5752 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5753 [(set_attr "type" "ssecvt")
5754 (set_attr "prefix" "evex")
5755 (set_attr "mode" "V8DF")])
5756
5757 (define_insn "avx_cvtdq2pd256_2"
5758 [(set (match_operand:V4DF 0 "register_operand" "=v")
5759 (float:V4DF
5760 (vec_select:V4SI
5761 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5762 (parallel [(const_int 0) (const_int 1)
5763 (const_int 2) (const_int 3)]))))]
5764 "TARGET_AVX"
5765 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5766 [(set_attr "type" "ssecvt")
5767 (set_attr "prefix" "maybe_evex")
5768 (set_attr "mode" "V4DF")])
5769
5770 (define_insn "sse2_cvtdq2pd<mask_name>"
5771 [(set (match_operand:V2DF 0 "register_operand" "=v")
5772 (float:V2DF
5773 (vec_select:V2SI
5774 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5775 (parallel [(const_int 0) (const_int 1)]))))]
5776 "TARGET_SSE2 && <mask_avx512vl_condition>"
5777 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5778 [(set_attr "type" "ssecvt")
5779 (set_attr "prefix" "maybe_vex")
5780 (set_attr "mode" "V2DF")])
5781
5782 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5783 [(set (match_operand:V8SI 0 "register_operand" "=v")
5784 (unspec:V8SI
5785 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5786 UNSPEC_FIX_NOTRUNC))]
5787 "TARGET_AVX512F"
5788 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5789 [(set_attr "type" "ssecvt")
5790 (set_attr "prefix" "evex")
5791 (set_attr "mode" "OI")])
5792
5793 (define_insn "avx_cvtpd2dq256<mask_name>"
5794 [(set (match_operand:V4SI 0 "register_operand" "=v")
5795 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5796 UNSPEC_FIX_NOTRUNC))]
5797 "TARGET_AVX && <mask_avx512vl_condition>"
5798 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5799 [(set_attr "type" "ssecvt")
5800 (set_attr "prefix" "<mask_prefix>")
5801 (set_attr "mode" "OI")])
5802
5803 (define_expand "avx_cvtpd2dq256_2"
5804 [(set (match_operand:V8SI 0 "register_operand")
5805 (vec_concat:V8SI
5806 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5807 UNSPEC_FIX_NOTRUNC)
5808 (match_dup 2)))]
5809 "TARGET_AVX"
5810 "operands[2] = CONST0_RTX (V4SImode);")
5811
5812 (define_insn "*avx_cvtpd2dq256_2"
5813 [(set (match_operand:V8SI 0 "register_operand" "=v")
5814 (vec_concat:V8SI
5815 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5816 UNSPEC_FIX_NOTRUNC)
5817 (match_operand:V4SI 2 "const0_operand")))]
5818 "TARGET_AVX"
5819 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5820 [(set_attr "type" "ssecvt")
5821 (set_attr "prefix" "vex")
5822 (set_attr "btver2_decode" "vector")
5823 (set_attr "mode" "OI")])
5824
5825 (define_insn "sse2_cvtpd2dq<mask_name>"
5826 [(set (match_operand:V4SI 0 "register_operand" "=v")
5827 (vec_concat:V4SI
5828 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5829 UNSPEC_FIX_NOTRUNC)
5830 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5831 "TARGET_SSE2 && <mask_avx512vl_condition>"
5832 {
5833 if (TARGET_AVX)
5834 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5835 else
5836 return "cvtpd2dq\t{%1, %0|%0, %1}";
5837 }
5838 [(set_attr "type" "ssecvt")
5839 (set_attr "prefix_rep" "1")
5840 (set_attr "prefix_data16" "0")
5841 (set_attr "prefix" "maybe_vex")
5842 (set_attr "mode" "TI")
5843 (set_attr "amdfam10_decode" "double")
5844 (set_attr "athlon_decode" "vector")
5845 (set_attr "bdver1_decode" "double")])
5846
5847 ;; For ufix_notrunc* insn patterns
5848 (define_mode_attr pd2udqsuff
5849 [(V8DF "") (V4DF "{y}")])
5850
5851 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5852 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5853 (unspec:<si2dfmode>
5854 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
5855 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5856 "TARGET_AVX512F"
5857 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5858 [(set_attr "type" "ssecvt")
5859 (set_attr "prefix" "evex")
5860 (set_attr "mode" "<sseinsnmode>")])
5861
5862 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
5863 [(set (match_operand:V4SI 0 "register_operand" "=v")
5864 (vec_concat:V4SI
5865 (unspec:V2SI
5866 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5867 UNSPEC_UNSIGNED_FIX_NOTRUNC)
5868 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5869 "TARGET_AVX512VL"
5870 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5871 [(set_attr "type" "ssecvt")
5872 (set_attr "prefix" "evex")
5873 (set_attr "mode" "TI")])
5874
5875 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
5876 [(set (match_operand:V8SI 0 "register_operand" "=v")
5877 (any_fix:V8SI
5878 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5879 "TARGET_AVX512F"
5880 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5881 [(set_attr "type" "ssecvt")
5882 (set_attr "prefix" "evex")
5883 (set_attr "mode" "OI")])
5884
5885 (define_insn "ufix_truncv2dfv2si2<mask_name>"
5886 [(set (match_operand:V4SI 0 "register_operand" "=v")
5887 (vec_concat:V4SI
5888 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5889 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5890 "TARGET_AVX512VL"
5891 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5892 [(set_attr "type" "ssecvt")
5893 (set_attr "prefix" "evex")
5894 (set_attr "mode" "TI")])
5895
5896 (define_insn "fix_truncv4dfv4si2<mask_name>"
5897 [(set (match_operand:V4SI 0 "register_operand" "=v")
5898 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5899 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5900 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5901 [(set_attr "type" "ssecvt")
5902 (set_attr "prefix" "maybe_evex")
5903 (set_attr "mode" "OI")])
5904
5905 (define_insn "ufix_truncv4dfv4si2<mask_name>"
5906 [(set (match_operand:V4SI 0 "register_operand" "=v")
5907 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5908 "TARGET_AVX512VL && TARGET_AVX512F"
5909 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5910 [(set_attr "type" "ssecvt")
5911 (set_attr "prefix" "maybe_evex")
5912 (set_attr "mode" "OI")])
5913
5914 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5915 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5916 (any_fix:<sseintvecmode>
5917 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5918 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5919 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5920 [(set_attr "type" "ssecvt")
5921 (set_attr "prefix" "evex")
5922 (set_attr "mode" "<sseintvecmode2>")])
5923
5924 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5925 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5926 (unspec:<sseintvecmode>
5927 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5928 UNSPEC_FIX_NOTRUNC))]
5929 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5930 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5931 [(set_attr "type" "ssecvt")
5932 (set_attr "prefix" "evex")
5933 (set_attr "mode" "<sseintvecmode2>")])
5934
5935 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5936 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5937 (unspec:<sseintvecmode>
5938 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5939 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5940 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5941 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5942 [(set_attr "type" "ssecvt")
5943 (set_attr "prefix" "evex")
5944 (set_attr "mode" "<sseintvecmode2>")])
5945
5946 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5947 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5948 (any_fix:<sselongvecmode>
5949 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5950 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5951 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5952 [(set_attr "type" "ssecvt")
5953 (set_attr "prefix" "evex")
5954 (set_attr "mode" "<sseintvecmode3>")])
5955
5956 (define_insn "fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
5957 [(set (match_operand:V2DI 0 "register_operand" "=v")
5958 (any_fix:V2DI
5959 (vec_select:V2SF
5960 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5961 (parallel [(const_int 0) (const_int 1)]))))]
5962 "TARGET_AVX512DQ && TARGET_AVX512VL"
5963 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5964 [(set_attr "type" "ssecvt")
5965 (set_attr "prefix" "evex")
5966 (set_attr "mode" "TI")])
5967
5968 (define_mode_attr vunpckfixt_mode
5969 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
5970 (define_mode_attr vunpckfixt_model
5971 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
5972 (define_mode_attr vunpckfixt_extract_mode
5973 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
5974
5975 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
5976 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
5977 (any_fix:<vunpckfixt_mode>
5978 (match_operand:VF1_AVX512VL 1 "register_operand"))]
5979 "TARGET_AVX512DQ"
5980 {
5981 rtx tem = operands[1];
5982 if (<MODE>mode != V4SFmode)
5983 {
5984 tem = gen_reg_rtx (<ssehalfvecmode>mode);
5985 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
5986 operands[1]));
5987 }
5988 rtx (*gen) (rtx, rtx)
5989 = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
5990 emit_insn (gen (operands[0], tem));
5991 DONE;
5992 })
5993
5994 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
5995 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
5996 (any_fix:<vunpckfixt_mode>
5997 (match_operand:VF1_AVX512VL 1 "register_operand"))]
5998 "TARGET_AVX512DQ"
5999 {
6000 rtx tem;
6001 if (<MODE>mode != V4SFmode)
6002 {
6003 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6004 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
6005 operands[1]));
6006 }
6007 else
6008 {
6009 tem = gen_reg_rtx (V4SFmode);
6010 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
6011 }
6012 rtx (*gen) (rtx, rtx)
6013 = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6014 emit_insn (gen (operands[0], tem));
6015 DONE;
6016 })
6017
6018 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
6019 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6020 (unsigned_fix:<sseintvecmode>
6021 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
6022 "TARGET_AVX512VL"
6023 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6024 [(set_attr "type" "ssecvt")
6025 (set_attr "prefix" "evex")
6026 (set_attr "mode" "<sseintvecmode2>")])
6027
6028 (define_expand "avx_cvttpd2dq256_2"
6029 [(set (match_operand:V8SI 0 "register_operand")
6030 (vec_concat:V8SI
6031 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
6032 (match_dup 2)))]
6033 "TARGET_AVX"
6034 "operands[2] = CONST0_RTX (V4SImode);")
6035
6036 (define_insn "sse2_cvttpd2dq<mask_name>"
6037 [(set (match_operand:V4SI 0 "register_operand" "=v")
6038 (vec_concat:V4SI
6039 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
6040 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6041 "TARGET_SSE2 && <mask_avx512vl_condition>"
6042 {
6043 if (TARGET_AVX)
6044 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
6045 else
6046 return "cvttpd2dq\t{%1, %0|%0, %1}";
6047 }
6048 [(set_attr "type" "ssecvt")
6049 (set_attr "amdfam10_decode" "double")
6050 (set_attr "athlon_decode" "vector")
6051 (set_attr "bdver1_decode" "double")
6052 (set_attr "prefix" "maybe_vex")
6053 (set_attr "mode" "TI")])
6054
6055 (define_insn "sse2_cvtsd2ss<round_name>"
6056 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6057 (vec_merge:V4SF
6058 (vec_duplicate:V4SF
6059 (float_truncate:V2SF
6060 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
6061 (match_operand:V4SF 1 "register_operand" "0,0,v")
6062 (const_int 1)))]
6063 "TARGET_SSE2"
6064 "@
6065 cvtsd2ss\t{%2, %0|%0, %2}
6066 cvtsd2ss\t{%2, %0|%0, %q2}
6067 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
6068 [(set_attr "isa" "noavx,noavx,avx")
6069 (set_attr "type" "ssecvt")
6070 (set_attr "athlon_decode" "vector,double,*")
6071 (set_attr "amdfam10_decode" "vector,double,*")
6072 (set_attr "bdver1_decode" "direct,direct,*")
6073 (set_attr "btver2_decode" "double,double,double")
6074 (set_attr "prefix" "orig,orig,<round_prefix>")
6075 (set_attr "mode" "SF")])
6076
6077 (define_insn "*sse2_vd_cvtsd2ss"
6078 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6079 (vec_merge:V4SF
6080 (vec_duplicate:V4SF
6081 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
6082 (match_operand:V4SF 1 "register_operand" "0,0,v")
6083 (const_int 1)))]
6084 "TARGET_SSE2"
6085 "@
6086 cvtsd2ss\t{%2, %0|%0, %2}
6087 cvtsd2ss\t{%2, %0|%0, %2}
6088 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
6089 [(set_attr "isa" "noavx,noavx,avx")
6090 (set_attr "type" "ssecvt")
6091 (set_attr "athlon_decode" "vector,double,*")
6092 (set_attr "amdfam10_decode" "vector,double,*")
6093 (set_attr "bdver1_decode" "direct,direct,*")
6094 (set_attr "btver2_decode" "double,double,double")
6095 (set_attr "prefix" "orig,orig,vex")
6096 (set_attr "mode" "SF")])
6097
6098 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
6099 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6100 (vec_merge:V2DF
6101 (float_extend:V2DF
6102 (vec_select:V2SF
6103 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
6104 (parallel [(const_int 0) (const_int 1)])))
6105 (match_operand:V2DF 1 "register_operand" "0,0,v")
6106 (const_int 1)))]
6107 "TARGET_SSE2"
6108 "@
6109 cvtss2sd\t{%2, %0|%0, %2}
6110 cvtss2sd\t{%2, %0|%0, %k2}
6111 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
6112 [(set_attr "isa" "noavx,noavx,avx")
6113 (set_attr "type" "ssecvt")
6114 (set_attr "amdfam10_decode" "vector,double,*")
6115 (set_attr "athlon_decode" "direct,direct,*")
6116 (set_attr "bdver1_decode" "direct,direct,*")
6117 (set_attr "btver2_decode" "double,double,double")
6118 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
6119 (set_attr "mode" "DF")])
6120
6121 (define_insn "*sse2_vd_cvtss2sd"
6122 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6123 (vec_merge:V2DF
6124 (vec_duplicate:V2DF
6125 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
6126 (match_operand:V2DF 1 "register_operand" "0,0,v")
6127 (const_int 1)))]
6128 "TARGET_SSE2"
6129 "@
6130 cvtss2sd\t{%2, %0|%0, %2}
6131 cvtss2sd\t{%2, %0|%0, %2}
6132 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
6133 [(set_attr "isa" "noavx,noavx,avx")
6134 (set_attr "type" "ssecvt")
6135 (set_attr "amdfam10_decode" "vector,double,*")
6136 (set_attr "athlon_decode" "direct,direct,*")
6137 (set_attr "bdver1_decode" "direct,direct,*")
6138 (set_attr "btver2_decode" "double,double,double")
6139 (set_attr "prefix" "orig,orig,vex")
6140 (set_attr "mode" "DF")])
6141
6142 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
6143 [(set (match_operand:V8SF 0 "register_operand" "=v")
6144 (float_truncate:V8SF
6145 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
6146 "TARGET_AVX512F"
6147 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6148 [(set_attr "type" "ssecvt")
6149 (set_attr "prefix" "evex")
6150 (set_attr "mode" "V8SF")])
6151
6152 (define_insn "avx_cvtpd2ps256<mask_name>"
6153 [(set (match_operand:V4SF 0 "register_operand" "=v")
6154 (float_truncate:V4SF
6155 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6156 "TARGET_AVX && <mask_avx512vl_condition>"
6157 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6158 [(set_attr "type" "ssecvt")
6159 (set_attr "prefix" "maybe_evex")
6160 (set_attr "btver2_decode" "vector")
6161 (set_attr "mode" "V4SF")])
6162
6163 (define_expand "sse2_cvtpd2ps"
6164 [(set (match_operand:V4SF 0 "register_operand")
6165 (vec_concat:V4SF
6166 (float_truncate:V2SF
6167 (match_operand:V2DF 1 "vector_operand"))
6168 (match_dup 2)))]
6169 "TARGET_SSE2"
6170 "operands[2] = CONST0_RTX (V2SFmode);")
6171
6172 (define_expand "sse2_cvtpd2ps_mask"
6173 [(set (match_operand:V4SF 0 "register_operand")
6174 (vec_merge:V4SF
6175 (vec_concat:V4SF
6176 (float_truncate:V2SF
6177 (match_operand:V2DF 1 "vector_operand"))
6178 (match_dup 4))
6179 (match_operand:V4SF 2 "register_operand")
6180 (match_operand:QI 3 "register_operand")))]
6181 "TARGET_SSE2"
6182 "operands[4] = CONST0_RTX (V2SFmode);")
6183
6184 (define_insn "*sse2_cvtpd2ps<mask_name>"
6185 [(set (match_operand:V4SF 0 "register_operand" "=v")
6186 (vec_concat:V4SF
6187 (float_truncate:V2SF
6188 (match_operand:V2DF 1 "vector_operand" "vBm"))
6189 (match_operand:V2SF 2 "const0_operand")))]
6190 "TARGET_SSE2 && <mask_avx512vl_condition>"
6191 {
6192 if (TARGET_AVX)
6193 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
6194 else
6195 return "cvtpd2ps\t{%1, %0|%0, %1}";
6196 }
6197 [(set_attr "type" "ssecvt")
6198 (set_attr "amdfam10_decode" "double")
6199 (set_attr "athlon_decode" "vector")
6200 (set_attr "bdver1_decode" "double")
6201 (set_attr "prefix_data16" "1")
6202 (set_attr "prefix" "maybe_vex")
6203 (set_attr "mode" "V4SF")])
6204
6205 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
6206 (define_mode_attr sf2dfmode
6207 [(V8DF "V8SF") (V4DF "V4SF")])
6208
6209 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6210 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6211 (float_extend:VF2_512_256
6212 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6213 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
6214 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6215 [(set_attr "type" "ssecvt")
6216 (set_attr "prefix" "maybe_vex")
6217 (set_attr "mode" "<MODE>")])
6218
6219 (define_insn "*avx_cvtps2pd256_2"
6220 [(set (match_operand:V4DF 0 "register_operand" "=v")
6221 (float_extend:V4DF
6222 (vec_select:V4SF
6223 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6224 (parallel [(const_int 0) (const_int 1)
6225 (const_int 2) (const_int 3)]))))]
6226 "TARGET_AVX"
6227 "vcvtps2pd\t{%x1, %0|%0, %x1}"
6228 [(set_attr "type" "ssecvt")
6229 (set_attr "prefix" "vex")
6230 (set_attr "mode" "V4DF")])
6231
6232 (define_insn "vec_unpacks_lo_v16sf"
6233 [(set (match_operand:V8DF 0 "register_operand" "=v")
6234 (float_extend:V8DF
6235 (vec_select:V8SF
6236 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6237 (parallel [(const_int 0) (const_int 1)
6238 (const_int 2) (const_int 3)
6239 (const_int 4) (const_int 5)
6240 (const_int 6) (const_int 7)]))))]
6241 "TARGET_AVX512F"
6242 "vcvtps2pd\t{%t1, %0|%0, %t1}"
6243 [(set_attr "type" "ssecvt")
6244 (set_attr "prefix" "evex")
6245 (set_attr "mode" "V8DF")])
6246
6247 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6248 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6249 (unspec:<avx512fmaskmode>
6250 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
6251 UNSPEC_CVTINT2MASK))]
6252 "TARGET_AVX512BW"
6253 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6254 [(set_attr "prefix" "evex")
6255 (set_attr "mode" "<sseinsnmode>")])
6256
6257 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6258 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6259 (unspec:<avx512fmaskmode>
6260 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
6261 UNSPEC_CVTINT2MASK))]
6262 "TARGET_AVX512DQ"
6263 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6264 [(set_attr "prefix" "evex")
6265 (set_attr "mode" "<sseinsnmode>")])
6266
6267 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6268 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
6269 (vec_merge:VI12_AVX512VL
6270 (match_dup 2)
6271 (match_dup 3)
6272 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6273 "TARGET_AVX512BW"
6274 {
6275 operands[2] = CONSTM1_RTX (<MODE>mode);
6276 operands[3] = CONST0_RTX (<MODE>mode);
6277 })
6278
6279 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6280 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
6281 (vec_merge:VI12_AVX512VL
6282 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
6283 (match_operand:VI12_AVX512VL 3 "const0_operand")
6284 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6285 "TARGET_AVX512BW"
6286 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6287 [(set_attr "prefix" "evex")
6288 (set_attr "mode" "<sseinsnmode>")])
6289
6290 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6291 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
6292 (vec_merge:VI48_AVX512VL
6293 (match_dup 2)
6294 (match_dup 3)
6295 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6296 "TARGET_AVX512DQ"
6297 "{
6298 operands[2] = CONSTM1_RTX (<MODE>mode);
6299 operands[3] = CONST0_RTX (<MODE>mode);
6300 }")
6301
6302 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6303 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
6304 (vec_merge:VI48_AVX512VL
6305 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
6306 (match_operand:VI48_AVX512VL 3 "const0_operand")
6307 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6308 "TARGET_AVX512DQ"
6309 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6310 [(set_attr "prefix" "evex")
6311 (set_attr "mode" "<sseinsnmode>")])
6312
6313 (define_insn "sse2_cvtps2pd<mask_name>"
6314 [(set (match_operand:V2DF 0 "register_operand" "=v")
6315 (float_extend:V2DF
6316 (vec_select:V2SF
6317 (match_operand:V4SF 1 "vector_operand" "vm")
6318 (parallel [(const_int 0) (const_int 1)]))))]
6319 "TARGET_SSE2 && <mask_avx512vl_condition>"
6320 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6321 [(set_attr "type" "ssecvt")
6322 (set_attr "amdfam10_decode" "direct")
6323 (set_attr "athlon_decode" "double")
6324 (set_attr "bdver1_decode" "double")
6325 (set_attr "prefix_data16" "0")
6326 (set_attr "prefix" "maybe_vex")
6327 (set_attr "mode" "V2DF")])
6328
6329 (define_expand "vec_unpacks_hi_v4sf"
6330 [(set (match_dup 2)
6331 (vec_select:V4SF
6332 (vec_concat:V8SF
6333 (match_dup 2)
6334 (match_operand:V4SF 1 "vector_operand"))
6335 (parallel [(const_int 6) (const_int 7)
6336 (const_int 2) (const_int 3)])))
6337 (set (match_operand:V2DF 0 "register_operand")
6338 (float_extend:V2DF
6339 (vec_select:V2SF
6340 (match_dup 2)
6341 (parallel [(const_int 0) (const_int 1)]))))]
6342 "TARGET_SSE2"
6343 "operands[2] = gen_reg_rtx (V4SFmode);")
6344
6345 (define_expand "vec_unpacks_hi_v8sf"
6346 [(set (match_dup 2)
6347 (vec_select:V4SF
6348 (match_operand:V8SF 1 "register_operand")
6349 (parallel [(const_int 4) (const_int 5)
6350 (const_int 6) (const_int 7)])))
6351 (set (match_operand:V4DF 0 "register_operand")
6352 (float_extend:V4DF
6353 (match_dup 2)))]
6354 "TARGET_AVX"
6355 "operands[2] = gen_reg_rtx (V4SFmode);")
6356
6357 (define_expand "vec_unpacks_hi_v16sf"
6358 [(set (match_dup 2)
6359 (vec_select:V8SF
6360 (match_operand:V16SF 1 "register_operand")
6361 (parallel [(const_int 8) (const_int 9)
6362 (const_int 10) (const_int 11)
6363 (const_int 12) (const_int 13)
6364 (const_int 14) (const_int 15)])))
6365 (set (match_operand:V8DF 0 "register_operand")
6366 (float_extend:V8DF
6367 (match_dup 2)))]
6368 "TARGET_AVX512F"
6369 "operands[2] = gen_reg_rtx (V8SFmode);")
6370
6371 (define_expand "vec_unpacks_lo_v4sf"
6372 [(set (match_operand:V2DF 0 "register_operand")
6373 (float_extend:V2DF
6374 (vec_select:V2SF
6375 (match_operand:V4SF 1 "vector_operand")
6376 (parallel [(const_int 0) (const_int 1)]))))]
6377 "TARGET_SSE2")
6378
6379 (define_expand "vec_unpacks_lo_v8sf"
6380 [(set (match_operand:V4DF 0 "register_operand")
6381 (float_extend:V4DF
6382 (vec_select:V4SF
6383 (match_operand:V8SF 1 "nonimmediate_operand")
6384 (parallel [(const_int 0) (const_int 1)
6385 (const_int 2) (const_int 3)]))))]
6386 "TARGET_AVX")
6387
6388 (define_mode_attr sseunpackfltmode
6389 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
6390 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
6391
6392 (define_expand "vec_unpacks_float_hi_<mode>"
6393 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6394 (match_operand:VI2_AVX512F 1 "register_operand")]
6395 "TARGET_SSE2"
6396 {
6397 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6398
6399 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
6400 emit_insn (gen_rtx_SET (operands[0],
6401 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6402 DONE;
6403 })
6404
6405 (define_expand "vec_unpacks_float_lo_<mode>"
6406 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6407 (match_operand:VI2_AVX512F 1 "register_operand")]
6408 "TARGET_SSE2"
6409 {
6410 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6411
6412 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
6413 emit_insn (gen_rtx_SET (operands[0],
6414 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6415 DONE;
6416 })
6417
6418 (define_expand "vec_unpacku_float_hi_<mode>"
6419 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6420 (match_operand:VI2_AVX512F 1 "register_operand")]
6421 "TARGET_SSE2"
6422 {
6423 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6424
6425 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
6426 emit_insn (gen_rtx_SET (operands[0],
6427 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6428 DONE;
6429 })
6430
6431 (define_expand "vec_unpacku_float_lo_<mode>"
6432 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6433 (match_operand:VI2_AVX512F 1 "register_operand")]
6434 "TARGET_SSE2"
6435 {
6436 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6437
6438 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
6439 emit_insn (gen_rtx_SET (operands[0],
6440 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6441 DONE;
6442 })
6443
6444 (define_expand "vec_unpacks_float_hi_v4si"
6445 [(set (match_dup 2)
6446 (vec_select:V4SI
6447 (match_operand:V4SI 1 "vector_operand")
6448 (parallel [(const_int 2) (const_int 3)
6449 (const_int 2) (const_int 3)])))
6450 (set (match_operand:V2DF 0 "register_operand")
6451 (float:V2DF
6452 (vec_select:V2SI
6453 (match_dup 2)
6454 (parallel [(const_int 0) (const_int 1)]))))]
6455 "TARGET_SSE2"
6456 "operands[2] = gen_reg_rtx (V4SImode);")
6457
6458 (define_expand "vec_unpacks_float_lo_v4si"
6459 [(set (match_operand:V2DF 0 "register_operand")
6460 (float:V2DF
6461 (vec_select:V2SI
6462 (match_operand:V4SI 1 "vector_operand")
6463 (parallel [(const_int 0) (const_int 1)]))))]
6464 "TARGET_SSE2")
6465
6466 (define_expand "vec_unpacks_float_hi_v8si"
6467 [(set (match_dup 2)
6468 (vec_select:V4SI
6469 (match_operand:V8SI 1 "vector_operand")
6470 (parallel [(const_int 4) (const_int 5)
6471 (const_int 6) (const_int 7)])))
6472 (set (match_operand:V4DF 0 "register_operand")
6473 (float:V4DF
6474 (match_dup 2)))]
6475 "TARGET_AVX"
6476 "operands[2] = gen_reg_rtx (V4SImode);")
6477
6478 (define_expand "vec_unpacks_float_lo_v8si"
6479 [(set (match_operand:V4DF 0 "register_operand")
6480 (float:V4DF
6481 (vec_select:V4SI
6482 (match_operand:V8SI 1 "nonimmediate_operand")
6483 (parallel [(const_int 0) (const_int 1)
6484 (const_int 2) (const_int 3)]))))]
6485 "TARGET_AVX")
6486
6487 (define_expand "vec_unpacks_float_hi_v16si"
6488 [(set (match_dup 2)
6489 (vec_select:V8SI
6490 (match_operand:V16SI 1 "nonimmediate_operand")
6491 (parallel [(const_int 8) (const_int 9)
6492 (const_int 10) (const_int 11)
6493 (const_int 12) (const_int 13)
6494 (const_int 14) (const_int 15)])))
6495 (set (match_operand:V8DF 0 "register_operand")
6496 (float:V8DF
6497 (match_dup 2)))]
6498 "TARGET_AVX512F"
6499 "operands[2] = gen_reg_rtx (V8SImode);")
6500
6501 (define_expand "vec_unpacks_float_lo_v16si"
6502 [(set (match_operand:V8DF 0 "register_operand")
6503 (float:V8DF
6504 (vec_select:V8SI
6505 (match_operand:V16SI 1 "nonimmediate_operand")
6506 (parallel [(const_int 0) (const_int 1)
6507 (const_int 2) (const_int 3)
6508 (const_int 4) (const_int 5)
6509 (const_int 6) (const_int 7)]))))]
6510 "TARGET_AVX512F")
6511
6512 (define_expand "vec_unpacku_float_hi_v4si"
6513 [(set (match_dup 5)
6514 (vec_select:V4SI
6515 (match_operand:V4SI 1 "vector_operand")
6516 (parallel [(const_int 2) (const_int 3)
6517 (const_int 2) (const_int 3)])))
6518 (set (match_dup 6)
6519 (float:V2DF
6520 (vec_select:V2SI
6521 (match_dup 5)
6522 (parallel [(const_int 0) (const_int 1)]))))
6523 (set (match_dup 7)
6524 (lt:V2DF (match_dup 6) (match_dup 3)))
6525 (set (match_dup 8)
6526 (and:V2DF (match_dup 7) (match_dup 4)))
6527 (set (match_operand:V2DF 0 "register_operand")
6528 (plus:V2DF (match_dup 6) (match_dup 8)))]
6529 "TARGET_SSE2"
6530 {
6531 REAL_VALUE_TYPE TWO32r;
6532 rtx x;
6533 int i;
6534
6535 real_ldexp (&TWO32r, &dconst1, 32);
6536 x = const_double_from_real_value (TWO32r, DFmode);
6537
6538 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6539 operands[4] = force_reg (V2DFmode,
6540 ix86_build_const_vector (V2DFmode, 1, x));
6541
6542 operands[5] = gen_reg_rtx (V4SImode);
6543
6544 for (i = 6; i < 9; i++)
6545 operands[i] = gen_reg_rtx (V2DFmode);
6546 })
6547
6548 (define_expand "vec_unpacku_float_lo_v4si"
6549 [(set (match_dup 5)
6550 (float:V2DF
6551 (vec_select:V2SI
6552 (match_operand:V4SI 1 "vector_operand")
6553 (parallel [(const_int 0) (const_int 1)]))))
6554 (set (match_dup 6)
6555 (lt:V2DF (match_dup 5) (match_dup 3)))
6556 (set (match_dup 7)
6557 (and:V2DF (match_dup 6) (match_dup 4)))
6558 (set (match_operand:V2DF 0 "register_operand")
6559 (plus:V2DF (match_dup 5) (match_dup 7)))]
6560 "TARGET_SSE2"
6561 {
6562 REAL_VALUE_TYPE TWO32r;
6563 rtx x;
6564 int i;
6565
6566 real_ldexp (&TWO32r, &dconst1, 32);
6567 x = const_double_from_real_value (TWO32r, DFmode);
6568
6569 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6570 operands[4] = force_reg (V2DFmode,
6571 ix86_build_const_vector (V2DFmode, 1, x));
6572
6573 for (i = 5; i < 8; i++)
6574 operands[i] = gen_reg_rtx (V2DFmode);
6575 })
6576
6577 (define_expand "vec_unpacku_float_hi_v8si"
6578 [(match_operand:V4DF 0 "register_operand")
6579 (match_operand:V8SI 1 "register_operand")]
6580 "TARGET_AVX"
6581 {
6582 REAL_VALUE_TYPE TWO32r;
6583 rtx x, tmp[6];
6584 int i;
6585
6586 real_ldexp (&TWO32r, &dconst1, 32);
6587 x = const_double_from_real_value (TWO32r, DFmode);
6588
6589 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6590 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6591 tmp[5] = gen_reg_rtx (V4SImode);
6592
6593 for (i = 2; i < 5; i++)
6594 tmp[i] = gen_reg_rtx (V4DFmode);
6595 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
6596 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
6597 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6598 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6599 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6600 DONE;
6601 })
6602
6603 (define_expand "vec_unpacku_float_hi_v16si"
6604 [(match_operand:V8DF 0 "register_operand")
6605 (match_operand:V16SI 1 "register_operand")]
6606 "TARGET_AVX512F"
6607 {
6608 REAL_VALUE_TYPE TWO32r;
6609 rtx k, x, tmp[4];
6610
6611 real_ldexp (&TWO32r, &dconst1, 32);
6612 x = const_double_from_real_value (TWO32r, DFmode);
6613
6614 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6615 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6616 tmp[2] = gen_reg_rtx (V8DFmode);
6617 tmp[3] = gen_reg_rtx (V8SImode);
6618 k = gen_reg_rtx (QImode);
6619
6620 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
6621 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
6622 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
6623 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6624 emit_move_insn (operands[0], tmp[2]);
6625 DONE;
6626 })
6627
6628 (define_expand "vec_unpacku_float_lo_v8si"
6629 [(match_operand:V4DF 0 "register_operand")
6630 (match_operand:V8SI 1 "nonimmediate_operand")]
6631 "TARGET_AVX"
6632 {
6633 REAL_VALUE_TYPE TWO32r;
6634 rtx x, tmp[5];
6635 int i;
6636
6637 real_ldexp (&TWO32r, &dconst1, 32);
6638 x = const_double_from_real_value (TWO32r, DFmode);
6639
6640 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6641 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6642
6643 for (i = 2; i < 5; i++)
6644 tmp[i] = gen_reg_rtx (V4DFmode);
6645 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
6646 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6647 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6648 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6649 DONE;
6650 })
6651
6652 (define_expand "vec_unpacku_float_lo_v16si"
6653 [(match_operand:V8DF 0 "register_operand")
6654 (match_operand:V16SI 1 "nonimmediate_operand")]
6655 "TARGET_AVX512F"
6656 {
6657 REAL_VALUE_TYPE TWO32r;
6658 rtx k, x, tmp[3];
6659
6660 real_ldexp (&TWO32r, &dconst1, 32);
6661 x = const_double_from_real_value (TWO32r, DFmode);
6662
6663 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6664 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6665 tmp[2] = gen_reg_rtx (V8DFmode);
6666 k = gen_reg_rtx (QImode);
6667
6668 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
6669 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
6670 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6671 emit_move_insn (operands[0], tmp[2]);
6672 DONE;
6673 })
6674
6675 (define_expand "vec_pack_trunc_<mode>"
6676 [(set (match_dup 3)
6677 (float_truncate:<sf2dfmode>
6678 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
6679 (set (match_dup 4)
6680 (float_truncate:<sf2dfmode>
6681 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
6682 (set (match_operand:<ssePSmode> 0 "register_operand")
6683 (vec_concat:<ssePSmode>
6684 (match_dup 3)
6685 (match_dup 4)))]
6686 "TARGET_AVX"
6687 {
6688 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
6689 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
6690 })
6691
6692 (define_expand "vec_pack_trunc_v2df"
6693 [(match_operand:V4SF 0 "register_operand")
6694 (match_operand:V2DF 1 "vector_operand")
6695 (match_operand:V2DF 2 "vector_operand")]
6696 "TARGET_SSE2"
6697 {
6698 rtx tmp0, tmp1;
6699
6700 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6701 {
6702 tmp0 = gen_reg_rtx (V4DFmode);
6703 tmp1 = force_reg (V2DFmode, operands[1]);
6704
6705 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6706 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
6707 }
6708 else
6709 {
6710 tmp0 = gen_reg_rtx (V4SFmode);
6711 tmp1 = gen_reg_rtx (V4SFmode);
6712
6713 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
6714 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
6715 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
6716 }
6717 DONE;
6718 })
6719
6720 (define_expand "vec_pack_sfix_trunc_v8df"
6721 [(match_operand:V16SI 0 "register_operand")
6722 (match_operand:V8DF 1 "nonimmediate_operand")
6723 (match_operand:V8DF 2 "nonimmediate_operand")]
6724 "TARGET_AVX512F"
6725 {
6726 rtx r1, r2;
6727
6728 r1 = gen_reg_rtx (V8SImode);
6729 r2 = gen_reg_rtx (V8SImode);
6730
6731 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
6732 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
6733 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6734 DONE;
6735 })
6736
6737 (define_expand "vec_pack_sfix_trunc_v4df"
6738 [(match_operand:V8SI 0 "register_operand")
6739 (match_operand:V4DF 1 "nonimmediate_operand")
6740 (match_operand:V4DF 2 "nonimmediate_operand")]
6741 "TARGET_AVX"
6742 {
6743 rtx r1, r2;
6744
6745 r1 = gen_reg_rtx (V4SImode);
6746 r2 = gen_reg_rtx (V4SImode);
6747
6748 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
6749 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
6750 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6751 DONE;
6752 })
6753
6754 (define_expand "vec_pack_sfix_trunc_v2df"
6755 [(match_operand:V4SI 0 "register_operand")
6756 (match_operand:V2DF 1 "vector_operand")
6757 (match_operand:V2DF 2 "vector_operand")]
6758 "TARGET_SSE2"
6759 {
6760 rtx tmp0, tmp1, tmp2;
6761
6762 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6763 {
6764 tmp0 = gen_reg_rtx (V4DFmode);
6765 tmp1 = force_reg (V2DFmode, operands[1]);
6766
6767 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6768 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
6769 }
6770 else
6771 {
6772 tmp0 = gen_reg_rtx (V4SImode);
6773 tmp1 = gen_reg_rtx (V4SImode);
6774 tmp2 = gen_reg_rtx (V2DImode);
6775
6776 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
6777 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
6778 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6779 gen_lowpart (V2DImode, tmp0),
6780 gen_lowpart (V2DImode, tmp1)));
6781 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6782 }
6783 DONE;
6784 })
6785
6786 (define_mode_attr ssepackfltmode
6787 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
6788
6789 (define_expand "vec_pack_ufix_trunc_<mode>"
6790 [(match_operand:<ssepackfltmode> 0 "register_operand")
6791 (match_operand:VF2 1 "register_operand")
6792 (match_operand:VF2 2 "register_operand")]
6793 "TARGET_SSE2"
6794 {
6795 if (<MODE>mode == V8DFmode)
6796 {
6797 rtx r1, r2;
6798
6799 r1 = gen_reg_rtx (V8SImode);
6800 r2 = gen_reg_rtx (V8SImode);
6801
6802 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
6803 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
6804 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6805 }
6806 else
6807 {
6808 rtx tmp[7];
6809 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
6810 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
6811 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
6812 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
6813 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
6814 {
6815 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
6816 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
6817 }
6818 else
6819 {
6820 tmp[5] = gen_reg_rtx (V8SFmode);
6821 ix86_expand_vec_extract_even_odd (tmp[5],
6822 gen_lowpart (V8SFmode, tmp[2]),
6823 gen_lowpart (V8SFmode, tmp[3]), 0);
6824 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
6825 }
6826 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
6827 operands[0], 0, OPTAB_DIRECT);
6828 if (tmp[6] != operands[0])
6829 emit_move_insn (operands[0], tmp[6]);
6830 }
6831
6832 DONE;
6833 })
6834
6835 (define_expand "avx512f_vec_pack_sfix_v8df"
6836 [(match_operand:V16SI 0 "register_operand")
6837 (match_operand:V8DF 1 "nonimmediate_operand")
6838 (match_operand:V8DF 2 "nonimmediate_operand")]
6839 "TARGET_AVX512F"
6840 {
6841 rtx r1, r2;
6842
6843 r1 = gen_reg_rtx (V8SImode);
6844 r2 = gen_reg_rtx (V8SImode);
6845
6846 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
6847 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
6848 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6849 DONE;
6850 })
6851
6852 (define_expand "vec_pack_sfix_v4df"
6853 [(match_operand:V8SI 0 "register_operand")
6854 (match_operand:V4DF 1 "nonimmediate_operand")
6855 (match_operand:V4DF 2 "nonimmediate_operand")]
6856 "TARGET_AVX"
6857 {
6858 rtx r1, r2;
6859
6860 r1 = gen_reg_rtx (V4SImode);
6861 r2 = gen_reg_rtx (V4SImode);
6862
6863 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6864 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6865 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6866 DONE;
6867 })
6868
6869 (define_expand "vec_pack_sfix_v2df"
6870 [(match_operand:V4SI 0 "register_operand")
6871 (match_operand:V2DF 1 "vector_operand")
6872 (match_operand:V2DF 2 "vector_operand")]
6873 "TARGET_SSE2"
6874 {
6875 rtx tmp0, tmp1, tmp2;
6876
6877 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6878 {
6879 tmp0 = gen_reg_rtx (V4DFmode);
6880 tmp1 = force_reg (V2DFmode, operands[1]);
6881
6882 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6883 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6884 }
6885 else
6886 {
6887 tmp0 = gen_reg_rtx (V4SImode);
6888 tmp1 = gen_reg_rtx (V4SImode);
6889 tmp2 = gen_reg_rtx (V2DImode);
6890
6891 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6892 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
6893 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6894 gen_lowpart (V2DImode, tmp0),
6895 gen_lowpart (V2DImode, tmp1)));
6896 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6897 }
6898 DONE;
6899 })
6900
6901 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6902 ;;
6903 ;; Parallel single-precision floating point element swizzling
6904 ;;
6905 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6906
6907 (define_expand "sse_movhlps_exp"
6908 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6909 (vec_select:V4SF
6910 (vec_concat:V8SF
6911 (match_operand:V4SF 1 "nonimmediate_operand")
6912 (match_operand:V4SF 2 "nonimmediate_operand"))
6913 (parallel [(const_int 6)
6914 (const_int 7)
6915 (const_int 2)
6916 (const_int 3)])))]
6917 "TARGET_SSE"
6918 {
6919 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6920
6921 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6922
6923 /* Fix up the destination if needed. */
6924 if (dst != operands[0])
6925 emit_move_insn (operands[0], dst);
6926
6927 DONE;
6928 })
6929
6930 (define_insn "sse_movhlps"
6931 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6932 (vec_select:V4SF
6933 (vec_concat:V8SF
6934 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6935 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
6936 (parallel [(const_int 6)
6937 (const_int 7)
6938 (const_int 2)
6939 (const_int 3)])))]
6940 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6941 "@
6942 movhlps\t{%2, %0|%0, %2}
6943 vmovhlps\t{%2, %1, %0|%0, %1, %2}
6944 movlps\t{%H2, %0|%0, %H2}
6945 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
6946 %vmovhps\t{%2, %0|%q0, %2}"
6947 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6948 (set_attr "type" "ssemov")
6949 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6950 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6951
6952 (define_expand "sse_movlhps_exp"
6953 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6954 (vec_select:V4SF
6955 (vec_concat:V8SF
6956 (match_operand:V4SF 1 "nonimmediate_operand")
6957 (match_operand:V4SF 2 "nonimmediate_operand"))
6958 (parallel [(const_int 0)
6959 (const_int 1)
6960 (const_int 4)
6961 (const_int 5)])))]
6962 "TARGET_SSE"
6963 {
6964 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6965
6966 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6967
6968 /* Fix up the destination if needed. */
6969 if (dst != operands[0])
6970 emit_move_insn (operands[0], dst);
6971
6972 DONE;
6973 })
6974
6975 (define_insn "sse_movlhps"
6976 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6977 (vec_select:V4SF
6978 (vec_concat:V8SF
6979 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6980 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
6981 (parallel [(const_int 0)
6982 (const_int 1)
6983 (const_int 4)
6984 (const_int 5)])))]
6985 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6986 "@
6987 movlhps\t{%2, %0|%0, %2}
6988 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6989 movhps\t{%2, %0|%0, %q2}
6990 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6991 %vmovlps\t{%2, %H0|%H0, %2}"
6992 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6993 (set_attr "type" "ssemov")
6994 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6995 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6996
6997 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
6998 [(set (match_operand:V16SF 0 "register_operand" "=v")
6999 (vec_select:V16SF
7000 (vec_concat:V32SF
7001 (match_operand:V16SF 1 "register_operand" "v")
7002 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7003 (parallel [(const_int 2) (const_int 18)
7004 (const_int 3) (const_int 19)
7005 (const_int 6) (const_int 22)
7006 (const_int 7) (const_int 23)
7007 (const_int 10) (const_int 26)
7008 (const_int 11) (const_int 27)
7009 (const_int 14) (const_int 30)
7010 (const_int 15) (const_int 31)])))]
7011 "TARGET_AVX512F"
7012 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7013 [(set_attr "type" "sselog")
7014 (set_attr "prefix" "evex")
7015 (set_attr "mode" "V16SF")])
7016
7017 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7018 (define_insn "avx_unpckhps256<mask_name>"
7019 [(set (match_operand:V8SF 0 "register_operand" "=v")
7020 (vec_select:V8SF
7021 (vec_concat:V16SF
7022 (match_operand:V8SF 1 "register_operand" "v")
7023 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7024 (parallel [(const_int 2) (const_int 10)
7025 (const_int 3) (const_int 11)
7026 (const_int 6) (const_int 14)
7027 (const_int 7) (const_int 15)])))]
7028 "TARGET_AVX && <mask_avx512vl_condition>"
7029 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7030 [(set_attr "type" "sselog")
7031 (set_attr "prefix" "vex")
7032 (set_attr "mode" "V8SF")])
7033
7034 (define_expand "vec_interleave_highv8sf"
7035 [(set (match_dup 3)
7036 (vec_select:V8SF
7037 (vec_concat:V16SF
7038 (match_operand:V8SF 1 "register_operand")
7039 (match_operand:V8SF 2 "nonimmediate_operand"))
7040 (parallel [(const_int 0) (const_int 8)
7041 (const_int 1) (const_int 9)
7042 (const_int 4) (const_int 12)
7043 (const_int 5) (const_int 13)])))
7044 (set (match_dup 4)
7045 (vec_select:V8SF
7046 (vec_concat:V16SF
7047 (match_dup 1)
7048 (match_dup 2))
7049 (parallel [(const_int 2) (const_int 10)
7050 (const_int 3) (const_int 11)
7051 (const_int 6) (const_int 14)
7052 (const_int 7) (const_int 15)])))
7053 (set (match_operand:V8SF 0 "register_operand")
7054 (vec_select:V8SF
7055 (vec_concat:V16SF
7056 (match_dup 3)
7057 (match_dup 4))
7058 (parallel [(const_int 4) (const_int 5)
7059 (const_int 6) (const_int 7)
7060 (const_int 12) (const_int 13)
7061 (const_int 14) (const_int 15)])))]
7062 "TARGET_AVX"
7063 {
7064 operands[3] = gen_reg_rtx (V8SFmode);
7065 operands[4] = gen_reg_rtx (V8SFmode);
7066 })
7067
7068 (define_insn "vec_interleave_highv4sf<mask_name>"
7069 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7070 (vec_select:V4SF
7071 (vec_concat:V8SF
7072 (match_operand:V4SF 1 "register_operand" "0,v")
7073 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7074 (parallel [(const_int 2) (const_int 6)
7075 (const_int 3) (const_int 7)])))]
7076 "TARGET_SSE && <mask_avx512vl_condition>"
7077 "@
7078 unpckhps\t{%2, %0|%0, %2}
7079 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7080 [(set_attr "isa" "noavx,avx")
7081 (set_attr "type" "sselog")
7082 (set_attr "prefix" "orig,vex")
7083 (set_attr "mode" "V4SF")])
7084
7085 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
7086 [(set (match_operand:V16SF 0 "register_operand" "=v")
7087 (vec_select:V16SF
7088 (vec_concat:V32SF
7089 (match_operand:V16SF 1 "register_operand" "v")
7090 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7091 (parallel [(const_int 0) (const_int 16)
7092 (const_int 1) (const_int 17)
7093 (const_int 4) (const_int 20)
7094 (const_int 5) (const_int 21)
7095 (const_int 8) (const_int 24)
7096 (const_int 9) (const_int 25)
7097 (const_int 12) (const_int 28)
7098 (const_int 13) (const_int 29)])))]
7099 "TARGET_AVX512F"
7100 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7101 [(set_attr "type" "sselog")
7102 (set_attr "prefix" "evex")
7103 (set_attr "mode" "V16SF")])
7104
7105 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7106 (define_insn "avx_unpcklps256<mask_name>"
7107 [(set (match_operand:V8SF 0 "register_operand" "=v")
7108 (vec_select:V8SF
7109 (vec_concat:V16SF
7110 (match_operand:V8SF 1 "register_operand" "v")
7111 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7112 (parallel [(const_int 0) (const_int 8)
7113 (const_int 1) (const_int 9)
7114 (const_int 4) (const_int 12)
7115 (const_int 5) (const_int 13)])))]
7116 "TARGET_AVX && <mask_avx512vl_condition>"
7117 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7118 [(set_attr "type" "sselog")
7119 (set_attr "prefix" "vex")
7120 (set_attr "mode" "V8SF")])
7121
7122 (define_insn "unpcklps128_mask"
7123 [(set (match_operand:V4SF 0 "register_operand" "=v")
7124 (vec_merge:V4SF
7125 (vec_select:V4SF
7126 (vec_concat:V8SF
7127 (match_operand:V4SF 1 "register_operand" "v")
7128 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7129 (parallel [(const_int 0) (const_int 4)
7130 (const_int 1) (const_int 5)]))
7131 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
7132 (match_operand:QI 4 "register_operand" "Yk")))]
7133 "TARGET_AVX512VL"
7134 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7135 [(set_attr "type" "sselog")
7136 (set_attr "prefix" "evex")
7137 (set_attr "mode" "V4SF")])
7138
7139 (define_expand "vec_interleave_lowv8sf"
7140 [(set (match_dup 3)
7141 (vec_select:V8SF
7142 (vec_concat:V16SF
7143 (match_operand:V8SF 1 "register_operand")
7144 (match_operand:V8SF 2 "nonimmediate_operand"))
7145 (parallel [(const_int 0) (const_int 8)
7146 (const_int 1) (const_int 9)
7147 (const_int 4) (const_int 12)
7148 (const_int 5) (const_int 13)])))
7149 (set (match_dup 4)
7150 (vec_select:V8SF
7151 (vec_concat:V16SF
7152 (match_dup 1)
7153 (match_dup 2))
7154 (parallel [(const_int 2) (const_int 10)
7155 (const_int 3) (const_int 11)
7156 (const_int 6) (const_int 14)
7157 (const_int 7) (const_int 15)])))
7158 (set (match_operand:V8SF 0 "register_operand")
7159 (vec_select:V8SF
7160 (vec_concat:V16SF
7161 (match_dup 3)
7162 (match_dup 4))
7163 (parallel [(const_int 0) (const_int 1)
7164 (const_int 2) (const_int 3)
7165 (const_int 8) (const_int 9)
7166 (const_int 10) (const_int 11)])))]
7167 "TARGET_AVX"
7168 {
7169 operands[3] = gen_reg_rtx (V8SFmode);
7170 operands[4] = gen_reg_rtx (V8SFmode);
7171 })
7172
7173 (define_insn "vec_interleave_lowv4sf"
7174 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7175 (vec_select:V4SF
7176 (vec_concat:V8SF
7177 (match_operand:V4SF 1 "register_operand" "0,v")
7178 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7179 (parallel [(const_int 0) (const_int 4)
7180 (const_int 1) (const_int 5)])))]
7181 "TARGET_SSE"
7182 "@
7183 unpcklps\t{%2, %0|%0, %2}
7184 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
7185 [(set_attr "isa" "noavx,avx")
7186 (set_attr "type" "sselog")
7187 (set_attr "prefix" "orig,maybe_evex")
7188 (set_attr "mode" "V4SF")])
7189
7190 ;; These are modeled with the same vec_concat as the others so that we
7191 ;; capture users of shufps that can use the new instructions
7192 (define_insn "avx_movshdup256<mask_name>"
7193 [(set (match_operand:V8SF 0 "register_operand" "=v")
7194 (vec_select:V8SF
7195 (vec_concat:V16SF
7196 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7197 (match_dup 1))
7198 (parallel [(const_int 1) (const_int 1)
7199 (const_int 3) (const_int 3)
7200 (const_int 5) (const_int 5)
7201 (const_int 7) (const_int 7)])))]
7202 "TARGET_AVX && <mask_avx512vl_condition>"
7203 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7204 [(set_attr "type" "sse")
7205 (set_attr "prefix" "vex")
7206 (set_attr "mode" "V8SF")])
7207
7208 (define_insn "sse3_movshdup<mask_name>"
7209 [(set (match_operand:V4SF 0 "register_operand" "=v")
7210 (vec_select:V4SF
7211 (vec_concat:V8SF
7212 (match_operand:V4SF 1 "vector_operand" "vBm")
7213 (match_dup 1))
7214 (parallel [(const_int 1)
7215 (const_int 1)
7216 (const_int 7)
7217 (const_int 7)])))]
7218 "TARGET_SSE3 && <mask_avx512vl_condition>"
7219 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7220 [(set_attr "type" "sse")
7221 (set_attr "prefix_rep" "1")
7222 (set_attr "prefix" "maybe_vex")
7223 (set_attr "mode" "V4SF")])
7224
7225 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
7226 [(set (match_operand:V16SF 0 "register_operand" "=v")
7227 (vec_select:V16SF
7228 (vec_concat:V32SF
7229 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7230 (match_dup 1))
7231 (parallel [(const_int 1) (const_int 1)
7232 (const_int 3) (const_int 3)
7233 (const_int 5) (const_int 5)
7234 (const_int 7) (const_int 7)
7235 (const_int 9) (const_int 9)
7236 (const_int 11) (const_int 11)
7237 (const_int 13) (const_int 13)
7238 (const_int 15) (const_int 15)])))]
7239 "TARGET_AVX512F"
7240 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7241 [(set_attr "type" "sse")
7242 (set_attr "prefix" "evex")
7243 (set_attr "mode" "V16SF")])
7244
7245 (define_insn "avx_movsldup256<mask_name>"
7246 [(set (match_operand:V8SF 0 "register_operand" "=v")
7247 (vec_select:V8SF
7248 (vec_concat:V16SF
7249 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7250 (match_dup 1))
7251 (parallel [(const_int 0) (const_int 0)
7252 (const_int 2) (const_int 2)
7253 (const_int 4) (const_int 4)
7254 (const_int 6) (const_int 6)])))]
7255 "TARGET_AVX && <mask_avx512vl_condition>"
7256 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7257 [(set_attr "type" "sse")
7258 (set_attr "prefix" "vex")
7259 (set_attr "mode" "V8SF")])
7260
7261 (define_insn "sse3_movsldup<mask_name>"
7262 [(set (match_operand:V4SF 0 "register_operand" "=v")
7263 (vec_select:V4SF
7264 (vec_concat:V8SF
7265 (match_operand:V4SF 1 "vector_operand" "vBm")
7266 (match_dup 1))
7267 (parallel [(const_int 0)
7268 (const_int 0)
7269 (const_int 6)
7270 (const_int 6)])))]
7271 "TARGET_SSE3 && <mask_avx512vl_condition>"
7272 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7273 [(set_attr "type" "sse")
7274 (set_attr "prefix_rep" "1")
7275 (set_attr "prefix" "maybe_vex")
7276 (set_attr "mode" "V4SF")])
7277
7278 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
7279 [(set (match_operand:V16SF 0 "register_operand" "=v")
7280 (vec_select:V16SF
7281 (vec_concat:V32SF
7282 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7283 (match_dup 1))
7284 (parallel [(const_int 0) (const_int 0)
7285 (const_int 2) (const_int 2)
7286 (const_int 4) (const_int 4)
7287 (const_int 6) (const_int 6)
7288 (const_int 8) (const_int 8)
7289 (const_int 10) (const_int 10)
7290 (const_int 12) (const_int 12)
7291 (const_int 14) (const_int 14)])))]
7292 "TARGET_AVX512F"
7293 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7294 [(set_attr "type" "sse")
7295 (set_attr "prefix" "evex")
7296 (set_attr "mode" "V16SF")])
7297
7298 (define_expand "avx_shufps256<mask_expand4_name>"
7299 [(match_operand:V8SF 0 "register_operand")
7300 (match_operand:V8SF 1 "register_operand")
7301 (match_operand:V8SF 2 "nonimmediate_operand")
7302 (match_operand:SI 3 "const_int_operand")]
7303 "TARGET_AVX"
7304 {
7305 int mask = INTVAL (operands[3]);
7306 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
7307 operands[1],
7308 operands[2],
7309 GEN_INT ((mask >> 0) & 3),
7310 GEN_INT ((mask >> 2) & 3),
7311 GEN_INT (((mask >> 4) & 3) + 8),
7312 GEN_INT (((mask >> 6) & 3) + 8),
7313 GEN_INT (((mask >> 0) & 3) + 4),
7314 GEN_INT (((mask >> 2) & 3) + 4),
7315 GEN_INT (((mask >> 4) & 3) + 12),
7316 GEN_INT (((mask >> 6) & 3) + 12)
7317 <mask_expand4_args>));
7318 DONE;
7319 })
7320
7321 ;; One bit in mask selects 2 elements.
7322 (define_insn "avx_shufps256_1<mask_name>"
7323 [(set (match_operand:V8SF 0 "register_operand" "=v")
7324 (vec_select:V8SF
7325 (vec_concat:V16SF
7326 (match_operand:V8SF 1 "register_operand" "v")
7327 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7328 (parallel [(match_operand 3 "const_0_to_3_operand" )
7329 (match_operand 4 "const_0_to_3_operand" )
7330 (match_operand 5 "const_8_to_11_operand" )
7331 (match_operand 6 "const_8_to_11_operand" )
7332 (match_operand 7 "const_4_to_7_operand" )
7333 (match_operand 8 "const_4_to_7_operand" )
7334 (match_operand 9 "const_12_to_15_operand")
7335 (match_operand 10 "const_12_to_15_operand")])))]
7336 "TARGET_AVX
7337 && <mask_avx512vl_condition>
7338 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7339 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7340 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7341 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
7342 {
7343 int mask;
7344 mask = INTVAL (operands[3]);
7345 mask |= INTVAL (operands[4]) << 2;
7346 mask |= (INTVAL (operands[5]) - 8) << 4;
7347 mask |= (INTVAL (operands[6]) - 8) << 6;
7348 operands[3] = GEN_INT (mask);
7349
7350 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7351 }
7352 [(set_attr "type" "sseshuf")
7353 (set_attr "length_immediate" "1")
7354 (set_attr "prefix" "<mask_prefix>")
7355 (set_attr "mode" "V8SF")])
7356
7357 (define_expand "sse_shufps<mask_expand4_name>"
7358 [(match_operand:V4SF 0 "register_operand")
7359 (match_operand:V4SF 1 "register_operand")
7360 (match_operand:V4SF 2 "vector_operand")
7361 (match_operand:SI 3 "const_int_operand")]
7362 "TARGET_SSE"
7363 {
7364 int mask = INTVAL (operands[3]);
7365 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
7366 operands[1],
7367 operands[2],
7368 GEN_INT ((mask >> 0) & 3),
7369 GEN_INT ((mask >> 2) & 3),
7370 GEN_INT (((mask >> 4) & 3) + 4),
7371 GEN_INT (((mask >> 6) & 3) + 4)
7372 <mask_expand4_args>));
7373 DONE;
7374 })
7375
7376 (define_insn "sse_shufps_v4sf_mask"
7377 [(set (match_operand:V4SF 0 "register_operand" "=v")
7378 (vec_merge:V4SF
7379 (vec_select:V4SF
7380 (vec_concat:V8SF
7381 (match_operand:V4SF 1 "register_operand" "v")
7382 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7383 (parallel [(match_operand 3 "const_0_to_3_operand")
7384 (match_operand 4 "const_0_to_3_operand")
7385 (match_operand 5 "const_4_to_7_operand")
7386 (match_operand 6 "const_4_to_7_operand")]))
7387 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
7388 (match_operand:QI 8 "register_operand" "Yk")))]
7389 "TARGET_AVX512VL"
7390 {
7391 int mask = 0;
7392 mask |= INTVAL (operands[3]) << 0;
7393 mask |= INTVAL (operands[4]) << 2;
7394 mask |= (INTVAL (operands[5]) - 4) << 4;
7395 mask |= (INTVAL (operands[6]) - 4) << 6;
7396 operands[3] = GEN_INT (mask);
7397
7398 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
7399 }
7400 [(set_attr "type" "sseshuf")
7401 (set_attr "length_immediate" "1")
7402 (set_attr "prefix" "evex")
7403 (set_attr "mode" "V4SF")])
7404
7405 (define_insn "sse_shufps_<mode>"
7406 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
7407 (vec_select:VI4F_128
7408 (vec_concat:<ssedoublevecmode>
7409 (match_operand:VI4F_128 1 "register_operand" "0,v")
7410 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
7411 (parallel [(match_operand 3 "const_0_to_3_operand")
7412 (match_operand 4 "const_0_to_3_operand")
7413 (match_operand 5 "const_4_to_7_operand")
7414 (match_operand 6 "const_4_to_7_operand")])))]
7415 "TARGET_SSE"
7416 {
7417 int mask = 0;
7418 mask |= INTVAL (operands[3]) << 0;
7419 mask |= INTVAL (operands[4]) << 2;
7420 mask |= (INTVAL (operands[5]) - 4) << 4;
7421 mask |= (INTVAL (operands[6]) - 4) << 6;
7422 operands[3] = GEN_INT (mask);
7423
7424 switch (which_alternative)
7425 {
7426 case 0:
7427 return "shufps\t{%3, %2, %0|%0, %2, %3}";
7428 case 1:
7429 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7430 default:
7431 gcc_unreachable ();
7432 }
7433 }
7434 [(set_attr "isa" "noavx,avx")
7435 (set_attr "type" "sseshuf")
7436 (set_attr "length_immediate" "1")
7437 (set_attr "prefix" "orig,maybe_evex")
7438 (set_attr "mode" "V4SF")])
7439
7440 (define_insn "sse_storehps"
7441 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7442 (vec_select:V2SF
7443 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
7444 (parallel [(const_int 2) (const_int 3)])))]
7445 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7446 "@
7447 %vmovhps\t{%1, %0|%q0, %1}
7448 %vmovhlps\t{%1, %d0|%d0, %1}
7449 %vmovlps\t{%H1, %d0|%d0, %H1}"
7450 [(set_attr "type" "ssemov")
7451 (set_attr "prefix" "maybe_vex")
7452 (set_attr "mode" "V2SF,V4SF,V2SF")])
7453
7454 (define_expand "sse_loadhps_exp"
7455 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7456 (vec_concat:V4SF
7457 (vec_select:V2SF
7458 (match_operand:V4SF 1 "nonimmediate_operand")
7459 (parallel [(const_int 0) (const_int 1)]))
7460 (match_operand:V2SF 2 "nonimmediate_operand")))]
7461 "TARGET_SSE"
7462 {
7463 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7464
7465 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
7466
7467 /* Fix up the destination if needed. */
7468 if (dst != operands[0])
7469 emit_move_insn (operands[0], dst);
7470
7471 DONE;
7472 })
7473
7474 (define_insn "sse_loadhps"
7475 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7476 (vec_concat:V4SF
7477 (vec_select:V2SF
7478 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7479 (parallel [(const_int 0) (const_int 1)]))
7480 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
7481 "TARGET_SSE"
7482 "@
7483 movhps\t{%2, %0|%0, %q2}
7484 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7485 movlhps\t{%2, %0|%0, %2}
7486 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7487 %vmovlps\t{%2, %H0|%H0, %2}"
7488 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7489 (set_attr "type" "ssemov")
7490 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7491 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
7492
7493 (define_insn "sse_storelps"
7494 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7495 (vec_select:V2SF
7496 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
7497 (parallel [(const_int 0) (const_int 1)])))]
7498 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7499 "@
7500 %vmovlps\t{%1, %0|%q0, %1}
7501 %vmovaps\t{%1, %0|%0, %1}
7502 %vmovlps\t{%1, %d0|%d0, %q1}"
7503 [(set_attr "type" "ssemov")
7504 (set_attr "prefix" "maybe_vex")
7505 (set_attr "mode" "V2SF,V4SF,V2SF")])
7506
7507 (define_expand "sse_loadlps_exp"
7508 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7509 (vec_concat:V4SF
7510 (match_operand:V2SF 2 "nonimmediate_operand")
7511 (vec_select:V2SF
7512 (match_operand:V4SF 1 "nonimmediate_operand")
7513 (parallel [(const_int 2) (const_int 3)]))))]
7514 "TARGET_SSE"
7515 {
7516 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7517
7518 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
7519
7520 /* Fix up the destination if needed. */
7521 if (dst != operands[0])
7522 emit_move_insn (operands[0], dst);
7523
7524 DONE;
7525 })
7526
7527 (define_insn "sse_loadlps"
7528 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7529 (vec_concat:V4SF
7530 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
7531 (vec_select:V2SF
7532 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
7533 (parallel [(const_int 2) (const_int 3)]))))]
7534 "TARGET_SSE"
7535 "@
7536 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
7537 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
7538 movlps\t{%2, %0|%0, %q2}
7539 vmovlps\t{%2, %1, %0|%0, %1, %q2}
7540 %vmovlps\t{%2, %0|%q0, %2}"
7541 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7542 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
7543 (set (attr "length_immediate")
7544 (if_then_else (eq_attr "alternative" "0,1")
7545 (const_string "1")
7546 (const_string "*")))
7547 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7548 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7549
7550 (define_insn "sse_movss"
7551 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7552 (vec_merge:V4SF
7553 (match_operand:V4SF 2 "register_operand" " x,v")
7554 (match_operand:V4SF 1 "register_operand" " 0,v")
7555 (const_int 1)))]
7556 "TARGET_SSE"
7557 "@
7558 movss\t{%2, %0|%0, %2}
7559 vmovss\t{%2, %1, %0|%0, %1, %2}"
7560 [(set_attr "isa" "noavx,avx")
7561 (set_attr "type" "ssemov")
7562 (set_attr "prefix" "orig,maybe_evex")
7563 (set_attr "mode" "SF")])
7564
7565 (define_insn "avx2_vec_dup<mode>"
7566 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
7567 (vec_duplicate:VF1_128_256
7568 (vec_select:SF
7569 (match_operand:V4SF 1 "register_operand" "v")
7570 (parallel [(const_int 0)]))))]
7571 "TARGET_AVX2"
7572 "vbroadcastss\t{%1, %0|%0, %1}"
7573 [(set_attr "type" "sselog1")
7574 (set_attr "prefix" "maybe_evex")
7575 (set_attr "mode" "<MODE>")])
7576
7577 (define_insn "avx2_vec_dupv8sf_1"
7578 [(set (match_operand:V8SF 0 "register_operand" "=v")
7579 (vec_duplicate:V8SF
7580 (vec_select:SF
7581 (match_operand:V8SF 1 "register_operand" "v")
7582 (parallel [(const_int 0)]))))]
7583 "TARGET_AVX2"
7584 "vbroadcastss\t{%x1, %0|%0, %x1}"
7585 [(set_attr "type" "sselog1")
7586 (set_attr "prefix" "maybe_evex")
7587 (set_attr "mode" "V8SF")])
7588
7589 (define_insn "avx512f_vec_dup<mode>_1"
7590 [(set (match_operand:VF_512 0 "register_operand" "=v")
7591 (vec_duplicate:VF_512
7592 (vec_select:<ssescalarmode>
7593 (match_operand:VF_512 1 "register_operand" "v")
7594 (parallel [(const_int 0)]))))]
7595 "TARGET_AVX512F"
7596 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
7597 [(set_attr "type" "sselog1")
7598 (set_attr "prefix" "evex")
7599 (set_attr "mode" "<MODE>")])
7600
7601 ;; Although insertps takes register source, we prefer
7602 ;; unpcklps with register source since it is shorter.
7603 (define_insn "*vec_concatv2sf_sse4_1"
7604 [(set (match_operand:V2SF 0 "register_operand"
7605 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
7606 (vec_concat:V2SF
7607 (match_operand:SF 1 "nonimmediate_operand"
7608 " 0, 0,Yv, 0,0, v,m, 0 , m")
7609 (match_operand:SF 2 "nonimm_or_0_operand"
7610 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
7611 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7612 "@
7613 unpcklps\t{%2, %0|%0, %2}
7614 unpcklps\t{%2, %0|%0, %2}
7615 vunpcklps\t{%2, %1, %0|%0, %1, %2}
7616 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7617 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7618 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
7619 %vmovss\t{%1, %0|%0, %1}
7620 punpckldq\t{%2, %0|%0, %2}
7621 movd\t{%1, %0|%0, %1}"
7622 [(set (attr "isa")
7623 (cond [(eq_attr "alternative" "0,1,3,4")
7624 (const_string "noavx")
7625 (eq_attr "alternative" "2,5")
7626 (const_string "avx")
7627 ]
7628 (const_string "*")))
7629 (set (attr "type")
7630 (cond [(eq_attr "alternative" "6")
7631 (const_string "ssemov")
7632 (eq_attr "alternative" "7")
7633 (const_string "mmxcvt")
7634 (eq_attr "alternative" "8")
7635 (const_string "mmxmov")
7636 ]
7637 (const_string "sselog")))
7638 (set (attr "mmx_isa")
7639 (if_then_else (eq_attr "alternative" "7,8")
7640 (const_string "native")
7641 (const_string "*")))
7642 (set (attr "prefix_data16")
7643 (if_then_else (eq_attr "alternative" "3,4")
7644 (const_string "1")
7645 (const_string "*")))
7646 (set (attr "prefix_extra")
7647 (if_then_else (eq_attr "alternative" "3,4,5")
7648 (const_string "1")
7649 (const_string "*")))
7650 (set (attr "length_immediate")
7651 (if_then_else (eq_attr "alternative" "3,4,5")
7652 (const_string "1")
7653 (const_string "*")))
7654 (set (attr "prefix")
7655 (cond [(eq_attr "alternative" "2,5")
7656 (const_string "maybe_evex")
7657 (eq_attr "alternative" "6")
7658 (const_string "maybe_vex")
7659 ]
7660 (const_string "orig")))
7661 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
7662
7663 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7664 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
7665 ;; alternatives pretty much forces the MMX alternative to be chosen.
7666 (define_insn "*vec_concatv2sf_sse"
7667 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
7668 (vec_concat:V2SF
7669 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
7670 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
7671 "TARGET_SSE"
7672 "@
7673 unpcklps\t{%2, %0|%0, %2}
7674 movss\t{%1, %0|%0, %1}
7675 punpckldq\t{%2, %0|%0, %2}
7676 movd\t{%1, %0|%0, %1}"
7677 [(set_attr "mmx_isa" "*,*,native,native")
7678 (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7679 (set_attr "mode" "V4SF,SF,DI,DI")])
7680
7681 (define_insn "*vec_concatv4sf"
7682 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
7683 (vec_concat:V4SF
7684 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
7685 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
7686 "TARGET_SSE"
7687 "@
7688 movlhps\t{%2, %0|%0, %2}
7689 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7690 movhps\t{%2, %0|%0, %q2}
7691 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
7692 [(set_attr "isa" "noavx,avx,noavx,avx")
7693 (set_attr "type" "ssemov")
7694 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
7695 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
7696
7697 (define_insn "*vec_concatv4sf_0"
7698 [(set (match_operand:V4SF 0 "register_operand" "=v")
7699 (vec_concat:V4SF
7700 (match_operand:V2SF 1 "nonimmediate_operand" "xm")
7701 (match_operand:V2SF 2 "const0_operand" " C")))]
7702 "TARGET_SSE2"
7703 "%vmovq\t{%1, %0|%0, %1}"
7704 [(set_attr "type" "ssemov")
7705 (set_attr "prefix" "maybe_vex")
7706 (set_attr "mode" "DF")])
7707
7708 ;; Avoid combining registers from different units in a single alternative,
7709 ;; see comment above inline_secondary_memory_needed function in i386.c
7710 (define_insn "vec_set<mode>_0"
7711 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
7712 "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x ,m ,m ,m")
7713 (vec_merge:VI4F_128
7714 (vec_duplicate:VI4F_128
7715 (match_operand:<ssescalarmode> 2 "general_operand"
7716 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
7717 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
7718 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
7719 (const_int 1)))]
7720 "TARGET_SSE"
7721 "@
7722 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7723 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7724 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
7725 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
7726 %vmovd\t{%2, %0|%0, %2}
7727 movss\t{%2, %0|%0, %2}
7728 movss\t{%2, %0|%0, %2}
7729 vmovss\t{%2, %1, %0|%0, %1, %2}
7730 pinsrd\t{$0, %2, %0|%0, %2, 0}
7731 pinsrd\t{$0, %2, %0|%0, %2, 0}
7732 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
7733 #
7734 #
7735 #"
7736 [(set (attr "isa")
7737 (cond [(eq_attr "alternative" "0,1,8,9")
7738 (const_string "sse4_noavx")
7739 (eq_attr "alternative" "2,7,10")
7740 (const_string "avx")
7741 (eq_attr "alternative" "3,4")
7742 (const_string "sse2")
7743 (eq_attr "alternative" "5,6")
7744 (const_string "noavx")
7745 ]
7746 (const_string "*")))
7747 (set (attr "type")
7748 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
7749 (const_string "sselog")
7750 (eq_attr "alternative" "12")
7751 (const_string "imov")
7752 (eq_attr "alternative" "13")
7753 (const_string "fmov")
7754 ]
7755 (const_string "ssemov")))
7756 (set (attr "prefix_extra")
7757 (if_then_else (eq_attr "alternative" "8,9,10")
7758 (const_string "1")
7759 (const_string "*")))
7760 (set (attr "length_immediate")
7761 (if_then_else (eq_attr "alternative" "8,9,10")
7762 (const_string "1")
7763 (const_string "*")))
7764 (set (attr "prefix")
7765 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
7766 (const_string "orig")
7767 (eq_attr "alternative" "2")
7768 (const_string "maybe_evex")
7769 (eq_attr "alternative" "3,4")
7770 (const_string "maybe_vex")
7771 (eq_attr "alternative" "7,10")
7772 (const_string "vex")
7773 ]
7774 (const_string "*")))
7775 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
7776 (set (attr "preferred_for_speed")
7777 (cond [(eq_attr "alternative" "4")
7778 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
7779 ]
7780 (symbol_ref "true")))])
7781
7782 ;; A subset is vec_setv4sf.
7783 (define_insn "*vec_setv4sf_sse4_1"
7784 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7785 (vec_merge:V4SF
7786 (vec_duplicate:V4SF
7787 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
7788 (match_operand:V4SF 1 "register_operand" "0,0,v")
7789 (match_operand:SI 3 "const_int_operand")))]
7790 "TARGET_SSE4_1
7791 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7792 < GET_MODE_NUNITS (V4SFmode))"
7793 {
7794 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
7795 switch (which_alternative)
7796 {
7797 case 0:
7798 case 1:
7799 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7800 case 2:
7801 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7802 default:
7803 gcc_unreachable ();
7804 }
7805 }
7806 [(set_attr "isa" "noavx,noavx,avx")
7807 (set_attr "type" "sselog")
7808 (set_attr "prefix_data16" "1,1,*")
7809 (set_attr "prefix_extra" "1")
7810 (set_attr "length_immediate" "1")
7811 (set_attr "prefix" "orig,orig,maybe_evex")
7812 (set_attr "mode" "V4SF")])
7813
7814 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
7815 (define_insn "vec_set<mode>_0"
7816 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
7817 (vec_merge:VI4F_256_512
7818 (vec_duplicate:VI4F_256_512
7819 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
7820 (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
7821 (const_int 1)))]
7822 "TARGET_AVX"
7823 "@
7824 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
7825 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
7826 vmovd\t{%2, %x0|%x0, %2}"
7827 [(set (attr "type")
7828 (if_then_else (eq_attr "alternative" "0")
7829 (const_string "sselog")
7830 (const_string "ssemov")))
7831 (set_attr "prefix" "maybe_evex")
7832 (set_attr "mode" "SF,<ssescalarmode>,SI")
7833 (set (attr "preferred_for_speed")
7834 (cond [(eq_attr "alternative" "2")
7835 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
7836 ]
7837 (symbol_ref "true")))])
7838
7839 (define_insn "sse4_1_insertps"
7840 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7841 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
7842 (match_operand:V4SF 1 "register_operand" "0,0,v")
7843 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
7844 UNSPEC_INSERTPS))]
7845 "TARGET_SSE4_1"
7846 {
7847 if (MEM_P (operands[2]))
7848 {
7849 unsigned count_s = INTVAL (operands[3]) >> 6;
7850 if (count_s)
7851 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
7852 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
7853 }
7854 switch (which_alternative)
7855 {
7856 case 0:
7857 case 1:
7858 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7859 case 2:
7860 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7861 default:
7862 gcc_unreachable ();
7863 }
7864 }
7865 [(set_attr "isa" "noavx,noavx,avx")
7866 (set_attr "type" "sselog")
7867 (set_attr "prefix_data16" "1,1,*")
7868 (set_attr "prefix_extra" "1")
7869 (set_attr "length_immediate" "1")
7870 (set_attr "prefix" "orig,orig,maybe_evex")
7871 (set_attr "mode" "V4SF")])
7872
7873 (define_split
7874 [(set (match_operand:VI4F_128 0 "memory_operand")
7875 (vec_merge:VI4F_128
7876 (vec_duplicate:VI4F_128
7877 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
7878 (match_dup 0)
7879 (const_int 1)))]
7880 "TARGET_SSE && reload_completed"
7881 [(set (match_dup 0) (match_dup 1))]
7882 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
7883
7884 (define_expand "vec_set<mode>"
7885 [(match_operand:V 0 "register_operand")
7886 (match_operand:<ssescalarmode> 1 "register_operand")
7887 (match_operand 2 "const_int_operand")]
7888 "TARGET_SSE"
7889 {
7890 ix86_expand_vector_set (false, operands[0], operands[1],
7891 INTVAL (operands[2]));
7892 DONE;
7893 })
7894
7895 (define_insn_and_split "*vec_extractv4sf_0"
7896 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
7897 (vec_select:SF
7898 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
7899 (parallel [(const_int 0)])))]
7900 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7901 "#"
7902 "&& reload_completed"
7903 [(set (match_dup 0) (match_dup 1))]
7904 "operands[1] = gen_lowpart (SFmode, operands[1]);")
7905
7906 (define_insn_and_split "*sse4_1_extractps"
7907 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
7908 (vec_select:SF
7909 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
7910 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
7911 "TARGET_SSE4_1"
7912 "@
7913 extractps\t{%2, %1, %0|%0, %1, %2}
7914 extractps\t{%2, %1, %0|%0, %1, %2}
7915 vextractps\t{%2, %1, %0|%0, %1, %2}
7916 #
7917 #"
7918 "&& reload_completed && SSE_REG_P (operands[0])"
7919 [(const_int 0)]
7920 {
7921 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
7922 switch (INTVAL (operands[2]))
7923 {
7924 case 1:
7925 case 3:
7926 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
7927 operands[2], operands[2],
7928 GEN_INT (INTVAL (operands[2]) + 4),
7929 GEN_INT (INTVAL (operands[2]) + 4)));
7930 break;
7931 case 2:
7932 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
7933 break;
7934 default:
7935 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
7936 gcc_unreachable ();
7937 }
7938 DONE;
7939 }
7940 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
7941 (set_attr "type" "sselog,sselog,sselog,*,*")
7942 (set_attr "prefix_data16" "1,1,1,*,*")
7943 (set_attr "prefix_extra" "1,1,1,*,*")
7944 (set_attr "length_immediate" "1,1,1,*,*")
7945 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
7946 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
7947
7948 (define_insn_and_split "*vec_extractv4sf_mem"
7949 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
7950 (vec_select:SF
7951 (match_operand:V4SF 1 "memory_operand" "o,o,o")
7952 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
7953 "TARGET_SSE"
7954 "#"
7955 "&& reload_completed"
7956 [(set (match_dup 0) (match_dup 1))]
7957 {
7958 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
7959 })
7960
7961 (define_mode_attr extract_type
7962 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7963
7964 (define_mode_attr extract_suf
7965 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7966
7967 (define_mode_iterator AVX512_VEC
7968 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7969
7970 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
7971 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
7972 (match_operand:AVX512_VEC 1 "register_operand")
7973 (match_operand:SI 2 "const_0_to_3_operand")
7974 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7975 (match_operand:QI 4 "register_operand")]
7976 "TARGET_AVX512F"
7977 {
7978 int mask;
7979 mask = INTVAL (operands[2]);
7980 rtx dest = operands[0];
7981
7982 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
7983 dest = gen_reg_rtx (<ssequartermode>mode);
7984
7985 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7986 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
7987 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7988 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
7989 operands[4]));
7990 else
7991 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
7992 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
7993 operands[4]));
7994 if (dest != operands[0])
7995 emit_move_insn (operands[0], dest);
7996 DONE;
7997 })
7998
7999 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
8000 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
8001 (vec_merge:<ssequartermode>
8002 (vec_select:<ssequartermode>
8003 (match_operand:V8FI 1 "register_operand" "v")
8004 (parallel [(match_operand 2 "const_0_to_7_operand")
8005 (match_operand 3 "const_0_to_7_operand")]))
8006 (match_operand:<ssequartermode> 4 "memory_operand" "0")
8007 (match_operand:QI 5 "register_operand" "Yk")))]
8008 "TARGET_AVX512DQ
8009 && INTVAL (operands[2]) % 2 == 0
8010 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8011 && rtx_equal_p (operands[4], operands[0])"
8012 {
8013 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
8014 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
8015 }
8016 [(set_attr "type" "sselog")
8017 (set_attr "prefix_extra" "1")
8018 (set_attr "length_immediate" "1")
8019 (set_attr "memory" "store")
8020 (set_attr "prefix" "evex")
8021 (set_attr "mode" "<sseinsnmode>")])
8022
8023 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
8024 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
8025 (vec_merge:<ssequartermode>
8026 (vec_select:<ssequartermode>
8027 (match_operand:V16FI 1 "register_operand" "v")
8028 (parallel [(match_operand 2 "const_0_to_15_operand")
8029 (match_operand 3 "const_0_to_15_operand")
8030 (match_operand 4 "const_0_to_15_operand")
8031 (match_operand 5 "const_0_to_15_operand")]))
8032 (match_operand:<ssequartermode> 6 "memory_operand" "0")
8033 (match_operand:QI 7 "register_operand" "Yk")))]
8034 "TARGET_AVX512F
8035 && INTVAL (operands[2]) % 4 == 0
8036 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8037 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8038 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
8039 && rtx_equal_p (operands[6], operands[0])"
8040 {
8041 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8042 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
8043 }
8044 [(set_attr "type" "sselog")
8045 (set_attr "prefix_extra" "1")
8046 (set_attr "length_immediate" "1")
8047 (set_attr "memory" "store")
8048 (set_attr "prefix" "evex")
8049 (set_attr "mode" "<sseinsnmode>")])
8050
8051 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
8052 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8053 (vec_select:<ssequartermode>
8054 (match_operand:V8FI 1 "register_operand" "v")
8055 (parallel [(match_operand 2 "const_0_to_7_operand")
8056 (match_operand 3 "const_0_to_7_operand")])))]
8057 "TARGET_AVX512DQ
8058 && INTVAL (operands[2]) % 2 == 0
8059 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
8060 {
8061 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8062 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
8063 }
8064 [(set_attr "type" "sselog1")
8065 (set_attr "prefix_extra" "1")
8066 (set_attr "length_immediate" "1")
8067 (set_attr "prefix" "evex")
8068 (set_attr "mode" "<sseinsnmode>")])
8069
8070 (define_split
8071 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8072 (vec_select:<ssequartermode>
8073 (match_operand:V8FI 1 "register_operand")
8074 (parallel [(const_int 0) (const_int 1)])))]
8075 "TARGET_AVX512DQ
8076 && reload_completed
8077 && (TARGET_AVX512VL
8078 || REG_P (operands[0])
8079 || !EXT_REX_SSE_REG_P (operands[1]))"
8080 [(set (match_dup 0) (match_dup 1))]
8081 {
8082 if (!TARGET_AVX512VL
8083 && REG_P (operands[0])
8084 && EXT_REX_SSE_REG_P (operands[1]))
8085 operands[0]
8086 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8087 else
8088 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8089 })
8090
8091 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
8092 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8093 (vec_select:<ssequartermode>
8094 (match_operand:V16FI 1 "register_operand" "v")
8095 (parallel [(match_operand 2 "const_0_to_15_operand")
8096 (match_operand 3 "const_0_to_15_operand")
8097 (match_operand 4 "const_0_to_15_operand")
8098 (match_operand 5 "const_0_to_15_operand")])))]
8099 "TARGET_AVX512F
8100 && INTVAL (operands[2]) % 4 == 0
8101 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8102 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8103 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
8104 {
8105 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8106 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
8107 }
8108 [(set_attr "type" "sselog1")
8109 (set_attr "prefix_extra" "1")
8110 (set_attr "length_immediate" "1")
8111 (set_attr "prefix" "evex")
8112 (set_attr "mode" "<sseinsnmode>")])
8113
8114 (define_split
8115 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8116 (vec_select:<ssequartermode>
8117 (match_operand:V16FI 1 "register_operand")
8118 (parallel [(const_int 0) (const_int 1)
8119 (const_int 2) (const_int 3)])))]
8120 "TARGET_AVX512F
8121 && reload_completed
8122 && (TARGET_AVX512VL
8123 || REG_P (operands[0])
8124 || !EXT_REX_SSE_REG_P (operands[1]))"
8125 [(set (match_dup 0) (match_dup 1))]
8126 {
8127 if (!TARGET_AVX512VL
8128 && REG_P (operands[0])
8129 && EXT_REX_SSE_REG_P (operands[1]))
8130 operands[0]
8131 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8132 else
8133 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8134 })
8135
8136 (define_mode_attr extract_type_2
8137 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
8138
8139 (define_mode_attr extract_suf_2
8140 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
8141
8142 (define_mode_iterator AVX512_VEC_2
8143 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
8144
8145 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
8146 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8147 (match_operand:AVX512_VEC_2 1 "register_operand")
8148 (match_operand:SI 2 "const_0_to_1_operand")
8149 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
8150 (match_operand:QI 4 "register_operand")]
8151 "TARGET_AVX512F"
8152 {
8153 rtx (*insn)(rtx, rtx, rtx, rtx);
8154 rtx dest = operands[0];
8155
8156 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
8157 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8158
8159 switch (INTVAL (operands[2]))
8160 {
8161 case 0:
8162 insn = gen_vec_extract_lo_<mode>_mask;
8163 break;
8164 case 1:
8165 insn = gen_vec_extract_hi_<mode>_mask;
8166 break;
8167 default:
8168 gcc_unreachable ();
8169 }
8170
8171 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8172 if (dest != operands[0])
8173 emit_move_insn (operands[0], dest);
8174 DONE;
8175 })
8176
8177 (define_split
8178 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8179 (vec_select:<ssehalfvecmode>
8180 (match_operand:V8FI 1 "nonimmediate_operand")
8181 (parallel [(const_int 0) (const_int 1)
8182 (const_int 2) (const_int 3)])))]
8183 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8184 && reload_completed
8185 && (TARGET_AVX512VL
8186 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
8187 [(set (match_dup 0) (match_dup 1))]
8188 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8189
8190 (define_insn "vec_extract_lo_<mode>_maskm"
8191 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8192 (vec_merge:<ssehalfvecmode>
8193 (vec_select:<ssehalfvecmode>
8194 (match_operand:V8FI 1 "register_operand" "v")
8195 (parallel [(const_int 0) (const_int 1)
8196 (const_int 2) (const_int 3)]))
8197 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8198 (match_operand:QI 3 "register_operand" "Yk")))]
8199 "TARGET_AVX512F
8200 && rtx_equal_p (operands[2], operands[0])"
8201 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
8202 [(set_attr "type" "sselog1")
8203 (set_attr "prefix_extra" "1")
8204 (set_attr "length_immediate" "1")
8205 (set_attr "prefix" "evex")
8206 (set_attr "mode" "<sseinsnmode>")])
8207
8208 (define_insn "vec_extract_lo_<mode><mask_name>"
8209 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>,v")
8210 (vec_select:<ssehalfvecmode>
8211 (match_operand:V8FI 1 "<store_mask_predicate>" "v,v,<store_mask_constraint>")
8212 (parallel [(const_int 0) (const_int 1)
8213 (const_int 2) (const_int 3)])))]
8214 "TARGET_AVX512F
8215 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8216 {
8217 if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
8218 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8219 else
8220 return "#";
8221 }
8222 [(set_attr "type" "sselog1")
8223 (set_attr "prefix_extra" "1")
8224 (set_attr "length_immediate" "1")
8225 (set_attr "memory" "none,store,load")
8226 (set_attr "prefix" "evex")
8227 (set_attr "mode" "<sseinsnmode>")])
8228
8229 (define_insn "vec_extract_hi_<mode>_maskm"
8230 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8231 (vec_merge:<ssehalfvecmode>
8232 (vec_select:<ssehalfvecmode>
8233 (match_operand:V8FI 1 "register_operand" "v")
8234 (parallel [(const_int 4) (const_int 5)
8235 (const_int 6) (const_int 7)]))
8236 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8237 (match_operand:QI 3 "register_operand" "Yk")))]
8238 "TARGET_AVX512F
8239 && rtx_equal_p (operands[2], operands[0])"
8240 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8241 [(set_attr "type" "sselog")
8242 (set_attr "prefix_extra" "1")
8243 (set_attr "length_immediate" "1")
8244 (set_attr "memory" "store")
8245 (set_attr "prefix" "evex")
8246 (set_attr "mode" "<sseinsnmode>")])
8247
8248 (define_insn "vec_extract_hi_<mode><mask_name>"
8249 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8250 (vec_select:<ssehalfvecmode>
8251 (match_operand:V8FI 1 "register_operand" "v")
8252 (parallel [(const_int 4) (const_int 5)
8253 (const_int 6) (const_int 7)])))]
8254 "TARGET_AVX512F"
8255 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
8256 [(set_attr "type" "sselog1")
8257 (set_attr "prefix_extra" "1")
8258 (set_attr "length_immediate" "1")
8259 (set_attr "prefix" "evex")
8260 (set_attr "mode" "<sseinsnmode>")])
8261
8262 (define_insn "vec_extract_hi_<mode>_maskm"
8263 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8264 (vec_merge:<ssehalfvecmode>
8265 (vec_select:<ssehalfvecmode>
8266 (match_operand:V16FI 1 "register_operand" "v")
8267 (parallel [(const_int 8) (const_int 9)
8268 (const_int 10) (const_int 11)
8269 (const_int 12) (const_int 13)
8270 (const_int 14) (const_int 15)]))
8271 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8272 (match_operand:QI 3 "register_operand" "Yk")))]
8273 "TARGET_AVX512DQ
8274 && rtx_equal_p (operands[2], operands[0])"
8275 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8276 [(set_attr "type" "sselog1")
8277 (set_attr "prefix_extra" "1")
8278 (set_attr "length_immediate" "1")
8279 (set_attr "prefix" "evex")
8280 (set_attr "mode" "<sseinsnmode>")])
8281
8282 (define_insn "vec_extract_hi_<mode><mask_name>"
8283 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
8284 (vec_select:<ssehalfvecmode>
8285 (match_operand:V16FI 1 "register_operand" "v,v")
8286 (parallel [(const_int 8) (const_int 9)
8287 (const_int 10) (const_int 11)
8288 (const_int 12) (const_int 13)
8289 (const_int 14) (const_int 15)])))]
8290 "TARGET_AVX512F && <mask_avx512dq_condition>"
8291 "@
8292 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
8293 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8294 [(set_attr "type" "sselog1")
8295 (set_attr "prefix_extra" "1")
8296 (set_attr "isa" "avx512dq,noavx512dq")
8297 (set_attr "length_immediate" "1")
8298 (set_attr "prefix" "evex")
8299 (set_attr "mode" "<sseinsnmode>")])
8300
8301 (define_expand "avx512vl_vextractf128<mode>"
8302 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8303 (match_operand:VI48F_256 1 "register_operand")
8304 (match_operand:SI 2 "const_0_to_1_operand")
8305 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
8306 (match_operand:QI 4 "register_operand")]
8307 "TARGET_AVX512DQ && TARGET_AVX512VL"
8308 {
8309 rtx (*insn)(rtx, rtx, rtx, rtx);
8310 rtx dest = operands[0];
8311
8312 if (MEM_P (dest)
8313 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
8314 /* For V8S[IF]mode there are maskm insns with =m and 0
8315 constraints. */
8316 ? !rtx_equal_p (dest, operands[3])
8317 /* For V4D[IF]mode, hi insns don't allow memory, and
8318 lo insns have =m and 0C constraints. */
8319 : (operands[2] != const0_rtx
8320 || (!rtx_equal_p (dest, operands[3])
8321 && GET_CODE (operands[3]) != CONST_VECTOR))))
8322 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8323 switch (INTVAL (operands[2]))
8324 {
8325 case 0:
8326 insn = gen_vec_extract_lo_<mode>_mask;
8327 break;
8328 case 1:
8329 insn = gen_vec_extract_hi_<mode>_mask;
8330 break;
8331 default:
8332 gcc_unreachable ();
8333 }
8334
8335 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8336 if (dest != operands[0])
8337 emit_move_insn (operands[0], dest);
8338 DONE;
8339 })
8340
8341 (define_expand "avx_vextractf128<mode>"
8342 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8343 (match_operand:V_256 1 "register_operand")
8344 (match_operand:SI 2 "const_0_to_1_operand")]
8345 "TARGET_AVX"
8346 {
8347 rtx (*insn)(rtx, rtx);
8348
8349 switch (INTVAL (operands[2]))
8350 {
8351 case 0:
8352 insn = gen_vec_extract_lo_<mode>;
8353 break;
8354 case 1:
8355 insn = gen_vec_extract_hi_<mode>;
8356 break;
8357 default:
8358 gcc_unreachable ();
8359 }
8360
8361 emit_insn (insn (operands[0], operands[1]));
8362 DONE;
8363 })
8364
8365 (define_insn "vec_extract_lo_<mode><mask_name>"
8366 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
8367 (vec_select:<ssehalfvecmode>
8368 (match_operand:V16FI 1 "<store_mask_predicate>"
8369 "v,<store_mask_constraint>,v")
8370 (parallel [(const_int 0) (const_int 1)
8371 (const_int 2) (const_int 3)
8372 (const_int 4) (const_int 5)
8373 (const_int 6) (const_int 7)])))]
8374 "TARGET_AVX512F
8375 && <mask_mode512bit_condition>
8376 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8377 {
8378 if (<mask_applied>
8379 || (!TARGET_AVX512VL
8380 && !REG_P (operands[0])
8381 && EXT_REX_SSE_REG_P (operands[1])))
8382 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8383 else
8384 return "#";
8385 }
8386 [(set_attr "type" "sselog1")
8387 (set_attr "prefix_extra" "1")
8388 (set_attr "length_immediate" "1")
8389 (set_attr "memory" "none,load,store")
8390 (set_attr "prefix" "evex")
8391 (set_attr "mode" "<sseinsnmode>")])
8392
8393 (define_split
8394 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8395 (vec_select:<ssehalfvecmode>
8396 (match_operand:V16FI 1 "nonimmediate_operand")
8397 (parallel [(const_int 0) (const_int 1)
8398 (const_int 2) (const_int 3)
8399 (const_int 4) (const_int 5)
8400 (const_int 6) (const_int 7)])))]
8401 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8402 && reload_completed
8403 && (TARGET_AVX512VL
8404 || REG_P (operands[0])
8405 || !EXT_REX_SSE_REG_P (operands[1]))"
8406 [(set (match_dup 0) (match_dup 1))]
8407 {
8408 if (!TARGET_AVX512VL
8409 && REG_P (operands[0])
8410 && EXT_REX_SSE_REG_P (operands[1]))
8411 operands[0]
8412 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
8413 else
8414 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
8415 })
8416
8417 (define_insn "vec_extract_lo_<mode><mask_name>"
8418 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,v,m")
8419 (vec_select:<ssehalfvecmode>
8420 (match_operand:VI8F_256 1 "<store_mask_predicate>"
8421 "v,<store_mask_constraint>,v")
8422 (parallel [(const_int 0) (const_int 1)])))]
8423 "TARGET_AVX
8424 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
8425 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8426 {
8427 if (<mask_applied>)
8428 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
8429 else
8430 return "#";
8431 }
8432 [(set_attr "type" "sselog1")
8433 (set_attr "prefix_extra" "1")
8434 (set_attr "length_immediate" "1")
8435 (set_attr "memory" "none,load,store")
8436 (set_attr "prefix" "evex")
8437 (set_attr "mode" "XI")])
8438
8439 (define_split
8440 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8441 (vec_select:<ssehalfvecmode>
8442 (match_operand:VI8F_256 1 "nonimmediate_operand")
8443 (parallel [(const_int 0) (const_int 1)])))]
8444 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8445 && reload_completed"
8446 [(set (match_dup 0) (match_dup 1))]
8447 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8448
8449 (define_insn "vec_extract_hi_<mode><mask_name>"
8450 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
8451 (vec_select:<ssehalfvecmode>
8452 (match_operand:VI8F_256 1 "register_operand" "v,v")
8453 (parallel [(const_int 2) (const_int 3)])))]
8454 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
8455 {
8456 if (TARGET_AVX512VL)
8457 {
8458 if (TARGET_AVX512DQ)
8459 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
8460 else
8461 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
8462 }
8463 else
8464 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
8465 }
8466 [(set_attr "type" "sselog1")
8467 (set_attr "prefix_extra" "1")
8468 (set_attr "length_immediate" "1")
8469 (set_attr "prefix" "vex")
8470 (set_attr "mode" "<sseinsnmode>")])
8471
8472 (define_split
8473 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8474 (vec_select:<ssehalfvecmode>
8475 (match_operand:VI4F_256 1 "nonimmediate_operand")
8476 (parallel [(const_int 0) (const_int 1)
8477 (const_int 2) (const_int 3)])))]
8478 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8479 && reload_completed"
8480 [(set (match_dup 0) (match_dup 1))]
8481 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8482
8483 (define_insn "vec_extract_lo_<mode><mask_name>"
8484 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
8485 "=<store_mask_constraint>,v")
8486 (vec_select:<ssehalfvecmode>
8487 (match_operand:VI4F_256 1 "<store_mask_predicate>"
8488 "v,<store_mask_constraint>")
8489 (parallel [(const_int 0) (const_int 1)
8490 (const_int 2) (const_int 3)])))]
8491 "TARGET_AVX
8492 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
8493 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8494 {
8495 if (<mask_applied>)
8496 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8497 else
8498 return "#";
8499 }
8500 [(set_attr "type" "sselog1")
8501 (set_attr "prefix_extra" "1")
8502 (set_attr "length_immediate" "1")
8503 (set_attr "prefix" "evex")
8504 (set_attr "mode" "<sseinsnmode>")])
8505
8506 (define_insn "vec_extract_lo_<mode>_maskm"
8507 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8508 (vec_merge:<ssehalfvecmode>
8509 (vec_select:<ssehalfvecmode>
8510 (match_operand:VI4F_256 1 "register_operand" "v")
8511 (parallel [(const_int 0) (const_int 1)
8512 (const_int 2) (const_int 3)]))
8513 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8514 (match_operand:QI 3 "register_operand" "Yk")))]
8515 "TARGET_AVX512VL && TARGET_AVX512F
8516 && rtx_equal_p (operands[2], operands[0])"
8517 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
8518 [(set_attr "type" "sselog1")
8519 (set_attr "prefix_extra" "1")
8520 (set_attr "length_immediate" "1")
8521 (set_attr "prefix" "evex")
8522 (set_attr "mode" "<sseinsnmode>")])
8523
8524 (define_insn "vec_extract_hi_<mode>_maskm"
8525 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8526 (vec_merge:<ssehalfvecmode>
8527 (vec_select:<ssehalfvecmode>
8528 (match_operand:VI4F_256 1 "register_operand" "v")
8529 (parallel [(const_int 4) (const_int 5)
8530 (const_int 6) (const_int 7)]))
8531 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8532 (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
8533 "TARGET_AVX512F && TARGET_AVX512VL
8534 && rtx_equal_p (operands[2], operands[0])"
8535 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8536 [(set_attr "type" "sselog1")
8537 (set_attr "length_immediate" "1")
8538 (set_attr "prefix" "evex")
8539 (set_attr "mode" "<sseinsnmode>")])
8540
8541 (define_insn "vec_extract_hi_<mode>_mask"
8542 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
8543 (vec_merge:<ssehalfvecmode>
8544 (vec_select:<ssehalfvecmode>
8545 (match_operand:VI4F_256 1 "register_operand" "v")
8546 (parallel [(const_int 4) (const_int 5)
8547 (const_int 6) (const_int 7)]))
8548 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C")
8549 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
8550 "TARGET_AVX512VL"
8551 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8552 [(set_attr "type" "sselog1")
8553 (set_attr "length_immediate" "1")
8554 (set_attr "prefix" "evex")
8555 (set_attr "mode" "<sseinsnmode>")])
8556
8557 (define_insn "vec_extract_hi_<mode>"
8558 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
8559 (vec_select:<ssehalfvecmode>
8560 (match_operand:VI4F_256 1 "register_operand" "x, v")
8561 (parallel [(const_int 4) (const_int 5)
8562 (const_int 6) (const_int 7)])))]
8563 "TARGET_AVX"
8564 "@
8565 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
8566 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8567 [(set_attr "isa" "*, avx512vl")
8568 (set_attr "prefix" "vex, evex")
8569 (set_attr "type" "sselog1")
8570 (set_attr "length_immediate" "1")
8571 (set_attr "mode" "<sseinsnmode>")])
8572
8573 (define_insn_and_split "vec_extract_lo_v32hi"
8574 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
8575 (vec_select:V16HI
8576 (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
8577 (parallel [(const_int 0) (const_int 1)
8578 (const_int 2) (const_int 3)
8579 (const_int 4) (const_int 5)
8580 (const_int 6) (const_int 7)
8581 (const_int 8) (const_int 9)
8582 (const_int 10) (const_int 11)
8583 (const_int 12) (const_int 13)
8584 (const_int 14) (const_int 15)])))]
8585 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8586 {
8587 if (TARGET_AVX512VL
8588 || REG_P (operands[0])
8589 || !EXT_REX_SSE_REG_P (operands[1]))
8590 return "#";
8591 else
8592 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8593 }
8594 "&& reload_completed
8595 && (TARGET_AVX512VL
8596 || REG_P (operands[0])
8597 || !EXT_REX_SSE_REG_P (operands[1]))"
8598 [(set (match_dup 0) (match_dup 1))]
8599 {
8600 if (!TARGET_AVX512VL
8601 && REG_P (operands[0])
8602 && EXT_REX_SSE_REG_P (operands[1]))
8603 operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
8604 else
8605 operands[1] = gen_lowpart (V16HImode, operands[1]);
8606 }
8607 [(set_attr "type" "sselog1")
8608 (set_attr "prefix_extra" "1")
8609 (set_attr "length_immediate" "1")
8610 (set_attr "memory" "none,load,store")
8611 (set_attr "prefix" "evex")
8612 (set_attr "mode" "XI")])
8613
8614 (define_insn "vec_extract_hi_v32hi"
8615 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
8616 (vec_select:V16HI
8617 (match_operand:V32HI 1 "register_operand" "v")
8618 (parallel [(const_int 16) (const_int 17)
8619 (const_int 18) (const_int 19)
8620 (const_int 20) (const_int 21)
8621 (const_int 22) (const_int 23)
8622 (const_int 24) (const_int 25)
8623 (const_int 26) (const_int 27)
8624 (const_int 28) (const_int 29)
8625 (const_int 30) (const_int 31)])))]
8626 "TARGET_AVX512F"
8627 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8628 [(set_attr "type" "sselog1")
8629 (set_attr "prefix_extra" "1")
8630 (set_attr "length_immediate" "1")
8631 (set_attr "prefix" "evex")
8632 (set_attr "mode" "XI")])
8633
8634 (define_insn_and_split "vec_extract_lo_v16hi"
8635 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
8636 (vec_select:V8HI
8637 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
8638 (parallel [(const_int 0) (const_int 1)
8639 (const_int 2) (const_int 3)
8640 (const_int 4) (const_int 5)
8641 (const_int 6) (const_int 7)])))]
8642 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8643 "#"
8644 "&& reload_completed"
8645 [(set (match_dup 0) (match_dup 1))]
8646 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
8647
8648 (define_insn "vec_extract_hi_v16hi"
8649 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
8650 (vec_select:V8HI
8651 (match_operand:V16HI 1 "register_operand" "x,v,v")
8652 (parallel [(const_int 8) (const_int 9)
8653 (const_int 10) (const_int 11)
8654 (const_int 12) (const_int 13)
8655 (const_int 14) (const_int 15)])))]
8656 "TARGET_AVX"
8657 "@
8658 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
8659 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
8660 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
8661 [(set_attr "type" "sselog1")
8662 (set_attr "prefix_extra" "1")
8663 (set_attr "length_immediate" "1")
8664 (set_attr "isa" "*,avx512dq,avx512f")
8665 (set_attr "prefix" "vex,evex,evex")
8666 (set_attr "mode" "OI")])
8667
8668 (define_insn_and_split "vec_extract_lo_v64qi"
8669 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
8670 (vec_select:V32QI
8671 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
8672 (parallel [(const_int 0) (const_int 1)
8673 (const_int 2) (const_int 3)
8674 (const_int 4) (const_int 5)
8675 (const_int 6) (const_int 7)
8676 (const_int 8) (const_int 9)
8677 (const_int 10) (const_int 11)
8678 (const_int 12) (const_int 13)
8679 (const_int 14) (const_int 15)
8680 (const_int 16) (const_int 17)
8681 (const_int 18) (const_int 19)
8682 (const_int 20) (const_int 21)
8683 (const_int 22) (const_int 23)
8684 (const_int 24) (const_int 25)
8685 (const_int 26) (const_int 27)
8686 (const_int 28) (const_int 29)
8687 (const_int 30) (const_int 31)])))]
8688 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8689 {
8690 if (TARGET_AVX512VL
8691 || REG_P (operands[0])
8692 || !EXT_REX_SSE_REG_P (operands[1]))
8693 return "#";
8694 else
8695 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8696 }
8697 "&& reload_completed
8698 && (TARGET_AVX512VL
8699 || REG_P (operands[0])
8700 || !EXT_REX_SSE_REG_P (operands[1]))"
8701 [(set (match_dup 0) (match_dup 1))]
8702 {
8703 if (!TARGET_AVX512VL
8704 && REG_P (operands[0])
8705 && EXT_REX_SSE_REG_P (operands[1]))
8706 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
8707 else
8708 operands[1] = gen_lowpart (V32QImode, operands[1]);
8709 }
8710 [(set_attr "type" "sselog1")
8711 (set_attr "prefix_extra" "1")
8712 (set_attr "length_immediate" "1")
8713 (set_attr "memory" "none,load,store")
8714 (set_attr "prefix" "evex")
8715 (set_attr "mode" "XI")])
8716
8717 (define_insn "vec_extract_hi_v64qi"
8718 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
8719 (vec_select:V32QI
8720 (match_operand:V64QI 1 "register_operand" "v")
8721 (parallel [(const_int 32) (const_int 33)
8722 (const_int 34) (const_int 35)
8723 (const_int 36) (const_int 37)
8724 (const_int 38) (const_int 39)
8725 (const_int 40) (const_int 41)
8726 (const_int 42) (const_int 43)
8727 (const_int 44) (const_int 45)
8728 (const_int 46) (const_int 47)
8729 (const_int 48) (const_int 49)
8730 (const_int 50) (const_int 51)
8731 (const_int 52) (const_int 53)
8732 (const_int 54) (const_int 55)
8733 (const_int 56) (const_int 57)
8734 (const_int 58) (const_int 59)
8735 (const_int 60) (const_int 61)
8736 (const_int 62) (const_int 63)])))]
8737 "TARGET_AVX512F"
8738 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8739 [(set_attr "type" "sselog1")
8740 (set_attr "prefix_extra" "1")
8741 (set_attr "length_immediate" "1")
8742 (set_attr "prefix" "evex")
8743 (set_attr "mode" "XI")])
8744
8745 (define_insn_and_split "vec_extract_lo_v32qi"
8746 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
8747 (vec_select:V16QI
8748 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
8749 (parallel [(const_int 0) (const_int 1)
8750 (const_int 2) (const_int 3)
8751 (const_int 4) (const_int 5)
8752 (const_int 6) (const_int 7)
8753 (const_int 8) (const_int 9)
8754 (const_int 10) (const_int 11)
8755 (const_int 12) (const_int 13)
8756 (const_int 14) (const_int 15)])))]
8757 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8758 "#"
8759 "&& reload_completed"
8760 [(set (match_dup 0) (match_dup 1))]
8761 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
8762
8763 (define_insn "vec_extract_hi_v32qi"
8764 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
8765 (vec_select:V16QI
8766 (match_operand:V32QI 1 "register_operand" "x,v,v")
8767 (parallel [(const_int 16) (const_int 17)
8768 (const_int 18) (const_int 19)
8769 (const_int 20) (const_int 21)
8770 (const_int 22) (const_int 23)
8771 (const_int 24) (const_int 25)
8772 (const_int 26) (const_int 27)
8773 (const_int 28) (const_int 29)
8774 (const_int 30) (const_int 31)])))]
8775 "TARGET_AVX"
8776 "@
8777 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
8778 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
8779 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
8780 [(set_attr "type" "sselog1")
8781 (set_attr "prefix_extra" "1")
8782 (set_attr "length_immediate" "1")
8783 (set_attr "isa" "*,avx512dq,avx512f")
8784 (set_attr "prefix" "vex,evex,evex")
8785 (set_attr "mode" "OI")])
8786
8787 ;; Modes handled by vec_extract patterns.
8788 (define_mode_iterator VEC_EXTRACT_MODE
8789 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
8790 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
8791 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
8792 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
8793 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
8794 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
8795 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
8796
8797 (define_expand "vec_extract<mode><ssescalarmodelower>"
8798 [(match_operand:<ssescalarmode> 0 "register_operand")
8799 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
8800 (match_operand 2 "const_int_operand")]
8801 "TARGET_SSE"
8802 {
8803 ix86_expand_vector_extract (false, operands[0], operands[1],
8804 INTVAL (operands[2]));
8805 DONE;
8806 })
8807
8808 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
8809 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8810 (match_operand:V_256_512 1 "register_operand")
8811 (match_operand 2 "const_0_to_1_operand")]
8812 "TARGET_AVX"
8813 {
8814 if (INTVAL (operands[2]))
8815 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
8816 else
8817 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
8818 DONE;
8819 })
8820
8821 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8822 ;;
8823 ;; Parallel double-precision floating point element swizzling
8824 ;;
8825 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8826
8827 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
8828 [(set (match_operand:V8DF 0 "register_operand" "=v")
8829 (vec_select:V8DF
8830 (vec_concat:V16DF
8831 (match_operand:V8DF 1 "register_operand" "v")
8832 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8833 (parallel [(const_int 1) (const_int 9)
8834 (const_int 3) (const_int 11)
8835 (const_int 5) (const_int 13)
8836 (const_int 7) (const_int 15)])))]
8837 "TARGET_AVX512F"
8838 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8839 [(set_attr "type" "sselog")
8840 (set_attr "prefix" "evex")
8841 (set_attr "mode" "V8DF")])
8842
8843 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8844 (define_insn "avx_unpckhpd256<mask_name>"
8845 [(set (match_operand:V4DF 0 "register_operand" "=v")
8846 (vec_select:V4DF
8847 (vec_concat:V8DF
8848 (match_operand:V4DF 1 "register_operand" "v")
8849 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8850 (parallel [(const_int 1) (const_int 5)
8851 (const_int 3) (const_int 7)])))]
8852 "TARGET_AVX && <mask_avx512vl_condition>"
8853 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8854 [(set_attr "type" "sselog")
8855 (set_attr "prefix" "vex")
8856 (set_attr "mode" "V4DF")])
8857
8858 (define_expand "vec_interleave_highv4df"
8859 [(set (match_dup 3)
8860 (vec_select:V4DF
8861 (vec_concat:V8DF
8862 (match_operand:V4DF 1 "register_operand")
8863 (match_operand:V4DF 2 "nonimmediate_operand"))
8864 (parallel [(const_int 0) (const_int 4)
8865 (const_int 2) (const_int 6)])))
8866 (set (match_dup 4)
8867 (vec_select:V4DF
8868 (vec_concat:V8DF
8869 (match_dup 1)
8870 (match_dup 2))
8871 (parallel [(const_int 1) (const_int 5)
8872 (const_int 3) (const_int 7)])))
8873 (set (match_operand:V4DF 0 "register_operand")
8874 (vec_select:V4DF
8875 (vec_concat:V8DF
8876 (match_dup 3)
8877 (match_dup 4))
8878 (parallel [(const_int 2) (const_int 3)
8879 (const_int 6) (const_int 7)])))]
8880 "TARGET_AVX"
8881 {
8882 operands[3] = gen_reg_rtx (V4DFmode);
8883 operands[4] = gen_reg_rtx (V4DFmode);
8884 })
8885
8886
8887 (define_insn "avx512vl_unpckhpd128_mask"
8888 [(set (match_operand:V2DF 0 "register_operand" "=v")
8889 (vec_merge:V2DF
8890 (vec_select:V2DF
8891 (vec_concat:V4DF
8892 (match_operand:V2DF 1 "register_operand" "v")
8893 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8894 (parallel [(const_int 1) (const_int 3)]))
8895 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
8896 (match_operand:QI 4 "register_operand" "Yk")))]
8897 "TARGET_AVX512VL"
8898 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8899 [(set_attr "type" "sselog")
8900 (set_attr "prefix" "evex")
8901 (set_attr "mode" "V2DF")])
8902
8903 (define_expand "vec_interleave_highv2df"
8904 [(set (match_operand:V2DF 0 "register_operand")
8905 (vec_select:V2DF
8906 (vec_concat:V4DF
8907 (match_operand:V2DF 1 "nonimmediate_operand")
8908 (match_operand:V2DF 2 "nonimmediate_operand"))
8909 (parallel [(const_int 1)
8910 (const_int 3)])))]
8911 "TARGET_SSE2"
8912 {
8913 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
8914 operands[2] = force_reg (V2DFmode, operands[2]);
8915 })
8916
8917 (define_insn "*vec_interleave_highv2df"
8918 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
8919 (vec_select:V2DF
8920 (vec_concat:V4DF
8921 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
8922 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
8923 (parallel [(const_int 1)
8924 (const_int 3)])))]
8925 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
8926 "@
8927 unpckhpd\t{%2, %0|%0, %2}
8928 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
8929 %vmovddup\t{%H1, %0|%0, %H1}
8930 movlpd\t{%H1, %0|%0, %H1}
8931 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
8932 %vmovhpd\t{%1, %0|%q0, %1}"
8933 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8934 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8935 (set (attr "prefix_data16")
8936 (if_then_else (eq_attr "alternative" "3,5")
8937 (const_string "1")
8938 (const_string "*")))
8939 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8940 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8941
8942 (define_expand "avx512f_movddup512<mask_name>"
8943 [(set (match_operand:V8DF 0 "register_operand")
8944 (vec_select:V8DF
8945 (vec_concat:V16DF
8946 (match_operand:V8DF 1 "nonimmediate_operand")
8947 (match_dup 1))
8948 (parallel [(const_int 0) (const_int 8)
8949 (const_int 2) (const_int 10)
8950 (const_int 4) (const_int 12)
8951 (const_int 6) (const_int 14)])))]
8952 "TARGET_AVX512F")
8953
8954 (define_expand "avx512f_unpcklpd512<mask_name>"
8955 [(set (match_operand:V8DF 0 "register_operand")
8956 (vec_select:V8DF
8957 (vec_concat:V16DF
8958 (match_operand:V8DF 1 "register_operand")
8959 (match_operand:V8DF 2 "nonimmediate_operand"))
8960 (parallel [(const_int 0) (const_int 8)
8961 (const_int 2) (const_int 10)
8962 (const_int 4) (const_int 12)
8963 (const_int 6) (const_int 14)])))]
8964 "TARGET_AVX512F")
8965
8966 (define_insn "*avx512f_unpcklpd512<mask_name>"
8967 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
8968 (vec_select:V8DF
8969 (vec_concat:V16DF
8970 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
8971 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
8972 (parallel [(const_int 0) (const_int 8)
8973 (const_int 2) (const_int 10)
8974 (const_int 4) (const_int 12)
8975 (const_int 6) (const_int 14)])))]
8976 "TARGET_AVX512F"
8977 "@
8978 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
8979 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8980 [(set_attr "type" "sselog")
8981 (set_attr "prefix" "evex")
8982 (set_attr "mode" "V8DF")])
8983
8984 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8985 (define_expand "avx_movddup256<mask_name>"
8986 [(set (match_operand:V4DF 0 "register_operand")
8987 (vec_select:V4DF
8988 (vec_concat:V8DF
8989 (match_operand:V4DF 1 "nonimmediate_operand")
8990 (match_dup 1))
8991 (parallel [(const_int 0) (const_int 4)
8992 (const_int 2) (const_int 6)])))]
8993 "TARGET_AVX && <mask_avx512vl_condition>")
8994
8995 (define_expand "avx_unpcklpd256<mask_name>"
8996 [(set (match_operand:V4DF 0 "register_operand")
8997 (vec_select:V4DF
8998 (vec_concat:V8DF
8999 (match_operand:V4DF 1 "register_operand")
9000 (match_operand:V4DF 2 "nonimmediate_operand"))
9001 (parallel [(const_int 0) (const_int 4)
9002 (const_int 2) (const_int 6)])))]
9003 "TARGET_AVX && <mask_avx512vl_condition>")
9004
9005 (define_insn "*avx_unpcklpd256<mask_name>"
9006 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
9007 (vec_select:V4DF
9008 (vec_concat:V8DF
9009 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
9010 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
9011 (parallel [(const_int 0) (const_int 4)
9012 (const_int 2) (const_int 6)])))]
9013 "TARGET_AVX && <mask_avx512vl_condition>"
9014 "@
9015 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
9016 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
9017 [(set_attr "type" "sselog")
9018 (set_attr "prefix" "vex")
9019 (set_attr "mode" "V4DF")])
9020
9021 (define_expand "vec_interleave_lowv4df"
9022 [(set (match_dup 3)
9023 (vec_select:V4DF
9024 (vec_concat:V8DF
9025 (match_operand:V4DF 1 "register_operand")
9026 (match_operand:V4DF 2 "nonimmediate_operand"))
9027 (parallel [(const_int 0) (const_int 4)
9028 (const_int 2) (const_int 6)])))
9029 (set (match_dup 4)
9030 (vec_select:V4DF
9031 (vec_concat:V8DF
9032 (match_dup 1)
9033 (match_dup 2))
9034 (parallel [(const_int 1) (const_int 5)
9035 (const_int 3) (const_int 7)])))
9036 (set (match_operand:V4DF 0 "register_operand")
9037 (vec_select:V4DF
9038 (vec_concat:V8DF
9039 (match_dup 3)
9040 (match_dup 4))
9041 (parallel [(const_int 0) (const_int 1)
9042 (const_int 4) (const_int 5)])))]
9043 "TARGET_AVX"
9044 {
9045 operands[3] = gen_reg_rtx (V4DFmode);
9046 operands[4] = gen_reg_rtx (V4DFmode);
9047 })
9048
9049 (define_insn "avx512vl_unpcklpd128_mask"
9050 [(set (match_operand:V2DF 0 "register_operand" "=v")
9051 (vec_merge:V2DF
9052 (vec_select:V2DF
9053 (vec_concat:V4DF
9054 (match_operand:V2DF 1 "register_operand" "v")
9055 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9056 (parallel [(const_int 0) (const_int 2)]))
9057 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9058 (match_operand:QI 4 "register_operand" "Yk")))]
9059 "TARGET_AVX512VL"
9060 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9061 [(set_attr "type" "sselog")
9062 (set_attr "prefix" "evex")
9063 (set_attr "mode" "V2DF")])
9064
9065 (define_expand "vec_interleave_lowv2df"
9066 [(set (match_operand:V2DF 0 "register_operand")
9067 (vec_select:V2DF
9068 (vec_concat:V4DF
9069 (match_operand:V2DF 1 "nonimmediate_operand")
9070 (match_operand:V2DF 2 "nonimmediate_operand"))
9071 (parallel [(const_int 0)
9072 (const_int 2)])))]
9073 "TARGET_SSE2"
9074 {
9075 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
9076 operands[1] = force_reg (V2DFmode, operands[1]);
9077 })
9078
9079 (define_insn "*vec_interleave_lowv2df"
9080 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
9081 (vec_select:V2DF
9082 (vec_concat:V4DF
9083 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
9084 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
9085 (parallel [(const_int 0)
9086 (const_int 2)])))]
9087 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
9088 "@
9089 unpcklpd\t{%2, %0|%0, %2}
9090 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9091 %vmovddup\t{%1, %0|%0, %q1}
9092 movhpd\t{%2, %0|%0, %q2}
9093 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
9094 %vmovlpd\t{%2, %H0|%H0, %2}"
9095 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9096 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9097 (set (attr "prefix_data16")
9098 (if_then_else (eq_attr "alternative" "3,5")
9099 (const_string "1")
9100 (const_string "*")))
9101 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9102 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9103
9104 (define_split
9105 [(set (match_operand:V2DF 0 "memory_operand")
9106 (vec_select:V2DF
9107 (vec_concat:V4DF
9108 (match_operand:V2DF 1 "register_operand")
9109 (match_dup 1))
9110 (parallel [(const_int 0)
9111 (const_int 2)])))]
9112 "TARGET_SSE3 && reload_completed"
9113 [(const_int 0)]
9114 {
9115 rtx low = gen_lowpart (DFmode, operands[1]);
9116
9117 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
9118 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
9119 DONE;
9120 })
9121
9122 (define_split
9123 [(set (match_operand:V2DF 0 "register_operand")
9124 (vec_select:V2DF
9125 (vec_concat:V4DF
9126 (match_operand:V2DF 1 "memory_operand")
9127 (match_dup 1))
9128 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
9129 (match_operand:SI 3 "const_int_operand")])))]
9130 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
9131 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
9132 {
9133 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
9134 })
9135
9136 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
9137 [(set (match_operand:VF_128 0 "register_operand" "=v")
9138 (vec_merge:VF_128
9139 (unspec:VF_128
9140 [(match_operand:VF_128 1 "register_operand" "v")
9141 (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
9142 UNSPEC_SCALEF)
9143 (match_dup 1)
9144 (const_int 1)))]
9145 "TARGET_AVX512F"
9146 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
9147 [(set_attr "prefix" "evex")
9148 (set_attr "mode" "<ssescalarmode>")])
9149
9150 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
9151 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9152 (unspec:VF_AVX512VL
9153 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
9154 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
9155 UNSPEC_SCALEF))]
9156 "TARGET_AVX512F"
9157 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
9158 [(set_attr "prefix" "evex")
9159 (set_attr "mode" "<MODE>")])
9160
9161 (define_expand "<avx512>_vternlog<mode>_maskz"
9162 [(match_operand:VI48_AVX512VL 0 "register_operand")
9163 (match_operand:VI48_AVX512VL 1 "register_operand")
9164 (match_operand:VI48_AVX512VL 2 "register_operand")
9165 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
9166 (match_operand:SI 4 "const_0_to_255_operand")
9167 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9168 "TARGET_AVX512F"
9169 {
9170 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
9171 operands[0], operands[1], operands[2], operands[3],
9172 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
9173 DONE;
9174 })
9175
9176 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
9177 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9178 (unspec:VI48_AVX512VL
9179 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9180 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9181 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9182 (match_operand:SI 4 "const_0_to_255_operand")]
9183 UNSPEC_VTERNLOG))]
9184 "TARGET_AVX512F"
9185 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
9186 [(set_attr "type" "sselog")
9187 (set_attr "prefix" "evex")
9188 (set_attr "mode" "<sseinsnmode>")])
9189
9190 (define_insn "<avx512>_vternlog<mode>_mask"
9191 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9192 (vec_merge:VI48_AVX512VL
9193 (unspec:VI48_AVX512VL
9194 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9195 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9196 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9197 (match_operand:SI 4 "const_0_to_255_operand")]
9198 UNSPEC_VTERNLOG)
9199 (match_dup 1)
9200 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9201 "TARGET_AVX512F"
9202 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
9203 [(set_attr "type" "sselog")
9204 (set_attr "prefix" "evex")
9205 (set_attr "mode" "<sseinsnmode>")])
9206
9207 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
9208 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9209 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
9210 UNSPEC_GETEXP))]
9211 "TARGET_AVX512F"
9212 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
9213 [(set_attr "prefix" "evex")
9214 (set_attr "mode" "<MODE>")])
9215
9216 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
9217 [(set (match_operand:VF_128 0 "register_operand" "=v")
9218 (vec_merge:VF_128
9219 (unspec:VF_128
9220 [(match_operand:VF_128 1 "register_operand" "v")
9221 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
9222 UNSPEC_GETEXP)
9223 (match_dup 1)
9224 (const_int 1)))]
9225 "TARGET_AVX512F"
9226 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
9227 [(set_attr "prefix" "evex")
9228 (set_attr "mode" "<ssescalarmode>")])
9229
9230 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
9231 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9232 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
9233 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
9234 (match_operand:SI 3 "const_0_to_255_operand")]
9235 UNSPEC_ALIGN))]
9236 "TARGET_AVX512F"
9237 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9238 [(set_attr "prefix" "evex")
9239 (set_attr "mode" "<sseinsnmode>")])
9240
9241 (define_expand "avx512f_shufps512_mask"
9242 [(match_operand:V16SF 0 "register_operand")
9243 (match_operand:V16SF 1 "register_operand")
9244 (match_operand:V16SF 2 "nonimmediate_operand")
9245 (match_operand:SI 3 "const_0_to_255_operand")
9246 (match_operand:V16SF 4 "register_operand")
9247 (match_operand:HI 5 "register_operand")]
9248 "TARGET_AVX512F"
9249 {
9250 int mask = INTVAL (operands[3]);
9251 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
9252 GEN_INT ((mask >> 0) & 3),
9253 GEN_INT ((mask >> 2) & 3),
9254 GEN_INT (((mask >> 4) & 3) + 16),
9255 GEN_INT (((mask >> 6) & 3) + 16),
9256 GEN_INT (((mask >> 0) & 3) + 4),
9257 GEN_INT (((mask >> 2) & 3) + 4),
9258 GEN_INT (((mask >> 4) & 3) + 20),
9259 GEN_INT (((mask >> 6) & 3) + 20),
9260 GEN_INT (((mask >> 0) & 3) + 8),
9261 GEN_INT (((mask >> 2) & 3) + 8),
9262 GEN_INT (((mask >> 4) & 3) + 24),
9263 GEN_INT (((mask >> 6) & 3) + 24),
9264 GEN_INT (((mask >> 0) & 3) + 12),
9265 GEN_INT (((mask >> 2) & 3) + 12),
9266 GEN_INT (((mask >> 4) & 3) + 28),
9267 GEN_INT (((mask >> 6) & 3) + 28),
9268 operands[4], operands[5]));
9269 DONE;
9270 })
9271
9272
9273 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
9274 [(match_operand:VF_AVX512VL 0 "register_operand")
9275 (match_operand:VF_AVX512VL 1 "register_operand")
9276 (match_operand:VF_AVX512VL 2 "register_operand")
9277 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9278 (match_operand:SI 4 "const_0_to_255_operand")
9279 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9280 "TARGET_AVX512F"
9281 {
9282 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9283 operands[0], operands[1], operands[2], operands[3],
9284 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9285 <round_saeonly_expand_operand6>));
9286 DONE;
9287 })
9288
9289 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
9290 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9291 (unspec:VF_AVX512VL
9292 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9293 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9294 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9295 (match_operand:SI 4 "const_0_to_255_operand")]
9296 UNSPEC_FIXUPIMM))]
9297 "TARGET_AVX512F"
9298 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
9299 [(set_attr "prefix" "evex")
9300 (set_attr "mode" "<MODE>")])
9301
9302 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
9303 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9304 (vec_merge:VF_AVX512VL
9305 (unspec:VF_AVX512VL
9306 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9307 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9308 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9309 (match_operand:SI 4 "const_0_to_255_operand")]
9310 UNSPEC_FIXUPIMM)
9311 (match_dup 1)
9312 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9313 "TARGET_AVX512F"
9314 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
9315 [(set_attr "prefix" "evex")
9316 (set_attr "mode" "<MODE>")])
9317
9318 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
9319 [(match_operand:VF_128 0 "register_operand")
9320 (match_operand:VF_128 1 "register_operand")
9321 (match_operand:VF_128 2 "register_operand")
9322 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9323 (match_operand:SI 4 "const_0_to_255_operand")
9324 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9325 "TARGET_AVX512F"
9326 {
9327 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9328 operands[0], operands[1], operands[2], operands[3],
9329 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9330 <round_saeonly_expand_operand6>));
9331 DONE;
9332 })
9333
9334 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
9335 [(set (match_operand:VF_128 0 "register_operand" "=v")
9336 (vec_merge:VF_128
9337 (unspec:VF_128
9338 [(match_operand:VF_128 1 "register_operand" "0")
9339 (match_operand:VF_128 2 "register_operand" "v")
9340 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9341 (match_operand:SI 4 "const_0_to_255_operand")]
9342 UNSPEC_FIXUPIMM)
9343 (match_dup 1)
9344 (const_int 1)))]
9345 "TARGET_AVX512F"
9346 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %<iptr>3<round_saeonly_sd_mask_op5>, %4}";
9347 [(set_attr "prefix" "evex")
9348 (set_attr "mode" "<ssescalarmode>")])
9349
9350 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
9351 [(set (match_operand:VF_128 0 "register_operand" "=v")
9352 (vec_merge:VF_128
9353 (vec_merge:VF_128
9354 (unspec:VF_128
9355 [(match_operand:VF_128 1 "register_operand" "0")
9356 (match_operand:VF_128 2 "register_operand" "v")
9357 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9358 (match_operand:SI 4 "const_0_to_255_operand")]
9359 UNSPEC_FIXUPIMM)
9360 (match_dup 1)
9361 (const_int 1))
9362 (match_dup 1)
9363 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9364 "TARGET_AVX512F"
9365 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
9366 [(set_attr "prefix" "evex")
9367 (set_attr "mode" "<ssescalarmode>")])
9368
9369 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
9370 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9371 (unspec:VF_AVX512VL
9372 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
9373 (match_operand:SI 2 "const_0_to_255_operand")]
9374 UNSPEC_ROUND))]
9375 "TARGET_AVX512F"
9376 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
9377 [(set_attr "length_immediate" "1")
9378 (set_attr "prefix" "evex")
9379 (set_attr "mode" "<MODE>")])
9380
9381 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
9382 [(set (match_operand:VF_128 0 "register_operand" "=v")
9383 (vec_merge:VF_128
9384 (unspec:VF_128
9385 [(match_operand:VF_128 1 "register_operand" "v")
9386 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9387 (match_operand:SI 3 "const_0_to_255_operand")]
9388 UNSPEC_ROUND)
9389 (match_dup 1)
9390 (const_int 1)))]
9391 "TARGET_AVX512F"
9392 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
9393 [(set_attr "length_immediate" "1")
9394 (set_attr "prefix" "evex")
9395 (set_attr "mode" "<MODE>")])
9396
9397 ;; One bit in mask selects 2 elements.
9398 (define_insn "avx512f_shufps512_1<mask_name>"
9399 [(set (match_operand:V16SF 0 "register_operand" "=v")
9400 (vec_select:V16SF
9401 (vec_concat:V32SF
9402 (match_operand:V16SF 1 "register_operand" "v")
9403 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
9404 (parallel [(match_operand 3 "const_0_to_3_operand")
9405 (match_operand 4 "const_0_to_3_operand")
9406 (match_operand 5 "const_16_to_19_operand")
9407 (match_operand 6 "const_16_to_19_operand")
9408 (match_operand 7 "const_4_to_7_operand")
9409 (match_operand 8 "const_4_to_7_operand")
9410 (match_operand 9 "const_20_to_23_operand")
9411 (match_operand 10 "const_20_to_23_operand")
9412 (match_operand 11 "const_8_to_11_operand")
9413 (match_operand 12 "const_8_to_11_operand")
9414 (match_operand 13 "const_24_to_27_operand")
9415 (match_operand 14 "const_24_to_27_operand")
9416 (match_operand 15 "const_12_to_15_operand")
9417 (match_operand 16 "const_12_to_15_operand")
9418 (match_operand 17 "const_28_to_31_operand")
9419 (match_operand 18 "const_28_to_31_operand")])))]
9420 "TARGET_AVX512F
9421 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
9422 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
9423 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
9424 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
9425 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
9426 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
9427 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
9428 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
9429 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
9430 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
9431 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
9432 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
9433 {
9434 int mask;
9435 mask = INTVAL (operands[3]);
9436 mask |= INTVAL (operands[4]) << 2;
9437 mask |= (INTVAL (operands[5]) - 16) << 4;
9438 mask |= (INTVAL (operands[6]) - 16) << 6;
9439 operands[3] = GEN_INT (mask);
9440
9441 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9442 }
9443 [(set_attr "type" "sselog")
9444 (set_attr "length_immediate" "1")
9445 (set_attr "prefix" "evex")
9446 (set_attr "mode" "V16SF")])
9447
9448 (define_expand "avx512f_shufpd512_mask"
9449 [(match_operand:V8DF 0 "register_operand")
9450 (match_operand:V8DF 1 "register_operand")
9451 (match_operand:V8DF 2 "nonimmediate_operand")
9452 (match_operand:SI 3 "const_0_to_255_operand")
9453 (match_operand:V8DF 4 "register_operand")
9454 (match_operand:QI 5 "register_operand")]
9455 "TARGET_AVX512F"
9456 {
9457 int mask = INTVAL (operands[3]);
9458 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
9459 GEN_INT (mask & 1),
9460 GEN_INT (mask & 2 ? 9 : 8),
9461 GEN_INT (mask & 4 ? 3 : 2),
9462 GEN_INT (mask & 8 ? 11 : 10),
9463 GEN_INT (mask & 16 ? 5 : 4),
9464 GEN_INT (mask & 32 ? 13 : 12),
9465 GEN_INT (mask & 64 ? 7 : 6),
9466 GEN_INT (mask & 128 ? 15 : 14),
9467 operands[4], operands[5]));
9468 DONE;
9469 })
9470
9471 (define_insn "avx512f_shufpd512_1<mask_name>"
9472 [(set (match_operand:V8DF 0 "register_operand" "=v")
9473 (vec_select:V8DF
9474 (vec_concat:V16DF
9475 (match_operand:V8DF 1 "register_operand" "v")
9476 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9477 (parallel [(match_operand 3 "const_0_to_1_operand")
9478 (match_operand 4 "const_8_to_9_operand")
9479 (match_operand 5 "const_2_to_3_operand")
9480 (match_operand 6 "const_10_to_11_operand")
9481 (match_operand 7 "const_4_to_5_operand")
9482 (match_operand 8 "const_12_to_13_operand")
9483 (match_operand 9 "const_6_to_7_operand")
9484 (match_operand 10 "const_14_to_15_operand")])))]
9485 "TARGET_AVX512F"
9486 {
9487 int mask;
9488 mask = INTVAL (operands[3]);
9489 mask |= (INTVAL (operands[4]) - 8) << 1;
9490 mask |= (INTVAL (operands[5]) - 2) << 2;
9491 mask |= (INTVAL (operands[6]) - 10) << 3;
9492 mask |= (INTVAL (operands[7]) - 4) << 4;
9493 mask |= (INTVAL (operands[8]) - 12) << 5;
9494 mask |= (INTVAL (operands[9]) - 6) << 6;
9495 mask |= (INTVAL (operands[10]) - 14) << 7;
9496 operands[3] = GEN_INT (mask);
9497
9498 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9499 }
9500 [(set_attr "type" "sselog")
9501 (set_attr "length_immediate" "1")
9502 (set_attr "prefix" "evex")
9503 (set_attr "mode" "V8DF")])
9504
9505 (define_expand "avx_shufpd256<mask_expand4_name>"
9506 [(match_operand:V4DF 0 "register_operand")
9507 (match_operand:V4DF 1 "register_operand")
9508 (match_operand:V4DF 2 "nonimmediate_operand")
9509 (match_operand:SI 3 "const_int_operand")]
9510 "TARGET_AVX"
9511 {
9512 int mask = INTVAL (operands[3]);
9513 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
9514 operands[1],
9515 operands[2],
9516 GEN_INT (mask & 1),
9517 GEN_INT (mask & 2 ? 5 : 4),
9518 GEN_INT (mask & 4 ? 3 : 2),
9519 GEN_INT (mask & 8 ? 7 : 6)
9520 <mask_expand4_args>));
9521 DONE;
9522 })
9523
9524 (define_insn "avx_shufpd256_1<mask_name>"
9525 [(set (match_operand:V4DF 0 "register_operand" "=v")
9526 (vec_select:V4DF
9527 (vec_concat:V8DF
9528 (match_operand:V4DF 1 "register_operand" "v")
9529 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9530 (parallel [(match_operand 3 "const_0_to_1_operand")
9531 (match_operand 4 "const_4_to_5_operand")
9532 (match_operand 5 "const_2_to_3_operand")
9533 (match_operand 6 "const_6_to_7_operand")])))]
9534 "TARGET_AVX && <mask_avx512vl_condition>"
9535 {
9536 int mask;
9537 mask = INTVAL (operands[3]);
9538 mask |= (INTVAL (operands[4]) - 4) << 1;
9539 mask |= (INTVAL (operands[5]) - 2) << 2;
9540 mask |= (INTVAL (operands[6]) - 6) << 3;
9541 operands[3] = GEN_INT (mask);
9542
9543 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
9544 }
9545 [(set_attr "type" "sseshuf")
9546 (set_attr "length_immediate" "1")
9547 (set_attr "prefix" "vex")
9548 (set_attr "mode" "V4DF")])
9549
9550 (define_expand "sse2_shufpd<mask_expand4_name>"
9551 [(match_operand:V2DF 0 "register_operand")
9552 (match_operand:V2DF 1 "register_operand")
9553 (match_operand:V2DF 2 "vector_operand")
9554 (match_operand:SI 3 "const_int_operand")]
9555 "TARGET_SSE2"
9556 {
9557 int mask = INTVAL (operands[3]);
9558 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
9559 operands[2], GEN_INT (mask & 1),
9560 GEN_INT (mask & 2 ? 3 : 2)
9561 <mask_expand4_args>));
9562 DONE;
9563 })
9564
9565 (define_insn "sse2_shufpd_v2df_mask"
9566 [(set (match_operand:V2DF 0 "register_operand" "=v")
9567 (vec_merge:V2DF
9568 (vec_select:V2DF
9569 (vec_concat:V4DF
9570 (match_operand:V2DF 1 "register_operand" "v")
9571 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9572 (parallel [(match_operand 3 "const_0_to_1_operand")
9573 (match_operand 4 "const_2_to_3_operand")]))
9574 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
9575 (match_operand:QI 6 "register_operand" "Yk")))]
9576 "TARGET_AVX512VL"
9577 {
9578 int mask;
9579 mask = INTVAL (operands[3]);
9580 mask |= (INTVAL (operands[4]) - 2) << 1;
9581 operands[3] = GEN_INT (mask);
9582
9583 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
9584 }
9585 [(set_attr "type" "sseshuf")
9586 (set_attr "length_immediate" "1")
9587 (set_attr "prefix" "evex")
9588 (set_attr "mode" "V2DF")])
9589
9590 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
9591 (define_insn "avx2_interleave_highv4di<mask_name>"
9592 [(set (match_operand:V4DI 0 "register_operand" "=v")
9593 (vec_select:V4DI
9594 (vec_concat:V8DI
9595 (match_operand:V4DI 1 "register_operand" "v")
9596 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
9597 (parallel [(const_int 1)
9598 (const_int 5)
9599 (const_int 3)
9600 (const_int 7)])))]
9601 "TARGET_AVX2 && <mask_avx512vl_condition>"
9602 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9603 [(set_attr "type" "sselog")
9604 (set_attr "prefix" "vex")
9605 (set_attr "mode" "OI")])
9606
9607 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
9608 [(set (match_operand:V8DI 0 "register_operand" "=v")
9609 (vec_select:V8DI
9610 (vec_concat:V16DI
9611 (match_operand:V8DI 1 "register_operand" "v")
9612 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
9613 (parallel [(const_int 1) (const_int 9)
9614 (const_int 3) (const_int 11)
9615 (const_int 5) (const_int 13)
9616 (const_int 7) (const_int 15)])))]
9617 "TARGET_AVX512F"
9618 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9619 [(set_attr "type" "sselog")
9620 (set_attr "prefix" "evex")
9621 (set_attr "mode" "XI")])
9622
9623 (define_insn "vec_interleave_highv2di<mask_name>"
9624 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9625 (vec_select:V2DI
9626 (vec_concat:V4DI
9627 (match_operand:V2DI 1 "register_operand" "0,v")
9628 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
9629 (parallel [(const_int 1)
9630 (const_int 3)])))]
9631 "TARGET_SSE2 && <mask_avx512vl_condition>"
9632 "@
9633 punpckhqdq\t{%2, %0|%0, %2}
9634 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9635 [(set_attr "isa" "noavx,avx")
9636 (set_attr "type" "sselog")
9637 (set_attr "prefix_data16" "1,*")
9638 (set_attr "prefix" "orig,<mask_prefix>")
9639 (set_attr "mode" "TI")])
9640
9641 (define_insn "avx2_interleave_lowv4di<mask_name>"
9642 [(set (match_operand:V4DI 0 "register_operand" "=v")
9643 (vec_select:V4DI
9644 (vec_concat:V8DI
9645 (match_operand:V4DI 1 "register_operand" "v")
9646 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
9647 (parallel [(const_int 0)
9648 (const_int 4)
9649 (const_int 2)
9650 (const_int 6)])))]
9651 "TARGET_AVX2 && <mask_avx512vl_condition>"
9652 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9653 [(set_attr "type" "sselog")
9654 (set_attr "prefix" "vex")
9655 (set_attr "mode" "OI")])
9656
9657 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
9658 [(set (match_operand:V8DI 0 "register_operand" "=v")
9659 (vec_select:V8DI
9660 (vec_concat:V16DI
9661 (match_operand:V8DI 1 "register_operand" "v")
9662 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
9663 (parallel [(const_int 0) (const_int 8)
9664 (const_int 2) (const_int 10)
9665 (const_int 4) (const_int 12)
9666 (const_int 6) (const_int 14)])))]
9667 "TARGET_AVX512F"
9668 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9669 [(set_attr "type" "sselog")
9670 (set_attr "prefix" "evex")
9671 (set_attr "mode" "XI")])
9672
9673 (define_insn "vec_interleave_lowv2di<mask_name>"
9674 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9675 (vec_select:V2DI
9676 (vec_concat:V4DI
9677 (match_operand:V2DI 1 "register_operand" "0,v")
9678 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
9679 (parallel [(const_int 0)
9680 (const_int 2)])))]
9681 "TARGET_SSE2 && <mask_avx512vl_condition>"
9682 "@
9683 punpcklqdq\t{%2, %0|%0, %2}
9684 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9685 [(set_attr "isa" "noavx,avx")
9686 (set_attr "type" "sselog")
9687 (set_attr "prefix_data16" "1,*")
9688 (set_attr "prefix" "orig,vex")
9689 (set_attr "mode" "TI")])
9690
9691 (define_insn "sse2_shufpd_<mode>"
9692 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
9693 (vec_select:VI8F_128
9694 (vec_concat:<ssedoublevecmode>
9695 (match_operand:VI8F_128 1 "register_operand" "0,v")
9696 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
9697 (parallel [(match_operand 3 "const_0_to_1_operand")
9698 (match_operand 4 "const_2_to_3_operand")])))]
9699 "TARGET_SSE2"
9700 {
9701 int mask;
9702 mask = INTVAL (operands[3]);
9703 mask |= (INTVAL (operands[4]) - 2) << 1;
9704 operands[3] = GEN_INT (mask);
9705
9706 switch (which_alternative)
9707 {
9708 case 0:
9709 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
9710 case 1:
9711 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9712 default:
9713 gcc_unreachable ();
9714 }
9715 }
9716 [(set_attr "isa" "noavx,avx")
9717 (set_attr "type" "sseshuf")
9718 (set_attr "length_immediate" "1")
9719 (set_attr "prefix" "orig,maybe_evex")
9720 (set_attr "mode" "V2DF")])
9721
9722 ;; Avoid combining registers from different units in a single alternative,
9723 ;; see comment above inline_secondary_memory_needed function in i386.c
9724 (define_insn "sse2_storehpd"
9725 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
9726 (vec_select:DF
9727 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
9728 (parallel [(const_int 1)])))]
9729 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9730 "@
9731 %vmovhpd\t{%1, %0|%0, %1}
9732 unpckhpd\t%0, %0
9733 vunpckhpd\t{%d1, %0|%0, %d1}
9734 #
9735 #
9736 #"
9737 [(set_attr "isa" "*,noavx,avx,*,*,*")
9738 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
9739 (set (attr "prefix_data16")
9740 (if_then_else
9741 (and (eq_attr "alternative" "0")
9742 (not (match_test "TARGET_AVX")))
9743 (const_string "1")
9744 (const_string "*")))
9745 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
9746 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
9747
9748 (define_split
9749 [(set (match_operand:DF 0 "register_operand")
9750 (vec_select:DF
9751 (match_operand:V2DF 1 "memory_operand")
9752 (parallel [(const_int 1)])))]
9753 "TARGET_SSE2 && reload_completed"
9754 [(set (match_dup 0) (match_dup 1))]
9755 "operands[1] = adjust_address (operands[1], DFmode, 8);")
9756
9757 (define_insn "*vec_extractv2df_1_sse"
9758 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9759 (vec_select:DF
9760 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
9761 (parallel [(const_int 1)])))]
9762 "!TARGET_SSE2 && TARGET_SSE
9763 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9764 "@
9765 movhps\t{%1, %0|%q0, %1}
9766 movhlps\t{%1, %0|%0, %1}
9767 movlps\t{%H1, %0|%0, %H1}"
9768 [(set_attr "type" "ssemov")
9769 (set_attr "mode" "V2SF,V4SF,V2SF")])
9770
9771 ;; Avoid combining registers from different units in a single alternative,
9772 ;; see comment above inline_secondary_memory_needed function in i386.c
9773 (define_insn "sse2_storelpd"
9774 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
9775 (vec_select:DF
9776 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
9777 (parallel [(const_int 0)])))]
9778 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9779 "@
9780 %vmovlpd\t{%1, %0|%0, %1}
9781 #
9782 #
9783 #
9784 #"
9785 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
9786 (set (attr "prefix_data16")
9787 (if_then_else (eq_attr "alternative" "0")
9788 (const_string "1")
9789 (const_string "*")))
9790 (set_attr "prefix" "maybe_vex")
9791 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
9792
9793 (define_split
9794 [(set (match_operand:DF 0 "register_operand")
9795 (vec_select:DF
9796 (match_operand:V2DF 1 "nonimmediate_operand")
9797 (parallel [(const_int 0)])))]
9798 "TARGET_SSE2 && reload_completed"
9799 [(set (match_dup 0) (match_dup 1))]
9800 "operands[1] = gen_lowpart (DFmode, operands[1]);")
9801
9802 (define_insn "*vec_extractv2df_0_sse"
9803 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9804 (vec_select:DF
9805 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
9806 (parallel [(const_int 0)])))]
9807 "!TARGET_SSE2 && TARGET_SSE
9808 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9809 "@
9810 movlps\t{%1, %0|%0, %1}
9811 movaps\t{%1, %0|%0, %1}
9812 movlps\t{%1, %0|%0, %q1}"
9813 [(set_attr "type" "ssemov")
9814 (set_attr "mode" "V2SF,V4SF,V2SF")])
9815
9816 (define_expand "sse2_loadhpd_exp"
9817 [(set (match_operand:V2DF 0 "nonimmediate_operand")
9818 (vec_concat:V2DF
9819 (vec_select:DF
9820 (match_operand:V2DF 1 "nonimmediate_operand")
9821 (parallel [(const_int 0)]))
9822 (match_operand:DF 2 "nonimmediate_operand")))]
9823 "TARGET_SSE2"
9824 {
9825 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9826
9827 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
9828
9829 /* Fix up the destination if needed. */
9830 if (dst != operands[0])
9831 emit_move_insn (operands[0], dst);
9832
9833 DONE;
9834 })
9835
9836 ;; Avoid combining registers from different units in a single alternative,
9837 ;; see comment above inline_secondary_memory_needed function in i386.c
9838 (define_insn "sse2_loadhpd"
9839 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9840 "=x,v,x,v ,o,o ,o")
9841 (vec_concat:V2DF
9842 (vec_select:DF
9843 (match_operand:V2DF 1 "nonimmediate_operand"
9844 " 0,v,0,v ,0,0 ,0")
9845 (parallel [(const_int 0)]))
9846 (match_operand:DF 2 "nonimmediate_operand"
9847 " m,m,x,Yv,x,*f,r")))]
9848 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9849 "@
9850 movhpd\t{%2, %0|%0, %2}
9851 vmovhpd\t{%2, %1, %0|%0, %1, %2}
9852 unpcklpd\t{%2, %0|%0, %2}
9853 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9854 #
9855 #
9856 #"
9857 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
9858 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
9859 (set (attr "prefix_data16")
9860 (if_then_else (eq_attr "alternative" "0")
9861 (const_string "1")
9862 (const_string "*")))
9863 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
9864 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
9865
9866 (define_split
9867 [(set (match_operand:V2DF 0 "memory_operand")
9868 (vec_concat:V2DF
9869 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
9870 (match_operand:DF 1 "register_operand")))]
9871 "TARGET_SSE2 && reload_completed"
9872 [(set (match_dup 0) (match_dup 1))]
9873 "operands[0] = adjust_address (operands[0], DFmode, 8);")
9874
9875 (define_expand "sse2_loadlpd_exp"
9876 [(set (match_operand:V2DF 0 "nonimmediate_operand")
9877 (vec_concat:V2DF
9878 (match_operand:DF 2 "nonimmediate_operand")
9879 (vec_select:DF
9880 (match_operand:V2DF 1 "nonimmediate_operand")
9881 (parallel [(const_int 1)]))))]
9882 "TARGET_SSE2"
9883 {
9884 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9885
9886 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
9887
9888 /* Fix up the destination if needed. */
9889 if (dst != operands[0])
9890 emit_move_insn (operands[0], dst);
9891
9892 DONE;
9893 })
9894
9895 ;; Avoid combining registers from different units in a single alternative,
9896 ;; see comment above inline_secondary_memory_needed function in i386.c
9897 (define_insn "sse2_loadlpd"
9898 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9899 "=v,x,v,x,v,x,x,v,m,m ,m")
9900 (vec_concat:V2DF
9901 (match_operand:DF 2 "nonimmediate_operand"
9902 "vm,m,m,x,v,0,0,v,x,*f,r")
9903 (vec_select:DF
9904 (match_operand:V2DF 1 "nonimm_or_0_operand"
9905 " C,0,v,0,v,x,o,o,0,0 ,0")
9906 (parallel [(const_int 1)]))))]
9907 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9908 "@
9909 %vmovq\t{%2, %0|%0, %2}
9910 movlpd\t{%2, %0|%0, %2}
9911 vmovlpd\t{%2, %1, %0|%0, %1, %2}
9912 movsd\t{%2, %0|%0, %2}
9913 vmovsd\t{%2, %1, %0|%0, %1, %2}
9914 shufpd\t{$2, %1, %0|%0, %1, 2}
9915 movhpd\t{%H1, %0|%0, %H1}
9916 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
9917 #
9918 #
9919 #"
9920 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
9921 (set (attr "type")
9922 (cond [(eq_attr "alternative" "5")
9923 (const_string "sselog")
9924 (eq_attr "alternative" "9")
9925 (const_string "fmov")
9926 (eq_attr "alternative" "10")
9927 (const_string "imov")
9928 ]
9929 (const_string "ssemov")))
9930 (set (attr "prefix_data16")
9931 (if_then_else (eq_attr "alternative" "1,6")
9932 (const_string "1")
9933 (const_string "*")))
9934 (set (attr "length_immediate")
9935 (if_then_else (eq_attr "alternative" "5")
9936 (const_string "1")
9937 (const_string "*")))
9938 (set (attr "prefix")
9939 (cond [(eq_attr "alternative" "0")
9940 (const_string "maybe_vex")
9941 (eq_attr "alternative" "1,3,5,6")
9942 (const_string "orig")
9943 (eq_attr "alternative" "2,4,7")
9944 (const_string "maybe_evex")
9945 ]
9946 (const_string "*")))
9947 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
9948
9949 (define_split
9950 [(set (match_operand:V2DF 0 "memory_operand")
9951 (vec_concat:V2DF
9952 (match_operand:DF 1 "register_operand")
9953 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
9954 "TARGET_SSE2 && reload_completed"
9955 [(set (match_dup 0) (match_dup 1))]
9956 "operands[0] = adjust_address (operands[0], DFmode, 0);")
9957
9958 (define_insn "sse2_movsd"
9959 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
9960 (vec_merge:V2DF
9961 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
9962 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
9963 (const_int 1)))]
9964 "TARGET_SSE2"
9965 "@
9966 movsd\t{%2, %0|%0, %2}
9967 vmovsd\t{%2, %1, %0|%0, %1, %2}
9968 movlpd\t{%2, %0|%0, %q2}
9969 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
9970 %vmovlpd\t{%2, %0|%q0, %2}
9971 shufpd\t{$2, %1, %0|%0, %1, 2}
9972 movhps\t{%H1, %0|%0, %H1}
9973 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
9974 %vmovhps\t{%1, %H0|%H0, %1}"
9975 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
9976 (set (attr "type")
9977 (if_then_else
9978 (eq_attr "alternative" "5")
9979 (const_string "sselog")
9980 (const_string "ssemov")))
9981 (set (attr "prefix_data16")
9982 (if_then_else
9983 (and (eq_attr "alternative" "2,4")
9984 (not (match_test "TARGET_AVX")))
9985 (const_string "1")
9986 (const_string "*")))
9987 (set (attr "length_immediate")
9988 (if_then_else (eq_attr "alternative" "5")
9989 (const_string "1")
9990 (const_string "*")))
9991 (set (attr "prefix")
9992 (cond [(eq_attr "alternative" "1,3,7")
9993 (const_string "maybe_evex")
9994 (eq_attr "alternative" "4,8")
9995 (const_string "maybe_vex")
9996 ]
9997 (const_string "orig")))
9998 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
9999
10000 (define_insn "vec_dupv2df<mask_name>"
10001 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
10002 (vec_duplicate:V2DF
10003 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
10004 "TARGET_SSE2 && <mask_avx512vl_condition>"
10005 "@
10006 unpcklpd\t%0, %0
10007 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
10008 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10009 [(set_attr "isa" "noavx,sse3,avx512vl")
10010 (set_attr "type" "sselog1")
10011 (set_attr "prefix" "orig,maybe_vex,evex")
10012 (set_attr "mode" "V2DF,DF,DF")])
10013
10014 (define_insn "vec_concatv2df"
10015 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
10016 (vec_concat:V2DF
10017 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
10018 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,1,1,m,m, C,x,m")))]
10019 "TARGET_SSE
10020 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
10021 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
10022 "@
10023 unpcklpd\t{%2, %0|%0, %2}
10024 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10025 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10026 %vmovddup\t{%1, %0|%0, %1}
10027 vmovddup\t{%1, %0|%0, %1}
10028 movhpd\t{%2, %0|%0, %2}
10029 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10030 %vmovq\t{%1, %0|%0, %1}
10031 movlhps\t{%2, %0|%0, %2}
10032 movhps\t{%2, %0|%0, %2}"
10033 [(set (attr "isa")
10034 (cond [(eq_attr "alternative" "0,5")
10035 (const_string "sse2_noavx")
10036 (eq_attr "alternative" "1,6")
10037 (const_string "avx")
10038 (eq_attr "alternative" "2,4")
10039 (const_string "avx512vl")
10040 (eq_attr "alternative" "3")
10041 (const_string "sse3")
10042 (eq_attr "alternative" "7")
10043 (const_string "sse2")
10044 ]
10045 (const_string "noavx")))
10046 (set (attr "type")
10047 (if_then_else
10048 (eq_attr "alternative" "0,1,2,3,4")
10049 (const_string "sselog")
10050 (const_string "ssemov")))
10051 (set (attr "prefix_data16")
10052 (if_then_else (eq_attr "alternative" "5")
10053 (const_string "1")
10054 (const_string "*")))
10055 (set (attr "prefix")
10056 (cond [(eq_attr "alternative" "1,6")
10057 (const_string "vex")
10058 (eq_attr "alternative" "2,4")
10059 (const_string "evex")
10060 (eq_attr "alternative" "3,7")
10061 (const_string "maybe_vex")
10062 ]
10063 (const_string "orig")))
10064 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
10065
10066 ;; vmovq clears also the higher bits.
10067 (define_insn "vec_set<mode>_0"
10068 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
10069 (vec_merge:VF2_512_256
10070 (vec_duplicate:VF2_512_256
10071 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm"))
10072 (match_operand:VF2_512_256 1 "const0_operand" "C")
10073 (const_int 1)))]
10074 "TARGET_AVX"
10075 "vmovq\t{%2, %x0|%x0, %2}"
10076 [(set_attr "type" "ssemov")
10077 (set_attr "prefix" "maybe_evex")
10078 (set_attr "mode" "DF")])
10079
10080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10081 ;;
10082 ;; Parallel integer down-conversion operations
10083 ;;
10084 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10085
10086 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
10087 (define_mode_attr pmov_src_mode
10088 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
10089 (define_mode_attr pmov_src_lower
10090 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
10091 (define_mode_attr pmov_suff_1
10092 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
10093
10094 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
10095 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10096 (any_truncate:PMOV_DST_MODE_1
10097 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
10098 "TARGET_AVX512F"
10099 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
10100 [(set_attr "type" "ssemov")
10101 (set_attr "memory" "none,store")
10102 (set_attr "prefix" "evex")
10103 (set_attr "mode" "<sseinsnmode>")])
10104
10105 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
10106 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10107 (vec_merge:PMOV_DST_MODE_1
10108 (any_truncate:PMOV_DST_MODE_1
10109 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
10110 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
10111 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10112 "TARGET_AVX512F"
10113 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10114 [(set_attr "type" "ssemov")
10115 (set_attr "memory" "none,store")
10116 (set_attr "prefix" "evex")
10117 (set_attr "mode" "<sseinsnmode>")])
10118
10119 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
10120 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
10121 (vec_merge:PMOV_DST_MODE_1
10122 (any_truncate:PMOV_DST_MODE_1
10123 (match_operand:<pmov_src_mode> 1 "register_operand"))
10124 (match_dup 0)
10125 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10126 "TARGET_AVX512F")
10127
10128 (define_insn "avx512bw_<code>v32hiv32qi2"
10129 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10130 (any_truncate:V32QI
10131 (match_operand:V32HI 1 "register_operand" "v,v")))]
10132 "TARGET_AVX512BW"
10133 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
10134 [(set_attr "type" "ssemov")
10135 (set_attr "memory" "none,store")
10136 (set_attr "prefix" "evex")
10137 (set_attr "mode" "XI")])
10138
10139 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
10140 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10141 (vec_merge:V32QI
10142 (any_truncate:V32QI
10143 (match_operand:V32HI 1 "register_operand" "v,v"))
10144 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
10145 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
10146 "TARGET_AVX512BW"
10147 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10148 [(set_attr "type" "ssemov")
10149 (set_attr "memory" "none,store")
10150 (set_attr "prefix" "evex")
10151 (set_attr "mode" "XI")])
10152
10153 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
10154 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10155 (vec_merge:V32QI
10156 (any_truncate:V32QI
10157 (match_operand:V32HI 1 "register_operand"))
10158 (match_dup 0)
10159 (match_operand:SI 2 "register_operand")))]
10160 "TARGET_AVX512BW")
10161
10162 (define_mode_iterator PMOV_DST_MODE_2
10163 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
10164 (define_mode_attr pmov_suff_2
10165 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
10166
10167 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
10168 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10169 (any_truncate:PMOV_DST_MODE_2
10170 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
10171 "TARGET_AVX512VL"
10172 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
10173 [(set_attr "type" "ssemov")
10174 (set_attr "memory" "none,store")
10175 (set_attr "prefix" "evex")
10176 (set_attr "mode" "<sseinsnmode>")])
10177
10178 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
10179 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10180 (vec_merge:PMOV_DST_MODE_2
10181 (any_truncate:PMOV_DST_MODE_2
10182 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
10183 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
10184 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10185 "TARGET_AVX512VL"
10186 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10187 [(set_attr "type" "ssemov")
10188 (set_attr "memory" "none,store")
10189 (set_attr "prefix" "evex")
10190 (set_attr "mode" "<sseinsnmode>")])
10191
10192 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
10193 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10194 (vec_merge:PMOV_DST_MODE_2
10195 (any_truncate:PMOV_DST_MODE_2
10196 (match_operand:<ssedoublemode> 1 "register_operand"))
10197 (match_dup 0)
10198 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10199 "TARGET_AVX512VL")
10200
10201 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
10202 (define_mode_attr pmov_dst_3
10203 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
10204 (define_mode_attr pmov_dst_zeroed_3
10205 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
10206 (define_mode_attr pmov_suff_3
10207 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
10208
10209 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
10210 [(set (match_operand:V16QI 0 "register_operand" "=v")
10211 (vec_concat:V16QI
10212 (any_truncate:<pmov_dst_3>
10213 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
10214 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
10215 "TARGET_AVX512VL"
10216 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10217 [(set_attr "type" "ssemov")
10218 (set_attr "prefix" "evex")
10219 (set_attr "mode" "TI")])
10220
10221 (define_insn "*avx512vl_<code>v2div2qi2_store"
10222 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10223 (vec_concat:V16QI
10224 (any_truncate:V2QI
10225 (match_operand:V2DI 1 "register_operand" "v"))
10226 (vec_select:V14QI
10227 (match_dup 0)
10228 (parallel [(const_int 2) (const_int 3)
10229 (const_int 4) (const_int 5)
10230 (const_int 6) (const_int 7)
10231 (const_int 8) (const_int 9)
10232 (const_int 10) (const_int 11)
10233 (const_int 12) (const_int 13)
10234 (const_int 14) (const_int 15)]))))]
10235 "TARGET_AVX512VL"
10236 "vpmov<trunsuffix>qb\t{%1, %0|%w0, %1}"
10237 [(set_attr "type" "ssemov")
10238 (set_attr "memory" "store")
10239 (set_attr "prefix" "evex")
10240 (set_attr "mode" "TI")])
10241
10242 (define_insn "avx512vl_<code>v2div2qi2_mask"
10243 [(set (match_operand:V16QI 0 "register_operand" "=v")
10244 (vec_concat:V16QI
10245 (vec_merge:V2QI
10246 (any_truncate:V2QI
10247 (match_operand:V2DI 1 "register_operand" "v"))
10248 (vec_select:V2QI
10249 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10250 (parallel [(const_int 0) (const_int 1)]))
10251 (match_operand:QI 3 "register_operand" "Yk"))
10252 (const_vector:V14QI [(const_int 0) (const_int 0)
10253 (const_int 0) (const_int 0)
10254 (const_int 0) (const_int 0)
10255 (const_int 0) (const_int 0)
10256 (const_int 0) (const_int 0)
10257 (const_int 0) (const_int 0)
10258 (const_int 0) (const_int 0)])))]
10259 "TARGET_AVX512VL"
10260 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10261 [(set_attr "type" "ssemov")
10262 (set_attr "prefix" "evex")
10263 (set_attr "mode" "TI")])
10264
10265 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
10266 [(set (match_operand:V16QI 0 "register_operand" "=v")
10267 (vec_concat:V16QI
10268 (vec_merge:V2QI
10269 (any_truncate:V2QI
10270 (match_operand:V2DI 1 "register_operand" "v"))
10271 (const_vector:V2QI [(const_int 0) (const_int 0)])
10272 (match_operand:QI 2 "register_operand" "Yk"))
10273 (const_vector:V14QI [(const_int 0) (const_int 0)
10274 (const_int 0) (const_int 0)
10275 (const_int 0) (const_int 0)
10276 (const_int 0) (const_int 0)
10277 (const_int 0) (const_int 0)
10278 (const_int 0) (const_int 0)
10279 (const_int 0) (const_int 0)])))]
10280 "TARGET_AVX512VL"
10281 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10282 [(set_attr "type" "ssemov")
10283 (set_attr "prefix" "evex")
10284 (set_attr "mode" "TI")])
10285
10286 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
10287 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10288 (vec_concat:V16QI
10289 (vec_merge:V2QI
10290 (any_truncate:V2QI
10291 (match_operand:V2DI 1 "register_operand" "v"))
10292 (vec_select:V2QI
10293 (match_dup 0)
10294 (parallel [(const_int 0) (const_int 1)]))
10295 (match_operand:QI 2 "register_operand" "Yk"))
10296 (vec_select:V14QI
10297 (match_dup 0)
10298 (parallel [(const_int 2) (const_int 3)
10299 (const_int 4) (const_int 5)
10300 (const_int 6) (const_int 7)
10301 (const_int 8) (const_int 9)
10302 (const_int 10) (const_int 11)
10303 (const_int 12) (const_int 13)
10304 (const_int 14) (const_int 15)]))))]
10305 "TARGET_AVX512VL"
10306 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
10307 [(set_attr "type" "ssemov")
10308 (set_attr "memory" "store")
10309 (set_attr "prefix" "evex")
10310 (set_attr "mode" "TI")])
10311
10312 (define_insn "*avx512vl_<code><mode>v4qi2_store"
10313 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10314 (vec_concat:V16QI
10315 (any_truncate:V4QI
10316 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10317 (vec_select:V12QI
10318 (match_dup 0)
10319 (parallel [(const_int 4) (const_int 5)
10320 (const_int 6) (const_int 7)
10321 (const_int 8) (const_int 9)
10322 (const_int 10) (const_int 11)
10323 (const_int 12) (const_int 13)
10324 (const_int 14) (const_int 15)]))))]
10325 "TARGET_AVX512VL"
10326 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%k0, %1}"
10327 [(set_attr "type" "ssemov")
10328 (set_attr "memory" "store")
10329 (set_attr "prefix" "evex")
10330 (set_attr "mode" "TI")])
10331
10332 (define_insn "avx512vl_<code><mode>v4qi2_mask"
10333 [(set (match_operand:V16QI 0 "register_operand" "=v")
10334 (vec_concat:V16QI
10335 (vec_merge:V4QI
10336 (any_truncate:V4QI
10337 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10338 (vec_select:V4QI
10339 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10340 (parallel [(const_int 0) (const_int 1)
10341 (const_int 2) (const_int 3)]))
10342 (match_operand:QI 3 "register_operand" "Yk"))
10343 (const_vector:V12QI [(const_int 0) (const_int 0)
10344 (const_int 0) (const_int 0)
10345 (const_int 0) (const_int 0)
10346 (const_int 0) (const_int 0)
10347 (const_int 0) (const_int 0)
10348 (const_int 0) (const_int 0)])))]
10349 "TARGET_AVX512VL"
10350 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10351 [(set_attr "type" "ssemov")
10352 (set_attr "prefix" "evex")
10353 (set_attr "mode" "TI")])
10354
10355 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
10356 [(set (match_operand:V16QI 0 "register_operand" "=v")
10357 (vec_concat:V16QI
10358 (vec_merge:V4QI
10359 (any_truncate:V4QI
10360 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10361 (const_vector:V4QI [(const_int 0) (const_int 0)
10362 (const_int 0) (const_int 0)])
10363 (match_operand:QI 2 "register_operand" "Yk"))
10364 (const_vector:V12QI [(const_int 0) (const_int 0)
10365 (const_int 0) (const_int 0)
10366 (const_int 0) (const_int 0)
10367 (const_int 0) (const_int 0)
10368 (const_int 0) (const_int 0)
10369 (const_int 0) (const_int 0)])))]
10370 "TARGET_AVX512VL"
10371 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10372 [(set_attr "type" "ssemov")
10373 (set_attr "prefix" "evex")
10374 (set_attr "mode" "TI")])
10375
10376 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
10377 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10378 (vec_concat:V16QI
10379 (vec_merge:V4QI
10380 (any_truncate:V4QI
10381 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10382 (vec_select:V4QI
10383 (match_dup 0)
10384 (parallel [(const_int 0) (const_int 1)
10385 (const_int 2) (const_int 3)]))
10386 (match_operand:QI 2 "register_operand" "Yk"))
10387 (vec_select:V12QI
10388 (match_dup 0)
10389 (parallel [(const_int 4) (const_int 5)
10390 (const_int 6) (const_int 7)
10391 (const_int 8) (const_int 9)
10392 (const_int 10) (const_int 11)
10393 (const_int 12) (const_int 13)
10394 (const_int 14) (const_int 15)]))))]
10395 "TARGET_AVX512VL"
10396 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}"
10397 [(set_attr "type" "ssemov")
10398 (set_attr "memory" "store")
10399 (set_attr "prefix" "evex")
10400 (set_attr "mode" "TI")])
10401
10402 (define_mode_iterator VI2_128_BW_4_256
10403 [(V8HI "TARGET_AVX512BW") V8SI])
10404
10405 (define_insn "*avx512vl_<code><mode>v8qi2_store"
10406 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10407 (vec_concat:V16QI
10408 (any_truncate:V8QI
10409 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10410 (vec_select:V8QI
10411 (match_dup 0)
10412 (parallel [(const_int 8) (const_int 9)
10413 (const_int 10) (const_int 11)
10414 (const_int 12) (const_int 13)
10415 (const_int 14) (const_int 15)]))))]
10416 "TARGET_AVX512VL"
10417 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%q0, %1}"
10418 [(set_attr "type" "ssemov")
10419 (set_attr "memory" "store")
10420 (set_attr "prefix" "evex")
10421 (set_attr "mode" "TI")])
10422
10423 (define_insn "avx512vl_<code><mode>v8qi2_mask"
10424 [(set (match_operand:V16QI 0 "register_operand" "=v")
10425 (vec_concat:V16QI
10426 (vec_merge:V8QI
10427 (any_truncate:V8QI
10428 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10429 (vec_select:V8QI
10430 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10431 (parallel [(const_int 0) (const_int 1)
10432 (const_int 2) (const_int 3)
10433 (const_int 4) (const_int 5)
10434 (const_int 6) (const_int 7)]))
10435 (match_operand:QI 3 "register_operand" "Yk"))
10436 (const_vector:V8QI [(const_int 0) (const_int 0)
10437 (const_int 0) (const_int 0)
10438 (const_int 0) (const_int 0)
10439 (const_int 0) (const_int 0)])))]
10440 "TARGET_AVX512VL"
10441 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10442 [(set_attr "type" "ssemov")
10443 (set_attr "prefix" "evex")
10444 (set_attr "mode" "TI")])
10445
10446 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
10447 [(set (match_operand:V16QI 0 "register_operand" "=v")
10448 (vec_concat:V16QI
10449 (vec_merge:V8QI
10450 (any_truncate:V8QI
10451 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10452 (const_vector:V8QI [(const_int 0) (const_int 0)
10453 (const_int 0) (const_int 0)
10454 (const_int 0) (const_int 0)
10455 (const_int 0) (const_int 0)])
10456 (match_operand:QI 2 "register_operand" "Yk"))
10457 (const_vector:V8QI [(const_int 0) (const_int 0)
10458 (const_int 0) (const_int 0)
10459 (const_int 0) (const_int 0)
10460 (const_int 0) (const_int 0)])))]
10461 "TARGET_AVX512VL"
10462 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10463 [(set_attr "type" "ssemov")
10464 (set_attr "prefix" "evex")
10465 (set_attr "mode" "TI")])
10466
10467 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
10468 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10469 (vec_concat:V16QI
10470 (vec_merge:V8QI
10471 (any_truncate:V8QI
10472 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10473 (vec_select:V8QI
10474 (match_dup 0)
10475 (parallel [(const_int 0) (const_int 1)
10476 (const_int 2) (const_int 3)
10477 (const_int 4) (const_int 5)
10478 (const_int 6) (const_int 7)]))
10479 (match_operand:QI 2 "register_operand" "Yk"))
10480 (vec_select:V8QI
10481 (match_dup 0)
10482 (parallel [(const_int 8) (const_int 9)
10483 (const_int 10) (const_int 11)
10484 (const_int 12) (const_int 13)
10485 (const_int 14) (const_int 15)]))))]
10486 "TARGET_AVX512VL"
10487 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10488 [(set_attr "type" "ssemov")
10489 (set_attr "memory" "store")
10490 (set_attr "prefix" "evex")
10491 (set_attr "mode" "TI")])
10492
10493 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
10494 (define_mode_attr pmov_dst_4
10495 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
10496 (define_mode_attr pmov_dst_zeroed_4
10497 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
10498 (define_mode_attr pmov_suff_4
10499 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
10500
10501 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
10502 [(set (match_operand:V8HI 0 "register_operand" "=v")
10503 (vec_concat:V8HI
10504 (any_truncate:<pmov_dst_4>
10505 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
10506 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
10507 "TARGET_AVX512VL"
10508 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10509 [(set_attr "type" "ssemov")
10510 (set_attr "prefix" "evex")
10511 (set_attr "mode" "TI")])
10512
10513 (define_insn "*avx512vl_<code><mode>v4hi2_store"
10514 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10515 (vec_concat:V8HI
10516 (any_truncate:V4HI
10517 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10518 (vec_select:V4HI
10519 (match_dup 0)
10520 (parallel [(const_int 4) (const_int 5)
10521 (const_int 6) (const_int 7)]))))]
10522 "TARGET_AVX512VL"
10523 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10524 [(set_attr "type" "ssemov")
10525 (set_attr "memory" "store")
10526 (set_attr "prefix" "evex")
10527 (set_attr "mode" "TI")])
10528
10529 (define_insn "avx512vl_<code><mode>v4hi2_mask"
10530 [(set (match_operand:V8HI 0 "register_operand" "=v")
10531 (vec_concat:V8HI
10532 (vec_merge:V4HI
10533 (any_truncate:V4HI
10534 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10535 (vec_select:V4HI
10536 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
10537 (parallel [(const_int 0) (const_int 1)
10538 (const_int 2) (const_int 3)]))
10539 (match_operand:QI 3 "register_operand" "Yk"))
10540 (const_vector:V4HI [(const_int 0) (const_int 0)
10541 (const_int 0) (const_int 0)])))]
10542 "TARGET_AVX512VL"
10543 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10544 [(set_attr "type" "ssemov")
10545 (set_attr "prefix" "evex")
10546 (set_attr "mode" "TI")])
10547
10548 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
10549 [(set (match_operand:V8HI 0 "register_operand" "=v")
10550 (vec_concat:V8HI
10551 (vec_merge:V4HI
10552 (any_truncate:V4HI
10553 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10554 (const_vector:V4HI [(const_int 0) (const_int 0)
10555 (const_int 0) (const_int 0)])
10556 (match_operand:QI 2 "register_operand" "Yk"))
10557 (const_vector:V4HI [(const_int 0) (const_int 0)
10558 (const_int 0) (const_int 0)])))]
10559 "TARGET_AVX512VL"
10560 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10561 [(set_attr "type" "ssemov")
10562 (set_attr "prefix" "evex")
10563 (set_attr "mode" "TI")])
10564
10565 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
10566 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10567 (vec_concat:V8HI
10568 (vec_merge:V4HI
10569 (any_truncate:V4HI
10570 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10571 (vec_select:V4HI
10572 (match_dup 0)
10573 (parallel [(const_int 0) (const_int 1)
10574 (const_int 2) (const_int 3)]))
10575 (match_operand:QI 2 "register_operand" "Yk"))
10576 (vec_select:V4HI
10577 (match_dup 0)
10578 (parallel [(const_int 4) (const_int 5)
10579 (const_int 6) (const_int 7)]))))]
10580 "TARGET_AVX512VL"
10581 {
10582 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
10583 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
10584 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
10585 }
10586 [(set_attr "type" "ssemov")
10587 (set_attr "memory" "store")
10588 (set_attr "prefix" "evex")
10589 (set_attr "mode" "TI")])
10590
10591 (define_insn "*avx512vl_<code>v2div2hi2_store"
10592 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10593 (vec_concat:V8HI
10594 (any_truncate:V2HI
10595 (match_operand:V2DI 1 "register_operand" "v"))
10596 (vec_select:V6HI
10597 (match_dup 0)
10598 (parallel [(const_int 2) (const_int 3)
10599 (const_int 4) (const_int 5)
10600 (const_int 6) (const_int 7)]))))]
10601 "TARGET_AVX512VL"
10602 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
10603 [(set_attr "type" "ssemov")
10604 (set_attr "memory" "store")
10605 (set_attr "prefix" "evex")
10606 (set_attr "mode" "TI")])
10607
10608 (define_insn "avx512vl_<code>v2div2hi2_mask"
10609 [(set (match_operand:V8HI 0 "register_operand" "=v")
10610 (vec_concat:V8HI
10611 (vec_merge:V2HI
10612 (any_truncate:V2HI
10613 (match_operand:V2DI 1 "register_operand" "v"))
10614 (vec_select:V2HI
10615 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
10616 (parallel [(const_int 0) (const_int 1)]))
10617 (match_operand:QI 3 "register_operand" "Yk"))
10618 (const_vector:V6HI [(const_int 0) (const_int 0)
10619 (const_int 0) (const_int 0)
10620 (const_int 0) (const_int 0)])))]
10621 "TARGET_AVX512VL"
10622 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10623 [(set_attr "type" "ssemov")
10624 (set_attr "prefix" "evex")
10625 (set_attr "mode" "TI")])
10626
10627 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
10628 [(set (match_operand:V8HI 0 "register_operand" "=v")
10629 (vec_concat:V8HI
10630 (vec_merge:V2HI
10631 (any_truncate:V2HI
10632 (match_operand:V2DI 1 "register_operand" "v"))
10633 (const_vector:V2HI [(const_int 0) (const_int 0)])
10634 (match_operand:QI 2 "register_operand" "Yk"))
10635 (const_vector:V6HI [(const_int 0) (const_int 0)
10636 (const_int 0) (const_int 0)
10637 (const_int 0) (const_int 0)])))]
10638 "TARGET_AVX512VL"
10639 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10640 [(set_attr "type" "ssemov")
10641 (set_attr "prefix" "evex")
10642 (set_attr "mode" "TI")])
10643
10644 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
10645 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10646 (vec_concat:V8HI
10647 (vec_merge:V2HI
10648 (any_truncate:V2HI
10649 (match_operand:V2DI 1 "register_operand" "v"))
10650 (vec_select:V2HI
10651 (match_dup 0)
10652 (parallel [(const_int 0) (const_int 1)]))
10653 (match_operand:QI 2 "register_operand" "Yk"))
10654 (vec_select:V6HI
10655 (match_dup 0)
10656 (parallel [(const_int 2) (const_int 3)
10657 (const_int 4) (const_int 5)
10658 (const_int 6) (const_int 7)]))))]
10659 "TARGET_AVX512VL"
10660 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
10661 [(set_attr "type" "ssemov")
10662 (set_attr "memory" "store")
10663 (set_attr "prefix" "evex")
10664 (set_attr "mode" "TI")])
10665
10666 (define_insn "*avx512vl_<code>v2div2si2"
10667 [(set (match_operand:V4SI 0 "register_operand" "=v")
10668 (vec_concat:V4SI
10669 (any_truncate:V2SI
10670 (match_operand:V2DI 1 "register_operand" "v"))
10671 (match_operand:V2SI 2 "const0_operand")))]
10672 "TARGET_AVX512VL"
10673 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
10674 [(set_attr "type" "ssemov")
10675 (set_attr "prefix" "evex")
10676 (set_attr "mode" "TI")])
10677
10678 (define_insn "*avx512vl_<code>v2div2si2_store"
10679 [(set (match_operand:V4SI 0 "memory_operand" "=m")
10680 (vec_concat:V4SI
10681 (any_truncate:V2SI
10682 (match_operand:V2DI 1 "register_operand" "v"))
10683 (vec_select:V2SI
10684 (match_dup 0)
10685 (parallel [(const_int 2) (const_int 3)]))))]
10686 "TARGET_AVX512VL"
10687 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
10688 [(set_attr "type" "ssemov")
10689 (set_attr "memory" "store")
10690 (set_attr "prefix" "evex")
10691 (set_attr "mode" "TI")])
10692
10693 (define_insn "avx512vl_<code>v2div2si2_mask"
10694 [(set (match_operand:V4SI 0 "register_operand" "=v")
10695 (vec_concat:V4SI
10696 (vec_merge:V2SI
10697 (any_truncate:V2SI
10698 (match_operand:V2DI 1 "register_operand" "v"))
10699 (vec_select:V2SI
10700 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
10701 (parallel [(const_int 0) (const_int 1)]))
10702 (match_operand:QI 3 "register_operand" "Yk"))
10703 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
10704 "TARGET_AVX512VL"
10705 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10706 [(set_attr "type" "ssemov")
10707 (set_attr "prefix" "evex")
10708 (set_attr "mode" "TI")])
10709
10710 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
10711 [(set (match_operand:V4SI 0 "register_operand" "=v")
10712 (vec_concat:V4SI
10713 (vec_merge:V2SI
10714 (any_truncate:V2SI
10715 (match_operand:V2DI 1 "register_operand" "v"))
10716 (const_vector:V2SI [(const_int 0) (const_int 0)])
10717 (match_operand:QI 2 "register_operand" "Yk"))
10718 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
10719 "TARGET_AVX512VL"
10720 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10721 [(set_attr "type" "ssemov")
10722 (set_attr "prefix" "evex")
10723 (set_attr "mode" "TI")])
10724
10725 (define_insn "avx512vl_<code>v2div2si2_mask_store"
10726 [(set (match_operand:V4SI 0 "memory_operand" "=m")
10727 (vec_concat:V4SI
10728 (vec_merge:V2SI
10729 (any_truncate:V2SI
10730 (match_operand:V2DI 1 "register_operand" "v"))
10731 (vec_select:V2SI
10732 (match_dup 0)
10733 (parallel [(const_int 0) (const_int 1)]))
10734 (match_operand:QI 2 "register_operand" "Yk"))
10735 (vec_select:V2SI
10736 (match_dup 0)
10737 (parallel [(const_int 2) (const_int 3)]))))]
10738 "TARGET_AVX512VL"
10739 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
10740 [(set_attr "type" "ssemov")
10741 (set_attr "memory" "store")
10742 (set_attr "prefix" "evex")
10743 (set_attr "mode" "TI")])
10744
10745 (define_insn "*avx512f_<code>v8div16qi2"
10746 [(set (match_operand:V16QI 0 "register_operand" "=v")
10747 (vec_concat:V16QI
10748 (any_truncate:V8QI
10749 (match_operand:V8DI 1 "register_operand" "v"))
10750 (const_vector:V8QI [(const_int 0) (const_int 0)
10751 (const_int 0) (const_int 0)
10752 (const_int 0) (const_int 0)
10753 (const_int 0) (const_int 0)])))]
10754 "TARGET_AVX512F"
10755 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10756 [(set_attr "type" "ssemov")
10757 (set_attr "prefix" "evex")
10758 (set_attr "mode" "TI")])
10759
10760 (define_insn "*avx512f_<code>v8div16qi2_store"
10761 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10762 (vec_concat:V16QI
10763 (any_truncate:V8QI
10764 (match_operand:V8DI 1 "register_operand" "v"))
10765 (vec_select:V8QI
10766 (match_dup 0)
10767 (parallel [(const_int 8) (const_int 9)
10768 (const_int 10) (const_int 11)
10769 (const_int 12) (const_int 13)
10770 (const_int 14) (const_int 15)]))))]
10771 "TARGET_AVX512F"
10772 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10773 [(set_attr "type" "ssemov")
10774 (set_attr "memory" "store")
10775 (set_attr "prefix" "evex")
10776 (set_attr "mode" "TI")])
10777
10778 (define_insn "avx512f_<code>v8div16qi2_mask"
10779 [(set (match_operand:V16QI 0 "register_operand" "=v")
10780 (vec_concat:V16QI
10781 (vec_merge:V8QI
10782 (any_truncate:V8QI
10783 (match_operand:V8DI 1 "register_operand" "v"))
10784 (vec_select:V8QI
10785 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10786 (parallel [(const_int 0) (const_int 1)
10787 (const_int 2) (const_int 3)
10788 (const_int 4) (const_int 5)
10789 (const_int 6) (const_int 7)]))
10790 (match_operand:QI 3 "register_operand" "Yk"))
10791 (const_vector:V8QI [(const_int 0) (const_int 0)
10792 (const_int 0) (const_int 0)
10793 (const_int 0) (const_int 0)
10794 (const_int 0) (const_int 0)])))]
10795 "TARGET_AVX512F"
10796 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10797 [(set_attr "type" "ssemov")
10798 (set_attr "prefix" "evex")
10799 (set_attr "mode" "TI")])
10800
10801 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
10802 [(set (match_operand:V16QI 0 "register_operand" "=v")
10803 (vec_concat:V16QI
10804 (vec_merge:V8QI
10805 (any_truncate:V8QI
10806 (match_operand:V8DI 1 "register_operand" "v"))
10807 (const_vector:V8QI [(const_int 0) (const_int 0)
10808 (const_int 0) (const_int 0)
10809 (const_int 0) (const_int 0)
10810 (const_int 0) (const_int 0)])
10811 (match_operand:QI 2 "register_operand" "Yk"))
10812 (const_vector:V8QI [(const_int 0) (const_int 0)
10813 (const_int 0) (const_int 0)
10814 (const_int 0) (const_int 0)
10815 (const_int 0) (const_int 0)])))]
10816 "TARGET_AVX512F"
10817 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10818 [(set_attr "type" "ssemov")
10819 (set_attr "prefix" "evex")
10820 (set_attr "mode" "TI")])
10821
10822 (define_insn "avx512f_<code>v8div16qi2_mask_store"
10823 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10824 (vec_concat:V16QI
10825 (vec_merge:V8QI
10826 (any_truncate:V8QI
10827 (match_operand:V8DI 1 "register_operand" "v"))
10828 (vec_select:V8QI
10829 (match_dup 0)
10830 (parallel [(const_int 0) (const_int 1)
10831 (const_int 2) (const_int 3)
10832 (const_int 4) (const_int 5)
10833 (const_int 6) (const_int 7)]))
10834 (match_operand:QI 2 "register_operand" "Yk"))
10835 (vec_select:V8QI
10836 (match_dup 0)
10837 (parallel [(const_int 8) (const_int 9)
10838 (const_int 10) (const_int 11)
10839 (const_int 12) (const_int 13)
10840 (const_int 14) (const_int 15)]))))]
10841 "TARGET_AVX512F"
10842 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10843 [(set_attr "type" "ssemov")
10844 (set_attr "memory" "store")
10845 (set_attr "prefix" "evex")
10846 (set_attr "mode" "TI")])
10847
10848 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10849 ;;
10850 ;; Parallel integral arithmetic
10851 ;;
10852 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10853
10854 (define_expand "neg<mode>2"
10855 [(set (match_operand:VI_AVX2 0 "register_operand")
10856 (minus:VI_AVX2
10857 (match_dup 2)
10858 (match_operand:VI_AVX2 1 "vector_operand")))]
10859 "TARGET_SSE2"
10860 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
10861
10862 (define_expand "<plusminus_insn><mode>3"
10863 [(set (match_operand:VI_AVX2 0 "register_operand")
10864 (plusminus:VI_AVX2
10865 (match_operand:VI_AVX2 1 "vector_operand")
10866 (match_operand:VI_AVX2 2 "vector_operand")))]
10867 "TARGET_SSE2"
10868 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10869
10870 (define_expand "<plusminus_insn><mode>3_mask"
10871 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10872 (vec_merge:VI48_AVX512VL
10873 (plusminus:VI48_AVX512VL
10874 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10875 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10876 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
10877 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10878 "TARGET_AVX512F"
10879 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10880
10881 (define_expand "<plusminus_insn><mode>3_mask"
10882 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10883 (vec_merge:VI12_AVX512VL
10884 (plusminus:VI12_AVX512VL
10885 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
10886 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10887 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
10888 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10889 "TARGET_AVX512BW"
10890 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10891
10892 (define_insn "*<plusminus_insn><mode>3"
10893 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
10894 (plusminus:VI_AVX2
10895 (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
10896 (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
10897 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10898 "@
10899 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10900 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10901 [(set_attr "isa" "noavx,avx")
10902 (set_attr "type" "sseiadd")
10903 (set_attr "prefix_data16" "1,*")
10904 (set_attr "prefix" "orig,vex")
10905 (set_attr "mode" "<sseinsnmode>")])
10906
10907 (define_insn "*sub<mode>3_bcst"
10908 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10909 (minus:VI48_AVX512VL
10910 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10911 (vec_duplicate:VI48_AVX512VL
10912 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
10913 "TARGET_AVX512F && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
10914 "vpsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
10915 [(set_attr "type" "sseiadd")
10916 (set_attr "prefix" "evex")
10917 (set_attr "mode" "<sseinsnmode>")])
10918
10919 (define_insn "*add<mode>3_bcst"
10920 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10921 (plus:VI48_AVX512VL
10922 (vec_duplicate:VI48_AVX512VL
10923 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
10924 (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
10925 "TARGET_AVX512F && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
10926 "vpadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0|%0, %2, %1<avx512bcst>}"
10927 [(set_attr "type" "sseiadd")
10928 (set_attr "prefix" "evex")
10929 (set_attr "mode" "<sseinsnmode>")])
10930
10931 (define_insn "*<plusminus_insn><mode>3_mask"
10932 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10933 (vec_merge:VI48_AVX512VL
10934 (plusminus:VI48_AVX512VL
10935 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10936 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10937 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
10938 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10939 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10940 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10941 [(set_attr "type" "sseiadd")
10942 (set_attr "prefix" "evex")
10943 (set_attr "mode" "<sseinsnmode>")])
10944
10945 (define_insn "*<plusminus_insn><mode>3_mask"
10946 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10947 (vec_merge:VI12_AVX512VL
10948 (plusminus:VI12_AVX512VL
10949 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10950 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10951 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
10952 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10953 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10954 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10955 [(set_attr "type" "sseiadd")
10956 (set_attr "prefix" "evex")
10957 (set_attr "mode" "<sseinsnmode>")])
10958
10959 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10960 [(set (match_operand:VI12_AVX2 0 "register_operand")
10961 (sat_plusminus:VI12_AVX2
10962 (match_operand:VI12_AVX2 1 "vector_operand")
10963 (match_operand:VI12_AVX2 2 "vector_operand")))]
10964 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10965 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10966
10967 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10968 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
10969 (sat_plusminus:VI12_AVX2
10970 (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
10971 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
10972 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
10973 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10974 "@
10975 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10976 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10977 [(set_attr "isa" "noavx,avx")
10978 (set_attr "type" "sseiadd")
10979 (set_attr "prefix_data16" "1,*")
10980 (set_attr "prefix" "orig,maybe_evex")
10981 (set_attr "mode" "TI")])
10982
10983 (define_expand "mul<mode>3<mask_name>"
10984 [(set (match_operand:VI1_AVX512 0 "register_operand")
10985 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
10986 (match_operand:VI1_AVX512 2 "register_operand")))]
10987 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10988 {
10989 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
10990 DONE;
10991 })
10992
10993 (define_expand "mul<mode>3<mask_name>"
10994 [(set (match_operand:VI2_AVX2 0 "register_operand")
10995 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
10996 (match_operand:VI2_AVX2 2 "vector_operand")))]
10997 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10998 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10999
11000 (define_insn "*mul<mode>3<mask_name>"
11001 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11002 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
11003 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
11004 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11005 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11006 "@
11007 pmullw\t{%2, %0|%0, %2}
11008 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11009 [(set_attr "isa" "noavx,avx")
11010 (set_attr "type" "sseimul")
11011 (set_attr "prefix_data16" "1,*")
11012 (set_attr "prefix" "orig,vex")
11013 (set_attr "mode" "<sseinsnmode>")])
11014
11015 (define_expand "<s>mul<mode>3_highpart<mask_name>"
11016 [(set (match_operand:VI2_AVX2 0 "register_operand")
11017 (truncate:VI2_AVX2
11018 (lshiftrt:<ssedoublemode>
11019 (mult:<ssedoublemode>
11020 (any_extend:<ssedoublemode>
11021 (match_operand:VI2_AVX2 1 "vector_operand"))
11022 (any_extend:<ssedoublemode>
11023 (match_operand:VI2_AVX2 2 "vector_operand")))
11024 (const_int 16))))]
11025 "TARGET_SSE2
11026 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11027 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11028
11029 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
11030 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11031 (truncate:VI2_AVX2
11032 (lshiftrt:<ssedoublemode>
11033 (mult:<ssedoublemode>
11034 (any_extend:<ssedoublemode>
11035 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
11036 (any_extend:<ssedoublemode>
11037 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
11038 (const_int 16))))]
11039 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11040 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11041 "@
11042 pmulh<u>w\t{%2, %0|%0, %2}
11043 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11044 [(set_attr "isa" "noavx,avx")
11045 (set_attr "type" "sseimul")
11046 (set_attr "prefix_data16" "1,*")
11047 (set_attr "prefix" "orig,vex")
11048 (set_attr "mode" "<sseinsnmode>")])
11049
11050 (define_expand "vec_widen_umult_even_v16si<mask_name>"
11051 [(set (match_operand:V8DI 0 "register_operand")
11052 (mult:V8DI
11053 (zero_extend:V8DI
11054 (vec_select:V8SI
11055 (match_operand:V16SI 1 "nonimmediate_operand")
11056 (parallel [(const_int 0) (const_int 2)
11057 (const_int 4) (const_int 6)
11058 (const_int 8) (const_int 10)
11059 (const_int 12) (const_int 14)])))
11060 (zero_extend:V8DI
11061 (vec_select:V8SI
11062 (match_operand:V16SI 2 "nonimmediate_operand")
11063 (parallel [(const_int 0) (const_int 2)
11064 (const_int 4) (const_int 6)
11065 (const_int 8) (const_int 10)
11066 (const_int 12) (const_int 14)])))))]
11067 "TARGET_AVX512F"
11068 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11069
11070 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
11071 [(set (match_operand:V8DI 0 "register_operand" "=v")
11072 (mult:V8DI
11073 (zero_extend:V8DI
11074 (vec_select:V8SI
11075 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11076 (parallel [(const_int 0) (const_int 2)
11077 (const_int 4) (const_int 6)
11078 (const_int 8) (const_int 10)
11079 (const_int 12) (const_int 14)])))
11080 (zero_extend:V8DI
11081 (vec_select:V8SI
11082 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11083 (parallel [(const_int 0) (const_int 2)
11084 (const_int 4) (const_int 6)
11085 (const_int 8) (const_int 10)
11086 (const_int 12) (const_int 14)])))))]
11087 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11088 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11089 [(set_attr "type" "sseimul")
11090 (set_attr "prefix_extra" "1")
11091 (set_attr "prefix" "evex")
11092 (set_attr "mode" "XI")])
11093
11094 (define_expand "vec_widen_umult_even_v8si<mask_name>"
11095 [(set (match_operand:V4DI 0 "register_operand")
11096 (mult:V4DI
11097 (zero_extend:V4DI
11098 (vec_select:V4SI
11099 (match_operand:V8SI 1 "nonimmediate_operand")
11100 (parallel [(const_int 0) (const_int 2)
11101 (const_int 4) (const_int 6)])))
11102 (zero_extend:V4DI
11103 (vec_select:V4SI
11104 (match_operand:V8SI 2 "nonimmediate_operand")
11105 (parallel [(const_int 0) (const_int 2)
11106 (const_int 4) (const_int 6)])))))]
11107 "TARGET_AVX2 && <mask_avx512vl_condition>"
11108 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11109
11110 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
11111 [(set (match_operand:V4DI 0 "register_operand" "=v")
11112 (mult:V4DI
11113 (zero_extend:V4DI
11114 (vec_select:V4SI
11115 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11116 (parallel [(const_int 0) (const_int 2)
11117 (const_int 4) (const_int 6)])))
11118 (zero_extend:V4DI
11119 (vec_select:V4SI
11120 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11121 (parallel [(const_int 0) (const_int 2)
11122 (const_int 4) (const_int 6)])))))]
11123 "TARGET_AVX2 && <mask_avx512vl_condition>
11124 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11125 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11126 [(set_attr "type" "sseimul")
11127 (set_attr "prefix" "maybe_evex")
11128 (set_attr "mode" "OI")])
11129
11130 (define_expand "vec_widen_umult_even_v4si<mask_name>"
11131 [(set (match_operand:V2DI 0 "register_operand")
11132 (mult:V2DI
11133 (zero_extend:V2DI
11134 (vec_select:V2SI
11135 (match_operand:V4SI 1 "vector_operand")
11136 (parallel [(const_int 0) (const_int 2)])))
11137 (zero_extend:V2DI
11138 (vec_select:V2SI
11139 (match_operand:V4SI 2 "vector_operand")
11140 (parallel [(const_int 0) (const_int 2)])))))]
11141 "TARGET_SSE2 && <mask_avx512vl_condition>"
11142 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11143
11144 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
11145 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
11146 (mult:V2DI
11147 (zero_extend:V2DI
11148 (vec_select:V2SI
11149 (match_operand:V4SI 1 "vector_operand" "%0,v")
11150 (parallel [(const_int 0) (const_int 2)])))
11151 (zero_extend:V2DI
11152 (vec_select:V2SI
11153 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
11154 (parallel [(const_int 0) (const_int 2)])))))]
11155 "TARGET_SSE2 && <mask_avx512vl_condition>
11156 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11157 "@
11158 pmuludq\t{%2, %0|%0, %2}
11159 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11160 [(set_attr "isa" "noavx,avx")
11161 (set_attr "type" "sseimul")
11162 (set_attr "prefix_data16" "1,*")
11163 (set_attr "prefix" "orig,maybe_evex")
11164 (set_attr "mode" "TI")])
11165
11166 (define_expand "vec_widen_smult_even_v16si<mask_name>"
11167 [(set (match_operand:V8DI 0 "register_operand")
11168 (mult:V8DI
11169 (sign_extend:V8DI
11170 (vec_select:V8SI
11171 (match_operand:V16SI 1 "nonimmediate_operand")
11172 (parallel [(const_int 0) (const_int 2)
11173 (const_int 4) (const_int 6)
11174 (const_int 8) (const_int 10)
11175 (const_int 12) (const_int 14)])))
11176 (sign_extend:V8DI
11177 (vec_select:V8SI
11178 (match_operand:V16SI 2 "nonimmediate_operand")
11179 (parallel [(const_int 0) (const_int 2)
11180 (const_int 4) (const_int 6)
11181 (const_int 8) (const_int 10)
11182 (const_int 12) (const_int 14)])))))]
11183 "TARGET_AVX512F"
11184 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11185
11186 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
11187 [(set (match_operand:V8DI 0 "register_operand" "=v")
11188 (mult:V8DI
11189 (sign_extend:V8DI
11190 (vec_select:V8SI
11191 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11192 (parallel [(const_int 0) (const_int 2)
11193 (const_int 4) (const_int 6)
11194 (const_int 8) (const_int 10)
11195 (const_int 12) (const_int 14)])))
11196 (sign_extend:V8DI
11197 (vec_select:V8SI
11198 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11199 (parallel [(const_int 0) (const_int 2)
11200 (const_int 4) (const_int 6)
11201 (const_int 8) (const_int 10)
11202 (const_int 12) (const_int 14)])))))]
11203 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11204 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11205 [(set_attr "type" "sseimul")
11206 (set_attr "prefix_extra" "1")
11207 (set_attr "prefix" "evex")
11208 (set_attr "mode" "XI")])
11209
11210 (define_expand "vec_widen_smult_even_v8si<mask_name>"
11211 [(set (match_operand:V4DI 0 "register_operand")
11212 (mult:V4DI
11213 (sign_extend:V4DI
11214 (vec_select:V4SI
11215 (match_operand:V8SI 1 "nonimmediate_operand")
11216 (parallel [(const_int 0) (const_int 2)
11217 (const_int 4) (const_int 6)])))
11218 (sign_extend:V4DI
11219 (vec_select:V4SI
11220 (match_operand:V8SI 2 "nonimmediate_operand")
11221 (parallel [(const_int 0) (const_int 2)
11222 (const_int 4) (const_int 6)])))))]
11223 "TARGET_AVX2 && <mask_avx512vl_condition>"
11224 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11225
11226 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
11227 [(set (match_operand:V4DI 0 "register_operand" "=v")
11228 (mult:V4DI
11229 (sign_extend:V4DI
11230 (vec_select:V4SI
11231 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11232 (parallel [(const_int 0) (const_int 2)
11233 (const_int 4) (const_int 6)])))
11234 (sign_extend:V4DI
11235 (vec_select:V4SI
11236 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11237 (parallel [(const_int 0) (const_int 2)
11238 (const_int 4) (const_int 6)])))))]
11239 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11240 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11241 [(set_attr "type" "sseimul")
11242 (set_attr "prefix_extra" "1")
11243 (set_attr "prefix" "vex")
11244 (set_attr "mode" "OI")])
11245
11246 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
11247 [(set (match_operand:V2DI 0 "register_operand")
11248 (mult:V2DI
11249 (sign_extend:V2DI
11250 (vec_select:V2SI
11251 (match_operand:V4SI 1 "vector_operand")
11252 (parallel [(const_int 0) (const_int 2)])))
11253 (sign_extend:V2DI
11254 (vec_select:V2SI
11255 (match_operand:V4SI 2 "vector_operand")
11256 (parallel [(const_int 0) (const_int 2)])))))]
11257 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
11258 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11259
11260 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
11261 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
11262 (mult:V2DI
11263 (sign_extend:V2DI
11264 (vec_select:V2SI
11265 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
11266 (parallel [(const_int 0) (const_int 2)])))
11267 (sign_extend:V2DI
11268 (vec_select:V2SI
11269 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
11270 (parallel [(const_int 0) (const_int 2)])))))]
11271 "TARGET_SSE4_1 && <mask_avx512vl_condition>
11272 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11273 "@
11274 pmuldq\t{%2, %0|%0, %2}
11275 pmuldq\t{%2, %0|%0, %2}
11276 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11277 [(set_attr "isa" "noavx,noavx,avx")
11278 (set_attr "type" "sseimul")
11279 (set_attr "prefix_data16" "1,1,*")
11280 (set_attr "prefix_extra" "1")
11281 (set_attr "prefix" "orig,orig,vex")
11282 (set_attr "mode" "TI")])
11283
11284 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
11285 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
11286 (unspec:<sseunpackmode>
11287 [(match_operand:VI2_AVX2 1 "register_operand" "v")
11288 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
11289 UNSPEC_PMADDWD512))]
11290 "TARGET_AVX512BW && <mask_mode512bit_condition>"
11291 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
11292 [(set_attr "type" "sseiadd")
11293 (set_attr "prefix" "evex")
11294 (set_attr "mode" "XI")])
11295
11296 (define_expand "avx2_pmaddwd"
11297 [(set (match_operand:V8SI 0 "register_operand")
11298 (plus:V8SI
11299 (mult:V8SI
11300 (sign_extend:V8SI
11301 (vec_select:V8HI
11302 (match_operand:V16HI 1 "nonimmediate_operand")
11303 (parallel [(const_int 0) (const_int 2)
11304 (const_int 4) (const_int 6)
11305 (const_int 8) (const_int 10)
11306 (const_int 12) (const_int 14)])))
11307 (sign_extend:V8SI
11308 (vec_select:V8HI
11309 (match_operand:V16HI 2 "nonimmediate_operand")
11310 (parallel [(const_int 0) (const_int 2)
11311 (const_int 4) (const_int 6)
11312 (const_int 8) (const_int 10)
11313 (const_int 12) (const_int 14)]))))
11314 (mult:V8SI
11315 (sign_extend:V8SI
11316 (vec_select:V8HI (match_dup 1)
11317 (parallel [(const_int 1) (const_int 3)
11318 (const_int 5) (const_int 7)
11319 (const_int 9) (const_int 11)
11320 (const_int 13) (const_int 15)])))
11321 (sign_extend:V8SI
11322 (vec_select:V8HI (match_dup 2)
11323 (parallel [(const_int 1) (const_int 3)
11324 (const_int 5) (const_int 7)
11325 (const_int 9) (const_int 11)
11326 (const_int 13) (const_int 15)]))))))]
11327 "TARGET_AVX2"
11328 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
11329
11330 (define_insn "*avx2_pmaddwd"
11331 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
11332 (plus:V8SI
11333 (mult:V8SI
11334 (sign_extend:V8SI
11335 (vec_select:V8HI
11336 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
11337 (parallel [(const_int 0) (const_int 2)
11338 (const_int 4) (const_int 6)
11339 (const_int 8) (const_int 10)
11340 (const_int 12) (const_int 14)])))
11341 (sign_extend:V8SI
11342 (vec_select:V8HI
11343 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
11344 (parallel [(const_int 0) (const_int 2)
11345 (const_int 4) (const_int 6)
11346 (const_int 8) (const_int 10)
11347 (const_int 12) (const_int 14)]))))
11348 (mult:V8SI
11349 (sign_extend:V8SI
11350 (vec_select:V8HI (match_dup 1)
11351 (parallel [(const_int 1) (const_int 3)
11352 (const_int 5) (const_int 7)
11353 (const_int 9) (const_int 11)
11354 (const_int 13) (const_int 15)])))
11355 (sign_extend:V8SI
11356 (vec_select:V8HI (match_dup 2)
11357 (parallel [(const_int 1) (const_int 3)
11358 (const_int 5) (const_int 7)
11359 (const_int 9) (const_int 11)
11360 (const_int 13) (const_int 15)]))))))]
11361 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11362 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11363 [(set_attr "type" "sseiadd")
11364 (set_attr "isa" "*,avx512bw")
11365 (set_attr "prefix" "vex,evex")
11366 (set_attr "mode" "OI")])
11367
11368 (define_expand "sse2_pmaddwd"
11369 [(set (match_operand:V4SI 0 "register_operand")
11370 (plus:V4SI
11371 (mult:V4SI
11372 (sign_extend:V4SI
11373 (vec_select:V4HI
11374 (match_operand:V8HI 1 "vector_operand")
11375 (parallel [(const_int 0) (const_int 2)
11376 (const_int 4) (const_int 6)])))
11377 (sign_extend:V4SI
11378 (vec_select:V4HI
11379 (match_operand:V8HI 2 "vector_operand")
11380 (parallel [(const_int 0) (const_int 2)
11381 (const_int 4) (const_int 6)]))))
11382 (mult:V4SI
11383 (sign_extend:V4SI
11384 (vec_select:V4HI (match_dup 1)
11385 (parallel [(const_int 1) (const_int 3)
11386 (const_int 5) (const_int 7)])))
11387 (sign_extend:V4SI
11388 (vec_select:V4HI (match_dup 2)
11389 (parallel [(const_int 1) (const_int 3)
11390 (const_int 5) (const_int 7)]))))))]
11391 "TARGET_SSE2"
11392 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
11393
11394 (define_insn "*sse2_pmaddwd"
11395 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
11396 (plus:V4SI
11397 (mult:V4SI
11398 (sign_extend:V4SI
11399 (vec_select:V4HI
11400 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11401 (parallel [(const_int 0) (const_int 2)
11402 (const_int 4) (const_int 6)])))
11403 (sign_extend:V4SI
11404 (vec_select:V4HI
11405 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
11406 (parallel [(const_int 0) (const_int 2)
11407 (const_int 4) (const_int 6)]))))
11408 (mult:V4SI
11409 (sign_extend:V4SI
11410 (vec_select:V4HI (match_dup 1)
11411 (parallel [(const_int 1) (const_int 3)
11412 (const_int 5) (const_int 7)])))
11413 (sign_extend:V4SI
11414 (vec_select:V4HI (match_dup 2)
11415 (parallel [(const_int 1) (const_int 3)
11416 (const_int 5) (const_int 7)]))))))]
11417 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11418 "@
11419 pmaddwd\t{%2, %0|%0, %2}
11420 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
11421 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11422 [(set_attr "isa" "noavx,avx,avx512bw")
11423 (set_attr "type" "sseiadd")
11424 (set_attr "atom_unit" "simul")
11425 (set_attr "prefix_data16" "1,*,*")
11426 (set_attr "prefix" "orig,vex,evex")
11427 (set_attr "mode" "TI")])
11428
11429 (define_insn "avx512dq_mul<mode>3<mask_name>"
11430 [(set (match_operand:VI8 0 "register_operand" "=v")
11431 (mult:VI8
11432 (match_operand:VI8 1 "register_operand" "v")
11433 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
11434 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
11435 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11436 [(set_attr "type" "sseimul")
11437 (set_attr "prefix" "evex")
11438 (set_attr "mode" "<sseinsnmode>")])
11439
11440 (define_expand "mul<mode>3<mask_name>"
11441 [(set (match_operand:VI4_AVX512F 0 "register_operand")
11442 (mult:VI4_AVX512F
11443 (match_operand:VI4_AVX512F 1 "general_vector_operand")
11444 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
11445 "TARGET_SSE2 && <mask_mode512bit_condition>"
11446 {
11447 if (TARGET_SSE4_1)
11448 {
11449 if (!vector_operand (operands[1], <MODE>mode))
11450 operands[1] = force_reg (<MODE>mode, operands[1]);
11451 if (!vector_operand (operands[2], <MODE>mode))
11452 operands[2] = force_reg (<MODE>mode, operands[2]);
11453 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
11454 }
11455 else
11456 {
11457 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
11458 DONE;
11459 }
11460 })
11461
11462 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
11463 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
11464 (mult:VI4_AVX512F
11465 (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
11466 (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
11467 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11468 && <mask_mode512bit_condition>"
11469 "@
11470 pmulld\t{%2, %0|%0, %2}
11471 pmulld\t{%2, %0|%0, %2}
11472 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11473 [(set_attr "isa" "noavx,noavx,avx")
11474 (set_attr "type" "sseimul")
11475 (set_attr "prefix_extra" "1")
11476 (set_attr "prefix" "<mask_prefix4>")
11477 (set_attr "btver2_decode" "vector,vector,vector")
11478 (set_attr "mode" "<sseinsnmode>")])
11479
11480 (define_expand "mul<mode>3"
11481 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
11482 (mult:VI8_AVX2_AVX512F
11483 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
11484 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
11485 "TARGET_SSE2"
11486 {
11487 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
11488 DONE;
11489 })
11490
11491 (define_expand "vec_widen_<s>mult_hi_<mode>"
11492 [(match_operand:<sseunpackmode> 0 "register_operand")
11493 (any_extend:<sseunpackmode>
11494 (match_operand:VI124_AVX2 1 "register_operand"))
11495 (match_operand:VI124_AVX2 2 "register_operand")]
11496 "TARGET_SSE2"
11497 {
11498 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11499 <u_bool>, true);
11500 DONE;
11501 })
11502
11503 (define_expand "vec_widen_<s>mult_lo_<mode>"
11504 [(match_operand:<sseunpackmode> 0 "register_operand")
11505 (any_extend:<sseunpackmode>
11506 (match_operand:VI124_AVX2 1 "register_operand"))
11507 (match_operand:VI124_AVX2 2 "register_operand")]
11508 "TARGET_SSE2"
11509 {
11510 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11511 <u_bool>, false);
11512 DONE;
11513 })
11514
11515 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
11516 ;; named patterns, but signed V4SI needs special help for plain SSE2.
11517 (define_expand "vec_widen_smult_even_v4si"
11518 [(match_operand:V2DI 0 "register_operand")
11519 (match_operand:V4SI 1 "vector_operand")
11520 (match_operand:V4SI 2 "vector_operand")]
11521 "TARGET_SSE2"
11522 {
11523 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11524 false, false);
11525 DONE;
11526 })
11527
11528 (define_expand "vec_widen_<s>mult_odd_<mode>"
11529 [(match_operand:<sseunpackmode> 0 "register_operand")
11530 (any_extend:<sseunpackmode>
11531 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
11532 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
11533 "TARGET_SSE2"
11534 {
11535 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11536 <u_bool>, true);
11537 DONE;
11538 })
11539
11540 (define_mode_attr SDOT_PMADD_SUF
11541 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
11542
11543 (define_expand "sdot_prod<mode>"
11544 [(match_operand:<sseunpackmode> 0 "register_operand")
11545 (match_operand:VI2_AVX2 1 "register_operand")
11546 (match_operand:VI2_AVX2 2 "register_operand")
11547 (match_operand:<sseunpackmode> 3 "register_operand")]
11548 "TARGET_SSE2"
11549 {
11550 rtx t = gen_reg_rtx (<sseunpackmode>mode);
11551 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
11552 emit_insn (gen_rtx_SET (operands[0],
11553 gen_rtx_PLUS (<sseunpackmode>mode,
11554 operands[3], t)));
11555 DONE;
11556 })
11557
11558 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
11559 ;; back together when madd is available.
11560 (define_expand "sdot_prodv4si"
11561 [(match_operand:V2DI 0 "register_operand")
11562 (match_operand:V4SI 1 "register_operand")
11563 (match_operand:V4SI 2 "register_operand")
11564 (match_operand:V2DI 3 "register_operand")]
11565 "TARGET_XOP"
11566 {
11567 rtx t = gen_reg_rtx (V2DImode);
11568 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
11569 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
11570 DONE;
11571 })
11572
11573 (define_expand "uavg<mode>3_ceil"
11574 [(set (match_operand:VI12_AVX2 0 "register_operand")
11575 (truncate:VI12_AVX2
11576 (lshiftrt:<ssedoublemode>
11577 (plus:<ssedoublemode>
11578 (plus:<ssedoublemode>
11579 (zero_extend:<ssedoublemode>
11580 (match_operand:VI12_AVX2 1 "vector_operand"))
11581 (zero_extend:<ssedoublemode>
11582 (match_operand:VI12_AVX2 2 "vector_operand")))
11583 (match_dup 3))
11584 (const_int 1))))]
11585 "TARGET_SSE2"
11586 {
11587 operands[3] = CONST1_RTX(<MODE>mode);
11588 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
11589 })
11590
11591 (define_expand "usadv16qi"
11592 [(match_operand:V4SI 0 "register_operand")
11593 (match_operand:V16QI 1 "register_operand")
11594 (match_operand:V16QI 2 "vector_operand")
11595 (match_operand:V4SI 3 "vector_operand")]
11596 "TARGET_SSE2"
11597 {
11598 rtx t1 = gen_reg_rtx (V2DImode);
11599 rtx t2 = gen_reg_rtx (V4SImode);
11600 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
11601 convert_move (t2, t1, 0);
11602 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
11603 DONE;
11604 })
11605
11606 (define_expand "usadv32qi"
11607 [(match_operand:V8SI 0 "register_operand")
11608 (match_operand:V32QI 1 "register_operand")
11609 (match_operand:V32QI 2 "nonimmediate_operand")
11610 (match_operand:V8SI 3 "nonimmediate_operand")]
11611 "TARGET_AVX2"
11612 {
11613 rtx t1 = gen_reg_rtx (V4DImode);
11614 rtx t2 = gen_reg_rtx (V8SImode);
11615 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
11616 convert_move (t2, t1, 0);
11617 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
11618 DONE;
11619 })
11620
11621 (define_expand "usadv64qi"
11622 [(match_operand:V16SI 0 "register_operand")
11623 (match_operand:V64QI 1 "register_operand")
11624 (match_operand:V64QI 2 "nonimmediate_operand")
11625 (match_operand:V16SI 3 "nonimmediate_operand")]
11626 "TARGET_AVX512BW"
11627 {
11628 rtx t1 = gen_reg_rtx (V8DImode);
11629 rtx t2 = gen_reg_rtx (V16SImode);
11630 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
11631 convert_move (t2, t1, 0);
11632 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
11633 DONE;
11634 })
11635
11636 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
11637 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
11638 (ashiftrt:VI248_AVX512BW_1
11639 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
11640 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11641 "TARGET_AVX512VL"
11642 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11643 [(set_attr "type" "sseishft")
11644 (set (attr "length_immediate")
11645 (if_then_else (match_operand 2 "const_int_operand")
11646 (const_string "1")
11647 (const_string "0")))
11648 (set_attr "mode" "<sseinsnmode>")])
11649
11650 (define_insn "ashr<mode>3"
11651 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
11652 (ashiftrt:VI24_AVX2
11653 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
11654 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
11655 "TARGET_SSE2"
11656 "@
11657 psra<ssemodesuffix>\t{%2, %0|%0, %2}
11658 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11659 [(set_attr "isa" "noavx,avx")
11660 (set_attr "type" "sseishft")
11661 (set (attr "length_immediate")
11662 (if_then_else (match_operand 2 "const_int_operand")
11663 (const_string "1")
11664 (const_string "0")))
11665 (set_attr "prefix_data16" "1,*")
11666 (set_attr "prefix" "orig,vex")
11667 (set_attr "mode" "<sseinsnmode>")])
11668
11669 (define_insn "ashr<mode>3<mask_name>"
11670 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
11671 (ashiftrt:VI248_AVX512BW_AVX512VL
11672 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
11673 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11674 "TARGET_AVX512F"
11675 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11676 [(set_attr "type" "sseishft")
11677 (set (attr "length_immediate")
11678 (if_then_else (match_operand 2 "const_int_operand")
11679 (const_string "1")
11680 (const_string "0")))
11681 (set_attr "mode" "<sseinsnmode>")])
11682
11683 (define_insn "<mask_codefor><shift_insn><mode>3<mask_name>"
11684 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
11685 (any_lshift:VI248_AVX512BW_2
11686 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
11687 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11688 "TARGET_AVX512VL"
11689 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11690 [(set_attr "type" "sseishft")
11691 (set (attr "length_immediate")
11692 (if_then_else (match_operand 2 "const_int_operand")
11693 (const_string "1")
11694 (const_string "0")))
11695 (set_attr "mode" "<sseinsnmode>")])
11696
11697 (define_insn "<shift_insn><mode>3"
11698 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
11699 (any_lshift:VI248_AVX2
11700 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
11701 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
11702 "TARGET_SSE2"
11703 "@
11704 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
11705 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11706 [(set_attr "isa" "noavx,avx")
11707 (set_attr "type" "sseishft")
11708 (set (attr "length_immediate")
11709 (if_then_else (match_operand 2 "const_int_operand")
11710 (const_string "1")
11711 (const_string "0")))
11712 (set_attr "prefix_data16" "1,*")
11713 (set_attr "prefix" "orig,vex")
11714 (set_attr "mode" "<sseinsnmode>")])
11715
11716 (define_insn "<shift_insn><mode>3<mask_name>"
11717 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
11718 (any_lshift:VI248_AVX512BW
11719 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
11720 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
11721 "TARGET_AVX512F"
11722 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11723 [(set_attr "type" "sseishft")
11724 (set (attr "length_immediate")
11725 (if_then_else (match_operand 2 "const_int_operand")
11726 (const_string "1")
11727 (const_string "0")))
11728 (set_attr "mode" "<sseinsnmode>")])
11729
11730
11731 (define_expand "vec_shr_<mode>"
11732 [(set (match_dup 3)
11733 (lshiftrt:V1TI
11734 (match_operand:VI_128 1 "register_operand")
11735 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
11736 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
11737 "TARGET_SSE2"
11738 {
11739 operands[1] = gen_lowpart (V1TImode, operands[1]);
11740 operands[3] = gen_reg_rtx (V1TImode);
11741 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
11742 })
11743
11744 (define_insn "avx512bw_<shift_insn><mode>3"
11745 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
11746 (any_lshift:VIMAX_AVX512VL
11747 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
11748 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
11749 "TARGET_AVX512BW"
11750 {
11751 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
11752 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
11753 }
11754 [(set_attr "type" "sseishft")
11755 (set_attr "length_immediate" "1")
11756 (set_attr "prefix" "maybe_evex")
11757 (set_attr "mode" "<sseinsnmode>")])
11758
11759 (define_insn "<sse2_avx2>_<shift_insn><mode>3"
11760 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
11761 (any_lshift:VIMAX_AVX2
11762 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
11763 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
11764 "TARGET_SSE2"
11765 {
11766 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
11767
11768 switch (which_alternative)
11769 {
11770 case 0:
11771 return "p<vshift>dq\t{%2, %0|%0, %2}";
11772 case 1:
11773 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
11774 default:
11775 gcc_unreachable ();
11776 }
11777 }
11778 [(set_attr "isa" "noavx,avx")
11779 (set_attr "type" "sseishft")
11780 (set_attr "length_immediate" "1")
11781 (set_attr "atom_unit" "sishuf")
11782 (set_attr "prefix_data16" "1,*")
11783 (set_attr "prefix" "orig,vex")
11784 (set_attr "mode" "<sseinsnmode>")])
11785
11786 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
11787 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11788 (any_rotate:VI48_AVX512VL
11789 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
11790 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11791 "TARGET_AVX512F"
11792 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11793 [(set_attr "prefix" "evex")
11794 (set_attr "mode" "<sseinsnmode>")])
11795
11796 (define_insn "<avx512>_<rotate><mode><mask_name>"
11797 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11798 (any_rotate:VI48_AVX512VL
11799 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
11800 (match_operand:SI 2 "const_0_to_255_operand")))]
11801 "TARGET_AVX512F"
11802 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11803 [(set_attr "prefix" "evex")
11804 (set_attr "mode" "<sseinsnmode>")])
11805
11806 (define_expand "<code><mode>3"
11807 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
11808 (maxmin:VI124_256_AVX512F_AVX512BW
11809 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
11810 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
11811 "TARGET_AVX2"
11812 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11813
11814 (define_insn "*avx2_<code><mode>3"
11815 [(set (match_operand:VI124_256 0 "register_operand" "=v")
11816 (maxmin:VI124_256
11817 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
11818 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
11819 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11820 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11821 [(set_attr "type" "sseiadd")
11822 (set_attr "prefix_extra" "1")
11823 (set_attr "prefix" "vex")
11824 (set_attr "mode" "OI")])
11825
11826 (define_expand "<code><mode>3_mask"
11827 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11828 (vec_merge:VI48_AVX512VL
11829 (maxmin:VI48_AVX512VL
11830 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11831 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11832 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11833 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11834 "TARGET_AVX512F"
11835 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11836
11837 (define_insn "*avx512f_<code><mode>3<mask_name>"
11838 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11839 (maxmin:VI48_AVX512VL
11840 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
11841 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11842 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11843 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11844 [(set_attr "type" "sseiadd")
11845 (set_attr "prefix_extra" "1")
11846 (set_attr "prefix" "maybe_evex")
11847 (set_attr "mode" "<sseinsnmode>")])
11848
11849 (define_insn "<mask_codefor><code><mode>3<mask_name>"
11850 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11851 (maxmin:VI12_AVX512VL
11852 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
11853 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
11854 "TARGET_AVX512BW"
11855 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11856 [(set_attr "type" "sseiadd")
11857 (set_attr "prefix" "evex")
11858 (set_attr "mode" "<sseinsnmode>")])
11859
11860 (define_expand "<code><mode>3"
11861 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
11862 (maxmin:VI8_AVX2_AVX512F
11863 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
11864 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
11865 "TARGET_SSE4_2"
11866 {
11867 if (TARGET_AVX512F
11868 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
11869 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11870 else
11871 {
11872 enum rtx_code code;
11873 rtx xops[6];
11874 bool ok;
11875
11876
11877 xops[0] = operands[0];
11878
11879 if (<CODE> == SMAX || <CODE> == UMAX)
11880 {
11881 xops[1] = operands[1];
11882 xops[2] = operands[2];
11883 }
11884 else
11885 {
11886 xops[1] = operands[2];
11887 xops[2] = operands[1];
11888 }
11889
11890 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
11891
11892 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
11893 xops[4] = operands[1];
11894 xops[5] = operands[2];
11895
11896 ok = ix86_expand_int_vcond (xops);
11897 gcc_assert (ok);
11898 DONE;
11899 }
11900 })
11901
11902 (define_expand "<code><mode>3"
11903 [(set (match_operand:VI124_128 0 "register_operand")
11904 (smaxmin:VI124_128
11905 (match_operand:VI124_128 1 "vector_operand")
11906 (match_operand:VI124_128 2 "vector_operand")))]
11907 "TARGET_SSE2"
11908 {
11909 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
11910 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11911 else
11912 {
11913 rtx xops[6];
11914 bool ok;
11915
11916 xops[0] = operands[0];
11917 operands[1] = force_reg (<MODE>mode, operands[1]);
11918 operands[2] = force_reg (<MODE>mode, operands[2]);
11919
11920 if (<CODE> == SMAX)
11921 {
11922 xops[1] = operands[1];
11923 xops[2] = operands[2];
11924 }
11925 else
11926 {
11927 xops[1] = operands[2];
11928 xops[2] = operands[1];
11929 }
11930
11931 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
11932 xops[4] = operands[1];
11933 xops[5] = operands[2];
11934
11935 ok = ix86_expand_int_vcond (xops);
11936 gcc_assert (ok);
11937 DONE;
11938 }
11939 })
11940
11941 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11942 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
11943 (smaxmin:VI14_128
11944 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
11945 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11946 "TARGET_SSE4_1
11947 && <mask_mode512bit_condition>
11948 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11949 "@
11950 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11951 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11952 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11953 [(set_attr "isa" "noavx,noavx,avx")
11954 (set_attr "type" "sseiadd")
11955 (set_attr "prefix_extra" "1,1,*")
11956 (set_attr "prefix" "orig,orig,vex")
11957 (set_attr "mode" "TI")])
11958
11959 (define_insn "*<code>v8hi3"
11960 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
11961 (smaxmin:V8HI
11962 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11963 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
11964 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11965 "@
11966 p<maxmin_int>w\t{%2, %0|%0, %2}
11967 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
11968 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
11969 [(set_attr "isa" "noavx,avx,avx512bw")
11970 (set_attr "type" "sseiadd")
11971 (set_attr "prefix_data16" "1,*,*")
11972 (set_attr "prefix_extra" "*,1,1")
11973 (set_attr "prefix" "orig,vex,evex")
11974 (set_attr "mode" "TI")])
11975
11976 (define_expand "<code><mode>3"
11977 [(set (match_operand:VI124_128 0 "register_operand")
11978 (umaxmin:VI124_128
11979 (match_operand:VI124_128 1 "vector_operand")
11980 (match_operand:VI124_128 2 "vector_operand")))]
11981 "TARGET_SSE2"
11982 {
11983 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
11984 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11985 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
11986 {
11987 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
11988 operands[1] = force_reg (<MODE>mode, operands[1]);
11989 if (rtx_equal_p (op3, op2))
11990 op3 = gen_reg_rtx (V8HImode);
11991 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
11992 emit_insn (gen_addv8hi3 (op0, op3, op2));
11993 DONE;
11994 }
11995 else
11996 {
11997 rtx xops[6];
11998 bool ok;
11999
12000 operands[1] = force_reg (<MODE>mode, operands[1]);
12001 operands[2] = force_reg (<MODE>mode, operands[2]);
12002
12003 xops[0] = operands[0];
12004
12005 if (<CODE> == UMAX)
12006 {
12007 xops[1] = operands[1];
12008 xops[2] = operands[2];
12009 }
12010 else
12011 {
12012 xops[1] = operands[2];
12013 xops[2] = operands[1];
12014 }
12015
12016 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
12017 xops[4] = operands[1];
12018 xops[5] = operands[2];
12019
12020 ok = ix86_expand_int_vcond (xops);
12021 gcc_assert (ok);
12022 DONE;
12023 }
12024 })
12025
12026 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12027 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
12028 (umaxmin:VI24_128
12029 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
12030 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12031 "TARGET_SSE4_1
12032 && <mask_mode512bit_condition>
12033 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12034 "@
12035 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12036 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12037 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12038 [(set_attr "isa" "noavx,noavx,avx")
12039 (set_attr "type" "sseiadd")
12040 (set_attr "prefix_extra" "1,1,*")
12041 (set_attr "prefix" "orig,orig,vex")
12042 (set_attr "mode" "TI")])
12043
12044 (define_insn "*<code>v16qi3"
12045 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
12046 (umaxmin:V16QI
12047 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
12048 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
12049 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12050 "@
12051 p<maxmin_int>b\t{%2, %0|%0, %2}
12052 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
12053 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
12054 [(set_attr "isa" "noavx,avx,avx512bw")
12055 (set_attr "type" "sseiadd")
12056 (set_attr "prefix_data16" "1,*,*")
12057 (set_attr "prefix_extra" "*,1,1")
12058 (set_attr "prefix" "orig,vex,evex")
12059 (set_attr "mode" "TI")])
12060
12061 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12062 ;;
12063 ;; Parallel integral comparisons
12064 ;;
12065 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12066
12067 (define_expand "avx2_eq<mode>3"
12068 [(set (match_operand:VI_256 0 "register_operand")
12069 (eq:VI_256
12070 (match_operand:VI_256 1 "nonimmediate_operand")
12071 (match_operand:VI_256 2 "nonimmediate_operand")))]
12072 "TARGET_AVX2"
12073 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12074
12075 (define_insn "*avx2_eq<mode>3"
12076 [(set (match_operand:VI_256 0 "register_operand" "=x")
12077 (eq:VI_256
12078 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
12079 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12080 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12081 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12082 [(set_attr "type" "ssecmp")
12083 (set_attr "prefix_extra" "1")
12084 (set_attr "prefix" "vex")
12085 (set_attr "mode" "OI")])
12086
12087 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12088 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12089 (unspec:<avx512fmaskmode>
12090 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
12091 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
12092 UNSPEC_MASKED_EQ))]
12093 "TARGET_AVX512BW"
12094 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12095
12096 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12097 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12098 (unspec:<avx512fmaskmode>
12099 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12100 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
12101 UNSPEC_MASKED_EQ))]
12102 "TARGET_AVX512F"
12103 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12104
12105 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12106 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12107 (unspec:<avx512fmaskmode>
12108 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12109 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12110 UNSPEC_MASKED_EQ))]
12111 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12112 "@
12113 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12114 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12115 [(set_attr "type" "ssecmp")
12116 (set_attr "prefix_extra" "1")
12117 (set_attr "prefix" "evex")
12118 (set_attr "mode" "<sseinsnmode>")])
12119
12120 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12121 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12122 (unspec:<avx512fmaskmode>
12123 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12124 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12125 UNSPEC_MASKED_EQ))]
12126 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12127 "@
12128 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12129 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12130 [(set_attr "type" "ssecmp")
12131 (set_attr "prefix_extra" "1")
12132 (set_attr "prefix" "evex")
12133 (set_attr "mode" "<sseinsnmode>")])
12134
12135 (define_insn "*sse4_1_eqv2di3"
12136 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12137 (eq:V2DI
12138 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
12139 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12140 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12141 "@
12142 pcmpeqq\t{%2, %0|%0, %2}
12143 pcmpeqq\t{%2, %0|%0, %2}
12144 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
12145 [(set_attr "isa" "noavx,noavx,avx")
12146 (set_attr "type" "ssecmp")
12147 (set_attr "prefix_extra" "1")
12148 (set_attr "prefix" "orig,orig,vex")
12149 (set_attr "mode" "TI")])
12150
12151 (define_insn "*sse2_eq<mode>3"
12152 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12153 (eq:VI124_128
12154 (match_operand:VI124_128 1 "vector_operand" "%0,x")
12155 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12156 "TARGET_SSE2 && !TARGET_XOP
12157 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12158 "@
12159 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
12160 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12161 [(set_attr "isa" "noavx,avx")
12162 (set_attr "type" "ssecmp")
12163 (set_attr "prefix_data16" "1,*")
12164 (set_attr "prefix" "orig,vex")
12165 (set_attr "mode" "TI")])
12166
12167 (define_expand "sse2_eq<mode>3"
12168 [(set (match_operand:VI124_128 0 "register_operand")
12169 (eq:VI124_128
12170 (match_operand:VI124_128 1 "vector_operand")
12171 (match_operand:VI124_128 2 "vector_operand")))]
12172 "TARGET_SSE2 && !TARGET_XOP "
12173 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12174
12175 (define_expand "sse4_1_eqv2di3"
12176 [(set (match_operand:V2DI 0 "register_operand")
12177 (eq:V2DI
12178 (match_operand:V2DI 1 "vector_operand")
12179 (match_operand:V2DI 2 "vector_operand")))]
12180 "TARGET_SSE4_1"
12181 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
12182
12183 (define_insn "sse4_2_gtv2di3"
12184 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12185 (gt:V2DI
12186 (match_operand:V2DI 1 "register_operand" "0,0,x")
12187 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12188 "TARGET_SSE4_2"
12189 "@
12190 pcmpgtq\t{%2, %0|%0, %2}
12191 pcmpgtq\t{%2, %0|%0, %2}
12192 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
12193 [(set_attr "isa" "noavx,noavx,avx")
12194 (set_attr "type" "ssecmp")
12195 (set_attr "prefix_extra" "1")
12196 (set_attr "prefix" "orig,orig,vex")
12197 (set_attr "mode" "TI")])
12198
12199 (define_insn "avx2_gt<mode>3"
12200 [(set (match_operand:VI_256 0 "register_operand" "=x")
12201 (gt:VI_256
12202 (match_operand:VI_256 1 "register_operand" "x")
12203 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12204 "TARGET_AVX2"
12205 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12206 [(set_attr "type" "ssecmp")
12207 (set_attr "prefix_extra" "1")
12208 (set_attr "prefix" "vex")
12209 (set_attr "mode" "OI")])
12210
12211 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12212 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12213 (unspec:<avx512fmaskmode>
12214 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12215 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12216 "TARGET_AVX512F"
12217 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12218 [(set_attr "type" "ssecmp")
12219 (set_attr "prefix_extra" "1")
12220 (set_attr "prefix" "evex")
12221 (set_attr "mode" "<sseinsnmode>")])
12222
12223 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12224 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12225 (unspec:<avx512fmaskmode>
12226 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
12227 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12228 "TARGET_AVX512BW"
12229 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12230 [(set_attr "type" "ssecmp")
12231 (set_attr "prefix_extra" "1")
12232 (set_attr "prefix" "evex")
12233 (set_attr "mode" "<sseinsnmode>")])
12234
12235 (define_insn "sse2_gt<mode>3"
12236 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12237 (gt:VI124_128
12238 (match_operand:VI124_128 1 "register_operand" "0,x")
12239 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12240 "TARGET_SSE2 && !TARGET_XOP"
12241 "@
12242 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
12243 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12244 [(set_attr "isa" "noavx,avx")
12245 (set_attr "type" "ssecmp")
12246 (set_attr "prefix_data16" "1,*")
12247 (set_attr "prefix" "orig,vex")
12248 (set_attr "mode" "TI")])
12249
12250 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
12251 [(set (match_operand:V_512 0 "register_operand")
12252 (if_then_else:V_512
12253 (match_operator 3 ""
12254 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12255 (match_operand:VI_AVX512BW 5 "general_operand")])
12256 (match_operand:V_512 1)
12257 (match_operand:V_512 2)))]
12258 "TARGET_AVX512F
12259 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12260 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12261 {
12262 bool ok = ix86_expand_int_vcond (operands);
12263 gcc_assert (ok);
12264 DONE;
12265 })
12266
12267 (define_expand "vcond<V_256:mode><VI_256:mode>"
12268 [(set (match_operand:V_256 0 "register_operand")
12269 (if_then_else:V_256
12270 (match_operator 3 ""
12271 [(match_operand:VI_256 4 "nonimmediate_operand")
12272 (match_operand:VI_256 5 "general_operand")])
12273 (match_operand:V_256 1)
12274 (match_operand:V_256 2)))]
12275 "TARGET_AVX2
12276 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12277 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12278 {
12279 bool ok = ix86_expand_int_vcond (operands);
12280 gcc_assert (ok);
12281 DONE;
12282 })
12283
12284 (define_expand "vcond<V_128:mode><VI124_128:mode>"
12285 [(set (match_operand:V_128 0 "register_operand")
12286 (if_then_else:V_128
12287 (match_operator 3 ""
12288 [(match_operand:VI124_128 4 "vector_operand")
12289 (match_operand:VI124_128 5 "general_operand")])
12290 (match_operand:V_128 1)
12291 (match_operand:V_128 2)))]
12292 "TARGET_SSE2
12293 && (GET_MODE_NUNITS (<V_128:MODE>mode)
12294 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12295 {
12296 bool ok = ix86_expand_int_vcond (operands);
12297 gcc_assert (ok);
12298 DONE;
12299 })
12300
12301 (define_expand "vcond<VI8F_128:mode>v2di"
12302 [(set (match_operand:VI8F_128 0 "register_operand")
12303 (if_then_else:VI8F_128
12304 (match_operator 3 ""
12305 [(match_operand:V2DI 4 "vector_operand")
12306 (match_operand:V2DI 5 "general_operand")])
12307 (match_operand:VI8F_128 1)
12308 (match_operand:VI8F_128 2)))]
12309 "TARGET_SSE4_2"
12310 {
12311 bool ok = ix86_expand_int_vcond (operands);
12312 gcc_assert (ok);
12313 DONE;
12314 })
12315
12316 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
12317 [(set (match_operand:V_512 0 "register_operand")
12318 (if_then_else:V_512
12319 (match_operator 3 ""
12320 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12321 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
12322 (match_operand:V_512 1 "general_operand")
12323 (match_operand:V_512 2 "general_operand")))]
12324 "TARGET_AVX512F
12325 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12326 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12327 {
12328 bool ok = ix86_expand_int_vcond (operands);
12329 gcc_assert (ok);
12330 DONE;
12331 })
12332
12333 (define_expand "vcondu<V_256:mode><VI_256:mode>"
12334 [(set (match_operand:V_256 0 "register_operand")
12335 (if_then_else:V_256
12336 (match_operator 3 ""
12337 [(match_operand:VI_256 4 "nonimmediate_operand")
12338 (match_operand:VI_256 5 "nonimmediate_operand")])
12339 (match_operand:V_256 1 "general_operand")
12340 (match_operand:V_256 2 "general_operand")))]
12341 "TARGET_AVX2
12342 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12343 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12344 {
12345 bool ok = ix86_expand_int_vcond (operands);
12346 gcc_assert (ok);
12347 DONE;
12348 })
12349
12350 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
12351 [(set (match_operand:V_128 0 "register_operand")
12352 (if_then_else:V_128
12353 (match_operator 3 ""
12354 [(match_operand:VI124_128 4 "vector_operand")
12355 (match_operand:VI124_128 5 "vector_operand")])
12356 (match_operand:V_128 1 "general_operand")
12357 (match_operand:V_128 2 "general_operand")))]
12358 "TARGET_SSE2
12359 && (GET_MODE_NUNITS (<V_128:MODE>mode)
12360 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12361 {
12362 bool ok = ix86_expand_int_vcond (operands);
12363 gcc_assert (ok);
12364 DONE;
12365 })
12366
12367 (define_expand "vcondu<VI8F_128:mode>v2di"
12368 [(set (match_operand:VI8F_128 0 "register_operand")
12369 (if_then_else:VI8F_128
12370 (match_operator 3 ""
12371 [(match_operand:V2DI 4 "vector_operand")
12372 (match_operand:V2DI 5 "vector_operand")])
12373 (match_operand:VI8F_128 1 "general_operand")
12374 (match_operand:VI8F_128 2 "general_operand")))]
12375 "TARGET_SSE4_2"
12376 {
12377 bool ok = ix86_expand_int_vcond (operands);
12378 gcc_assert (ok);
12379 DONE;
12380 })
12381
12382 (define_expand "vcondeq<VI8F_128:mode>v2di"
12383 [(set (match_operand:VI8F_128 0 "register_operand")
12384 (if_then_else:VI8F_128
12385 (match_operator 3 ""
12386 [(match_operand:V2DI 4 "vector_operand")
12387 (match_operand:V2DI 5 "general_operand")])
12388 (match_operand:VI8F_128 1)
12389 (match_operand:VI8F_128 2)))]
12390 "TARGET_SSE4_1"
12391 {
12392 bool ok = ix86_expand_int_vcond (operands);
12393 gcc_assert (ok);
12394 DONE;
12395 })
12396
12397 (define_mode_iterator VEC_PERM_AVX2
12398 [V16QI V8HI V4SI V2DI V4SF V2DF
12399 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
12400 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
12401 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
12402 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
12403 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
12404 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
12405
12406 (define_expand "vec_perm<mode>"
12407 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
12408 (match_operand:VEC_PERM_AVX2 1 "register_operand")
12409 (match_operand:VEC_PERM_AVX2 2 "register_operand")
12410 (match_operand:<sseintvecmode> 3 "register_operand")]
12411 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
12412 {
12413 ix86_expand_vec_perm (operands);
12414 DONE;
12415 })
12416
12417 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12418 ;;
12419 ;; Parallel bitwise logical operations
12420 ;;
12421 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12422
12423 (define_expand "one_cmpl<mode>2"
12424 [(set (match_operand:VI 0 "register_operand")
12425 (xor:VI (match_operand:VI 1 "vector_operand")
12426 (match_dup 2)))]
12427 "TARGET_SSE"
12428 {
12429 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
12430 })
12431
12432 (define_expand "<sse2_avx2>_andnot<mode>3"
12433 [(set (match_operand:VI_AVX2 0 "register_operand")
12434 (and:VI_AVX2
12435 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
12436 (match_operand:VI_AVX2 2 "vector_operand")))]
12437 "TARGET_SSE2")
12438
12439 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
12440 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12441 (vec_merge:VI48_AVX512VL
12442 (and:VI48_AVX512VL
12443 (not:VI48_AVX512VL
12444 (match_operand:VI48_AVX512VL 1 "register_operand"))
12445 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12446 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12447 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12448 "TARGET_AVX512F")
12449
12450 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
12451 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
12452 (vec_merge:VI12_AVX512VL
12453 (and:VI12_AVX512VL
12454 (not:VI12_AVX512VL
12455 (match_operand:VI12_AVX512VL 1 "register_operand"))
12456 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
12457 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
12458 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12459 "TARGET_AVX512BW")
12460
12461 (define_insn "*andnot<mode>3"
12462 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
12463 (and:VI
12464 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
12465 (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
12466 "TARGET_SSE"
12467 {
12468 char buf[64];
12469 const char *ops;
12470 const char *tmp;
12471 const char *ssesuffix;
12472
12473 switch (get_attr_mode (insn))
12474 {
12475 case MODE_XI:
12476 gcc_assert (TARGET_AVX512F);
12477 /* FALLTHRU */
12478 case MODE_OI:
12479 gcc_assert (TARGET_AVX2);
12480 /* FALLTHRU */
12481 case MODE_TI:
12482 gcc_assert (TARGET_SSE2);
12483 tmp = "pandn";
12484 switch (<MODE>mode)
12485 {
12486 case E_V64QImode:
12487 case E_V32HImode:
12488 /* There is no vpandnb or vpandnw instruction, nor vpandn for
12489 512-bit vectors. Use vpandnq instead. */
12490 ssesuffix = "q";
12491 break;
12492 case E_V16SImode:
12493 case E_V8DImode:
12494 ssesuffix = "<ssemodesuffix>";
12495 break;
12496 case E_V8SImode:
12497 case E_V4DImode:
12498 case E_V4SImode:
12499 case E_V2DImode:
12500 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
12501 ? "<ssemodesuffix>" : "");
12502 break;
12503 default:
12504 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
12505 }
12506 break;
12507
12508 case MODE_V16SF:
12509 gcc_assert (TARGET_AVX512F);
12510 /* FALLTHRU */
12511 case MODE_V8SF:
12512 gcc_assert (TARGET_AVX);
12513 /* FALLTHRU */
12514 case MODE_V4SF:
12515 gcc_assert (TARGET_SSE);
12516 tmp = "andn";
12517 ssesuffix = "ps";
12518 break;
12519
12520 default:
12521 gcc_unreachable ();
12522 }
12523
12524 switch (which_alternative)
12525 {
12526 case 0:
12527 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12528 break;
12529 case 1:
12530 case 2:
12531 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
12532 break;
12533 default:
12534 gcc_unreachable ();
12535 }
12536
12537 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12538 output_asm_insn (buf, operands);
12539 return "";
12540 }
12541 [(set_attr "isa" "noavx,avx,avx")
12542 (set_attr "type" "sselog")
12543 (set (attr "prefix_data16")
12544 (if_then_else
12545 (and (eq_attr "alternative" "0")
12546 (eq_attr "mode" "TI"))
12547 (const_string "1")
12548 (const_string "*")))
12549 (set_attr "prefix" "orig,vex,evex")
12550 (set (attr "mode")
12551 (cond [(and (match_test "<MODE_SIZE> == 16")
12552 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12553 (const_string "<ssePSmode>")
12554 (match_test "TARGET_AVX2")
12555 (const_string "<sseinsnmode>")
12556 (match_test "TARGET_AVX")
12557 (if_then_else
12558 (match_test "<MODE_SIZE> > 16")
12559 (const_string "V8SF")
12560 (const_string "<sseinsnmode>"))
12561 (ior (not (match_test "TARGET_SSE2"))
12562 (match_test "optimize_function_for_size_p (cfun)"))
12563 (const_string "V4SF")
12564 ]
12565 (const_string "<sseinsnmode>")))])
12566
12567 (define_insn "*andnot<mode>3_bcst"
12568 [(set (match_operand:VI 0 "register_operand" "=v")
12569 (and:VI
12570 (not:VI48_AVX512VL
12571 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
12572 (vec_duplicate:VI48_AVX512VL
12573 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
12574 "TARGET_AVX512F"
12575 "vpandn<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
12576 [(set_attr "type" "sselog")
12577 (set_attr "prefix" "evex")
12578 (set_attr "mode" "<sseinsnmode>")])
12579
12580 (define_insn "*andnot<mode>3_mask"
12581 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12582 (vec_merge:VI48_AVX512VL
12583 (and:VI48_AVX512VL
12584 (not:VI48_AVX512VL
12585 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
12586 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
12587 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
12588 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
12589 "TARGET_AVX512F"
12590 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
12591 [(set_attr "type" "sselog")
12592 (set_attr "prefix" "evex")
12593 (set_attr "mode" "<sseinsnmode>")])
12594
12595 (define_expand "<code><mode>3"
12596 [(set (match_operand:VI 0 "register_operand")
12597 (any_logic:VI
12598 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
12599 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
12600 "TARGET_SSE"
12601 {
12602 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
12603 DONE;
12604 })
12605
12606 (define_insn "<mask_codefor><code><mode>3<mask_name>"
12607 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
12608 (any_logic:VI48_AVX_AVX512F
12609 (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
12610 (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
12611 "TARGET_SSE && <mask_mode512bit_condition>
12612 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12613 {
12614 char buf[64];
12615 const char *ops;
12616 const char *tmp;
12617 const char *ssesuffix;
12618
12619 switch (get_attr_mode (insn))
12620 {
12621 case MODE_XI:
12622 gcc_assert (TARGET_AVX512F);
12623 /* FALLTHRU */
12624 case MODE_OI:
12625 gcc_assert (TARGET_AVX2);
12626 /* FALLTHRU */
12627 case MODE_TI:
12628 gcc_assert (TARGET_SSE2);
12629 tmp = "p<logic>";
12630 switch (<MODE>mode)
12631 {
12632 case E_V16SImode:
12633 case E_V8DImode:
12634 ssesuffix = "<ssemodesuffix>";
12635 break;
12636 case E_V8SImode:
12637 case E_V4DImode:
12638 case E_V4SImode:
12639 case E_V2DImode:
12640 ssesuffix = (TARGET_AVX512VL
12641 && (<mask_applied> || which_alternative == 2)
12642 ? "<ssemodesuffix>" : "");
12643 break;
12644 default:
12645 gcc_unreachable ();
12646 }
12647 break;
12648
12649 case MODE_V8SF:
12650 gcc_assert (TARGET_AVX);
12651 /* FALLTHRU */
12652 case MODE_V4SF:
12653 gcc_assert (TARGET_SSE);
12654 tmp = "<logic>";
12655 ssesuffix = "ps";
12656 break;
12657
12658 default:
12659 gcc_unreachable ();
12660 }
12661
12662 switch (which_alternative)
12663 {
12664 case 0:
12665 if (<mask_applied>)
12666 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
12667 else
12668 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12669 break;
12670 case 1:
12671 case 2:
12672 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
12673 break;
12674 default:
12675 gcc_unreachable ();
12676 }
12677
12678 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12679 output_asm_insn (buf, operands);
12680 return "";
12681 }
12682 [(set_attr "isa" "noavx,avx,avx")
12683 (set_attr "type" "sselog")
12684 (set (attr "prefix_data16")
12685 (if_then_else
12686 (and (eq_attr "alternative" "0")
12687 (eq_attr "mode" "TI"))
12688 (const_string "1")
12689 (const_string "*")))
12690 (set_attr "prefix" "<mask_prefix3>,evex")
12691 (set (attr "mode")
12692 (cond [(and (match_test "<MODE_SIZE> == 16")
12693 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12694 (const_string "<ssePSmode>")
12695 (match_test "TARGET_AVX2")
12696 (const_string "<sseinsnmode>")
12697 (match_test "TARGET_AVX")
12698 (if_then_else
12699 (match_test "<MODE_SIZE> > 16")
12700 (const_string "V8SF")
12701 (const_string "<sseinsnmode>"))
12702 (ior (not (match_test "TARGET_SSE2"))
12703 (match_test "optimize_function_for_size_p (cfun)"))
12704 (const_string "V4SF")
12705 ]
12706 (const_string "<sseinsnmode>")))])
12707
12708 (define_insn "*<code><mode>3"
12709 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
12710 (any_logic:VI12_AVX_AVX512F
12711 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
12712 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
12713 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12714 {
12715 char buf[64];
12716 const char *ops;
12717 const char *tmp;
12718 const char *ssesuffix;
12719
12720 switch (get_attr_mode (insn))
12721 {
12722 case MODE_XI:
12723 gcc_assert (TARGET_AVX512F);
12724 /* FALLTHRU */
12725 case MODE_OI:
12726 gcc_assert (TARGET_AVX2);
12727 /* FALLTHRU */
12728 case MODE_TI:
12729 gcc_assert (TARGET_SSE2);
12730 tmp = "p<logic>";
12731 switch (<MODE>mode)
12732 {
12733 case E_V64QImode:
12734 case E_V32HImode:
12735 ssesuffix = "q";
12736 break;
12737 case E_V32QImode:
12738 case E_V16HImode:
12739 case E_V16QImode:
12740 case E_V8HImode:
12741 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
12742 break;
12743 default:
12744 gcc_unreachable ();
12745 }
12746 break;
12747
12748 case MODE_V8SF:
12749 gcc_assert (TARGET_AVX);
12750 /* FALLTHRU */
12751 case MODE_V4SF:
12752 gcc_assert (TARGET_SSE);
12753 tmp = "<logic>";
12754 ssesuffix = "ps";
12755 break;
12756
12757 default:
12758 gcc_unreachable ();
12759 }
12760
12761 switch (which_alternative)
12762 {
12763 case 0:
12764 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12765 break;
12766 case 1:
12767 case 2:
12768 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
12769 break;
12770 default:
12771 gcc_unreachable ();
12772 }
12773
12774 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12775 output_asm_insn (buf, operands);
12776 return "";
12777 }
12778 [(set_attr "isa" "noavx,avx,avx")
12779 (set_attr "type" "sselog")
12780 (set (attr "prefix_data16")
12781 (if_then_else
12782 (and (eq_attr "alternative" "0")
12783 (eq_attr "mode" "TI"))
12784 (const_string "1")
12785 (const_string "*")))
12786 (set_attr "prefix" "orig,vex,evex")
12787 (set (attr "mode")
12788 (cond [(and (match_test "<MODE_SIZE> == 16")
12789 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12790 (const_string "<ssePSmode>")
12791 (match_test "TARGET_AVX2")
12792 (const_string "<sseinsnmode>")
12793 (match_test "TARGET_AVX")
12794 (if_then_else
12795 (match_test "<MODE_SIZE> > 16")
12796 (const_string "V8SF")
12797 (const_string "<sseinsnmode>"))
12798 (ior (not (match_test "TARGET_SSE2"))
12799 (match_test "optimize_function_for_size_p (cfun)"))
12800 (const_string "V4SF")
12801 ]
12802 (const_string "<sseinsnmode>")))])
12803
12804 (define_insn "*<code><mode>3_bcst"
12805 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12806 (any_logic:VI48_AVX512VL
12807 (vec_duplicate:VI48_AVX512VL
12808 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
12809 (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
12810 "TARGET_AVX512F && <mask_avx512vl_condition>"
12811 "vp<logic><ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
12812 [(set_attr "type" "sseiadd")
12813 (set_attr "prefix" "evex")
12814 (set_attr "mode" "<sseinsnmode>")])
12815
12816 (define_mode_iterator VI1248_AVX512VLBW
12817 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
12818 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
12819 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
12820 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
12821 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
12822 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
12823
12824 (define_mode_iterator AVX512ZEXTMASK
12825 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
12826
12827 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
12828 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12829 (unspec:<avx512fmaskmode>
12830 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12831 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12832 UNSPEC_TESTM))]
12833 "TARGET_AVX512F"
12834 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12835 [(set_attr "prefix" "evex")
12836 (set_attr "mode" "<sseinsnmode>")])
12837
12838 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
12839 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12840 (unspec:<avx512fmaskmode>
12841 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12842 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12843 UNSPEC_TESTNM))]
12844 "TARGET_AVX512F"
12845 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12846 [(set_attr "prefix" "evex")
12847 (set_attr "mode" "<sseinsnmode>")])
12848
12849 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
12850 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12851 (zero_extend:AVX512ZEXTMASK
12852 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12853 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12854 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12855 UNSPEC_TESTM)))]
12856 "TARGET_AVX512BW
12857 && (<AVX512ZEXTMASK:MODE_SIZE>
12858 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12859 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12860 [(set_attr "prefix" "evex")
12861 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12862
12863 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
12864 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12865 (zero_extend:AVX512ZEXTMASK
12866 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
12867 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12868 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12869 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12870 UNSPEC_TESTM)
12871 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
12872 "TARGET_AVX512BW
12873 && (<AVX512ZEXTMASK:MODE_SIZE>
12874 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12875 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
12876 [(set_attr "prefix" "evex")
12877 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12878
12879 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
12880 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12881 (zero_extend:AVX512ZEXTMASK
12882 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12883 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12884 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12885 UNSPEC_TESTNM)))]
12886 "TARGET_AVX512BW
12887 && (<AVX512ZEXTMASK:MODE_SIZE>
12888 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12889 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12890 [(set_attr "prefix" "evex")
12891 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12892
12893 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
12894 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12895 (zero_extend:AVX512ZEXTMASK
12896 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
12897 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12898 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12899 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12900 UNSPEC_TESTNM)
12901 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
12902 "TARGET_AVX512BW
12903 && (<AVX512ZEXTMASK:MODE_SIZE>
12904 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12905 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
12906 [(set_attr "prefix" "evex")
12907 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12908
12909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12910 ;;
12911 ;; Parallel integral element swizzling
12912 ;;
12913 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12914
12915 (define_expand "vec_pack_trunc_<mode>"
12916 [(match_operand:<ssepackmode> 0 "register_operand")
12917 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
12918 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
12919 "TARGET_SSE2"
12920 {
12921 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
12922 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
12923 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
12924 DONE;
12925 })
12926
12927 (define_expand "vec_pack_trunc_qi"
12928 [(set (match_operand:HI 0 "register_operand")
12929 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
12930 (const_int 8))
12931 (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
12932 "TARGET_AVX512F")
12933
12934 (define_expand "vec_pack_trunc_<mode>"
12935 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
12936 (ior:<DOUBLEMASKMODE>
12937 (ashift:<DOUBLEMASKMODE>
12938 (zero_extend:<DOUBLEMASKMODE>
12939 (match_operand:SWI24 2 "register_operand"))
12940 (match_dup 3))
12941 (zero_extend:<DOUBLEMASKMODE>
12942 (match_operand:SWI24 1 "register_operand"))))]
12943 "TARGET_AVX512BW"
12944 {
12945 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
12946 })
12947
12948 (define_expand "vec_pack_sbool_trunc_qi"
12949 [(match_operand:QI 0 "register_operand")
12950 (match_operand:QI 1 "register_operand")
12951 (match_operand:QI 2 "register_operand")
12952 (match_operand:QI 3 "const_int_operand")]
12953 "TARGET_AVX512F"
12954 {
12955 HOST_WIDE_INT nunits = INTVAL (operands[3]);
12956 rtx mask, tem1, tem2;
12957 if (nunits != 8 && nunits != 4)
12958 FAIL;
12959 mask = gen_reg_rtx (QImode);
12960 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
12961 tem1 = gen_reg_rtx (QImode);
12962 emit_insn (gen_kandqi (tem1, operands[1], mask));
12963 if (TARGET_AVX512DQ)
12964 {
12965 tem2 = gen_reg_rtx (QImode);
12966 emit_insn (gen_kashiftqi (tem2, operands[2],
12967 GEN_INT (nunits / 2)));
12968 }
12969 else
12970 {
12971 tem2 = gen_reg_rtx (HImode);
12972 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
12973 QImode),
12974 GEN_INT (nunits / 2)));
12975 tem2 = lowpart_subreg (QImode, tem2, HImode);
12976 }
12977 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
12978 DONE;
12979 })
12980
12981 (define_insn "<sse2_avx2>_packsswb<mask_name>"
12982 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
12983 (vec_concat:VI1_AVX512
12984 (ss_truncate:<ssehalfvecmode>
12985 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12986 (ss_truncate:<ssehalfvecmode>
12987 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12988 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12989 "@
12990 packsswb\t{%2, %0|%0, %2}
12991 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12992 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12993 [(set_attr "isa" "noavx,avx,avx512bw")
12994 (set_attr "type" "sselog")
12995 (set_attr "prefix_data16" "1,*,*")
12996 (set_attr "prefix" "orig,<mask_prefix>,evex")
12997 (set_attr "mode" "<sseinsnmode>")])
12998
12999 (define_insn "<sse2_avx2>_packssdw<mask_name>"
13000 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
13001 (vec_concat:VI2_AVX2
13002 (ss_truncate:<ssehalfvecmode>
13003 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13004 (ss_truncate:<ssehalfvecmode>
13005 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13006 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13007 "@
13008 packssdw\t{%2, %0|%0, %2}
13009 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13010 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13011 [(set_attr "isa" "noavx,avx,avx512bw")
13012 (set_attr "type" "sselog")
13013 (set_attr "prefix_data16" "1,*,*")
13014 (set_attr "prefix" "orig,<mask_prefix>,evex")
13015 (set_attr "mode" "<sseinsnmode>")])
13016
13017 (define_insn "<sse2_avx2>_packuswb<mask_name>"
13018 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13019 (vec_concat:VI1_AVX512
13020 (us_truncate:<ssehalfvecmode>
13021 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13022 (us_truncate:<ssehalfvecmode>
13023 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13024 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13025 "@
13026 packuswb\t{%2, %0|%0, %2}
13027 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13028 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13029 [(set_attr "isa" "noavx,avx,avx512bw")
13030 (set_attr "type" "sselog")
13031 (set_attr "prefix_data16" "1,*,*")
13032 (set_attr "prefix" "orig,<mask_prefix>,evex")
13033 (set_attr "mode" "<sseinsnmode>")])
13034
13035 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
13036 [(set (match_operand:V64QI 0 "register_operand" "=v")
13037 (vec_select:V64QI
13038 (vec_concat:V128QI
13039 (match_operand:V64QI 1 "register_operand" "v")
13040 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13041 (parallel [(const_int 8) (const_int 72)
13042 (const_int 9) (const_int 73)
13043 (const_int 10) (const_int 74)
13044 (const_int 11) (const_int 75)
13045 (const_int 12) (const_int 76)
13046 (const_int 13) (const_int 77)
13047 (const_int 14) (const_int 78)
13048 (const_int 15) (const_int 79)
13049 (const_int 24) (const_int 88)
13050 (const_int 25) (const_int 89)
13051 (const_int 26) (const_int 90)
13052 (const_int 27) (const_int 91)
13053 (const_int 28) (const_int 92)
13054 (const_int 29) (const_int 93)
13055 (const_int 30) (const_int 94)
13056 (const_int 31) (const_int 95)
13057 (const_int 40) (const_int 104)
13058 (const_int 41) (const_int 105)
13059 (const_int 42) (const_int 106)
13060 (const_int 43) (const_int 107)
13061 (const_int 44) (const_int 108)
13062 (const_int 45) (const_int 109)
13063 (const_int 46) (const_int 110)
13064 (const_int 47) (const_int 111)
13065 (const_int 56) (const_int 120)
13066 (const_int 57) (const_int 121)
13067 (const_int 58) (const_int 122)
13068 (const_int 59) (const_int 123)
13069 (const_int 60) (const_int 124)
13070 (const_int 61) (const_int 125)
13071 (const_int 62) (const_int 126)
13072 (const_int 63) (const_int 127)])))]
13073 "TARGET_AVX512BW"
13074 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13075 [(set_attr "type" "sselog")
13076 (set_attr "prefix" "evex")
13077 (set_attr "mode" "XI")])
13078
13079 (define_insn "avx2_interleave_highv32qi<mask_name>"
13080 [(set (match_operand:V32QI 0 "register_operand" "=v")
13081 (vec_select:V32QI
13082 (vec_concat:V64QI
13083 (match_operand:V32QI 1 "register_operand" "v")
13084 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13085 (parallel [(const_int 8) (const_int 40)
13086 (const_int 9) (const_int 41)
13087 (const_int 10) (const_int 42)
13088 (const_int 11) (const_int 43)
13089 (const_int 12) (const_int 44)
13090 (const_int 13) (const_int 45)
13091 (const_int 14) (const_int 46)
13092 (const_int 15) (const_int 47)
13093 (const_int 24) (const_int 56)
13094 (const_int 25) (const_int 57)
13095 (const_int 26) (const_int 58)
13096 (const_int 27) (const_int 59)
13097 (const_int 28) (const_int 60)
13098 (const_int 29) (const_int 61)
13099 (const_int 30) (const_int 62)
13100 (const_int 31) (const_int 63)])))]
13101 "TARGET_AVX2 && <mask_avx512vl_condition>"
13102 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13103 [(set_attr "type" "sselog")
13104 (set_attr "prefix" "<mask_prefix>")
13105 (set_attr "mode" "OI")])
13106
13107 (define_insn "vec_interleave_highv16qi<mask_name>"
13108 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13109 (vec_select:V16QI
13110 (vec_concat:V32QI
13111 (match_operand:V16QI 1 "register_operand" "0,v")
13112 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13113 (parallel [(const_int 8) (const_int 24)
13114 (const_int 9) (const_int 25)
13115 (const_int 10) (const_int 26)
13116 (const_int 11) (const_int 27)
13117 (const_int 12) (const_int 28)
13118 (const_int 13) (const_int 29)
13119 (const_int 14) (const_int 30)
13120 (const_int 15) (const_int 31)])))]
13121 "TARGET_SSE2 && <mask_avx512vl_condition>"
13122 "@
13123 punpckhbw\t{%2, %0|%0, %2}
13124 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13125 [(set_attr "isa" "noavx,avx")
13126 (set_attr "type" "sselog")
13127 (set_attr "prefix_data16" "1,*")
13128 (set_attr "prefix" "orig,<mask_prefix>")
13129 (set_attr "mode" "TI")])
13130
13131 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
13132 [(set (match_operand:V64QI 0 "register_operand" "=v")
13133 (vec_select:V64QI
13134 (vec_concat:V128QI
13135 (match_operand:V64QI 1 "register_operand" "v")
13136 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13137 (parallel [(const_int 0) (const_int 64)
13138 (const_int 1) (const_int 65)
13139 (const_int 2) (const_int 66)
13140 (const_int 3) (const_int 67)
13141 (const_int 4) (const_int 68)
13142 (const_int 5) (const_int 69)
13143 (const_int 6) (const_int 70)
13144 (const_int 7) (const_int 71)
13145 (const_int 16) (const_int 80)
13146 (const_int 17) (const_int 81)
13147 (const_int 18) (const_int 82)
13148 (const_int 19) (const_int 83)
13149 (const_int 20) (const_int 84)
13150 (const_int 21) (const_int 85)
13151 (const_int 22) (const_int 86)
13152 (const_int 23) (const_int 87)
13153 (const_int 32) (const_int 96)
13154 (const_int 33) (const_int 97)
13155 (const_int 34) (const_int 98)
13156 (const_int 35) (const_int 99)
13157 (const_int 36) (const_int 100)
13158 (const_int 37) (const_int 101)
13159 (const_int 38) (const_int 102)
13160 (const_int 39) (const_int 103)
13161 (const_int 48) (const_int 112)
13162 (const_int 49) (const_int 113)
13163 (const_int 50) (const_int 114)
13164 (const_int 51) (const_int 115)
13165 (const_int 52) (const_int 116)
13166 (const_int 53) (const_int 117)
13167 (const_int 54) (const_int 118)
13168 (const_int 55) (const_int 119)])))]
13169 "TARGET_AVX512BW"
13170 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13171 [(set_attr "type" "sselog")
13172 (set_attr "prefix" "evex")
13173 (set_attr "mode" "XI")])
13174
13175 (define_insn "avx2_interleave_lowv32qi<mask_name>"
13176 [(set (match_operand:V32QI 0 "register_operand" "=v")
13177 (vec_select:V32QI
13178 (vec_concat:V64QI
13179 (match_operand:V32QI 1 "register_operand" "v")
13180 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13181 (parallel [(const_int 0) (const_int 32)
13182 (const_int 1) (const_int 33)
13183 (const_int 2) (const_int 34)
13184 (const_int 3) (const_int 35)
13185 (const_int 4) (const_int 36)
13186 (const_int 5) (const_int 37)
13187 (const_int 6) (const_int 38)
13188 (const_int 7) (const_int 39)
13189 (const_int 16) (const_int 48)
13190 (const_int 17) (const_int 49)
13191 (const_int 18) (const_int 50)
13192 (const_int 19) (const_int 51)
13193 (const_int 20) (const_int 52)
13194 (const_int 21) (const_int 53)
13195 (const_int 22) (const_int 54)
13196 (const_int 23) (const_int 55)])))]
13197 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13198 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13199 [(set_attr "type" "sselog")
13200 (set_attr "prefix" "maybe_vex")
13201 (set_attr "mode" "OI")])
13202
13203 (define_insn "vec_interleave_lowv16qi<mask_name>"
13204 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13205 (vec_select:V16QI
13206 (vec_concat:V32QI
13207 (match_operand:V16QI 1 "register_operand" "0,v")
13208 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13209 (parallel [(const_int 0) (const_int 16)
13210 (const_int 1) (const_int 17)
13211 (const_int 2) (const_int 18)
13212 (const_int 3) (const_int 19)
13213 (const_int 4) (const_int 20)
13214 (const_int 5) (const_int 21)
13215 (const_int 6) (const_int 22)
13216 (const_int 7) (const_int 23)])))]
13217 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13218 "@
13219 punpcklbw\t{%2, %0|%0, %2}
13220 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13221 [(set_attr "isa" "noavx,avx")
13222 (set_attr "type" "sselog")
13223 (set_attr "prefix_data16" "1,*")
13224 (set_attr "prefix" "orig,vex")
13225 (set_attr "mode" "TI")])
13226
13227 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
13228 [(set (match_operand:V32HI 0 "register_operand" "=v")
13229 (vec_select:V32HI
13230 (vec_concat:V64HI
13231 (match_operand:V32HI 1 "register_operand" "v")
13232 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13233 (parallel [(const_int 4) (const_int 36)
13234 (const_int 5) (const_int 37)
13235 (const_int 6) (const_int 38)
13236 (const_int 7) (const_int 39)
13237 (const_int 12) (const_int 44)
13238 (const_int 13) (const_int 45)
13239 (const_int 14) (const_int 46)
13240 (const_int 15) (const_int 47)
13241 (const_int 20) (const_int 52)
13242 (const_int 21) (const_int 53)
13243 (const_int 22) (const_int 54)
13244 (const_int 23) (const_int 55)
13245 (const_int 28) (const_int 60)
13246 (const_int 29) (const_int 61)
13247 (const_int 30) (const_int 62)
13248 (const_int 31) (const_int 63)])))]
13249 "TARGET_AVX512BW"
13250 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13251 [(set_attr "type" "sselog")
13252 (set_attr "prefix" "evex")
13253 (set_attr "mode" "XI")])
13254
13255 (define_insn "avx2_interleave_highv16hi<mask_name>"
13256 [(set (match_operand:V16HI 0 "register_operand" "=v")
13257 (vec_select:V16HI
13258 (vec_concat:V32HI
13259 (match_operand:V16HI 1 "register_operand" "v")
13260 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13261 (parallel [(const_int 4) (const_int 20)
13262 (const_int 5) (const_int 21)
13263 (const_int 6) (const_int 22)
13264 (const_int 7) (const_int 23)
13265 (const_int 12) (const_int 28)
13266 (const_int 13) (const_int 29)
13267 (const_int 14) (const_int 30)
13268 (const_int 15) (const_int 31)])))]
13269 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13270 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13271 [(set_attr "type" "sselog")
13272 (set_attr "prefix" "maybe_evex")
13273 (set_attr "mode" "OI")])
13274
13275 (define_insn "vec_interleave_highv8hi<mask_name>"
13276 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13277 (vec_select:V8HI
13278 (vec_concat:V16HI
13279 (match_operand:V8HI 1 "register_operand" "0,v")
13280 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13281 (parallel [(const_int 4) (const_int 12)
13282 (const_int 5) (const_int 13)
13283 (const_int 6) (const_int 14)
13284 (const_int 7) (const_int 15)])))]
13285 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13286 "@
13287 punpckhwd\t{%2, %0|%0, %2}
13288 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13289 [(set_attr "isa" "noavx,avx")
13290 (set_attr "type" "sselog")
13291 (set_attr "prefix_data16" "1,*")
13292 (set_attr "prefix" "orig,maybe_vex")
13293 (set_attr "mode" "TI")])
13294
13295 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
13296 [(set (match_operand:V32HI 0 "register_operand" "=v")
13297 (vec_select:V32HI
13298 (vec_concat:V64HI
13299 (match_operand:V32HI 1 "register_operand" "v")
13300 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13301 (parallel [(const_int 0) (const_int 32)
13302 (const_int 1) (const_int 33)
13303 (const_int 2) (const_int 34)
13304 (const_int 3) (const_int 35)
13305 (const_int 8) (const_int 40)
13306 (const_int 9) (const_int 41)
13307 (const_int 10) (const_int 42)
13308 (const_int 11) (const_int 43)
13309 (const_int 16) (const_int 48)
13310 (const_int 17) (const_int 49)
13311 (const_int 18) (const_int 50)
13312 (const_int 19) (const_int 51)
13313 (const_int 24) (const_int 56)
13314 (const_int 25) (const_int 57)
13315 (const_int 26) (const_int 58)
13316 (const_int 27) (const_int 59)])))]
13317 "TARGET_AVX512BW"
13318 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13319 [(set_attr "type" "sselog")
13320 (set_attr "prefix" "evex")
13321 (set_attr "mode" "XI")])
13322
13323 (define_insn "avx2_interleave_lowv16hi<mask_name>"
13324 [(set (match_operand:V16HI 0 "register_operand" "=v")
13325 (vec_select:V16HI
13326 (vec_concat:V32HI
13327 (match_operand:V16HI 1 "register_operand" "v")
13328 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13329 (parallel [(const_int 0) (const_int 16)
13330 (const_int 1) (const_int 17)
13331 (const_int 2) (const_int 18)
13332 (const_int 3) (const_int 19)
13333 (const_int 8) (const_int 24)
13334 (const_int 9) (const_int 25)
13335 (const_int 10) (const_int 26)
13336 (const_int 11) (const_int 27)])))]
13337 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13338 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13339 [(set_attr "type" "sselog")
13340 (set_attr "prefix" "maybe_evex")
13341 (set_attr "mode" "OI")])
13342
13343 (define_insn "vec_interleave_lowv8hi<mask_name>"
13344 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13345 (vec_select:V8HI
13346 (vec_concat:V16HI
13347 (match_operand:V8HI 1 "register_operand" "0,v")
13348 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13349 (parallel [(const_int 0) (const_int 8)
13350 (const_int 1) (const_int 9)
13351 (const_int 2) (const_int 10)
13352 (const_int 3) (const_int 11)])))]
13353 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13354 "@
13355 punpcklwd\t{%2, %0|%0, %2}
13356 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13357 [(set_attr "isa" "noavx,avx")
13358 (set_attr "type" "sselog")
13359 (set_attr "prefix_data16" "1,*")
13360 (set_attr "prefix" "orig,maybe_evex")
13361 (set_attr "mode" "TI")])
13362
13363 (define_insn "avx2_interleave_highv8si<mask_name>"
13364 [(set (match_operand:V8SI 0 "register_operand" "=v")
13365 (vec_select:V8SI
13366 (vec_concat:V16SI
13367 (match_operand:V8SI 1 "register_operand" "v")
13368 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
13369 (parallel [(const_int 2) (const_int 10)
13370 (const_int 3) (const_int 11)
13371 (const_int 6) (const_int 14)
13372 (const_int 7) (const_int 15)])))]
13373 "TARGET_AVX2 && <mask_avx512vl_condition>"
13374 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13375 [(set_attr "type" "sselog")
13376 (set_attr "prefix" "maybe_evex")
13377 (set_attr "mode" "OI")])
13378
13379 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
13380 [(set (match_operand:V16SI 0 "register_operand" "=v")
13381 (vec_select:V16SI
13382 (vec_concat:V32SI
13383 (match_operand:V16SI 1 "register_operand" "v")
13384 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
13385 (parallel [(const_int 2) (const_int 18)
13386 (const_int 3) (const_int 19)
13387 (const_int 6) (const_int 22)
13388 (const_int 7) (const_int 23)
13389 (const_int 10) (const_int 26)
13390 (const_int 11) (const_int 27)
13391 (const_int 14) (const_int 30)
13392 (const_int 15) (const_int 31)])))]
13393 "TARGET_AVX512F"
13394 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13395 [(set_attr "type" "sselog")
13396 (set_attr "prefix" "evex")
13397 (set_attr "mode" "XI")])
13398
13399
13400 (define_insn "vec_interleave_highv4si<mask_name>"
13401 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
13402 (vec_select:V4SI
13403 (vec_concat:V8SI
13404 (match_operand:V4SI 1 "register_operand" "0,v")
13405 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
13406 (parallel [(const_int 2) (const_int 6)
13407 (const_int 3) (const_int 7)])))]
13408 "TARGET_SSE2 && <mask_avx512vl_condition>"
13409 "@
13410 punpckhdq\t{%2, %0|%0, %2}
13411 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13412 [(set_attr "isa" "noavx,avx")
13413 (set_attr "type" "sselog")
13414 (set_attr "prefix_data16" "1,*")
13415 (set_attr "prefix" "orig,maybe_vex")
13416 (set_attr "mode" "TI")])
13417
13418 (define_insn "avx2_interleave_lowv8si<mask_name>"
13419 [(set (match_operand:V8SI 0 "register_operand" "=v")
13420 (vec_select:V8SI
13421 (vec_concat:V16SI
13422 (match_operand:V8SI 1 "register_operand" "v")
13423 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
13424 (parallel [(const_int 0) (const_int 8)
13425 (const_int 1) (const_int 9)
13426 (const_int 4) (const_int 12)
13427 (const_int 5) (const_int 13)])))]
13428 "TARGET_AVX2 && <mask_avx512vl_condition>"
13429 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13430 [(set_attr "type" "sselog")
13431 (set_attr "prefix" "maybe_evex")
13432 (set_attr "mode" "OI")])
13433
13434 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
13435 [(set (match_operand:V16SI 0 "register_operand" "=v")
13436 (vec_select:V16SI
13437 (vec_concat:V32SI
13438 (match_operand:V16SI 1 "register_operand" "v")
13439 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
13440 (parallel [(const_int 0) (const_int 16)
13441 (const_int 1) (const_int 17)
13442 (const_int 4) (const_int 20)
13443 (const_int 5) (const_int 21)
13444 (const_int 8) (const_int 24)
13445 (const_int 9) (const_int 25)
13446 (const_int 12) (const_int 28)
13447 (const_int 13) (const_int 29)])))]
13448 "TARGET_AVX512F"
13449 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13450 [(set_attr "type" "sselog")
13451 (set_attr "prefix" "evex")
13452 (set_attr "mode" "XI")])
13453
13454 (define_insn "vec_interleave_lowv4si<mask_name>"
13455 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
13456 (vec_select:V4SI
13457 (vec_concat:V8SI
13458 (match_operand:V4SI 1 "register_operand" "0,v")
13459 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
13460 (parallel [(const_int 0) (const_int 4)
13461 (const_int 1) (const_int 5)])))]
13462 "TARGET_SSE2 && <mask_avx512vl_condition>"
13463 "@
13464 punpckldq\t{%2, %0|%0, %2}
13465 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13466 [(set_attr "isa" "noavx,avx")
13467 (set_attr "type" "sselog")
13468 (set_attr "prefix_data16" "1,*")
13469 (set_attr "prefix" "orig,vex")
13470 (set_attr "mode" "TI")])
13471
13472 (define_expand "vec_interleave_high<mode>"
13473 [(match_operand:VI_256 0 "register_operand")
13474 (match_operand:VI_256 1 "register_operand")
13475 (match_operand:VI_256 2 "nonimmediate_operand")]
13476 "TARGET_AVX2"
13477 {
13478 rtx t1 = gen_reg_rtx (<MODE>mode);
13479 rtx t2 = gen_reg_rtx (<MODE>mode);
13480 rtx t3 = gen_reg_rtx (V4DImode);
13481 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
13482 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
13483 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
13484 gen_lowpart (V4DImode, t2),
13485 GEN_INT (1 + (3 << 4))));
13486 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
13487 DONE;
13488 })
13489
13490 (define_expand "vec_interleave_low<mode>"
13491 [(match_operand:VI_256 0 "register_operand")
13492 (match_operand:VI_256 1 "register_operand")
13493 (match_operand:VI_256 2 "nonimmediate_operand")]
13494 "TARGET_AVX2"
13495 {
13496 rtx t1 = gen_reg_rtx (<MODE>mode);
13497 rtx t2 = gen_reg_rtx (<MODE>mode);
13498 rtx t3 = gen_reg_rtx (V4DImode);
13499 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
13500 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
13501 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
13502 gen_lowpart (V4DImode, t2),
13503 GEN_INT (0 + (2 << 4))));
13504 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
13505 DONE;
13506 })
13507
13508 ;; Modes handled by pinsr patterns.
13509 (define_mode_iterator PINSR_MODE
13510 [(V16QI "TARGET_SSE4_1") V8HI
13511 (V4SI "TARGET_SSE4_1")
13512 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
13513
13514 (define_mode_attr sse2p4_1
13515 [(V16QI "sse4_1") (V8HI "sse2")
13516 (V4SI "sse4_1") (V2DI "sse4_1")])
13517
13518 (define_mode_attr pinsr_evex_isa
13519 [(V16QI "avx512bw") (V8HI "avx512bw")
13520 (V4SI "avx512dq") (V2DI "avx512dq")])
13521
13522 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
13523 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
13524 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
13525 (vec_merge:PINSR_MODE
13526 (vec_duplicate:PINSR_MODE
13527 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
13528 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
13529 (match_operand:SI 3 "const_int_operand")))]
13530 "TARGET_SSE2
13531 && ((unsigned) exact_log2 (INTVAL (operands[3]))
13532 < GET_MODE_NUNITS (<MODE>mode))"
13533 {
13534 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
13535
13536 switch (which_alternative)
13537 {
13538 case 0:
13539 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13540 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
13541 /* FALLTHRU */
13542 case 1:
13543 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
13544 case 2:
13545 case 4:
13546 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13547 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
13548 /* FALLTHRU */
13549 case 3:
13550 case 5:
13551 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13552 default:
13553 gcc_unreachable ();
13554 }
13555 }
13556 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
13557 (set_attr "type" "sselog")
13558 (set (attr "prefix_rex")
13559 (if_then_else
13560 (and (not (match_test "TARGET_AVX"))
13561 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
13562 (const_string "1")
13563 (const_string "*")))
13564 (set (attr "prefix_data16")
13565 (if_then_else
13566 (and (not (match_test "TARGET_AVX"))
13567 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13568 (const_string "1")
13569 (const_string "*")))
13570 (set (attr "prefix_extra")
13571 (if_then_else
13572 (and (not (match_test "TARGET_AVX"))
13573 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13574 (const_string "*")
13575 (const_string "1")))
13576 (set_attr "length_immediate" "1")
13577 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
13578 (set_attr "mode" "TI")])
13579
13580 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
13581 [(match_operand:AVX512_VEC 0 "register_operand")
13582 (match_operand:AVX512_VEC 1 "register_operand")
13583 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
13584 (match_operand:SI 3 "const_0_to_3_operand")
13585 (match_operand:AVX512_VEC 4 "register_operand")
13586 (match_operand:<avx512fmaskmode> 5 "register_operand")]
13587 "TARGET_AVX512F"
13588 {
13589 int mask, selector;
13590 mask = INTVAL (operands[3]);
13591 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
13592 ? 0xFFFF ^ (0x000F << mask * 4)
13593 : 0xFF ^ (0x03 << mask * 2));
13594 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
13595 (operands[0], operands[1], operands[2], GEN_INT (selector),
13596 operands[4], operands[5]));
13597 DONE;
13598 })
13599
13600 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
13601 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
13602 (vec_merge:AVX512_VEC
13603 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
13604 (vec_duplicate:AVX512_VEC
13605 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
13606 (match_operand:SI 3 "const_int_operand" "n,n,n")))]
13607 "TARGET_AVX512F
13608 && (INTVAL (operands[3])
13609 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
13610 {
13611 if (which_alternative == 0)
13612 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
13613 switch (<MODE>mode)
13614 {
13615 case E_V8DFmode:
13616 return "vmovapd\t{%2, %x0|%x0, %2}";
13617 case E_V16SFmode:
13618 return "vmovaps\t{%2, %x0|%x0, %2}";
13619 case E_V8DImode:
13620 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
13621 : "vmovdqa\t{%2, %x0|%x0, %2}";
13622 case E_V16SImode:
13623 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
13624 : "vmovdqa\t{%2, %x0|%x0, %2}";
13625 default:
13626 gcc_unreachable ();
13627 }
13628 }
13629 [(set_attr "type" "sselog,ssemov,ssemov")
13630 (set_attr "length_immediate" "1,0,0")
13631 (set_attr "prefix" "evex,vex,evex")
13632 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
13633
13634 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
13635 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
13636 (vec_merge:AVX512_VEC
13637 (match_operand:AVX512_VEC 1 "register_operand" "v")
13638 (vec_duplicate:AVX512_VEC
13639 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
13640 (match_operand:SI 3 "const_int_operand" "n")))]
13641 "TARGET_AVX512F"
13642 {
13643 int mask;
13644 int selector = INTVAL (operands[3]);
13645
13646 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
13647 mask = 0;
13648 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
13649 mask = 1;
13650 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
13651 mask = 2;
13652 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
13653 mask = 3;
13654 else
13655 gcc_unreachable ();
13656
13657 operands[3] = GEN_INT (mask);
13658
13659 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
13660 }
13661 [(set_attr "type" "sselog")
13662 (set_attr "length_immediate" "1")
13663 (set_attr "prefix" "evex")
13664 (set_attr "mode" "<sseinsnmode>")])
13665
13666 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
13667 [(match_operand:AVX512_VEC_2 0 "register_operand")
13668 (match_operand:AVX512_VEC_2 1 "register_operand")
13669 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
13670 (match_operand:SI 3 "const_0_to_1_operand")
13671 (match_operand:AVX512_VEC_2 4 "register_operand")
13672 (match_operand:<avx512fmaskmode> 5 "register_operand")]
13673 "TARGET_AVX512F"
13674 {
13675 int mask = INTVAL (operands[3]);
13676 if (mask == 0)
13677 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
13678 operands[2], operands[4],
13679 operands[5]));
13680 else
13681 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
13682 operands[2], operands[4],
13683 operands[5]));
13684 DONE;
13685 })
13686
13687 (define_insn "vec_set_lo_<mode><mask_name>"
13688 [(set (match_operand:V16FI 0 "register_operand" "=v")
13689 (vec_concat:V16FI
13690 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
13691 (vec_select:<ssehalfvecmode>
13692 (match_operand:V16FI 1 "register_operand" "v")
13693 (parallel [(const_int 8) (const_int 9)
13694 (const_int 10) (const_int 11)
13695 (const_int 12) (const_int 13)
13696 (const_int 14) (const_int 15)]))))]
13697 "TARGET_AVX512DQ"
13698 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
13699 [(set_attr "type" "sselog")
13700 (set_attr "length_immediate" "1")
13701 (set_attr "prefix" "evex")
13702 (set_attr "mode" "<sseinsnmode>")])
13703
13704 (define_insn "vec_set_hi_<mode><mask_name>"
13705 [(set (match_operand:V16FI 0 "register_operand" "=v")
13706 (vec_concat:V16FI
13707 (vec_select:<ssehalfvecmode>
13708 (match_operand:V16FI 1 "register_operand" "v")
13709 (parallel [(const_int 0) (const_int 1)
13710 (const_int 2) (const_int 3)
13711 (const_int 4) (const_int 5)
13712 (const_int 6) (const_int 7)]))
13713 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
13714 "TARGET_AVX512DQ"
13715 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
13716 [(set_attr "type" "sselog")
13717 (set_attr "length_immediate" "1")
13718 (set_attr "prefix" "evex")
13719 (set_attr "mode" "<sseinsnmode>")])
13720
13721 (define_insn "vec_set_lo_<mode><mask_name>"
13722 [(set (match_operand:V8FI 0 "register_operand" "=v")
13723 (vec_concat:V8FI
13724 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
13725 (vec_select:<ssehalfvecmode>
13726 (match_operand:V8FI 1 "register_operand" "v")
13727 (parallel [(const_int 4) (const_int 5)
13728 (const_int 6) (const_int 7)]))))]
13729 "TARGET_AVX512F"
13730 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
13731 [(set_attr "type" "sselog")
13732 (set_attr "length_immediate" "1")
13733 (set_attr "prefix" "evex")
13734 (set_attr "mode" "XI")])
13735
13736 (define_insn "vec_set_hi_<mode><mask_name>"
13737 [(set (match_operand:V8FI 0 "register_operand" "=v")
13738 (vec_concat:V8FI
13739 (vec_select:<ssehalfvecmode>
13740 (match_operand:V8FI 1 "register_operand" "v")
13741 (parallel [(const_int 0) (const_int 1)
13742 (const_int 2) (const_int 3)]))
13743 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
13744 "TARGET_AVX512F"
13745 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
13746 [(set_attr "type" "sselog")
13747 (set_attr "length_immediate" "1")
13748 (set_attr "prefix" "evex")
13749 (set_attr "mode" "XI")])
13750
13751 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
13752 [(match_operand:VI8F_256 0 "register_operand")
13753 (match_operand:VI8F_256 1 "register_operand")
13754 (match_operand:VI8F_256 2 "nonimmediate_operand")
13755 (match_operand:SI 3 "const_0_to_3_operand")
13756 (match_operand:VI8F_256 4 "register_operand")
13757 (match_operand:QI 5 "register_operand")]
13758 "TARGET_AVX512DQ"
13759 {
13760 int mask = INTVAL (operands[3]);
13761 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
13762 (operands[0], operands[1], operands[2],
13763 GEN_INT (((mask >> 0) & 1) * 2 + 0),
13764 GEN_INT (((mask >> 0) & 1) * 2 + 1),
13765 GEN_INT (((mask >> 1) & 1) * 2 + 4),
13766 GEN_INT (((mask >> 1) & 1) * 2 + 5),
13767 operands[4], operands[5]));
13768 DONE;
13769 })
13770
13771 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
13772 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
13773 (vec_select:VI8F_256
13774 (vec_concat:<ssedoublemode>
13775 (match_operand:VI8F_256 1 "register_operand" "v")
13776 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
13777 (parallel [(match_operand 3 "const_0_to_3_operand")
13778 (match_operand 4 "const_0_to_3_operand")
13779 (match_operand 5 "const_4_to_7_operand")
13780 (match_operand 6 "const_4_to_7_operand")])))]
13781 "TARGET_AVX512VL
13782 && (INTVAL (operands[3]) & 1) == 0
13783 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13784 && (INTVAL (operands[5]) & 1) == 0
13785 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
13786 {
13787 int mask;
13788 mask = INTVAL (operands[3]) / 2;
13789 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
13790 operands[3] = GEN_INT (mask);
13791 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
13792 }
13793 [(set_attr "type" "sselog")
13794 (set_attr "length_immediate" "1")
13795 (set_attr "prefix" "evex")
13796 (set_attr "mode" "XI")])
13797
13798 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
13799 [(match_operand:V8FI 0 "register_operand")
13800 (match_operand:V8FI 1 "register_operand")
13801 (match_operand:V8FI 2 "nonimmediate_operand")
13802 (match_operand:SI 3 "const_0_to_255_operand")
13803 (match_operand:V8FI 4 "register_operand")
13804 (match_operand:QI 5 "register_operand")]
13805 "TARGET_AVX512F"
13806 {
13807 int mask = INTVAL (operands[3]);
13808 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
13809 (operands[0], operands[1], operands[2],
13810 GEN_INT (((mask >> 0) & 3) * 2),
13811 GEN_INT (((mask >> 0) & 3) * 2 + 1),
13812 GEN_INT (((mask >> 2) & 3) * 2),
13813 GEN_INT (((mask >> 2) & 3) * 2 + 1),
13814 GEN_INT (((mask >> 4) & 3) * 2 + 8),
13815 GEN_INT (((mask >> 4) & 3) * 2 + 9),
13816 GEN_INT (((mask >> 6) & 3) * 2 + 8),
13817 GEN_INT (((mask >> 6) & 3) * 2 + 9),
13818 operands[4], operands[5]));
13819 DONE;
13820 })
13821
13822 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
13823 [(set (match_operand:V8FI 0 "register_operand" "=v")
13824 (vec_select:V8FI
13825 (vec_concat:<ssedoublemode>
13826 (match_operand:V8FI 1 "register_operand" "v")
13827 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
13828 (parallel [(match_operand 3 "const_0_to_7_operand")
13829 (match_operand 4 "const_0_to_7_operand")
13830 (match_operand 5 "const_0_to_7_operand")
13831 (match_operand 6 "const_0_to_7_operand")
13832 (match_operand 7 "const_8_to_15_operand")
13833 (match_operand 8 "const_8_to_15_operand")
13834 (match_operand 9 "const_8_to_15_operand")
13835 (match_operand 10 "const_8_to_15_operand")])))]
13836 "TARGET_AVX512F
13837 && (INTVAL (operands[3]) & 1) == 0
13838 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13839 && (INTVAL (operands[5]) & 1) == 0
13840 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
13841 && (INTVAL (operands[7]) & 1) == 0
13842 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
13843 && (INTVAL (operands[9]) & 1) == 0
13844 && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
13845 {
13846 int mask;
13847 mask = INTVAL (operands[3]) / 2;
13848 mask |= INTVAL (operands[5]) / 2 << 2;
13849 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
13850 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
13851 operands[3] = GEN_INT (mask);
13852
13853 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
13854 }
13855 [(set_attr "type" "sselog")
13856 (set_attr "length_immediate" "1")
13857 (set_attr "prefix" "evex")
13858 (set_attr "mode" "<sseinsnmode>")])
13859
13860 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
13861 [(match_operand:VI4F_256 0 "register_operand")
13862 (match_operand:VI4F_256 1 "register_operand")
13863 (match_operand:VI4F_256 2 "nonimmediate_operand")
13864 (match_operand:SI 3 "const_0_to_3_operand")
13865 (match_operand:VI4F_256 4 "register_operand")
13866 (match_operand:QI 5 "register_operand")]
13867 "TARGET_AVX512VL"
13868 {
13869 int mask = INTVAL (operands[3]);
13870 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
13871 (operands[0], operands[1], operands[2],
13872 GEN_INT (((mask >> 0) & 1) * 4 + 0),
13873 GEN_INT (((mask >> 0) & 1) * 4 + 1),
13874 GEN_INT (((mask >> 0) & 1) * 4 + 2),
13875 GEN_INT (((mask >> 0) & 1) * 4 + 3),
13876 GEN_INT (((mask >> 1) & 1) * 4 + 8),
13877 GEN_INT (((mask >> 1) & 1) * 4 + 9),
13878 GEN_INT (((mask >> 1) & 1) * 4 + 10),
13879 GEN_INT (((mask >> 1) & 1) * 4 + 11),
13880 operands[4], operands[5]));
13881 DONE;
13882 })
13883
13884 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
13885 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
13886 (vec_select:VI4F_256
13887 (vec_concat:<ssedoublemode>
13888 (match_operand:VI4F_256 1 "register_operand" "v")
13889 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
13890 (parallel [(match_operand 3 "const_0_to_7_operand")
13891 (match_operand 4 "const_0_to_7_operand")
13892 (match_operand 5 "const_0_to_7_operand")
13893 (match_operand 6 "const_0_to_7_operand")
13894 (match_operand 7 "const_8_to_15_operand")
13895 (match_operand 8 "const_8_to_15_operand")
13896 (match_operand 9 "const_8_to_15_operand")
13897 (match_operand 10 "const_8_to_15_operand")])))]
13898 "TARGET_AVX512VL
13899 && (INTVAL (operands[3]) & 3) == 0
13900 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13901 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
13902 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
13903 && (INTVAL (operands[7]) & 3) == 0
13904 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
13905 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
13906 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
13907 {
13908 int mask;
13909 mask = INTVAL (operands[3]) / 4;
13910 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
13911 operands[3] = GEN_INT (mask);
13912
13913 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
13914 }
13915 [(set_attr "type" "sselog")
13916 (set_attr "length_immediate" "1")
13917 (set_attr "prefix" "evex")
13918 (set_attr "mode" "<sseinsnmode>")])
13919
13920 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
13921 [(match_operand:V16FI 0 "register_operand")
13922 (match_operand:V16FI 1 "register_operand")
13923 (match_operand:V16FI 2 "nonimmediate_operand")
13924 (match_operand:SI 3 "const_0_to_255_operand")
13925 (match_operand:V16FI 4 "register_operand")
13926 (match_operand:HI 5 "register_operand")]
13927 "TARGET_AVX512F"
13928 {
13929 int mask = INTVAL (operands[3]);
13930 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
13931 (operands[0], operands[1], operands[2],
13932 GEN_INT (((mask >> 0) & 3) * 4),
13933 GEN_INT (((mask >> 0) & 3) * 4 + 1),
13934 GEN_INT (((mask >> 0) & 3) * 4 + 2),
13935 GEN_INT (((mask >> 0) & 3) * 4 + 3),
13936 GEN_INT (((mask >> 2) & 3) * 4),
13937 GEN_INT (((mask >> 2) & 3) * 4 + 1),
13938 GEN_INT (((mask >> 2) & 3) * 4 + 2),
13939 GEN_INT (((mask >> 2) & 3) * 4 + 3),
13940 GEN_INT (((mask >> 4) & 3) * 4 + 16),
13941 GEN_INT (((mask >> 4) & 3) * 4 + 17),
13942 GEN_INT (((mask >> 4) & 3) * 4 + 18),
13943 GEN_INT (((mask >> 4) & 3) * 4 + 19),
13944 GEN_INT (((mask >> 6) & 3) * 4 + 16),
13945 GEN_INT (((mask >> 6) & 3) * 4 + 17),
13946 GEN_INT (((mask >> 6) & 3) * 4 + 18),
13947 GEN_INT (((mask >> 6) & 3) * 4 + 19),
13948 operands[4], operands[5]));
13949 DONE;
13950 })
13951
13952 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
13953 [(set (match_operand:V16FI 0 "register_operand" "=v")
13954 (vec_select:V16FI
13955 (vec_concat:<ssedoublemode>
13956 (match_operand:V16FI 1 "register_operand" "v")
13957 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
13958 (parallel [(match_operand 3 "const_0_to_15_operand")
13959 (match_operand 4 "const_0_to_15_operand")
13960 (match_operand 5 "const_0_to_15_operand")
13961 (match_operand 6 "const_0_to_15_operand")
13962 (match_operand 7 "const_0_to_15_operand")
13963 (match_operand 8 "const_0_to_15_operand")
13964 (match_operand 9 "const_0_to_15_operand")
13965 (match_operand 10 "const_0_to_15_operand")
13966 (match_operand 11 "const_16_to_31_operand")
13967 (match_operand 12 "const_16_to_31_operand")
13968 (match_operand 13 "const_16_to_31_operand")
13969 (match_operand 14 "const_16_to_31_operand")
13970 (match_operand 15 "const_16_to_31_operand")
13971 (match_operand 16 "const_16_to_31_operand")
13972 (match_operand 17 "const_16_to_31_operand")
13973 (match_operand 18 "const_16_to_31_operand")])))]
13974 "TARGET_AVX512F
13975 && (INTVAL (operands[3]) & 3) == 0
13976 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13977 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
13978 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
13979 && (INTVAL (operands[7]) & 3) == 0
13980 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
13981 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
13982 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
13983 && (INTVAL (operands[11]) & 3) == 0
13984 && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
13985 && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
13986 && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
13987 && (INTVAL (operands[15]) & 3) == 0
13988 && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
13989 && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
13990 && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
13991 {
13992 int mask;
13993 mask = INTVAL (operands[3]) / 4;
13994 mask |= INTVAL (operands[7]) / 4 << 2;
13995 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
13996 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
13997 operands[3] = GEN_INT (mask);
13998
13999 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
14000 }
14001 [(set_attr "type" "sselog")
14002 (set_attr "length_immediate" "1")
14003 (set_attr "prefix" "evex")
14004 (set_attr "mode" "<sseinsnmode>")])
14005
14006 (define_expand "avx512f_pshufdv3_mask"
14007 [(match_operand:V16SI 0 "register_operand")
14008 (match_operand:V16SI 1 "nonimmediate_operand")
14009 (match_operand:SI 2 "const_0_to_255_operand")
14010 (match_operand:V16SI 3 "register_operand")
14011 (match_operand:HI 4 "register_operand")]
14012 "TARGET_AVX512F"
14013 {
14014 int mask = INTVAL (operands[2]);
14015 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
14016 GEN_INT ((mask >> 0) & 3),
14017 GEN_INT ((mask >> 2) & 3),
14018 GEN_INT ((mask >> 4) & 3),
14019 GEN_INT ((mask >> 6) & 3),
14020 GEN_INT (((mask >> 0) & 3) + 4),
14021 GEN_INT (((mask >> 2) & 3) + 4),
14022 GEN_INT (((mask >> 4) & 3) + 4),
14023 GEN_INT (((mask >> 6) & 3) + 4),
14024 GEN_INT (((mask >> 0) & 3) + 8),
14025 GEN_INT (((mask >> 2) & 3) + 8),
14026 GEN_INT (((mask >> 4) & 3) + 8),
14027 GEN_INT (((mask >> 6) & 3) + 8),
14028 GEN_INT (((mask >> 0) & 3) + 12),
14029 GEN_INT (((mask >> 2) & 3) + 12),
14030 GEN_INT (((mask >> 4) & 3) + 12),
14031 GEN_INT (((mask >> 6) & 3) + 12),
14032 operands[3], operands[4]));
14033 DONE;
14034 })
14035
14036 (define_insn "avx512f_pshufd_1<mask_name>"
14037 [(set (match_operand:V16SI 0 "register_operand" "=v")
14038 (vec_select:V16SI
14039 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
14040 (parallel [(match_operand 2 "const_0_to_3_operand")
14041 (match_operand 3 "const_0_to_3_operand")
14042 (match_operand 4 "const_0_to_3_operand")
14043 (match_operand 5 "const_0_to_3_operand")
14044 (match_operand 6 "const_4_to_7_operand")
14045 (match_operand 7 "const_4_to_7_operand")
14046 (match_operand 8 "const_4_to_7_operand")
14047 (match_operand 9 "const_4_to_7_operand")
14048 (match_operand 10 "const_8_to_11_operand")
14049 (match_operand 11 "const_8_to_11_operand")
14050 (match_operand 12 "const_8_to_11_operand")
14051 (match_operand 13 "const_8_to_11_operand")
14052 (match_operand 14 "const_12_to_15_operand")
14053 (match_operand 15 "const_12_to_15_operand")
14054 (match_operand 16 "const_12_to_15_operand")
14055 (match_operand 17 "const_12_to_15_operand")])))]
14056 "TARGET_AVX512F
14057 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14058 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14059 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14060 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
14061 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
14062 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
14063 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
14064 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
14065 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
14066 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
14067 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
14068 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
14069 {
14070 int mask = 0;
14071 mask |= INTVAL (operands[2]) << 0;
14072 mask |= INTVAL (operands[3]) << 2;
14073 mask |= INTVAL (operands[4]) << 4;
14074 mask |= INTVAL (operands[5]) << 6;
14075 operands[2] = GEN_INT (mask);
14076
14077 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
14078 }
14079 [(set_attr "type" "sselog1")
14080 (set_attr "prefix" "evex")
14081 (set_attr "length_immediate" "1")
14082 (set_attr "mode" "XI")])
14083
14084 (define_expand "avx512vl_pshufdv3_mask"
14085 [(match_operand:V8SI 0 "register_operand")
14086 (match_operand:V8SI 1 "nonimmediate_operand")
14087 (match_operand:SI 2 "const_0_to_255_operand")
14088 (match_operand:V8SI 3 "register_operand")
14089 (match_operand:QI 4 "register_operand")]
14090 "TARGET_AVX512VL"
14091 {
14092 int mask = INTVAL (operands[2]);
14093 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
14094 GEN_INT ((mask >> 0) & 3),
14095 GEN_INT ((mask >> 2) & 3),
14096 GEN_INT ((mask >> 4) & 3),
14097 GEN_INT ((mask >> 6) & 3),
14098 GEN_INT (((mask >> 0) & 3) + 4),
14099 GEN_INT (((mask >> 2) & 3) + 4),
14100 GEN_INT (((mask >> 4) & 3) + 4),
14101 GEN_INT (((mask >> 6) & 3) + 4),
14102 operands[3], operands[4]));
14103 DONE;
14104 })
14105
14106 (define_expand "avx2_pshufdv3"
14107 [(match_operand:V8SI 0 "register_operand")
14108 (match_operand:V8SI 1 "nonimmediate_operand")
14109 (match_operand:SI 2 "const_0_to_255_operand")]
14110 "TARGET_AVX2"
14111 {
14112 int mask = INTVAL (operands[2]);
14113 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
14114 GEN_INT ((mask >> 0) & 3),
14115 GEN_INT ((mask >> 2) & 3),
14116 GEN_INT ((mask >> 4) & 3),
14117 GEN_INT ((mask >> 6) & 3),
14118 GEN_INT (((mask >> 0) & 3) + 4),
14119 GEN_INT (((mask >> 2) & 3) + 4),
14120 GEN_INT (((mask >> 4) & 3) + 4),
14121 GEN_INT (((mask >> 6) & 3) + 4)));
14122 DONE;
14123 })
14124
14125 (define_insn "avx2_pshufd_1<mask_name>"
14126 [(set (match_operand:V8SI 0 "register_operand" "=v")
14127 (vec_select:V8SI
14128 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
14129 (parallel [(match_operand 2 "const_0_to_3_operand")
14130 (match_operand 3 "const_0_to_3_operand")
14131 (match_operand 4 "const_0_to_3_operand")
14132 (match_operand 5 "const_0_to_3_operand")
14133 (match_operand 6 "const_4_to_7_operand")
14134 (match_operand 7 "const_4_to_7_operand")
14135 (match_operand 8 "const_4_to_7_operand")
14136 (match_operand 9 "const_4_to_7_operand")])))]
14137 "TARGET_AVX2
14138 && <mask_avx512vl_condition>
14139 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14140 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14141 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14142 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
14143 {
14144 int mask = 0;
14145 mask |= INTVAL (operands[2]) << 0;
14146 mask |= INTVAL (operands[3]) << 2;
14147 mask |= INTVAL (operands[4]) << 4;
14148 mask |= INTVAL (operands[5]) << 6;
14149 operands[2] = GEN_INT (mask);
14150
14151 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14152 }
14153 [(set_attr "type" "sselog1")
14154 (set_attr "prefix" "maybe_evex")
14155 (set_attr "length_immediate" "1")
14156 (set_attr "mode" "OI")])
14157
14158 (define_expand "avx512vl_pshufd_mask"
14159 [(match_operand:V4SI 0 "register_operand")
14160 (match_operand:V4SI 1 "nonimmediate_operand")
14161 (match_operand:SI 2 "const_0_to_255_operand")
14162 (match_operand:V4SI 3 "register_operand")
14163 (match_operand:QI 4 "register_operand")]
14164 "TARGET_AVX512VL"
14165 {
14166 int mask = INTVAL (operands[2]);
14167 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
14168 GEN_INT ((mask >> 0) & 3),
14169 GEN_INT ((mask >> 2) & 3),
14170 GEN_INT ((mask >> 4) & 3),
14171 GEN_INT ((mask >> 6) & 3),
14172 operands[3], operands[4]));
14173 DONE;
14174 })
14175
14176 (define_expand "sse2_pshufd"
14177 [(match_operand:V4SI 0 "register_operand")
14178 (match_operand:V4SI 1 "vector_operand")
14179 (match_operand:SI 2 "const_int_operand")]
14180 "TARGET_SSE2"
14181 {
14182 int mask = INTVAL (operands[2]);
14183 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
14184 GEN_INT ((mask >> 0) & 3),
14185 GEN_INT ((mask >> 2) & 3),
14186 GEN_INT ((mask >> 4) & 3),
14187 GEN_INT ((mask >> 6) & 3)));
14188 DONE;
14189 })
14190
14191 (define_insn "sse2_pshufd_1<mask_name>"
14192 [(set (match_operand:V4SI 0 "register_operand" "=v")
14193 (vec_select:V4SI
14194 (match_operand:V4SI 1 "vector_operand" "vBm")
14195 (parallel [(match_operand 2 "const_0_to_3_operand")
14196 (match_operand 3 "const_0_to_3_operand")
14197 (match_operand 4 "const_0_to_3_operand")
14198 (match_operand 5 "const_0_to_3_operand")])))]
14199 "TARGET_SSE2 && <mask_avx512vl_condition>"
14200 {
14201 int mask = 0;
14202 mask |= INTVAL (operands[2]) << 0;
14203 mask |= INTVAL (operands[3]) << 2;
14204 mask |= INTVAL (operands[4]) << 4;
14205 mask |= INTVAL (operands[5]) << 6;
14206 operands[2] = GEN_INT (mask);
14207
14208 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14209 }
14210 [(set_attr "type" "sselog1")
14211 (set_attr "prefix_data16" "1")
14212 (set_attr "prefix" "<mask_prefix2>")
14213 (set_attr "length_immediate" "1")
14214 (set_attr "mode" "TI")])
14215
14216 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
14217 [(set (match_operand:V32HI 0 "register_operand" "=v")
14218 (unspec:V32HI
14219 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14220 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14221 UNSPEC_PSHUFLW))]
14222 "TARGET_AVX512BW"
14223 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14224 [(set_attr "type" "sselog")
14225 (set_attr "prefix" "evex")
14226 (set_attr "mode" "XI")])
14227
14228 (define_expand "avx512vl_pshuflwv3_mask"
14229 [(match_operand:V16HI 0 "register_operand")
14230 (match_operand:V16HI 1 "nonimmediate_operand")
14231 (match_operand:SI 2 "const_0_to_255_operand")
14232 (match_operand:V16HI 3 "register_operand")
14233 (match_operand:HI 4 "register_operand")]
14234 "TARGET_AVX512VL && TARGET_AVX512BW"
14235 {
14236 int mask = INTVAL (operands[2]);
14237 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
14238 GEN_INT ((mask >> 0) & 3),
14239 GEN_INT ((mask >> 2) & 3),
14240 GEN_INT ((mask >> 4) & 3),
14241 GEN_INT ((mask >> 6) & 3),
14242 GEN_INT (((mask >> 0) & 3) + 8),
14243 GEN_INT (((mask >> 2) & 3) + 8),
14244 GEN_INT (((mask >> 4) & 3) + 8),
14245 GEN_INT (((mask >> 6) & 3) + 8),
14246 operands[3], operands[4]));
14247 DONE;
14248 })
14249
14250 (define_expand "avx2_pshuflwv3"
14251 [(match_operand:V16HI 0 "register_operand")
14252 (match_operand:V16HI 1 "nonimmediate_operand")
14253 (match_operand:SI 2 "const_0_to_255_operand")]
14254 "TARGET_AVX2"
14255 {
14256 int mask = INTVAL (operands[2]);
14257 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
14258 GEN_INT ((mask >> 0) & 3),
14259 GEN_INT ((mask >> 2) & 3),
14260 GEN_INT ((mask >> 4) & 3),
14261 GEN_INT ((mask >> 6) & 3),
14262 GEN_INT (((mask >> 0) & 3) + 8),
14263 GEN_INT (((mask >> 2) & 3) + 8),
14264 GEN_INT (((mask >> 4) & 3) + 8),
14265 GEN_INT (((mask >> 6) & 3) + 8)));
14266 DONE;
14267 })
14268
14269 (define_insn "avx2_pshuflw_1<mask_name>"
14270 [(set (match_operand:V16HI 0 "register_operand" "=v")
14271 (vec_select:V16HI
14272 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14273 (parallel [(match_operand 2 "const_0_to_3_operand")
14274 (match_operand 3 "const_0_to_3_operand")
14275 (match_operand 4 "const_0_to_3_operand")
14276 (match_operand 5 "const_0_to_3_operand")
14277 (const_int 4)
14278 (const_int 5)
14279 (const_int 6)
14280 (const_int 7)
14281 (match_operand 6 "const_8_to_11_operand")
14282 (match_operand 7 "const_8_to_11_operand")
14283 (match_operand 8 "const_8_to_11_operand")
14284 (match_operand 9 "const_8_to_11_operand")
14285 (const_int 12)
14286 (const_int 13)
14287 (const_int 14)
14288 (const_int 15)])))]
14289 "TARGET_AVX2
14290 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14291 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14292 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14293 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14294 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14295 {
14296 int mask = 0;
14297 mask |= INTVAL (operands[2]) << 0;
14298 mask |= INTVAL (operands[3]) << 2;
14299 mask |= INTVAL (operands[4]) << 4;
14300 mask |= INTVAL (operands[5]) << 6;
14301 operands[2] = GEN_INT (mask);
14302
14303 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14304 }
14305 [(set_attr "type" "sselog")
14306 (set_attr "prefix" "maybe_evex")
14307 (set_attr "length_immediate" "1")
14308 (set_attr "mode" "OI")])
14309
14310 (define_expand "avx512vl_pshuflw_mask"
14311 [(match_operand:V8HI 0 "register_operand")
14312 (match_operand:V8HI 1 "nonimmediate_operand")
14313 (match_operand:SI 2 "const_0_to_255_operand")
14314 (match_operand:V8HI 3 "register_operand")
14315 (match_operand:QI 4 "register_operand")]
14316 "TARGET_AVX512VL && TARGET_AVX512BW"
14317 {
14318 int mask = INTVAL (operands[2]);
14319 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
14320 GEN_INT ((mask >> 0) & 3),
14321 GEN_INT ((mask >> 2) & 3),
14322 GEN_INT ((mask >> 4) & 3),
14323 GEN_INT ((mask >> 6) & 3),
14324 operands[3], operands[4]));
14325 DONE;
14326 })
14327
14328 (define_expand "sse2_pshuflw"
14329 [(match_operand:V8HI 0 "register_operand")
14330 (match_operand:V8HI 1 "vector_operand")
14331 (match_operand:SI 2 "const_int_operand")]
14332 "TARGET_SSE2"
14333 {
14334 int mask = INTVAL (operands[2]);
14335 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
14336 GEN_INT ((mask >> 0) & 3),
14337 GEN_INT ((mask >> 2) & 3),
14338 GEN_INT ((mask >> 4) & 3),
14339 GEN_INT ((mask >> 6) & 3)));
14340 DONE;
14341 })
14342
14343 (define_insn "sse2_pshuflw_1<mask_name>"
14344 [(set (match_operand:V8HI 0 "register_operand" "=v")
14345 (vec_select:V8HI
14346 (match_operand:V8HI 1 "vector_operand" "vBm")
14347 (parallel [(match_operand 2 "const_0_to_3_operand")
14348 (match_operand 3 "const_0_to_3_operand")
14349 (match_operand 4 "const_0_to_3_operand")
14350 (match_operand 5 "const_0_to_3_operand")
14351 (const_int 4)
14352 (const_int 5)
14353 (const_int 6)
14354 (const_int 7)])))]
14355 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14356 {
14357 int mask = 0;
14358 mask |= INTVAL (operands[2]) << 0;
14359 mask |= INTVAL (operands[3]) << 2;
14360 mask |= INTVAL (operands[4]) << 4;
14361 mask |= INTVAL (operands[5]) << 6;
14362 operands[2] = GEN_INT (mask);
14363
14364 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14365 }
14366 [(set_attr "type" "sselog")
14367 (set_attr "prefix_data16" "0")
14368 (set_attr "prefix_rep" "1")
14369 (set_attr "prefix" "maybe_vex")
14370 (set_attr "length_immediate" "1")
14371 (set_attr "mode" "TI")])
14372
14373 (define_expand "avx2_pshufhwv3"
14374 [(match_operand:V16HI 0 "register_operand")
14375 (match_operand:V16HI 1 "nonimmediate_operand")
14376 (match_operand:SI 2 "const_0_to_255_operand")]
14377 "TARGET_AVX2"
14378 {
14379 int mask = INTVAL (operands[2]);
14380 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
14381 GEN_INT (((mask >> 0) & 3) + 4),
14382 GEN_INT (((mask >> 2) & 3) + 4),
14383 GEN_INT (((mask >> 4) & 3) + 4),
14384 GEN_INT (((mask >> 6) & 3) + 4),
14385 GEN_INT (((mask >> 0) & 3) + 12),
14386 GEN_INT (((mask >> 2) & 3) + 12),
14387 GEN_INT (((mask >> 4) & 3) + 12),
14388 GEN_INT (((mask >> 6) & 3) + 12)));
14389 DONE;
14390 })
14391
14392 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
14393 [(set (match_operand:V32HI 0 "register_operand" "=v")
14394 (unspec:V32HI
14395 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14396 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14397 UNSPEC_PSHUFHW))]
14398 "TARGET_AVX512BW"
14399 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14400 [(set_attr "type" "sselog")
14401 (set_attr "prefix" "evex")
14402 (set_attr "mode" "XI")])
14403
14404 (define_expand "avx512vl_pshufhwv3_mask"
14405 [(match_operand:V16HI 0 "register_operand")
14406 (match_operand:V16HI 1 "nonimmediate_operand")
14407 (match_operand:SI 2 "const_0_to_255_operand")
14408 (match_operand:V16HI 3 "register_operand")
14409 (match_operand:HI 4 "register_operand")]
14410 "TARGET_AVX512VL && TARGET_AVX512BW"
14411 {
14412 int mask = INTVAL (operands[2]);
14413 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
14414 GEN_INT (((mask >> 0) & 3) + 4),
14415 GEN_INT (((mask >> 2) & 3) + 4),
14416 GEN_INT (((mask >> 4) & 3) + 4),
14417 GEN_INT (((mask >> 6) & 3) + 4),
14418 GEN_INT (((mask >> 0) & 3) + 12),
14419 GEN_INT (((mask >> 2) & 3) + 12),
14420 GEN_INT (((mask >> 4) & 3) + 12),
14421 GEN_INT (((mask >> 6) & 3) + 12),
14422 operands[3], operands[4]));
14423 DONE;
14424 })
14425
14426 (define_insn "avx2_pshufhw_1<mask_name>"
14427 [(set (match_operand:V16HI 0 "register_operand" "=v")
14428 (vec_select:V16HI
14429 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14430 (parallel [(const_int 0)
14431 (const_int 1)
14432 (const_int 2)
14433 (const_int 3)
14434 (match_operand 2 "const_4_to_7_operand")
14435 (match_operand 3 "const_4_to_7_operand")
14436 (match_operand 4 "const_4_to_7_operand")
14437 (match_operand 5 "const_4_to_7_operand")
14438 (const_int 8)
14439 (const_int 9)
14440 (const_int 10)
14441 (const_int 11)
14442 (match_operand 6 "const_12_to_15_operand")
14443 (match_operand 7 "const_12_to_15_operand")
14444 (match_operand 8 "const_12_to_15_operand")
14445 (match_operand 9 "const_12_to_15_operand")])))]
14446 "TARGET_AVX2
14447 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14448 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14449 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14450 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14451 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14452 {
14453 int mask = 0;
14454 mask |= (INTVAL (operands[2]) - 4) << 0;
14455 mask |= (INTVAL (operands[3]) - 4) << 2;
14456 mask |= (INTVAL (operands[4]) - 4) << 4;
14457 mask |= (INTVAL (operands[5]) - 4) << 6;
14458 operands[2] = GEN_INT (mask);
14459
14460 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14461 }
14462 [(set_attr "type" "sselog")
14463 (set_attr "prefix" "maybe_evex")
14464 (set_attr "length_immediate" "1")
14465 (set_attr "mode" "OI")])
14466
14467 (define_expand "avx512vl_pshufhw_mask"
14468 [(match_operand:V8HI 0 "register_operand")
14469 (match_operand:V8HI 1 "nonimmediate_operand")
14470 (match_operand:SI 2 "const_0_to_255_operand")
14471 (match_operand:V8HI 3 "register_operand")
14472 (match_operand:QI 4 "register_operand")]
14473 "TARGET_AVX512VL && TARGET_AVX512BW"
14474 {
14475 int mask = INTVAL (operands[2]);
14476 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
14477 GEN_INT (((mask >> 0) & 3) + 4),
14478 GEN_INT (((mask >> 2) & 3) + 4),
14479 GEN_INT (((mask >> 4) & 3) + 4),
14480 GEN_INT (((mask >> 6) & 3) + 4),
14481 operands[3], operands[4]));
14482 DONE;
14483 })
14484
14485 (define_expand "sse2_pshufhw"
14486 [(match_operand:V8HI 0 "register_operand")
14487 (match_operand:V8HI 1 "vector_operand")
14488 (match_operand:SI 2 "const_int_operand")]
14489 "TARGET_SSE2"
14490 {
14491 int mask = INTVAL (operands[2]);
14492 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
14493 GEN_INT (((mask >> 0) & 3) + 4),
14494 GEN_INT (((mask >> 2) & 3) + 4),
14495 GEN_INT (((mask >> 4) & 3) + 4),
14496 GEN_INT (((mask >> 6) & 3) + 4)));
14497 DONE;
14498 })
14499
14500 (define_insn "sse2_pshufhw_1<mask_name>"
14501 [(set (match_operand:V8HI 0 "register_operand" "=v")
14502 (vec_select:V8HI
14503 (match_operand:V8HI 1 "vector_operand" "vBm")
14504 (parallel [(const_int 0)
14505 (const_int 1)
14506 (const_int 2)
14507 (const_int 3)
14508 (match_operand 2 "const_4_to_7_operand")
14509 (match_operand 3 "const_4_to_7_operand")
14510 (match_operand 4 "const_4_to_7_operand")
14511 (match_operand 5 "const_4_to_7_operand")])))]
14512 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14513 {
14514 int mask = 0;
14515 mask |= (INTVAL (operands[2]) - 4) << 0;
14516 mask |= (INTVAL (operands[3]) - 4) << 2;
14517 mask |= (INTVAL (operands[4]) - 4) << 4;
14518 mask |= (INTVAL (operands[5]) - 4) << 6;
14519 operands[2] = GEN_INT (mask);
14520
14521 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14522 }
14523 [(set_attr "type" "sselog")
14524 (set_attr "prefix_rep" "1")
14525 (set_attr "prefix_data16" "0")
14526 (set_attr "prefix" "maybe_vex")
14527 (set_attr "length_immediate" "1")
14528 (set_attr "mode" "TI")])
14529
14530 (define_expand "sse2_loadd"
14531 [(set (match_operand:V4SI 0 "register_operand")
14532 (vec_merge:V4SI
14533 (vec_duplicate:V4SI
14534 (match_operand:SI 1 "nonimmediate_operand"))
14535 (match_dup 2)
14536 (const_int 1)))]
14537 "TARGET_SSE"
14538 "operands[2] = CONST0_RTX (V4SImode);")
14539
14540 (define_insn "sse2_loadld"
14541 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
14542 (vec_merge:V4SI
14543 (vec_duplicate:V4SI
14544 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
14545 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
14546 (const_int 1)))]
14547 "TARGET_SSE"
14548 "@
14549 %vmovd\t{%2, %0|%0, %2}
14550 %vmovd\t{%2, %0|%0, %2}
14551 movss\t{%2, %0|%0, %2}
14552 movss\t{%2, %0|%0, %2}
14553 vmovss\t{%2, %1, %0|%0, %1, %2}"
14554 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
14555 (set_attr "type" "ssemov")
14556 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
14557 (set_attr "mode" "TI,TI,V4SF,SF,SF")
14558 (set (attr "preferred_for_speed")
14559 (cond [(eq_attr "alternative" "1")
14560 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
14561 ]
14562 (symbol_ref "true")))])
14563
14564 ;; QI and HI modes handled by pextr patterns.
14565 (define_mode_iterator PEXTR_MODE12
14566 [(V16QI "TARGET_SSE4_1") V8HI])
14567
14568 (define_insn "*vec_extract<mode>"
14569 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
14570 (vec_select:<ssescalarmode>
14571 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
14572 (parallel
14573 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
14574 "TARGET_SSE2"
14575 "@
14576 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14577 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
14578 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14579 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14580 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
14581 (set_attr "type" "sselog1")
14582 (set_attr "prefix_data16" "1")
14583 (set (attr "prefix_extra")
14584 (if_then_else
14585 (and (eq_attr "alternative" "0,2")
14586 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14587 (const_string "*")
14588 (const_string "1")))
14589 (set_attr "length_immediate" "1")
14590 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
14591 (set_attr "mode" "TI")])
14592
14593 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
14594 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
14595 (zero_extend:SWI48
14596 (vec_select:<PEXTR_MODE12:ssescalarmode>
14597 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
14598 (parallel
14599 [(match_operand:SI 2
14600 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
14601 "TARGET_SSE2"
14602 "@
14603 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14604 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
14605 [(set_attr "isa" "*,avx512bw")
14606 (set_attr "type" "sselog1")
14607 (set_attr "prefix_data16" "1")
14608 (set (attr "prefix_extra")
14609 (if_then_else
14610 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
14611 (const_string "*")
14612 (const_string "1")))
14613 (set_attr "length_immediate" "1")
14614 (set_attr "prefix" "maybe_vex")
14615 (set_attr "mode" "TI")])
14616
14617 (define_insn "*vec_extract<mode>_mem"
14618 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
14619 (vec_select:<ssescalarmode>
14620 (match_operand:VI12_128 1 "memory_operand" "o")
14621 (parallel
14622 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
14623 "TARGET_SSE"
14624 "#")
14625
14626 (define_insn "*vec_extract<ssevecmodelower>_0"
14627 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
14628 (vec_select:SWI48
14629 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
14630 (parallel [(const_int 0)])))]
14631 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14632 "#"
14633 [(set_attr "isa" "*,sse2,*,*")
14634 (set (attr "preferred_for_speed")
14635 (cond [(eq_attr "alternative" "1")
14636 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14637 ]
14638 (symbol_ref "true")))])
14639
14640 (define_insn "*vec_extractv2di_0_sse"
14641 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,x ,m")
14642 (vec_select:DI
14643 (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
14644 (parallel [(const_int 0)])))]
14645 "TARGET_SSE && !TARGET_64BIT
14646 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14647 "#"
14648 [(set_attr "isa" "sse4,*,*")
14649 (set (attr "preferred_for_speed")
14650 (cond [(eq_attr "alternative" "0")
14651 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14652 ]
14653 (symbol_ref "true")))])
14654
14655 (define_split
14656 [(set (match_operand:DI 0 "general_reg_operand")
14657 (vec_select:DI
14658 (match_operand:V2DI 1 "register_operand")
14659 (parallel [(const_int 0)])))]
14660 "TARGET_SSE4_1 && !TARGET_64BIT
14661 && reload_completed"
14662 [(set (match_dup 2) (match_dup 4))
14663 (set (match_dup 3)
14664 (vec_select:SI
14665 (match_dup 5)
14666 (parallel [(const_int 1)])))]
14667 {
14668 operands[4] = gen_lowpart (SImode, operands[1]);
14669 operands[5] = gen_lowpart (V4SImode, operands[1]);
14670 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
14671 })
14672
14673 (define_split
14674 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
14675 (vec_select:SWI48x
14676 (match_operand:<ssevecmode> 1 "register_operand")
14677 (parallel [(const_int 0)])))]
14678 "TARGET_SSE && reload_completed"
14679 [(set (match_dup 0) (match_dup 1))]
14680 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
14681
14682 (define_insn "*vec_extractv4si_0_zext_sse4"
14683 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
14684 (zero_extend:DI
14685 (vec_select:SI
14686 (match_operand:V4SI 1 "register_operand" "v,x,v")
14687 (parallel [(const_int 0)]))))]
14688 "TARGET_SSE4_1"
14689 "#"
14690 [(set_attr "isa" "x64,*,avx512f")
14691 (set (attr "preferred_for_speed")
14692 (cond [(eq_attr "alternative" "0")
14693 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14694 ]
14695 (symbol_ref "true")))])
14696
14697 (define_insn "*vec_extractv4si_0_zext"
14698 [(set (match_operand:DI 0 "register_operand" "=r")
14699 (zero_extend:DI
14700 (vec_select:SI
14701 (match_operand:V4SI 1 "register_operand" "x")
14702 (parallel [(const_int 0)]))))]
14703 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
14704 "#")
14705
14706 (define_split
14707 [(set (match_operand:DI 0 "register_operand")
14708 (zero_extend:DI
14709 (vec_select:SI
14710 (match_operand:V4SI 1 "register_operand")
14711 (parallel [(const_int 0)]))))]
14712 "TARGET_SSE2 && reload_completed"
14713 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
14714 "operands[1] = gen_lowpart (SImode, operands[1]);")
14715
14716 (define_insn "*vec_extractv4si"
14717 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
14718 (vec_select:SI
14719 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
14720 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
14721 "TARGET_SSE4_1"
14722 {
14723 switch (which_alternative)
14724 {
14725 case 0:
14726 case 1:
14727 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
14728
14729 case 2:
14730 case 3:
14731 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
14732 return "psrldq\t{%2, %0|%0, %2}";
14733
14734 case 4:
14735 case 5:
14736 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
14737 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
14738
14739 default:
14740 gcc_unreachable ();
14741 }
14742 }
14743 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
14744 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
14745 (set (attr "prefix_extra")
14746 (if_then_else (eq_attr "alternative" "0,1")
14747 (const_string "1")
14748 (const_string "*")))
14749 (set_attr "length_immediate" "1")
14750 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
14751 (set_attr "mode" "TI")])
14752
14753 (define_insn "*vec_extractv4si_zext"
14754 [(set (match_operand:DI 0 "register_operand" "=r,r")
14755 (zero_extend:DI
14756 (vec_select:SI
14757 (match_operand:V4SI 1 "register_operand" "x,v")
14758 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
14759 "TARGET_64BIT && TARGET_SSE4_1"
14760 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
14761 [(set_attr "isa" "*,avx512dq")
14762 (set_attr "type" "sselog1")
14763 (set_attr "prefix_extra" "1")
14764 (set_attr "length_immediate" "1")
14765 (set_attr "prefix" "maybe_vex")
14766 (set_attr "mode" "TI")])
14767
14768 (define_insn "*vec_extractv4si_mem"
14769 [(set (match_operand:SI 0 "register_operand" "=x,r")
14770 (vec_select:SI
14771 (match_operand:V4SI 1 "memory_operand" "o,o")
14772 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
14773 "TARGET_SSE"
14774 "#")
14775
14776 (define_insn_and_split "*vec_extractv4si_zext_mem"
14777 [(set (match_operand:DI 0 "register_operand" "=x,r")
14778 (zero_extend:DI
14779 (vec_select:SI
14780 (match_operand:V4SI 1 "memory_operand" "o,o")
14781 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
14782 "TARGET_64BIT && TARGET_SSE"
14783 "#"
14784 "&& reload_completed"
14785 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
14786 {
14787 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
14788 })
14789
14790 (define_insn "*vec_extractv2di_1"
14791 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
14792 (vec_select:DI
14793 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
14794 (parallel [(const_int 1)])))]
14795 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14796 "@
14797 %vpextrq\t{$1, %1, %0|%0, %1, 1}
14798 vpextrq\t{$1, %1, %0|%0, %1, 1}
14799 %vmovhps\t{%1, %0|%0, %1}
14800 psrldq\t{$8, %0|%0, 8}
14801 vpsrldq\t{$8, %1, %0|%0, %1, 8}
14802 vpsrldq\t{$8, %1, %0|%0, %1, 8}
14803 movhlps\t{%1, %0|%0, %1}
14804 #
14805 #"
14806 [(set (attr "isa")
14807 (cond [(eq_attr "alternative" "0")
14808 (const_string "x64_sse4")
14809 (eq_attr "alternative" "1")
14810 (const_string "x64_avx512dq")
14811 (eq_attr "alternative" "3")
14812 (const_string "sse2_noavx")
14813 (eq_attr "alternative" "4")
14814 (const_string "avx")
14815 (eq_attr "alternative" "5")
14816 (const_string "avx512bw")
14817 (eq_attr "alternative" "6")
14818 (const_string "noavx")
14819 (eq_attr "alternative" "8")
14820 (const_string "x64")
14821 ]
14822 (const_string "*")))
14823 (set (attr "type")
14824 (cond [(eq_attr "alternative" "2,6,7")
14825 (const_string "ssemov")
14826 (eq_attr "alternative" "3,4,5")
14827 (const_string "sseishft1")
14828 (eq_attr "alternative" "8")
14829 (const_string "imov")
14830 ]
14831 (const_string "sselog1")))
14832 (set (attr "length_immediate")
14833 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
14834 (const_string "1")
14835 (const_string "*")))
14836 (set (attr "prefix_rex")
14837 (if_then_else (eq_attr "alternative" "0,1")
14838 (const_string "1")
14839 (const_string "*")))
14840 (set (attr "prefix_extra")
14841 (if_then_else (eq_attr "alternative" "0,1")
14842 (const_string "1")
14843 (const_string "*")))
14844 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
14845 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
14846
14847 (define_split
14848 [(set (match_operand:<ssescalarmode> 0 "register_operand")
14849 (vec_select:<ssescalarmode>
14850 (match_operand:VI_128 1 "memory_operand")
14851 (parallel
14852 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
14853 "TARGET_SSE && reload_completed"
14854 [(set (match_dup 0) (match_dup 1))]
14855 {
14856 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
14857
14858 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
14859 })
14860
14861 (define_insn "*vec_extractv2ti"
14862 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
14863 (vec_select:TI
14864 (match_operand:V2TI 1 "register_operand" "x,v")
14865 (parallel
14866 [(match_operand:SI 2 "const_0_to_1_operand")])))]
14867 "TARGET_AVX"
14868 "@
14869 vextract%~128\t{%2, %1, %0|%0, %1, %2}
14870 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
14871 [(set_attr "type" "sselog")
14872 (set_attr "prefix_extra" "1")
14873 (set_attr "length_immediate" "1")
14874 (set_attr "prefix" "vex,evex")
14875 (set_attr "mode" "OI")])
14876
14877 (define_insn "*vec_extractv4ti"
14878 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
14879 (vec_select:TI
14880 (match_operand:V4TI 1 "register_operand" "v")
14881 (parallel
14882 [(match_operand:SI 2 "const_0_to_3_operand")])))]
14883 "TARGET_AVX512F"
14884 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
14885 [(set_attr "type" "sselog")
14886 (set_attr "prefix_extra" "1")
14887 (set_attr "length_immediate" "1")
14888 (set_attr "prefix" "evex")
14889 (set_attr "mode" "XI")])
14890
14891 (define_mode_iterator VEXTRACTI128_MODE
14892 [(V4TI "TARGET_AVX512F") V2TI])
14893
14894 (define_split
14895 [(set (match_operand:TI 0 "nonimmediate_operand")
14896 (vec_select:TI
14897 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
14898 (parallel [(const_int 0)])))]
14899 "TARGET_AVX
14900 && reload_completed
14901 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
14902 [(set (match_dup 0) (match_dup 1))]
14903 "operands[1] = gen_lowpart (TImode, operands[1]);")
14904
14905 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
14906 ;; vector modes into vec_extract*.
14907 (define_split
14908 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
14909 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
14910 "can_create_pseudo_p ()
14911 && REG_P (operands[1])
14912 && VECTOR_MODE_P (GET_MODE (operands[1]))
14913 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
14914 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
14915 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
14916 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
14917 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
14918 (parallel [(const_int 0)])))]
14919 {
14920 rtx tmp;
14921
14922 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
14923 {
14924 case 64:
14925 if (<MODE>mode == SImode)
14926 {
14927 tmp = gen_reg_rtx (V8SImode);
14928 emit_insn (gen_vec_extract_lo_v16si (tmp,
14929 gen_lowpart (V16SImode,
14930 operands[1])));
14931 }
14932 else
14933 {
14934 tmp = gen_reg_rtx (V4DImode);
14935 emit_insn (gen_vec_extract_lo_v8di (tmp,
14936 gen_lowpart (V8DImode,
14937 operands[1])));
14938 }
14939 operands[1] = tmp;
14940 /* FALLTHRU */
14941 case 32:
14942 tmp = gen_reg_rtx (<ssevecmode>mode);
14943 if (<MODE>mode == SImode)
14944 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
14945 operands[1])));
14946 else
14947 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
14948 operands[1])));
14949 operands[1] = tmp;
14950 break;
14951 case 16:
14952 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
14953 break;
14954 }
14955 })
14956
14957 (define_insn "*vec_concatv2si_sse4_1"
14958 [(set (match_operand:V2SI 0 "register_operand"
14959 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
14960 (vec_concat:V2SI
14961 (match_operand:SI 1 "nonimmediate_operand"
14962 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
14963 (match_operand:SI 2 "nonimm_or_0_operand"
14964 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
14965 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14966 "@
14967 pinsrd\t{$1, %2, %0|%0, %2, 1}
14968 pinsrd\t{$1, %2, %0|%0, %2, 1}
14969 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
14970 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
14971 punpckldq\t{%2, %0|%0, %2}
14972 punpckldq\t{%2, %0|%0, %2}
14973 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
14974 %vmovd\t{%1, %0|%0, %1}
14975 punpckldq\t{%2, %0|%0, %2}
14976 movd\t{%1, %0|%0, %1}"
14977 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
14978 (set (attr "mmx_isa")
14979 (if_then_else (eq_attr "alternative" "8,9")
14980 (const_string "native")
14981 (const_string "*")))
14982 (set (attr "type")
14983 (cond [(eq_attr "alternative" "7")
14984 (const_string "ssemov")
14985 (eq_attr "alternative" "8")
14986 (const_string "mmxcvt")
14987 (eq_attr "alternative" "9")
14988 (const_string "mmxmov")
14989 ]
14990 (const_string "sselog")))
14991 (set (attr "prefix_extra")
14992 (if_then_else (eq_attr "alternative" "0,1,2,3")
14993 (const_string "1")
14994 (const_string "*")))
14995 (set (attr "length_immediate")
14996 (if_then_else (eq_attr "alternative" "0,1,2,3")
14997 (const_string "1")
14998 (const_string "*")))
14999 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
15000 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
15001
15002 ;; ??? In theory we can match memory for the MMX alternative, but allowing
15003 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
15004 ;; alternatives pretty much forces the MMX alternative to be chosen.
15005 (define_insn "*vec_concatv2si"
15006 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,x,x,*y,*y")
15007 (vec_concat:V2SI
15008 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
15009 (match_operand:SI 2 "reg_or_0_operand" " x,C ,x,C,*y,C")))]
15010 "TARGET_SSE && !TARGET_SSE4_1"
15011 "@
15012 punpckldq\t{%2, %0|%0, %2}
15013 movd\t{%1, %0|%0, %1}
15014 unpcklps\t{%2, %0|%0, %2}
15015 movss\t{%1, %0|%0, %1}
15016 punpckldq\t{%2, %0|%0, %2}
15017 movd\t{%1, %0|%0, %1}"
15018 [(set_attr "isa" "sse2,sse2,*,*,*,*")
15019 (set_attr "mmx_isa" "*,*,*,*,native,native")
15020 (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
15021 (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
15022
15023 (define_insn "*vec_concatv4si"
15024 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
15025 (vec_concat:V4SI
15026 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
15027 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
15028 "TARGET_SSE"
15029 "@
15030 punpcklqdq\t{%2, %0|%0, %2}
15031 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15032 movlhps\t{%2, %0|%0, %2}
15033 movhps\t{%2, %0|%0, %q2}
15034 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
15035 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
15036 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
15037 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
15038 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
15039
15040 (define_insn "*vec_concatv4si_0"
15041 [(set (match_operand:V4SI 0 "register_operand" "=v,x")
15042 (vec_concat:V4SI
15043 (match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y")
15044 (match_operand:V2SI 2 "const0_operand" " C,C")))]
15045 "TARGET_SSE2"
15046 "@
15047 %vmovq\t{%1, %0|%0, %1}
15048 movq2dq\t{%1, %0|%0, %1}"
15049 [(set_attr "mmx_isa" "*,native")
15050 (set_attr "type" "ssemov")
15051 (set_attr "prefix" "maybe_vex,orig")
15052 (set_attr "mode" "TI")])
15053
15054 (define_insn "vec_concatv2di"
15055 [(set (match_operand:V2DI 0 "register_operand"
15056 "=Yr,*x,x ,v ,x,v ,x,x,v")
15057 (vec_concat:V2DI
15058 (match_operand:DI 1 "register_operand"
15059 " 0, 0,x ,Yv,0,Yv,0,0,v")
15060 (match_operand:DI 2 "nonimmediate_operand"
15061 " rm,rm,rm,rm,x,Yv,x,m,m")))]
15062 "TARGET_SSE"
15063 "@
15064 pinsrq\t{$1, %2, %0|%0, %2, 1}
15065 pinsrq\t{$1, %2, %0|%0, %2, 1}
15066 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15067 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15068 punpcklqdq\t{%2, %0|%0, %2}
15069 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15070 movlhps\t{%2, %0|%0, %2}
15071 movhps\t{%2, %0|%0, %2}
15072 vmovhps\t{%2, %1, %0|%0, %1, %2}"
15073 [(set (attr "isa")
15074 (cond [(eq_attr "alternative" "0,1")
15075 (const_string "x64_sse4_noavx")
15076 (eq_attr "alternative" "2")
15077 (const_string "x64_avx")
15078 (eq_attr "alternative" "3")
15079 (const_string "x64_avx512dq")
15080 (eq_attr "alternative" "4")
15081 (const_string "sse2_noavx")
15082 (eq_attr "alternative" "5,8")
15083 (const_string "avx")
15084 ]
15085 (const_string "noavx")))
15086 (set (attr "type")
15087 (if_then_else
15088 (eq_attr "alternative" "0,1,2,3,4,5")
15089 (const_string "sselog")
15090 (const_string "ssemov")))
15091 (set (attr "prefix_rex")
15092 (if_then_else (eq_attr "alternative" "0,1,2,3")
15093 (const_string "1")
15094 (const_string "*")))
15095 (set (attr "prefix_extra")
15096 (if_then_else (eq_attr "alternative" "0,1,2,3")
15097 (const_string "1")
15098 (const_string "*")))
15099 (set (attr "length_immediate")
15100 (if_then_else (eq_attr "alternative" "0,1,2,3")
15101 (const_string "1")
15102 (const_string "*")))
15103 (set (attr "prefix")
15104 (cond [(eq_attr "alternative" "2")
15105 (const_string "vex")
15106 (eq_attr "alternative" "3")
15107 (const_string "evex")
15108 (eq_attr "alternative" "5,8")
15109 (const_string "maybe_evex")
15110 ]
15111 (const_string "orig")))
15112 (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
15113
15114 (define_insn "*vec_concatv2di_0"
15115 [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
15116 (vec_concat:V2DI
15117 (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
15118 (match_operand:DI 2 "const0_operand" " C,C ,C")))]
15119 "TARGET_SSE2"
15120 "@
15121 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
15122 %vmovq\t{%1, %0|%0, %1}
15123 movq2dq\t{%1, %0|%0, %1}"
15124 [(set_attr "isa" "x64,*,*")
15125 (set_attr "mmx_isa" "*,*,native")
15126 (set_attr "type" "ssemov")
15127 (set_attr "prefix_rex" "1,*,*")
15128 (set_attr "prefix" "maybe_vex,maybe_vex,orig")
15129 (set_attr "mode" "TI")
15130 (set (attr "preferred_for_speed")
15131 (cond [(eq_attr "alternative" "0")
15132 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15133 ]
15134 (symbol_ref "true")))])
15135
15136 ;; vmovq clears also the higher bits.
15137 (define_insn "vec_set<mode>_0"
15138 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
15139 (vec_merge:VI8_AVX_AVX512F
15140 (vec_duplicate:VI8_AVX_AVX512F
15141 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
15142 (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
15143 (const_int 1)))]
15144 "TARGET_AVX"
15145 "vmovq\t{%2, %x0|%x0, %2}"
15146 [(set_attr "isa" "x64,*")
15147 (set_attr "type" "ssemov")
15148 (set_attr "prefix_rex" "1,*")
15149 (set_attr "prefix" "maybe_evex")
15150 (set_attr "mode" "TI")
15151 (set (attr "preferred_for_speed")
15152 (cond [(eq_attr "alternative" "0")
15153 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15154 ]
15155 (symbol_ref "true")))])
15156
15157 (define_expand "vec_unpacks_lo_<mode>"
15158 [(match_operand:<sseunpackmode> 0 "register_operand")
15159 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15160 "TARGET_SSE2"
15161 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
15162
15163 (define_expand "vec_unpacks_hi_<mode>"
15164 [(match_operand:<sseunpackmode> 0 "register_operand")
15165 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15166 "TARGET_SSE2"
15167 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
15168
15169 (define_expand "vec_unpacku_lo_<mode>"
15170 [(match_operand:<sseunpackmode> 0 "register_operand")
15171 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15172 "TARGET_SSE2"
15173 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
15174
15175 (define_expand "vec_unpacks_sbool_lo_qi"
15176 [(match_operand:QI 0 "register_operand")
15177 (match_operand:QI 1 "register_operand")
15178 (match_operand:QI 2 "const_int_operand")]
15179 "TARGET_AVX512F"
15180 {
15181 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
15182 FAIL;
15183 emit_move_insn (operands[0], operands[1]);
15184 DONE;
15185 })
15186
15187 (define_expand "vec_unpacks_lo_hi"
15188 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15189 (match_operand:HI 1 "register_operand"))]
15190 "TARGET_AVX512F")
15191
15192 (define_expand "vec_unpacks_lo_si"
15193 [(set (match_operand:HI 0 "register_operand")
15194 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
15195 "TARGET_AVX512F")
15196
15197 (define_expand "vec_unpacks_lo_di"
15198 [(set (match_operand:SI 0 "register_operand")
15199 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
15200 "TARGET_AVX512BW")
15201
15202 (define_expand "vec_unpacku_hi_<mode>"
15203 [(match_operand:<sseunpackmode> 0 "register_operand")
15204 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15205 "TARGET_SSE2"
15206 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
15207
15208 (define_expand "vec_unpacks_sbool_hi_qi"
15209 [(match_operand:QI 0 "register_operand")
15210 (match_operand:QI 1 "register_operand")
15211 (match_operand:QI 2 "const_int_operand")]
15212 "TARGET_AVX512F"
15213 {
15214 HOST_WIDE_INT nunits = INTVAL (operands[2]);
15215 if (nunits != 8 && nunits != 4)
15216 FAIL;
15217 if (TARGET_AVX512DQ)
15218 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
15219 GEN_INT (nunits / 2)));
15220 else
15221 {
15222 rtx tem = gen_reg_rtx (HImode);
15223 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
15224 QImode),
15225 GEN_INT (nunits / 2)));
15226 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
15227 }
15228 DONE;
15229 })
15230
15231 (define_expand "vec_unpacks_hi_hi"
15232 [(parallel
15233 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15234 (lshiftrt:HI (match_operand:HI 1 "register_operand")
15235 (const_int 8)))
15236 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15237 "TARGET_AVX512F")
15238
15239 (define_expand "vec_unpacks_hi_<mode>"
15240 [(parallel
15241 [(set (subreg:SWI48x
15242 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
15243 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
15244 (match_dup 2)))
15245 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15246 "TARGET_AVX512BW"
15247 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
15248
15249 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15250 ;;
15251 ;; Miscellaneous
15252 ;;
15253 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15254
15255 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
15256 [(set (match_operand:VI12_AVX2 0 "register_operand")
15257 (truncate:VI12_AVX2
15258 (lshiftrt:<ssedoublemode>
15259 (plus:<ssedoublemode>
15260 (plus:<ssedoublemode>
15261 (zero_extend:<ssedoublemode>
15262 (match_operand:VI12_AVX2 1 "vector_operand"))
15263 (zero_extend:<ssedoublemode>
15264 (match_operand:VI12_AVX2 2 "vector_operand")))
15265 (match_dup <mask_expand_op3>))
15266 (const_int 1))))]
15267 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15268 {
15269 operands[<mask_expand_op3>] = CONST1_RTX(<MODE>mode);
15270 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
15271 })
15272
15273 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
15274 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
15275 (truncate:VI12_AVX2
15276 (lshiftrt:<ssedoublemode>
15277 (plus:<ssedoublemode>
15278 (plus:<ssedoublemode>
15279 (zero_extend:<ssedoublemode>
15280 (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
15281 (zero_extend:<ssedoublemode>
15282 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
15283 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
15284 (const_int 1))))]
15285 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
15286 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15287 "@
15288 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
15289 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15290 [(set_attr "isa" "noavx,avx")
15291 (set_attr "type" "sseiadd")
15292 (set_attr "prefix_data16" "1,*")
15293 (set_attr "prefix" "orig,<mask_prefix>")
15294 (set_attr "mode" "<sseinsnmode>")])
15295
15296 ;; The correct representation for this is absolutely enormous, and
15297 ;; surely not generally useful.
15298 (define_insn "<sse2_avx2>_psadbw"
15299 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
15300 (unspec:VI8_AVX2_AVX512BW
15301 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
15302 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
15303 UNSPEC_PSADBW))]
15304 "TARGET_SSE2"
15305 "@
15306 psadbw\t{%2, %0|%0, %2}
15307 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
15308 [(set_attr "isa" "noavx,avx")
15309 (set_attr "type" "sseiadd")
15310 (set_attr "atom_unit" "simul")
15311 (set_attr "prefix_data16" "1,*")
15312 (set_attr "prefix" "orig,maybe_evex")
15313 (set_attr "mode" "<sseinsnmode>")])
15314
15315 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
15316 [(set (match_operand:SI 0 "register_operand" "=r")
15317 (unspec:SI
15318 [(match_operand:VF_128_256 1 "register_operand" "x")]
15319 UNSPEC_MOVMSK))]
15320 "TARGET_SSE"
15321 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
15322 [(set_attr "type" "ssemov")
15323 (set_attr "prefix" "maybe_vex")
15324 (set_attr "mode" "<MODE>")])
15325
15326 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
15327 [(set (match_operand:DI 0 "register_operand" "=r")
15328 (zero_extend:DI
15329 (unspec:SI
15330 [(match_operand:VF_128_256 1 "register_operand" "x")]
15331 UNSPEC_MOVMSK)))]
15332 "TARGET_64BIT && TARGET_SSE"
15333 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
15334 [(set_attr "type" "ssemov")
15335 (set_attr "prefix" "maybe_vex")
15336 (set_attr "mode" "<MODE>")])
15337
15338 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
15339 [(set (match_operand:SI 0 "register_operand" "=r")
15340 (unspec:SI
15341 [(lt:VF_128_256
15342 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15343 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
15344 UNSPEC_MOVMSK))]
15345 "TARGET_SSE"
15346 "#"
15347 "&& reload_completed"
15348 [(set (match_dup 0)
15349 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15350 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15351 [(set_attr "type" "ssemov")
15352 (set_attr "prefix" "maybe_vex")
15353 (set_attr "mode" "<MODE>")])
15354
15355 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt"
15356 [(set (match_operand:DI 0 "register_operand" "=r")
15357 (zero_extend:DI
15358 (unspec:SI
15359 [(lt:VF_128_256
15360 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15361 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
15362 UNSPEC_MOVMSK)))]
15363 "TARGET_64BIT && TARGET_SSE"
15364 "#"
15365 "&& reload_completed"
15366 [(set (match_dup 0)
15367 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15368 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15369 [(set_attr "type" "ssemov")
15370 (set_attr "prefix" "maybe_vex")
15371 (set_attr "mode" "<MODE>")])
15372
15373 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
15374 [(set (match_operand:SI 0 "register_operand" "=r")
15375 (unspec:SI
15376 [(subreg:VF_128_256
15377 (ashiftrt:<sseintvecmode>
15378 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15379 (match_operand:QI 2 "const_int_operand" "n")) 0)]
15380 UNSPEC_MOVMSK))]
15381 "TARGET_SSE"
15382 "#"
15383 "&& reload_completed"
15384 [(set (match_dup 0)
15385 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15386 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15387 [(set_attr "type" "ssemov")
15388 (set_attr "prefix" "maybe_vex")
15389 (set_attr "mode" "<MODE>")])
15390
15391 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift"
15392 [(set (match_operand:DI 0 "register_operand" "=r")
15393 (zero_extend:DI
15394 (unspec:SI
15395 [(subreg:VF_128_256
15396 (ashiftrt:<sseintvecmode>
15397 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15398 (match_operand:QI 2 "const_int_operand" "n")) 0)]
15399 UNSPEC_MOVMSK)))]
15400 "TARGET_64BIT && TARGET_SSE"
15401 "#"
15402 "&& reload_completed"
15403 [(set (match_dup 0)
15404 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15405 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15406 [(set_attr "type" "ssemov")
15407 (set_attr "prefix" "maybe_vex")
15408 (set_attr "mode" "<MODE>")])
15409
15410 (define_insn "<sse2_avx2>_pmovmskb"
15411 [(set (match_operand:SI 0 "register_operand" "=r")
15412 (unspec:SI
15413 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
15414 UNSPEC_MOVMSK))]
15415 "TARGET_SSE2"
15416 "%vpmovmskb\t{%1, %0|%0, %1}"
15417 [(set_attr "type" "ssemov")
15418 (set (attr "prefix_data16")
15419 (if_then_else
15420 (match_test "TARGET_AVX")
15421 (const_string "*")
15422 (const_string "1")))
15423 (set_attr "prefix" "maybe_vex")
15424 (set_attr "mode" "SI")])
15425
15426 (define_insn "*<sse2_avx2>_pmovmskb_zext"
15427 [(set (match_operand:DI 0 "register_operand" "=r")
15428 (zero_extend:DI
15429 (unspec:SI
15430 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
15431 UNSPEC_MOVMSK)))]
15432 "TARGET_64BIT && TARGET_SSE2"
15433 "%vpmovmskb\t{%1, %k0|%k0, %1}"
15434 [(set_attr "type" "ssemov")
15435 (set (attr "prefix_data16")
15436 (if_then_else
15437 (match_test "TARGET_AVX")
15438 (const_string "*")
15439 (const_string "1")))
15440 (set_attr "prefix" "maybe_vex")
15441 (set_attr "mode" "SI")])
15442
15443 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
15444 [(set (match_operand:SI 0 "register_operand" "=r")
15445 (unspec:SI
15446 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
15447 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
15448 UNSPEC_MOVMSK))]
15449 "TARGET_SSE2"
15450 "#"
15451 ""
15452 [(set (match_dup 0)
15453 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15454 ""
15455 [(set_attr "type" "ssemov")
15456 (set (attr "prefix_data16")
15457 (if_then_else
15458 (match_test "TARGET_AVX")
15459 (const_string "*")
15460 (const_string "1")))
15461 (set_attr "prefix" "maybe_vex")
15462 (set_attr "mode" "SI")])
15463
15464 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
15465 [(set (match_operand:DI 0 "register_operand" "=r")
15466 (zero_extend:DI
15467 (unspec:SI
15468 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
15469 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
15470 UNSPEC_MOVMSK)))]
15471 "TARGET_64BIT && TARGET_SSE2"
15472 "#"
15473 ""
15474 [(set (match_dup 0)
15475 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15476 ""
15477 [(set_attr "type" "ssemov")
15478 (set (attr "prefix_data16")
15479 (if_then_else
15480 (match_test "TARGET_AVX")
15481 (const_string "*")
15482 (const_string "1")))
15483 (set_attr "prefix" "maybe_vex")
15484 (set_attr "mode" "SI")])
15485
15486 (define_expand "sse2_maskmovdqu"
15487 [(set (match_operand:V16QI 0 "memory_operand")
15488 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
15489 (match_operand:V16QI 2 "register_operand")
15490 (match_dup 0)]
15491 UNSPEC_MASKMOV))]
15492 "TARGET_SSE2")
15493
15494 (define_insn "*sse2_maskmovdqu"
15495 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
15496 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
15497 (match_operand:V16QI 2 "register_operand" "x")
15498 (mem:V16QI (match_dup 0))]
15499 UNSPEC_MASKMOV))]
15500 "TARGET_SSE2"
15501 {
15502 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
15503 that requires %v to be at the beginning of the opcode name. */
15504 if (Pmode != word_mode)
15505 fputs ("\taddr32", asm_out_file);
15506 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
15507 }
15508 [(set_attr "type" "ssemov")
15509 (set_attr "prefix_data16" "1")
15510 (set (attr "length_address")
15511 (symbol_ref ("Pmode != word_mode")))
15512 ;; The implicit %rdi operand confuses default length_vex computation.
15513 (set (attr "length_vex")
15514 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
15515 (set_attr "prefix" "maybe_vex")
15516 (set_attr "znver1_decode" "vector")
15517 (set_attr "mode" "TI")])
15518
15519 (define_insn "sse_ldmxcsr"
15520 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
15521 UNSPECV_LDMXCSR)]
15522 "TARGET_SSE"
15523 "%vldmxcsr\t%0"
15524 [(set_attr "type" "sse")
15525 (set_attr "atom_sse_attr" "mxcsr")
15526 (set_attr "prefix" "maybe_vex")
15527 (set_attr "memory" "load")])
15528
15529 (define_insn "sse_stmxcsr"
15530 [(set (match_operand:SI 0 "memory_operand" "=m")
15531 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
15532 "TARGET_SSE"
15533 "%vstmxcsr\t%0"
15534 [(set_attr "type" "sse")
15535 (set_attr "atom_sse_attr" "mxcsr")
15536 (set_attr "prefix" "maybe_vex")
15537 (set_attr "memory" "store")])
15538
15539 (define_insn "sse2_clflush"
15540 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
15541 UNSPECV_CLFLUSH)]
15542 "TARGET_SSE2"
15543 "clflush\t%a0"
15544 [(set_attr "type" "sse")
15545 (set_attr "atom_sse_attr" "fence")
15546 (set_attr "memory" "unknown")])
15547
15548 ;; As per AMD and Intel ISA manuals, the first operand is extensions
15549 ;; and it goes to %ecx. The second operand received is hints and it goes
15550 ;; to %eax.
15551 (define_insn "sse3_mwait"
15552 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
15553 (match_operand:SI 1 "register_operand" "a")]
15554 UNSPECV_MWAIT)]
15555 "TARGET_SSE3"
15556 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
15557 ;; Since 32bit register operands are implicitly zero extended to 64bit,
15558 ;; we only need to set up 32bit registers.
15559 "mwait"
15560 [(set_attr "length" "3")])
15561
15562 (define_insn "sse3_monitor_<mode>"
15563 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
15564 (match_operand:SI 1 "register_operand" "c")
15565 (match_operand:SI 2 "register_operand" "d")]
15566 UNSPECV_MONITOR)]
15567 "TARGET_SSE3"
15568 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
15569 ;; RCX and RDX are used. Since 32bit register operands are implicitly
15570 ;; zero extended to 64bit, we only need to set up 32bit registers.
15571 "%^monitor"
15572 [(set (attr "length")
15573 (symbol_ref ("(Pmode != word_mode) + 3")))])
15574
15575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15576 ;;
15577 ;; SSSE3 instructions
15578 ;;
15579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15580
15581 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
15582
15583 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
15584 [(set (match_operand:V16HI 0 "register_operand" "=x")
15585 (vec_concat:V16HI
15586 (vec_concat:V8HI
15587 (vec_concat:V4HI
15588 (vec_concat:V2HI
15589 (ssse3_plusminus:HI
15590 (vec_select:HI
15591 (match_operand:V16HI 1 "register_operand" "x")
15592 (parallel [(const_int 0)]))
15593 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15594 (ssse3_plusminus:HI
15595 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15596 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15597 (vec_concat:V2HI
15598 (ssse3_plusminus:HI
15599 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
15600 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
15601 (ssse3_plusminus:HI
15602 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
15603 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
15604 (vec_concat:V4HI
15605 (vec_concat:V2HI
15606 (ssse3_plusminus:HI
15607 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
15608 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
15609 (ssse3_plusminus:HI
15610 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
15611 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
15612 (vec_concat:V2HI
15613 (ssse3_plusminus:HI
15614 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
15615 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
15616 (ssse3_plusminus:HI
15617 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
15618 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
15619 (vec_concat:V8HI
15620 (vec_concat:V4HI
15621 (vec_concat:V2HI
15622 (ssse3_plusminus:HI
15623 (vec_select:HI
15624 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
15625 (parallel [(const_int 0)]))
15626 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15627 (ssse3_plusminus:HI
15628 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15629 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
15630 (vec_concat:V2HI
15631 (ssse3_plusminus:HI
15632 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
15633 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
15634 (ssse3_plusminus:HI
15635 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
15636 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
15637 (vec_concat:V4HI
15638 (vec_concat:V2HI
15639 (ssse3_plusminus:HI
15640 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
15641 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
15642 (ssse3_plusminus:HI
15643 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
15644 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
15645 (vec_concat:V2HI
15646 (ssse3_plusminus:HI
15647 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
15648 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
15649 (ssse3_plusminus:HI
15650 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
15651 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
15652 "TARGET_AVX2"
15653 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
15654 [(set_attr "type" "sseiadd")
15655 (set_attr "prefix_extra" "1")
15656 (set_attr "prefix" "vex")
15657 (set_attr "mode" "OI")])
15658
15659 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
15660 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15661 (vec_concat:V8HI
15662 (vec_concat:V4HI
15663 (vec_concat:V2HI
15664 (ssse3_plusminus:HI
15665 (vec_select:HI
15666 (match_operand:V8HI 1 "register_operand" "0,x")
15667 (parallel [(const_int 0)]))
15668 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15669 (ssse3_plusminus:HI
15670 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15671 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15672 (vec_concat:V2HI
15673 (ssse3_plusminus:HI
15674 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
15675 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
15676 (ssse3_plusminus:HI
15677 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
15678 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
15679 (vec_concat:V4HI
15680 (vec_concat:V2HI
15681 (ssse3_plusminus:HI
15682 (vec_select:HI
15683 (match_operand:V8HI 2 "vector_operand" "xBm,xm")
15684 (parallel [(const_int 0)]))
15685 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15686 (ssse3_plusminus:HI
15687 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15688 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
15689 (vec_concat:V2HI
15690 (ssse3_plusminus:HI
15691 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
15692 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
15693 (ssse3_plusminus:HI
15694 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
15695 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
15696 "TARGET_SSSE3"
15697 "@
15698 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
15699 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
15700 [(set_attr "isa" "noavx,avx")
15701 (set_attr "type" "sseiadd")
15702 (set_attr "atom_unit" "complex")
15703 (set_attr "prefix_data16" "1,*")
15704 (set_attr "prefix_extra" "1")
15705 (set_attr "prefix" "orig,vex")
15706 (set_attr "mode" "TI")])
15707
15708 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
15709 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
15710 (vec_concat:V4HI
15711 (vec_concat:V2HI
15712 (ssse3_plusminus:HI
15713 (vec_select:HI
15714 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
15715 (parallel [(const_int 0)]))
15716 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15717 (ssse3_plusminus:HI
15718 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15719 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15720 (vec_concat:V2HI
15721 (ssse3_plusminus:HI
15722 (vec_select:HI
15723 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
15724 (parallel [(const_int 0)]))
15725 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15726 (ssse3_plusminus:HI
15727 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15728 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
15729 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
15730 "@
15731 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
15732 #
15733 #"
15734 "TARGET_MMX_WITH_SSE && reload_completed"
15735 [(const_int 0)]
15736 {
15737 /* Generate SSE version of the operation. */
15738 rtx op0 = lowpart_subreg (V8HImode, operands[0],
15739 GET_MODE (operands[0]));
15740 rtx op1 = lowpart_subreg (V8HImode, operands[1],
15741 GET_MODE (operands[1]));
15742 rtx op2 = lowpart_subreg (V8HImode, operands[2],
15743 GET_MODE (operands[2]));
15744 emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
15745 ix86_move_vector_high_sse_to_mmx (op0);
15746 DONE;
15747 }
15748 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
15749 (set_attr "type" "sseiadd")
15750 (set_attr "atom_unit" "complex")
15751 (set_attr "prefix_extra" "1")
15752 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15753 (set_attr "mode" "DI,TI,TI")])
15754
15755 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
15756 [(set (match_operand:V8SI 0 "register_operand" "=x")
15757 (vec_concat:V8SI
15758 (vec_concat:V4SI
15759 (vec_concat:V2SI
15760 (plusminus:SI
15761 (vec_select:SI
15762 (match_operand:V8SI 1 "register_operand" "x")
15763 (parallel [(const_int 0)]))
15764 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15765 (plusminus:SI
15766 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
15767 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
15768 (vec_concat:V2SI
15769 (plusminus:SI
15770 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
15771 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
15772 (plusminus:SI
15773 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
15774 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
15775 (vec_concat:V4SI
15776 (vec_concat:V2SI
15777 (plusminus:SI
15778 (vec_select:SI
15779 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
15780 (parallel [(const_int 0)]))
15781 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
15782 (plusminus:SI
15783 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
15784 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
15785 (vec_concat:V2SI
15786 (plusminus:SI
15787 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
15788 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
15789 (plusminus:SI
15790 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
15791 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
15792 "TARGET_AVX2"
15793 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
15794 [(set_attr "type" "sseiadd")
15795 (set_attr "prefix_extra" "1")
15796 (set_attr "prefix" "vex")
15797 (set_attr "mode" "OI")])
15798
15799 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
15800 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15801 (vec_concat:V4SI
15802 (vec_concat:V2SI
15803 (plusminus:SI
15804 (vec_select:SI
15805 (match_operand:V4SI 1 "register_operand" "0,x")
15806 (parallel [(const_int 0)]))
15807 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15808 (plusminus:SI
15809 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
15810 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
15811 (vec_concat:V2SI
15812 (plusminus:SI
15813 (vec_select:SI
15814 (match_operand:V4SI 2 "vector_operand" "xBm,xm")
15815 (parallel [(const_int 0)]))
15816 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
15817 (plusminus:SI
15818 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
15819 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
15820 "TARGET_SSSE3"
15821 "@
15822 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
15823 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
15824 [(set_attr "isa" "noavx,avx")
15825 (set_attr "type" "sseiadd")
15826 (set_attr "atom_unit" "complex")
15827 (set_attr "prefix_data16" "1,*")
15828 (set_attr "prefix_extra" "1")
15829 (set_attr "prefix" "orig,vex")
15830 (set_attr "mode" "TI")])
15831
15832 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
15833 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
15834 (vec_concat:V2SI
15835 (plusminus:SI
15836 (vec_select:SI
15837 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
15838 (parallel [(const_int 0)]))
15839 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15840 (plusminus:SI
15841 (vec_select:SI
15842 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
15843 (parallel [(const_int 0)]))
15844 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
15845 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
15846 "@
15847 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
15848 #
15849 #"
15850 "TARGET_MMX_WITH_SSE && reload_completed"
15851 [(const_int 0)]
15852 {
15853 /* Generate SSE version of the operation. */
15854 rtx op0 = lowpart_subreg (V4SImode, operands[0],
15855 GET_MODE (operands[0]));
15856 rtx op1 = lowpart_subreg (V4SImode, operands[1],
15857 GET_MODE (operands[1]));
15858 rtx op2 = lowpart_subreg (V4SImode, operands[2],
15859 GET_MODE (operands[2]));
15860 emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
15861 ix86_move_vector_high_sse_to_mmx (op0);
15862 DONE;
15863 }
15864 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
15865 (set_attr "type" "sseiadd")
15866 (set_attr "atom_unit" "complex")
15867 (set_attr "prefix_extra" "1")
15868 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15869 (set_attr "mode" "DI,TI,TI")])
15870
15871 (define_insn "avx2_pmaddubsw256"
15872 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
15873 (ss_plus:V16HI
15874 (mult:V16HI
15875 (zero_extend:V16HI
15876 (vec_select:V16QI
15877 (match_operand:V32QI 1 "register_operand" "x,v")
15878 (parallel [(const_int 0) (const_int 2)
15879 (const_int 4) (const_int 6)
15880 (const_int 8) (const_int 10)
15881 (const_int 12) (const_int 14)
15882 (const_int 16) (const_int 18)
15883 (const_int 20) (const_int 22)
15884 (const_int 24) (const_int 26)
15885 (const_int 28) (const_int 30)])))
15886 (sign_extend:V16HI
15887 (vec_select:V16QI
15888 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
15889 (parallel [(const_int 0) (const_int 2)
15890 (const_int 4) (const_int 6)
15891 (const_int 8) (const_int 10)
15892 (const_int 12) (const_int 14)
15893 (const_int 16) (const_int 18)
15894 (const_int 20) (const_int 22)
15895 (const_int 24) (const_int 26)
15896 (const_int 28) (const_int 30)]))))
15897 (mult:V16HI
15898 (zero_extend:V16HI
15899 (vec_select:V16QI (match_dup 1)
15900 (parallel [(const_int 1) (const_int 3)
15901 (const_int 5) (const_int 7)
15902 (const_int 9) (const_int 11)
15903 (const_int 13) (const_int 15)
15904 (const_int 17) (const_int 19)
15905 (const_int 21) (const_int 23)
15906 (const_int 25) (const_int 27)
15907 (const_int 29) (const_int 31)])))
15908 (sign_extend:V16HI
15909 (vec_select:V16QI (match_dup 2)
15910 (parallel [(const_int 1) (const_int 3)
15911 (const_int 5) (const_int 7)
15912 (const_int 9) (const_int 11)
15913 (const_int 13) (const_int 15)
15914 (const_int 17) (const_int 19)
15915 (const_int 21) (const_int 23)
15916 (const_int 25) (const_int 27)
15917 (const_int 29) (const_int 31)]))))))]
15918 "TARGET_AVX2"
15919 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
15920 [(set_attr "isa" "*,avx512bw")
15921 (set_attr "type" "sseiadd")
15922 (set_attr "prefix_extra" "1")
15923 (set_attr "prefix" "vex,evex")
15924 (set_attr "mode" "OI")])
15925
15926 ;; The correct representation for this is absolutely enormous, and
15927 ;; surely not generally useful.
15928 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
15929 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
15930 (unspec:VI2_AVX512VL
15931 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
15932 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
15933 UNSPEC_PMADDUBSW512))]
15934 "TARGET_AVX512BW"
15935 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
15936 [(set_attr "type" "sseiadd")
15937 (set_attr "prefix" "evex")
15938 (set_attr "mode" "XI")])
15939
15940 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
15941 [(set (match_operand:V32HI 0 "register_operand" "=v")
15942 (truncate:V32HI
15943 (lshiftrt:V32SI
15944 (plus:V32SI
15945 (lshiftrt:V32SI
15946 (mult:V32SI
15947 (sign_extend:V32SI
15948 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
15949 (sign_extend:V32SI
15950 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
15951 (const_int 14))
15952 (const_vector:V32HI [(const_int 1) (const_int 1)
15953 (const_int 1) (const_int 1)
15954 (const_int 1) (const_int 1)
15955 (const_int 1) (const_int 1)
15956 (const_int 1) (const_int 1)
15957 (const_int 1) (const_int 1)
15958 (const_int 1) (const_int 1)
15959 (const_int 1) (const_int 1)
15960 (const_int 1) (const_int 1)
15961 (const_int 1) (const_int 1)
15962 (const_int 1) (const_int 1)
15963 (const_int 1) (const_int 1)
15964 (const_int 1) (const_int 1)
15965 (const_int 1) (const_int 1)
15966 (const_int 1) (const_int 1)
15967 (const_int 1) (const_int 1)]))
15968 (const_int 1))))]
15969 "TARGET_AVX512BW"
15970 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15971 [(set_attr "type" "sseimul")
15972 (set_attr "prefix" "evex")
15973 (set_attr "mode" "XI")])
15974
15975 (define_insn "ssse3_pmaddubsw128"
15976 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
15977 (ss_plus:V8HI
15978 (mult:V8HI
15979 (zero_extend:V8HI
15980 (vec_select:V8QI
15981 (match_operand:V16QI 1 "register_operand" "0,x,v")
15982 (parallel [(const_int 0) (const_int 2)
15983 (const_int 4) (const_int 6)
15984 (const_int 8) (const_int 10)
15985 (const_int 12) (const_int 14)])))
15986 (sign_extend:V8HI
15987 (vec_select:V8QI
15988 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
15989 (parallel [(const_int 0) (const_int 2)
15990 (const_int 4) (const_int 6)
15991 (const_int 8) (const_int 10)
15992 (const_int 12) (const_int 14)]))))
15993 (mult:V8HI
15994 (zero_extend:V8HI
15995 (vec_select:V8QI (match_dup 1)
15996 (parallel [(const_int 1) (const_int 3)
15997 (const_int 5) (const_int 7)
15998 (const_int 9) (const_int 11)
15999 (const_int 13) (const_int 15)])))
16000 (sign_extend:V8HI
16001 (vec_select:V8QI (match_dup 2)
16002 (parallel [(const_int 1) (const_int 3)
16003 (const_int 5) (const_int 7)
16004 (const_int 9) (const_int 11)
16005 (const_int 13) (const_int 15)]))))))]
16006 "TARGET_SSSE3"
16007 "@
16008 pmaddubsw\t{%2, %0|%0, %2}
16009 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
16010 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16011 [(set_attr "isa" "noavx,avx,avx512bw")
16012 (set_attr "type" "sseiadd")
16013 (set_attr "atom_unit" "simul")
16014 (set_attr "prefix_data16" "1,*,*")
16015 (set_attr "prefix_extra" "1")
16016 (set_attr "prefix" "orig,vex,evex")
16017 (set_attr "mode" "TI")])
16018
16019 (define_insn "ssse3_pmaddubsw"
16020 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16021 (ss_plus:V4HI
16022 (mult:V4HI
16023 (zero_extend:V4HI
16024 (vec_select:V4QI
16025 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
16026 (parallel [(const_int 0) (const_int 2)
16027 (const_int 4) (const_int 6)])))
16028 (sign_extend:V4HI
16029 (vec_select:V4QI
16030 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
16031 (parallel [(const_int 0) (const_int 2)
16032 (const_int 4) (const_int 6)]))))
16033 (mult:V4HI
16034 (zero_extend:V4HI
16035 (vec_select:V4QI (match_dup 1)
16036 (parallel [(const_int 1) (const_int 3)
16037 (const_int 5) (const_int 7)])))
16038 (sign_extend:V4HI
16039 (vec_select:V4QI (match_dup 2)
16040 (parallel [(const_int 1) (const_int 3)
16041 (const_int 5) (const_int 7)]))))))]
16042 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16043 "@
16044 pmaddubsw\t{%2, %0|%0, %2}
16045 pmaddubsw\t{%2, %0|%0, %2}
16046 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16047 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16048 (set_attr "type" "sseiadd")
16049 (set_attr "atom_unit" "simul")
16050 (set_attr "prefix_extra" "1")
16051 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16052 (set_attr "mode" "DI,TI,TI")])
16053
16054 (define_mode_iterator PMULHRSW
16055 [V8HI (V16HI "TARGET_AVX2")])
16056
16057 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
16058 [(set (match_operand:PMULHRSW 0 "register_operand")
16059 (vec_merge:PMULHRSW
16060 (truncate:PMULHRSW
16061 (lshiftrt:<ssedoublemode>
16062 (plus:<ssedoublemode>
16063 (lshiftrt:<ssedoublemode>
16064 (mult:<ssedoublemode>
16065 (sign_extend:<ssedoublemode>
16066 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16067 (sign_extend:<ssedoublemode>
16068 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16069 (const_int 14))
16070 (match_dup 5))
16071 (const_int 1)))
16072 (match_operand:PMULHRSW 3 "register_operand")
16073 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
16074 "TARGET_AVX512BW && TARGET_AVX512VL"
16075 {
16076 operands[5] = CONST1_RTX(<MODE>mode);
16077 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16078 })
16079
16080 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
16081 [(set (match_operand:PMULHRSW 0 "register_operand")
16082 (truncate:PMULHRSW
16083 (lshiftrt:<ssedoublemode>
16084 (plus:<ssedoublemode>
16085 (lshiftrt:<ssedoublemode>
16086 (mult:<ssedoublemode>
16087 (sign_extend:<ssedoublemode>
16088 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16089 (sign_extend:<ssedoublemode>
16090 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16091 (const_int 14))
16092 (match_dup 3))
16093 (const_int 1))))]
16094 "TARGET_SSSE3"
16095 {
16096 operands[3] = CONST1_RTX(<MODE>mode);
16097 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16098 })
16099
16100 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
16101 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
16102 (truncate:VI2_AVX2
16103 (lshiftrt:<ssedoublemode>
16104 (plus:<ssedoublemode>
16105 (lshiftrt:<ssedoublemode>
16106 (mult:<ssedoublemode>
16107 (sign_extend:<ssedoublemode>
16108 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
16109 (sign_extend:<ssedoublemode>
16110 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
16111 (const_int 14))
16112 (match_operand:VI2_AVX2 3 "const1_operand"))
16113 (const_int 1))))]
16114 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
16115 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16116 "@
16117 pmulhrsw\t{%2, %0|%0, %2}
16118 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
16119 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
16120 [(set_attr "isa" "noavx,avx,avx512bw")
16121 (set_attr "type" "sseimul")
16122 (set_attr "prefix_data16" "1,*,*")
16123 (set_attr "prefix_extra" "1")
16124 (set_attr "prefix" "orig,maybe_evex,evex")
16125 (set_attr "mode" "<sseinsnmode>")])
16126
16127 (define_expand "ssse3_pmulhrswv4hi3"
16128 [(set (match_operand:V4HI 0 "register_operand")
16129 (truncate:V4HI
16130 (lshiftrt:V4SI
16131 (plus:V4SI
16132 (lshiftrt:V4SI
16133 (mult:V4SI
16134 (sign_extend:V4SI
16135 (match_operand:V4HI 1 "register_mmxmem_operand"))
16136 (sign_extend:V4SI
16137 (match_operand:V4HI 2 "register_mmxmem_operand")))
16138 (const_int 14))
16139 (match_dup 3))
16140 (const_int 1))))]
16141 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16142 {
16143 operands[3] = CONST1_RTX(V4HImode);
16144 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
16145 })
16146
16147 (define_insn "*ssse3_pmulhrswv4hi3"
16148 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16149 (truncate:V4HI
16150 (lshiftrt:V4SI
16151 (plus:V4SI
16152 (lshiftrt:V4SI
16153 (mult:V4SI
16154 (sign_extend:V4SI
16155 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
16156 (sign_extend:V4SI
16157 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
16158 (const_int 14))
16159 (match_operand:V4HI 3 "const1_operand"))
16160 (const_int 1))))]
16161 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
16162 && TARGET_SSSE3
16163 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16164 "@
16165 pmulhrsw\t{%2, %0|%0, %2}
16166 pmulhrsw\t{%2, %0|%0, %2}
16167 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
16168 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16169 (set_attr "type" "sseimul")
16170 (set_attr "prefix_extra" "1")
16171 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16172 (set_attr "mode" "DI,TI,TI")])
16173
16174 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
16175 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
16176 (unspec:VI1_AVX512
16177 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
16178 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
16179 UNSPEC_PSHUFB))]
16180 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16181 "@
16182 pshufb\t{%2, %0|%0, %2}
16183 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
16184 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16185 [(set_attr "isa" "noavx,avx,avx512bw")
16186 (set_attr "type" "sselog1")
16187 (set_attr "prefix_data16" "1,*,*")
16188 (set_attr "prefix_extra" "1")
16189 (set_attr "prefix" "orig,maybe_evex,evex")
16190 (set_attr "btver2_decode" "vector")
16191 (set_attr "mode" "<sseinsnmode>")])
16192
16193 (define_insn_and_split "ssse3_pshufbv8qi3"
16194 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
16195 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
16196 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
16197 UNSPEC_PSHUFB))
16198 (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
16199 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16200 "@
16201 pshufb\t{%2, %0|%0, %2}
16202 #
16203 #"
16204 "TARGET_MMX_WITH_SSE && reload_completed"
16205 [(set (match_dup 3) (match_dup 5))
16206 (set (match_dup 3)
16207 (and:V4SI (match_dup 3) (match_dup 2)))
16208 (set (match_dup 0)
16209 (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
16210 {
16211 /* Emulate MMX version of pshufb with SSE version by masking out the
16212 bit 3 of the shuffle control byte. */
16213 operands[0] = lowpart_subreg (V16QImode, operands[0],
16214 GET_MODE (operands[0]));
16215 operands[1] = lowpart_subreg (V16QImode, operands[1],
16216 GET_MODE (operands[1]));
16217 operands[2] = lowpart_subreg (V4SImode, operands[2],
16218 GET_MODE (operands[2]));
16219 operands[4] = lowpart_subreg (V16QImode, operands[3],
16220 GET_MODE (operands[3]));
16221 rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
16222 GEN_INT (0xf7f7f7f7),
16223 GEN_INT (0xf7f7f7f7),
16224 GEN_INT (0xf7f7f7f7));
16225 rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
16226 operands[5] = force_const_mem (V4SImode, vec_const);
16227 }
16228 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16229 (set_attr "prefix_extra" "1")
16230 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16231 (set_attr "mode" "DI,TI,TI")])
16232
16233 (define_insn "<ssse3_avx2>_psign<mode>3"
16234 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
16235 (unspec:VI124_AVX2
16236 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
16237 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
16238 UNSPEC_PSIGN))]
16239 "TARGET_SSSE3"
16240 "@
16241 psign<ssemodesuffix>\t{%2, %0|%0, %2}
16242 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16243 [(set_attr "isa" "noavx,avx")
16244 (set_attr "type" "sselog1")
16245 (set_attr "prefix_data16" "1,*")
16246 (set_attr "prefix_extra" "1")
16247 (set_attr "prefix" "orig,vex")
16248 (set_attr "mode" "<sseinsnmode>")])
16249
16250 (define_insn "ssse3_psign<mode>3"
16251 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
16252 (unspec:MMXMODEI
16253 [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
16254 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
16255 UNSPEC_PSIGN))]
16256 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16257 "@
16258 psign<mmxvecsize>\t{%2, %0|%0, %2}
16259 psign<mmxvecsize>\t{%2, %0|%0, %2}
16260 vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
16261 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16262 (set_attr "type" "sselog1")
16263 (set_attr "prefix_extra" "1")
16264 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16265 (set_attr "mode" "DI,TI,TI")])
16266
16267 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
16268 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
16269 (vec_merge:VI1_AVX512
16270 (unspec:VI1_AVX512
16271 [(match_operand:VI1_AVX512 1 "register_operand" "v")
16272 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
16273 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
16274 UNSPEC_PALIGNR)
16275 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
16276 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
16277 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
16278 {
16279 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16280 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
16281 }
16282 [(set_attr "type" "sseishft")
16283 (set_attr "atom_unit" "sishuf")
16284 (set_attr "prefix_extra" "1")
16285 (set_attr "length_immediate" "1")
16286 (set_attr "prefix" "evex")
16287 (set_attr "mode" "<sseinsnmode>")])
16288
16289 (define_insn "<ssse3_avx2>_palignr<mode>"
16290 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
16291 (unspec:SSESCALARMODE
16292 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
16293 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
16294 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
16295 UNSPEC_PALIGNR))]
16296 "TARGET_SSSE3"
16297 {
16298 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16299
16300 switch (which_alternative)
16301 {
16302 case 0:
16303 return "palignr\t{%3, %2, %0|%0, %2, %3}";
16304 case 1:
16305 case 2:
16306 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
16307 default:
16308 gcc_unreachable ();
16309 }
16310 }
16311 [(set_attr "isa" "noavx,avx,avx512bw")
16312 (set_attr "type" "sseishft")
16313 (set_attr "atom_unit" "sishuf")
16314 (set_attr "prefix_data16" "1,*,*")
16315 (set_attr "prefix_extra" "1")
16316 (set_attr "length_immediate" "1")
16317 (set_attr "prefix" "orig,vex,evex")
16318 (set_attr "mode" "<sseinsnmode>")])
16319
16320 (define_insn_and_split "ssse3_palignrdi"
16321 [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
16322 (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
16323 (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
16324 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
16325 UNSPEC_PALIGNR))]
16326 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16327 {
16328 switch (which_alternative)
16329 {
16330 case 0:
16331 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16332 return "palignr\t{%3, %2, %0|%0, %2, %3}";
16333 case 1:
16334 case 2:
16335 return "#";
16336 default:
16337 gcc_unreachable ();
16338 }
16339 }
16340 "TARGET_MMX_WITH_SSE && reload_completed"
16341 [(set (match_dup 0)
16342 (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
16343 {
16344 /* Emulate MMX palignrdi with SSE psrldq. */
16345 rtx op0 = lowpart_subreg (V2DImode, operands[0],
16346 GET_MODE (operands[0]));
16347 rtx insn;
16348 if (TARGET_AVX)
16349 insn = gen_vec_concatv2di (op0, operands[2], operands[1]);
16350 else
16351 {
16352 /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
16353 insn = gen_vec_concatv2di (op0, operands[1], operands[2]);
16354 emit_insn (insn);
16355 /* Swap bits 0:63 with bits 64:127. */
16356 rtx mask = gen_rtx_PARALLEL (VOIDmode,
16357 gen_rtvec (4, GEN_INT (2),
16358 GEN_INT (3),
16359 GEN_INT (0),
16360 GEN_INT (1)));
16361 rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
16362 rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
16363 insn = gen_rtx_SET (op1, op2);
16364 }
16365 emit_insn (insn);
16366 operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
16367 }
16368 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16369 (set_attr "type" "sseishft")
16370 (set_attr "atom_unit" "sishuf")
16371 (set_attr "prefix_extra" "1")
16372 (set_attr "length_immediate" "1")
16373 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16374 (set_attr "mode" "DI,TI,TI")])
16375
16376 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
16377 ;; modes for abs instruction on pre AVX-512 targets.
16378 (define_mode_iterator VI1248_AVX512VL_AVX512BW
16379 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
16380 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
16381 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
16382 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
16383
16384 (define_insn "*abs<mode>2"
16385 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
16386 (abs:VI1248_AVX512VL_AVX512BW
16387 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
16388 "TARGET_SSSE3"
16389 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
16390 [(set_attr "type" "sselog1")
16391 (set_attr "prefix_data16" "1")
16392 (set_attr "prefix_extra" "1")
16393 (set_attr "prefix" "maybe_vex")
16394 (set_attr "mode" "<sseinsnmode>")])
16395
16396 (define_insn "abs<mode>2_mask"
16397 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
16398 (vec_merge:VI48_AVX512VL
16399 (abs:VI48_AVX512VL
16400 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
16401 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
16402 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
16403 "TARGET_AVX512F"
16404 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16405 [(set_attr "type" "sselog1")
16406 (set_attr "prefix" "evex")
16407 (set_attr "mode" "<sseinsnmode>")])
16408
16409 (define_insn "abs<mode>2_mask"
16410 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16411 (vec_merge:VI12_AVX512VL
16412 (abs:VI12_AVX512VL
16413 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
16414 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
16415 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
16416 "TARGET_AVX512BW"
16417 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16418 [(set_attr "type" "sselog1")
16419 (set_attr "prefix" "evex")
16420 (set_attr "mode" "<sseinsnmode>")])
16421
16422 (define_expand "abs<mode>2"
16423 [(set (match_operand:VI_AVX2 0 "register_operand")
16424 (abs:VI_AVX2
16425 (match_operand:VI_AVX2 1 "vector_operand")))]
16426 "TARGET_SSE2"
16427 {
16428 if (!TARGET_SSSE3
16429 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
16430 && !TARGET_AVX512VL))
16431 {
16432 ix86_expand_sse2_abs (operands[0], operands[1]);
16433 DONE;
16434 }
16435 })
16436
16437 (define_insn "abs<mode>2"
16438 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
16439 (abs:MMXMODEI
16440 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
16441 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16442 "@
16443 pabs<mmxvecsize>\t{%1, %0|%0, %1}
16444 %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
16445 [(set_attr "mmx_isa" "native,x64")
16446 (set_attr "type" "sselog1")
16447 (set_attr "prefix_rep" "0")
16448 (set_attr "prefix_extra" "1")
16449 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16450 (set_attr "mode" "DI,TI")])
16451
16452 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16453 ;;
16454 ;; AMD SSE4A instructions
16455 ;;
16456 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16457
16458 (define_insn "sse4a_movnt<mode>"
16459 [(set (match_operand:MODEF 0 "memory_operand" "=m")
16460 (unspec:MODEF
16461 [(match_operand:MODEF 1 "register_operand" "x")]
16462 UNSPEC_MOVNT))]
16463 "TARGET_SSE4A"
16464 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
16465 [(set_attr "type" "ssemov")
16466 (set_attr "mode" "<MODE>")])
16467
16468 (define_insn "sse4a_vmmovnt<mode>"
16469 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
16470 (unspec:<ssescalarmode>
16471 [(vec_select:<ssescalarmode>
16472 (match_operand:VF_128 1 "register_operand" "x")
16473 (parallel [(const_int 0)]))]
16474 UNSPEC_MOVNT))]
16475 "TARGET_SSE4A"
16476 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
16477 [(set_attr "type" "ssemov")
16478 (set_attr "mode" "<ssescalarmode>")])
16479
16480 (define_insn "sse4a_extrqi"
16481 [(set (match_operand:V2DI 0 "register_operand" "=x")
16482 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16483 (match_operand 2 "const_0_to_255_operand")
16484 (match_operand 3 "const_0_to_255_operand")]
16485 UNSPEC_EXTRQI))]
16486 "TARGET_SSE4A"
16487 "extrq\t{%3, %2, %0|%0, %2, %3}"
16488 [(set_attr "type" "sse")
16489 (set_attr "prefix_data16" "1")
16490 (set_attr "length_immediate" "2")
16491 (set_attr "mode" "TI")])
16492
16493 (define_insn "sse4a_extrq"
16494 [(set (match_operand:V2DI 0 "register_operand" "=x")
16495 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16496 (match_operand:V16QI 2 "register_operand" "x")]
16497 UNSPEC_EXTRQ))]
16498 "TARGET_SSE4A"
16499 "extrq\t{%2, %0|%0, %2}"
16500 [(set_attr "type" "sse")
16501 (set_attr "prefix_data16" "1")
16502 (set_attr "mode" "TI")])
16503
16504 (define_insn "sse4a_insertqi"
16505 [(set (match_operand:V2DI 0 "register_operand" "=x")
16506 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16507 (match_operand:V2DI 2 "register_operand" "x")
16508 (match_operand 3 "const_0_to_255_operand")
16509 (match_operand 4 "const_0_to_255_operand")]
16510 UNSPEC_INSERTQI))]
16511 "TARGET_SSE4A"
16512 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
16513 [(set_attr "type" "sseins")
16514 (set_attr "prefix_data16" "0")
16515 (set_attr "prefix_rep" "1")
16516 (set_attr "length_immediate" "2")
16517 (set_attr "mode" "TI")])
16518
16519 (define_insn "sse4a_insertq"
16520 [(set (match_operand:V2DI 0 "register_operand" "=x")
16521 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16522 (match_operand:V2DI 2 "register_operand" "x")]
16523 UNSPEC_INSERTQ))]
16524 "TARGET_SSE4A"
16525 "insertq\t{%2, %0|%0, %2}"
16526 [(set_attr "type" "sseins")
16527 (set_attr "prefix_data16" "0")
16528 (set_attr "prefix_rep" "1")
16529 (set_attr "mode" "TI")])
16530
16531 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16532 ;;
16533 ;; Intel SSE4.1 instructions
16534 ;;
16535 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16536
16537 ;; Mapping of immediate bits for blend instructions
16538 (define_mode_attr blendbits
16539 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
16540
16541 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
16542 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16543 (vec_merge:VF_128_256
16544 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16545 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
16546 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
16547 "TARGET_SSE4_1"
16548 "@
16549 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16550 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16551 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16552 [(set_attr "isa" "noavx,noavx,avx")
16553 (set_attr "type" "ssemov")
16554 (set_attr "length_immediate" "1")
16555 (set_attr "prefix_data16" "1,1,*")
16556 (set_attr "prefix_extra" "1")
16557 (set_attr "prefix" "orig,orig,vex")
16558 (set_attr "mode" "<MODE>")])
16559
16560 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
16561 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16562 (unspec:VF_128_256
16563 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
16564 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16565 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
16566 UNSPEC_BLENDV))]
16567 "TARGET_SSE4_1"
16568 "@
16569 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16570 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16571 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16572 [(set_attr "isa" "noavx,noavx,avx")
16573 (set_attr "type" "ssemov")
16574 (set_attr "length_immediate" "1")
16575 (set_attr "prefix_data16" "1,1,*")
16576 (set_attr "prefix_extra" "1")
16577 (set_attr "prefix" "orig,orig,vex")
16578 (set_attr "btver2_decode" "vector,vector,vector")
16579 (set_attr "mode" "<MODE>")])
16580
16581 ;; Also define scalar versions. These are used for conditional move.
16582 ;; Using subregs into vector modes causes register allocation lossage.
16583 ;; These patterns do not allow memory operands because the native
16584 ;; instructions read the full 128-bits.
16585
16586 (define_insn "sse4_1_blendv<ssemodesuffix>"
16587 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
16588 (unspec:MODEF
16589 [(match_operand:MODEF 1 "register_operand" "0,0,x")
16590 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
16591 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
16592 UNSPEC_BLENDV))]
16593 "TARGET_SSE4_1"
16594 {
16595 if (get_attr_mode (insn) == MODE_V4SF)
16596 return (which_alternative == 2
16597 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16598 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
16599 else
16600 return (which_alternative == 2
16601 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16602 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
16603 }
16604 [(set_attr "isa" "noavx,noavx,avx")
16605 (set_attr "type" "ssemov")
16606 (set_attr "length_immediate" "1")
16607 (set_attr "prefix_data16" "1,1,*")
16608 (set_attr "prefix_extra" "1")
16609 (set_attr "prefix" "orig,orig,vex")
16610 (set_attr "btver2_decode" "vector,vector,vector")
16611 (set (attr "mode")
16612 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
16613 (const_string "V4SF")
16614 (match_test "TARGET_AVX")
16615 (const_string "<ssevecmode>")
16616 (match_test "optimize_function_for_size_p (cfun)")
16617 (const_string "V4SF")
16618 ]
16619 (const_string "<ssevecmode>")))])
16620
16621 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
16622 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16623 (unspec:VF_128_256
16624 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
16625 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16626 (subreg:VF_128_256
16627 (lt:<sseintvecmode>
16628 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
16629 (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C")) 0)]
16630 UNSPEC_BLENDV))]
16631 "TARGET_SSE4_1"
16632 "#"
16633 "&& reload_completed"
16634 [(set (match_dup 0)
16635 (unspec:VF_128_256
16636 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
16637 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
16638 [(set_attr "isa" "noavx,noavx,avx")
16639 (set_attr "type" "ssemov")
16640 (set_attr "length_immediate" "1")
16641 (set_attr "prefix_data16" "1,1,*")
16642 (set_attr "prefix_extra" "1")
16643 (set_attr "prefix" "orig,orig,vex")
16644 (set_attr "btver2_decode" "vector,vector,vector")
16645 (set_attr "mode" "<MODE>")])
16646
16647 (define_mode_attr ssefltmodesuffix
16648 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
16649
16650 (define_mode_attr ssefltvecmode
16651 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
16652
16653 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
16654 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
16655 (unspec:<ssebytemode>
16656 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
16657 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
16658 (subreg:<ssebytemode>
16659 (lt:VI48_AVX
16660 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
16661 (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
16662 UNSPEC_BLENDV))]
16663 "TARGET_SSE4_1"
16664 "#"
16665 "&& reload_completed"
16666 [(set (match_dup 0)
16667 (unspec:<ssefltvecmode>
16668 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
16669 {
16670 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
16671 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
16672 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
16673 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
16674 }
16675 [(set_attr "isa" "noavx,noavx,avx")
16676 (set_attr "type" "ssemov")
16677 (set_attr "length_immediate" "1")
16678 (set_attr "prefix_data16" "1,1,*")
16679 (set_attr "prefix_extra" "1")
16680 (set_attr "prefix" "orig,orig,vex")
16681 (set_attr "btver2_decode" "vector,vector,vector")
16682 (set_attr "mode" "<ssefltvecmode>")])
16683
16684 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
16685 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16686 (unspec:VF_128_256
16687 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
16688 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16689 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
16690 UNSPEC_DP))]
16691 "TARGET_SSE4_1"
16692 "@
16693 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16694 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16695 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16696 [(set_attr "isa" "noavx,noavx,avx")
16697 (set_attr "type" "ssemul")
16698 (set_attr "length_immediate" "1")
16699 (set_attr "prefix_data16" "1,1,*")
16700 (set_attr "prefix_extra" "1")
16701 (set_attr "prefix" "orig,orig,vex")
16702 (set_attr "btver2_decode" "vector,vector,vector")
16703 (set_attr "znver1_decode" "vector,vector,vector")
16704 (set_attr "mode" "<MODE>")])
16705
16706 ;; Mode attribute used by `vmovntdqa' pattern
16707 (define_mode_attr vi8_sse4_1_avx2_avx512
16708 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
16709
16710 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
16711 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
16712 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
16713 UNSPEC_MOVNTDQA))]
16714 "TARGET_SSE4_1"
16715 "%vmovntdqa\t{%1, %0|%0, %1}"
16716 [(set_attr "isa" "noavx,noavx,avx")
16717 (set_attr "type" "ssemov")
16718 (set_attr "prefix_extra" "1,1,*")
16719 (set_attr "prefix" "orig,orig,maybe_evex")
16720 (set_attr "mode" "<sseinsnmode>")])
16721
16722 (define_insn "<sse4_1_avx2>_mpsadbw"
16723 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16724 (unspec:VI1_AVX2
16725 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16726 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16727 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
16728 UNSPEC_MPSADBW))]
16729 "TARGET_SSE4_1"
16730 "@
16731 mpsadbw\t{%3, %2, %0|%0, %2, %3}
16732 mpsadbw\t{%3, %2, %0|%0, %2, %3}
16733 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16734 [(set_attr "isa" "noavx,noavx,avx")
16735 (set_attr "type" "sselog1")
16736 (set_attr "length_immediate" "1")
16737 (set_attr "prefix_extra" "1")
16738 (set_attr "prefix" "orig,orig,vex")
16739 (set_attr "btver2_decode" "vector,vector,vector")
16740 (set_attr "znver1_decode" "vector,vector,vector")
16741 (set_attr "mode" "<sseinsnmode>")])
16742
16743 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
16744 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
16745 (vec_concat:VI2_AVX2
16746 (us_truncate:<ssehalfvecmode>
16747 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
16748 (us_truncate:<ssehalfvecmode>
16749 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
16750 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16751 "@
16752 packusdw\t{%2, %0|%0, %2}
16753 packusdw\t{%2, %0|%0, %2}
16754 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
16755 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16756 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
16757 (set_attr "type" "sselog")
16758 (set_attr "prefix_extra" "1")
16759 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
16760 (set_attr "mode" "<sseinsnmode>")])
16761
16762 (define_insn "<sse4_1_avx2>_pblendvb"
16763 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16764 (unspec:VI1_AVX2
16765 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16766 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16767 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
16768 UNSPEC_BLENDV))]
16769 "TARGET_SSE4_1"
16770 "@
16771 pblendvb\t{%3, %2, %0|%0, %2, %3}
16772 pblendvb\t{%3, %2, %0|%0, %2, %3}
16773 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16774 [(set_attr "isa" "noavx,noavx,avx")
16775 (set_attr "type" "ssemov")
16776 (set_attr "prefix_extra" "1")
16777 (set_attr "length_immediate" "*,*,1")
16778 (set_attr "prefix" "orig,orig,vex")
16779 (set_attr "btver2_decode" "vector,vector,vector")
16780 (set_attr "mode" "<sseinsnmode>")])
16781
16782 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
16783 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16784 (unspec:VI1_AVX2
16785 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16786 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16787 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
16788 (match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
16789 UNSPEC_BLENDV))]
16790 "TARGET_SSE4_1"
16791 "#"
16792 ""
16793 [(set (match_dup 0)
16794 (unspec:VI1_AVX2
16795 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
16796 ""
16797 [(set_attr "isa" "noavx,noavx,avx")
16798 (set_attr "type" "ssemov")
16799 (set_attr "prefix_extra" "1")
16800 (set_attr "length_immediate" "*,*,1")
16801 (set_attr "prefix" "orig,orig,vex")
16802 (set_attr "btver2_decode" "vector,vector,vector")
16803 (set_attr "mode" "<sseinsnmode>")])
16804
16805 (define_insn "sse4_1_pblendw"
16806 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
16807 (vec_merge:V8HI
16808 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
16809 (match_operand:V8HI 1 "register_operand" "0,0,x")
16810 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
16811 "TARGET_SSE4_1"
16812 "@
16813 pblendw\t{%3, %2, %0|%0, %2, %3}
16814 pblendw\t{%3, %2, %0|%0, %2, %3}
16815 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16816 [(set_attr "isa" "noavx,noavx,avx")
16817 (set_attr "type" "ssemov")
16818 (set_attr "prefix_extra" "1")
16819 (set_attr "length_immediate" "1")
16820 (set_attr "prefix" "orig,orig,vex")
16821 (set_attr "mode" "TI")])
16822
16823 ;; The builtin uses an 8-bit immediate. Expand that.
16824 (define_expand "avx2_pblendw"
16825 [(set (match_operand:V16HI 0 "register_operand")
16826 (vec_merge:V16HI
16827 (match_operand:V16HI 2 "nonimmediate_operand")
16828 (match_operand:V16HI 1 "register_operand")
16829 (match_operand:SI 3 "const_0_to_255_operand")))]
16830 "TARGET_AVX2"
16831 {
16832 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
16833 operands[3] = GEN_INT (val << 8 | val);
16834 })
16835
16836 (define_insn "*avx2_pblendw"
16837 [(set (match_operand:V16HI 0 "register_operand" "=x")
16838 (vec_merge:V16HI
16839 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
16840 (match_operand:V16HI 1 "register_operand" "x")
16841 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
16842 "TARGET_AVX2"
16843 {
16844 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
16845 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
16846 }
16847 [(set_attr "type" "ssemov")
16848 (set_attr "prefix_extra" "1")
16849 (set_attr "length_immediate" "1")
16850 (set_attr "prefix" "vex")
16851 (set_attr "mode" "OI")])
16852
16853 (define_insn "avx2_pblendd<mode>"
16854 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
16855 (vec_merge:VI4_AVX2
16856 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
16857 (match_operand:VI4_AVX2 1 "register_operand" "x")
16858 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
16859 "TARGET_AVX2"
16860 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16861 [(set_attr "type" "ssemov")
16862 (set_attr "prefix_extra" "1")
16863 (set_attr "length_immediate" "1")
16864 (set_attr "prefix" "vex")
16865 (set_attr "mode" "<sseinsnmode>")])
16866
16867 (define_insn "sse4_1_phminposuw"
16868 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
16869 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
16870 UNSPEC_PHMINPOSUW))]
16871 "TARGET_SSE4_1"
16872 "%vphminposuw\t{%1, %0|%0, %1}"
16873 [(set_attr "isa" "noavx,noavx,avx")
16874 (set_attr "type" "sselog1")
16875 (set_attr "prefix_extra" "1")
16876 (set_attr "prefix" "orig,orig,vex")
16877 (set_attr "mode" "TI")])
16878
16879 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
16880 [(set (match_operand:V16HI 0 "register_operand" "=v")
16881 (any_extend:V16HI
16882 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
16883 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16884 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16885 [(set_attr "type" "ssemov")
16886 (set_attr "prefix_extra" "1")
16887 (set_attr "prefix" "maybe_evex")
16888 (set_attr "mode" "OI")])
16889
16890 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
16891 [(set (match_operand:V32HI 0 "register_operand" "=v")
16892 (any_extend:V32HI
16893 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
16894 "TARGET_AVX512BW"
16895 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16896 [(set_attr "type" "ssemov")
16897 (set_attr "prefix_extra" "1")
16898 (set_attr "prefix" "evex")
16899 (set_attr "mode" "XI")])
16900
16901 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
16902 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
16903 (any_extend:V8HI
16904 (vec_select:V8QI
16905 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
16906 (parallel [(const_int 0) (const_int 1)
16907 (const_int 2) (const_int 3)
16908 (const_int 4) (const_int 5)
16909 (const_int 6) (const_int 7)]))))]
16910 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16911 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16912 [(set_attr "isa" "noavx,noavx,avx")
16913 (set_attr "type" "ssemov")
16914 (set_attr "prefix_extra" "1")
16915 (set_attr "prefix" "orig,orig,maybe_evex")
16916 (set_attr "mode" "TI")])
16917
16918 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
16919 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
16920 (any_extend:V8HI
16921 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
16922 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16923 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16924 [(set_attr "isa" "noavx,noavx,avx")
16925 (set_attr "type" "ssemov")
16926 (set_attr "prefix_extra" "1")
16927 (set_attr "prefix" "orig,orig,maybe_evex")
16928 (set_attr "mode" "TI")])
16929
16930 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
16931 [(set (match_operand:V8HI 0 "register_operand")
16932 (any_extend:V8HI
16933 (vec_select:V8QI
16934 (subreg:V16QI
16935 (vec_concat:V2DI
16936 (match_operand:DI 1 "memory_operand")
16937 (const_int 0)) 0)
16938 (parallel [(const_int 0) (const_int 1)
16939 (const_int 2) (const_int 3)
16940 (const_int 4) (const_int 5)
16941 (const_int 6) (const_int 7)]))))]
16942 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
16943 && can_create_pseudo_p ()"
16944 "#"
16945 "&& 1"
16946 [(set (match_dup 0)
16947 (any_extend:V8HI (match_dup 1)))]
16948 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
16949
16950 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
16951 [(set (match_operand:V16SI 0 "register_operand" "=v")
16952 (any_extend:V16SI
16953 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
16954 "TARGET_AVX512F"
16955 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
16956 [(set_attr "type" "ssemov")
16957 (set_attr "prefix" "evex")
16958 (set_attr "mode" "XI")])
16959
16960 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
16961 [(set (match_operand:V8SI 0 "register_operand" "=v")
16962 (any_extend:V8SI
16963 (vec_select:V8QI
16964 (match_operand:V16QI 1 "register_operand" "v")
16965 (parallel [(const_int 0) (const_int 1)
16966 (const_int 2) (const_int 3)
16967 (const_int 4) (const_int 5)
16968 (const_int 6) (const_int 7)]))))]
16969 "TARGET_AVX2 && <mask_avx512vl_condition>"
16970 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16971 [(set_attr "type" "ssemov")
16972 (set_attr "prefix_extra" "1")
16973 (set_attr "prefix" "maybe_evex")
16974 (set_attr "mode" "OI")])
16975
16976 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
16977 [(set (match_operand:V8SI 0 "register_operand" "=v")
16978 (any_extend:V8SI
16979 (match_operand:V8QI 1 "memory_operand" "m")))]
16980 "TARGET_AVX2 && <mask_avx512vl_condition>"
16981 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16982 [(set_attr "type" "ssemov")
16983 (set_attr "prefix_extra" "1")
16984 (set_attr "prefix" "maybe_evex")
16985 (set_attr "mode" "OI")])
16986
16987 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
16988 [(set (match_operand:V8SI 0 "register_operand")
16989 (any_extend:V8SI
16990 (vec_select:V8QI
16991 (subreg:V16QI
16992 (vec_concat:V2DI
16993 (match_operand:DI 1 "memory_operand")
16994 (const_int 0)) 0)
16995 (parallel [(const_int 0) (const_int 1)
16996 (const_int 2) (const_int 3)
16997 (const_int 4) (const_int 5)
16998 (const_int 6) (const_int 7)]))))]
16999 "TARGET_AVX2 && <mask_avx512vl_condition>
17000 && can_create_pseudo_p ()"
17001 "#"
17002 "&& 1"
17003 [(set (match_dup 0)
17004 (any_extend:V8SI (match_dup 1)))]
17005 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17006
17007 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
17008 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17009 (any_extend:V4SI
17010 (vec_select:V4QI
17011 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17012 (parallel [(const_int 0) (const_int 1)
17013 (const_int 2) (const_int 3)]))))]
17014 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17015 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17016 [(set_attr "isa" "noavx,noavx,avx")
17017 (set_attr "type" "ssemov")
17018 (set_attr "prefix_extra" "1")
17019 (set_attr "prefix" "orig,orig,maybe_evex")
17020 (set_attr "mode" "TI")])
17021
17022 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
17023 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17024 (any_extend:V4SI
17025 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
17026 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17027 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17028 [(set_attr "isa" "noavx,noavx,avx")
17029 (set_attr "type" "ssemov")
17030 (set_attr "prefix_extra" "1")
17031 (set_attr "prefix" "orig,orig,maybe_evex")
17032 (set_attr "mode" "TI")])
17033
17034 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
17035 [(set (match_operand:V4SI 0 "register_operand")
17036 (any_extend:V4SI
17037 (vec_select:V4QI
17038 (subreg:V16QI
17039 (vec_merge:V4SI
17040 (vec_duplicate:V4SI
17041 (match_operand:SI 1 "memory_operand"))
17042 (const_vector:V4SI
17043 [(const_int 0) (const_int 0)
17044 (const_int 0) (const_int 0)])
17045 (const_int 1)) 0)
17046 (parallel [(const_int 0) (const_int 1)
17047 (const_int 2) (const_int 3)]))))]
17048 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17049 && can_create_pseudo_p ()"
17050 "#"
17051 "&& 1"
17052 [(set (match_dup 0)
17053 (any_extend:V4SI (match_dup 1)))]
17054 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17055
17056 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
17057 [(set (match_operand:V16SI 0 "register_operand" "=v")
17058 (any_extend:V16SI
17059 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
17060 "TARGET_AVX512F"
17061 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17062 [(set_attr "type" "ssemov")
17063 (set_attr "prefix" "evex")
17064 (set_attr "mode" "XI")])
17065
17066 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
17067 [(set (match_operand:V8SI 0 "register_operand" "=v")
17068 (any_extend:V8SI
17069 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17070 "TARGET_AVX2 && <mask_avx512vl_condition>"
17071 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17072 [(set_attr "type" "ssemov")
17073 (set_attr "prefix_extra" "1")
17074 (set_attr "prefix" "maybe_evex")
17075 (set_attr "mode" "OI")])
17076
17077 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
17078 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17079 (any_extend:V4SI
17080 (vec_select:V4HI
17081 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17082 (parallel [(const_int 0) (const_int 1)
17083 (const_int 2) (const_int 3)]))))]
17084 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17085 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17086 [(set_attr "isa" "noavx,noavx,avx")
17087 (set_attr "type" "ssemov")
17088 (set_attr "prefix_extra" "1")
17089 (set_attr "prefix" "orig,orig,maybe_evex")
17090 (set_attr "mode" "TI")])
17091
17092 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
17093 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17094 (any_extend:V4SI
17095 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
17096 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17097 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17098 [(set_attr "isa" "noavx,noavx,avx")
17099 (set_attr "type" "ssemov")
17100 (set_attr "prefix_extra" "1")
17101 (set_attr "prefix" "orig,orig,maybe_evex")
17102 (set_attr "mode" "TI")])
17103
17104 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
17105 [(set (match_operand:V4SI 0 "register_operand")
17106 (any_extend:V4SI
17107 (vec_select:V4HI
17108 (subreg:V8HI
17109 (vec_concat:V2DI
17110 (match_operand:DI 1 "memory_operand")
17111 (const_int 0)) 0)
17112 (parallel [(const_int 0) (const_int 1)
17113 (const_int 2) (const_int 3)]))))]
17114 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17115 && can_create_pseudo_p ()"
17116 "#"
17117 "&& 1"
17118 [(set (match_dup 0)
17119 (any_extend:V4SI (match_dup 1)))]
17120 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
17121
17122 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
17123 [(set (match_operand:V8DI 0 "register_operand" "=v")
17124 (any_extend:V8DI
17125 (vec_select:V8QI
17126 (match_operand:V16QI 1 "register_operand" "v")
17127 (parallel [(const_int 0) (const_int 1)
17128 (const_int 2) (const_int 3)
17129 (const_int 4) (const_int 5)
17130 (const_int 6) (const_int 7)]))))]
17131 "TARGET_AVX512F"
17132 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17133 [(set_attr "type" "ssemov")
17134 (set_attr "prefix" "evex")
17135 (set_attr "mode" "XI")])
17136
17137 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
17138 [(set (match_operand:V8DI 0 "register_operand" "=v")
17139 (any_extend:V8DI
17140 (match_operand:V8QI 1 "memory_operand" "m")))]
17141 "TARGET_AVX512F"
17142 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17143 [(set_attr "type" "ssemov")
17144 (set_attr "prefix" "evex")
17145 (set_attr "mode" "XI")])
17146
17147 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
17148 [(set (match_operand:V8DI 0 "register_operand")
17149 (any_extend:V8DI
17150 (vec_select:V8QI
17151 (subreg:V16QI
17152 (vec_concat:V2DI
17153 (match_operand:DI 1 "memory_operand")
17154 (const_int 0)) 0)
17155 (parallel [(const_int 0) (const_int 1)
17156 (const_int 2) (const_int 3)
17157 (const_int 4) (const_int 5)
17158 (const_int 6) (const_int 7)]))))]
17159 "TARGET_AVX512F && can_create_pseudo_p ()"
17160 "#"
17161 "&& 1"
17162 [(set (match_dup 0)
17163 (any_extend:V8DI (match_dup 1)))]
17164 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17165
17166 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
17167 [(set (match_operand:V4DI 0 "register_operand" "=v")
17168 (any_extend:V4DI
17169 (vec_select:V4QI
17170 (match_operand:V16QI 1 "register_operand" "v")
17171 (parallel [(const_int 0) (const_int 1)
17172 (const_int 2) (const_int 3)]))))]
17173 "TARGET_AVX2 && <mask_avx512vl_condition>"
17174 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17175 [(set_attr "type" "ssemov")
17176 (set_attr "prefix_extra" "1")
17177 (set_attr "prefix" "maybe_evex")
17178 (set_attr "mode" "OI")])
17179
17180 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
17181 [(set (match_operand:V4DI 0 "register_operand" "=v")
17182 (any_extend:V4DI
17183 (match_operand:V4QI 1 "memory_operand" "m")))]
17184 "TARGET_AVX2 && <mask_avx512vl_condition>"
17185 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17186 [(set_attr "type" "ssemov")
17187 (set_attr "prefix_extra" "1")
17188 (set_attr "prefix" "maybe_evex")
17189 (set_attr "mode" "OI")])
17190
17191 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
17192 [(set (match_operand:V4DI 0 "register_operand")
17193 (any_extend:V4DI
17194 (vec_select:V4QI
17195 (subreg:V16QI
17196 (vec_merge:V4SI
17197 (vec_duplicate:V4SI
17198 (match_operand:SI 1 "memory_operand"))
17199 (const_vector:V4SI
17200 [(const_int 0) (const_int 0)
17201 (const_int 0) (const_int 0)])
17202 (const_int 1)) 0)
17203 (parallel [(const_int 0) (const_int 1)
17204 (const_int 2) (const_int 3)]))))]
17205 "TARGET_AVX2 && <mask_avx512vl_condition>
17206 && can_create_pseudo_p ()"
17207 "#"
17208 "&& 1"
17209 [(set (match_dup 0)
17210 (any_extend:V4DI (match_dup 1)))]
17211 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17212
17213 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
17214 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17215 (any_extend:V2DI
17216 (vec_select:V2QI
17217 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17218 (parallel [(const_int 0) (const_int 1)]))))]
17219 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17220 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17221 [(set_attr "isa" "noavx,noavx,avx")
17222 (set_attr "type" "ssemov")
17223 (set_attr "prefix_extra" "1")
17224 (set_attr "prefix" "orig,orig,maybe_evex")
17225 (set_attr "mode" "TI")])
17226
17227 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
17228 [(set (match_operand:V8DI 0 "register_operand" "=v")
17229 (any_extend:V8DI
17230 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17231 "TARGET_AVX512F"
17232 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
17233 [(set_attr "type" "ssemov")
17234 (set_attr "prefix" "evex")
17235 (set_attr "mode" "XI")])
17236
17237 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
17238 [(set (match_operand:V4DI 0 "register_operand" "=v")
17239 (any_extend:V4DI
17240 (vec_select:V4HI
17241 (match_operand:V8HI 1 "register_operand" "v")
17242 (parallel [(const_int 0) (const_int 1)
17243 (const_int 2) (const_int 3)]))))]
17244 "TARGET_AVX2 && <mask_avx512vl_condition>"
17245 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17246 [(set_attr "type" "ssemov")
17247 (set_attr "prefix_extra" "1")
17248 (set_attr "prefix" "maybe_evex")
17249 (set_attr "mode" "OI")])
17250
17251 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
17252 [(set (match_operand:V4DI 0 "register_operand" "=v")
17253 (any_extend:V4DI
17254 (match_operand:V4HI 1 "memory_operand" "m")))]
17255 "TARGET_AVX2 && <mask_avx512vl_condition>"
17256 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17257 [(set_attr "type" "ssemov")
17258 (set_attr "prefix_extra" "1")
17259 (set_attr "prefix" "maybe_evex")
17260 (set_attr "mode" "OI")])
17261
17262 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
17263 [(set (match_operand:V4DI 0 "register_operand")
17264 (any_extend:V4DI
17265 (vec_select:V4HI
17266 (subreg:V8HI
17267 (vec_concat:V2DI
17268 (match_operand:DI 1 "memory_operand")
17269 (const_int 0)) 0)
17270 (parallel [(const_int 0) (const_int 1)
17271 (const_int 2) (const_int 3)]))))]
17272 "TARGET_AVX2 && <mask_avx512vl_condition>
17273 && can_create_pseudo_p ()"
17274 "#"
17275 "&& 1"
17276 [(set (match_dup 0)
17277 (any_extend:V4DI (match_dup 1)))]
17278 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
17279
17280 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
17281 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17282 (any_extend:V2DI
17283 (vec_select:V2HI
17284 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17285 (parallel [(const_int 0) (const_int 1)]))))]
17286 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17287 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17288 [(set_attr "isa" "noavx,noavx,avx")
17289 (set_attr "type" "ssemov")
17290 (set_attr "prefix_extra" "1")
17291 (set_attr "prefix" "orig,orig,maybe_evex")
17292 (set_attr "mode" "TI")])
17293
17294 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
17295 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17296 (any_extend:V2DI
17297 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
17298 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17299 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17300 [(set_attr "isa" "noavx,noavx,avx")
17301 (set_attr "type" "ssemov")
17302 (set_attr "prefix_extra" "1")
17303 (set_attr "prefix" "orig,orig,maybe_evex")
17304 (set_attr "mode" "TI")])
17305
17306 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
17307 [(set (match_operand:V2DI 0 "register_operand")
17308 (any_extend:V2DI
17309 (vec_select:V2HI
17310 (subreg:V8HI
17311 (vec_merge:V4SI
17312 (vec_duplicate:V4SI
17313 (match_operand:SI 1 "memory_operand"))
17314 (const_vector:V4SI
17315 [(const_int 0) (const_int 0)
17316 (const_int 0) (const_int 0)])
17317 (const_int 1)) 0)
17318 (parallel [(const_int 0) (const_int 1)]))))]
17319 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17320 && can_create_pseudo_p ()"
17321 "#"
17322 "&& 1"
17323 [(set (match_dup 0)
17324 (any_extend:V2DI (match_dup 1)))]
17325 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
17326
17327 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
17328 [(set (match_operand:V8DI 0 "register_operand" "=v")
17329 (any_extend:V8DI
17330 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
17331 "TARGET_AVX512F"
17332 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17333 [(set_attr "type" "ssemov")
17334 (set_attr "prefix" "evex")
17335 (set_attr "mode" "XI")])
17336
17337 (define_insn "avx2_<code>v4siv4di2<mask_name>"
17338 [(set (match_operand:V4DI 0 "register_operand" "=v")
17339 (any_extend:V4DI
17340 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
17341 "TARGET_AVX2 && <mask_avx512vl_condition>"
17342 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17343 [(set_attr "type" "ssemov")
17344 (set_attr "prefix" "maybe_evex")
17345 (set_attr "prefix_extra" "1")
17346 (set_attr "mode" "OI")])
17347
17348 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
17349 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17350 (any_extend:V2DI
17351 (vec_select:V2SI
17352 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
17353 (parallel [(const_int 0) (const_int 1)]))))]
17354 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17355 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17356 [(set_attr "isa" "noavx,noavx,avx")
17357 (set_attr "type" "ssemov")
17358 (set_attr "prefix_extra" "1")
17359 (set_attr "prefix" "orig,orig,maybe_evex")
17360 (set_attr "mode" "TI")])
17361
17362 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
17363 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17364 (any_extend:V2DI
17365 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
17366 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17367 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17368 [(set_attr "isa" "noavx,noavx,avx")
17369 (set_attr "type" "ssemov")
17370 (set_attr "prefix_extra" "1")
17371 (set_attr "prefix" "orig,orig,maybe_evex")
17372 (set_attr "mode" "TI")])
17373
17374 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
17375 [(set (match_operand:V2DI 0 "register_operand")
17376 (any_extend:V2DI
17377 (vec_select:V2SI
17378 (subreg:V4SI
17379 (vec_concat:V2DI
17380 (match_operand:DI 1 "memory_operand")
17381 (const_int 0)) 0)
17382 (parallel [(const_int 0) (const_int 1)]))))]
17383 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17384 && can_create_pseudo_p ()"
17385 "#"
17386 "&& 1"
17387 [(set (match_dup 0)
17388 (any_extend:V2DI (match_dup 1)))]
17389 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
17390
17391 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
17392 ;; setting FLAGS_REG. But it is not a really compare instruction.
17393 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
17394 [(set (reg:CC FLAGS_REG)
17395 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
17396 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
17397 UNSPEC_VTESTP))]
17398 "TARGET_AVX"
17399 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
17400 [(set_attr "type" "ssecomi")
17401 (set_attr "prefix_extra" "1")
17402 (set_attr "prefix" "vex")
17403 (set_attr "mode" "<MODE>")])
17404
17405 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
17406 ;; But it is not a really compare instruction.
17407 (define_insn "<sse4_1>_ptest<mode>"
17408 [(set (reg:CC FLAGS_REG)
17409 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
17410 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
17411 UNSPEC_PTEST))]
17412 "TARGET_SSE4_1"
17413 "%vptest\t{%1, %0|%0, %1}"
17414 [(set_attr "isa" "noavx,noavx,avx")
17415 (set_attr "type" "ssecomi")
17416 (set_attr "prefix_extra" "1")
17417 (set_attr "prefix" "orig,orig,vex")
17418 (set (attr "btver2_decode")
17419 (if_then_else
17420 (match_test "<sseinsnmode>mode==OImode")
17421 (const_string "vector")
17422 (const_string "*")))
17423 (set_attr "mode" "<sseinsnmode>")])
17424
17425 (define_insn "ptesttf2"
17426 [(set (reg:CC FLAGS_REG)
17427 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
17428 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
17429 UNSPEC_PTEST))]
17430 "TARGET_SSE4_1"
17431 "%vptest\t{%1, %0|%0, %1}"
17432 [(set_attr "isa" "noavx,noavx,avx")
17433 (set_attr "type" "ssecomi")
17434 (set_attr "prefix_extra" "1")
17435 (set_attr "prefix" "orig,orig,vex")
17436 (set_attr "mode" "TI")])
17437
17438 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
17439 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17440 (unspec:VF_128_256
17441 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
17442 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
17443 UNSPEC_ROUND))]
17444 "TARGET_SSE4_1"
17445 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17446 [(set_attr "isa" "noavx,noavx,avx")
17447 (set_attr "type" "ssecvt")
17448 (set_attr "prefix_data16" "1,1,*")
17449 (set_attr "prefix_extra" "1")
17450 (set_attr "length_immediate" "1")
17451 (set_attr "prefix" "orig,orig,vex")
17452 (set_attr "mode" "<MODE>")])
17453
17454 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
17455 [(match_operand:<sseintvecmode> 0 "register_operand")
17456 (match_operand:VF1_128_256 1 "vector_operand")
17457 (match_operand:SI 2 "const_0_to_15_operand")]
17458 "TARGET_SSE4_1"
17459 {
17460 rtx tmp = gen_reg_rtx (<MODE>mode);
17461
17462 emit_insn
17463 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
17464 operands[2]));
17465 emit_insn
17466 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
17467 DONE;
17468 })
17469
17470 (define_expand "avx512f_round<castmode>512"
17471 [(match_operand:VF_512 0 "register_operand")
17472 (match_operand:VF_512 1 "nonimmediate_operand")
17473 (match_operand:SI 2 "const_0_to_15_operand")]
17474 "TARGET_AVX512F"
17475 {
17476 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
17477 DONE;
17478 })
17479
17480 (define_expand "avx512f_roundps512_sfix"
17481 [(match_operand:V16SI 0 "register_operand")
17482 (match_operand:V16SF 1 "nonimmediate_operand")
17483 (match_operand:SI 2 "const_0_to_15_operand")]
17484 "TARGET_AVX512F"
17485 {
17486 rtx tmp = gen_reg_rtx (V16SFmode);
17487 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
17488 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
17489 DONE;
17490 })
17491
17492 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
17493 [(match_operand:<ssepackfltmode> 0 "register_operand")
17494 (match_operand:VF2 1 "vector_operand")
17495 (match_operand:VF2 2 "vector_operand")
17496 (match_operand:SI 3 "const_0_to_15_operand")]
17497 "TARGET_SSE4_1"
17498 {
17499 rtx tmp0, tmp1;
17500
17501 if (<MODE>mode == V2DFmode
17502 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
17503 {
17504 rtx tmp2 = gen_reg_rtx (V4DFmode);
17505
17506 tmp0 = gen_reg_rtx (V4DFmode);
17507 tmp1 = force_reg (V2DFmode, operands[1]);
17508
17509 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
17510 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
17511 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
17512 }
17513 else
17514 {
17515 tmp0 = gen_reg_rtx (<MODE>mode);
17516 tmp1 = gen_reg_rtx (<MODE>mode);
17517
17518 emit_insn
17519 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
17520 operands[3]));
17521 emit_insn
17522 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
17523 operands[3]));
17524 emit_insn
17525 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
17526 }
17527 DONE;
17528 })
17529
17530 (define_insn "sse4_1_round<ssescalarmodesuffix>"
17531 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
17532 (vec_merge:VF_128
17533 (unspec:VF_128
17534 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
17535 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
17536 UNSPEC_ROUND)
17537 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
17538 (const_int 1)))]
17539 "TARGET_SSE4_1"
17540 "@
17541 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
17542 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
17543 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
17544 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17545 [(set_attr "isa" "noavx,noavx,avx,avx512f")
17546 (set_attr "type" "ssecvt")
17547 (set_attr "length_immediate" "1")
17548 (set_attr "prefix_data16" "1,1,*,*")
17549 (set_attr "prefix_extra" "1")
17550 (set_attr "prefix" "orig,orig,vex,evex")
17551 (set_attr "mode" "<MODE>")])
17552
17553 (define_expand "round<mode>2"
17554 [(set (match_dup 3)
17555 (plus:VF
17556 (match_operand:VF 1 "register_operand")
17557 (match_dup 2)))
17558 (set (match_operand:VF 0 "register_operand")
17559 (unspec:VF
17560 [(match_dup 3) (match_dup 4)]
17561 UNSPEC_ROUND))]
17562 "TARGET_SSE4_1 && !flag_trapping_math"
17563 {
17564 machine_mode scalar_mode;
17565 const struct real_format *fmt;
17566 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
17567 rtx half, vec_half;
17568
17569 scalar_mode = GET_MODE_INNER (<MODE>mode);
17570
17571 /* load nextafter (0.5, 0.0) */
17572 fmt = REAL_MODE_FORMAT (scalar_mode);
17573 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
17574 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
17575 half = const_double_from_real_value (pred_half, scalar_mode);
17576
17577 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
17578 vec_half = force_reg (<MODE>mode, vec_half);
17579
17580 operands[2] = gen_reg_rtx (<MODE>mode);
17581 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
17582
17583 operands[3] = gen_reg_rtx (<MODE>mode);
17584 operands[4] = GEN_INT (ROUND_TRUNC);
17585 })
17586
17587 (define_expand "round<mode>2_sfix"
17588 [(match_operand:<sseintvecmode> 0 "register_operand")
17589 (match_operand:VF1 1 "register_operand")]
17590 "TARGET_SSE4_1 && !flag_trapping_math"
17591 {
17592 rtx tmp = gen_reg_rtx (<MODE>mode);
17593
17594 emit_insn (gen_round<mode>2 (tmp, operands[1]));
17595
17596 emit_insn
17597 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
17598 DONE;
17599 })
17600
17601 (define_expand "round<mode>2_vec_pack_sfix"
17602 [(match_operand:<ssepackfltmode> 0 "register_operand")
17603 (match_operand:VF2 1 "register_operand")
17604 (match_operand:VF2 2 "register_operand")]
17605 "TARGET_SSE4_1 && !flag_trapping_math"
17606 {
17607 rtx tmp0, tmp1;
17608
17609 if (<MODE>mode == V2DFmode
17610 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
17611 {
17612 rtx tmp2 = gen_reg_rtx (V4DFmode);
17613
17614 tmp0 = gen_reg_rtx (V4DFmode);
17615 tmp1 = force_reg (V2DFmode, operands[1]);
17616
17617 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
17618 emit_insn (gen_roundv4df2 (tmp2, tmp0));
17619 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
17620 }
17621 else
17622 {
17623 tmp0 = gen_reg_rtx (<MODE>mode);
17624 tmp1 = gen_reg_rtx (<MODE>mode);
17625
17626 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
17627 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
17628
17629 emit_insn
17630 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
17631 }
17632 DONE;
17633 })
17634
17635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17636 ;;
17637 ;; Intel SSE4.2 string/text processing instructions
17638 ;;
17639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17640
17641 (define_insn_and_split "sse4_2_pcmpestr"
17642 [(set (match_operand:SI 0 "register_operand" "=c,c")
17643 (unspec:SI
17644 [(match_operand:V16QI 2 "register_operand" "x,x")
17645 (match_operand:SI 3 "register_operand" "a,a")
17646 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
17647 (match_operand:SI 5 "register_operand" "d,d")
17648 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
17649 UNSPEC_PCMPESTR))
17650 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
17651 (unspec:V16QI
17652 [(match_dup 2)
17653 (match_dup 3)
17654 (match_dup 4)
17655 (match_dup 5)
17656 (match_dup 6)]
17657 UNSPEC_PCMPESTR))
17658 (set (reg:CC FLAGS_REG)
17659 (unspec:CC
17660 [(match_dup 2)
17661 (match_dup 3)
17662 (match_dup 4)
17663 (match_dup 5)
17664 (match_dup 6)]
17665 UNSPEC_PCMPESTR))]
17666 "TARGET_SSE4_2
17667 && can_create_pseudo_p ()"
17668 "#"
17669 "&& 1"
17670 [(const_int 0)]
17671 {
17672 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
17673 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
17674 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
17675
17676 if (ecx)
17677 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
17678 operands[3], operands[4],
17679 operands[5], operands[6]));
17680 if (xmm0)
17681 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
17682 operands[3], operands[4],
17683 operands[5], operands[6]));
17684 if (flags && !(ecx || xmm0))
17685 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
17686 operands[2], operands[3],
17687 operands[4], operands[5],
17688 operands[6]));
17689 if (!(flags || ecx || xmm0))
17690 emit_note (NOTE_INSN_DELETED);
17691
17692 DONE;
17693 }
17694 [(set_attr "type" "sselog")
17695 (set_attr "prefix_data16" "1")
17696 (set_attr "prefix_extra" "1")
17697 (set_attr "length_immediate" "1")
17698 (set_attr "memory" "none,load")
17699 (set_attr "mode" "TI")])
17700
17701 (define_insn "sse4_2_pcmpestri"
17702 [(set (match_operand:SI 0 "register_operand" "=c,c")
17703 (unspec:SI
17704 [(match_operand:V16QI 1 "register_operand" "x,x")
17705 (match_operand:SI 2 "register_operand" "a,a")
17706 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17707 (match_operand:SI 4 "register_operand" "d,d")
17708 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
17709 UNSPEC_PCMPESTR))
17710 (set (reg:CC FLAGS_REG)
17711 (unspec:CC
17712 [(match_dup 1)
17713 (match_dup 2)
17714 (match_dup 3)
17715 (match_dup 4)
17716 (match_dup 5)]
17717 UNSPEC_PCMPESTR))]
17718 "TARGET_SSE4_2"
17719 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
17720 [(set_attr "type" "sselog")
17721 (set_attr "prefix_data16" "1")
17722 (set_attr "prefix_extra" "1")
17723 (set_attr "prefix" "maybe_vex")
17724 (set_attr "length_immediate" "1")
17725 (set_attr "btver2_decode" "vector")
17726 (set_attr "memory" "none,load")
17727 (set_attr "mode" "TI")])
17728
17729 (define_insn "sse4_2_pcmpestrm"
17730 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
17731 (unspec:V16QI
17732 [(match_operand:V16QI 1 "register_operand" "x,x")
17733 (match_operand:SI 2 "register_operand" "a,a")
17734 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17735 (match_operand:SI 4 "register_operand" "d,d")
17736 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
17737 UNSPEC_PCMPESTR))
17738 (set (reg:CC FLAGS_REG)
17739 (unspec:CC
17740 [(match_dup 1)
17741 (match_dup 2)
17742 (match_dup 3)
17743 (match_dup 4)
17744 (match_dup 5)]
17745 UNSPEC_PCMPESTR))]
17746 "TARGET_SSE4_2"
17747 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
17748 [(set_attr "type" "sselog")
17749 (set_attr "prefix_data16" "1")
17750 (set_attr "prefix_extra" "1")
17751 (set_attr "length_immediate" "1")
17752 (set_attr "prefix" "maybe_vex")
17753 (set_attr "btver2_decode" "vector")
17754 (set_attr "memory" "none,load")
17755 (set_attr "mode" "TI")])
17756
17757 (define_insn "sse4_2_pcmpestr_cconly"
17758 [(set (reg:CC FLAGS_REG)
17759 (unspec:CC
17760 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
17761 (match_operand:SI 3 "register_operand" "a,a,a,a")
17762 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
17763 (match_operand:SI 5 "register_operand" "d,d,d,d")
17764 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
17765 UNSPEC_PCMPESTR))
17766 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
17767 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
17768 "TARGET_SSE4_2"
17769 "@
17770 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
17771 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
17772 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
17773 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
17774 [(set_attr "type" "sselog")
17775 (set_attr "prefix_data16" "1")
17776 (set_attr "prefix_extra" "1")
17777 (set_attr "length_immediate" "1")
17778 (set_attr "memory" "none,load,none,load")
17779 (set_attr "btver2_decode" "vector,vector,vector,vector")
17780 (set_attr "prefix" "maybe_vex")
17781 (set_attr "mode" "TI")])
17782
17783 (define_insn_and_split "sse4_2_pcmpistr"
17784 [(set (match_operand:SI 0 "register_operand" "=c,c")
17785 (unspec:SI
17786 [(match_operand:V16QI 2 "register_operand" "x,x")
17787 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17788 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
17789 UNSPEC_PCMPISTR))
17790 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
17791 (unspec:V16QI
17792 [(match_dup 2)
17793 (match_dup 3)
17794 (match_dup 4)]
17795 UNSPEC_PCMPISTR))
17796 (set (reg:CC FLAGS_REG)
17797 (unspec:CC
17798 [(match_dup 2)
17799 (match_dup 3)
17800 (match_dup 4)]
17801 UNSPEC_PCMPISTR))]
17802 "TARGET_SSE4_2
17803 && can_create_pseudo_p ()"
17804 "#"
17805 "&& 1"
17806 [(const_int 0)]
17807 {
17808 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
17809 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
17810 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
17811
17812 if (ecx)
17813 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
17814 operands[3], operands[4]));
17815 if (xmm0)
17816 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
17817 operands[3], operands[4]));
17818 if (flags && !(ecx || xmm0))
17819 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
17820 operands[2], operands[3],
17821 operands[4]));
17822 if (!(flags || ecx || xmm0))
17823 emit_note (NOTE_INSN_DELETED);
17824
17825 DONE;
17826 }
17827 [(set_attr "type" "sselog")
17828 (set_attr "prefix_data16" "1")
17829 (set_attr "prefix_extra" "1")
17830 (set_attr "length_immediate" "1")
17831 (set_attr "memory" "none,load")
17832 (set_attr "mode" "TI")])
17833
17834 (define_insn "sse4_2_pcmpistri"
17835 [(set (match_operand:SI 0 "register_operand" "=c,c")
17836 (unspec:SI
17837 [(match_operand:V16QI 1 "register_operand" "x,x")
17838 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
17839 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17840 UNSPEC_PCMPISTR))
17841 (set (reg:CC FLAGS_REG)
17842 (unspec:CC
17843 [(match_dup 1)
17844 (match_dup 2)
17845 (match_dup 3)]
17846 UNSPEC_PCMPISTR))]
17847 "TARGET_SSE4_2"
17848 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
17849 [(set_attr "type" "sselog")
17850 (set_attr "prefix_data16" "1")
17851 (set_attr "prefix_extra" "1")
17852 (set_attr "length_immediate" "1")
17853 (set_attr "prefix" "maybe_vex")
17854 (set_attr "memory" "none,load")
17855 (set_attr "btver2_decode" "vector")
17856 (set_attr "mode" "TI")])
17857
17858 (define_insn "sse4_2_pcmpistrm"
17859 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
17860 (unspec:V16QI
17861 [(match_operand:V16QI 1 "register_operand" "x,x")
17862 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
17863 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17864 UNSPEC_PCMPISTR))
17865 (set (reg:CC FLAGS_REG)
17866 (unspec:CC
17867 [(match_dup 1)
17868 (match_dup 2)
17869 (match_dup 3)]
17870 UNSPEC_PCMPISTR))]
17871 "TARGET_SSE4_2"
17872 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
17873 [(set_attr "type" "sselog")
17874 (set_attr "prefix_data16" "1")
17875 (set_attr "prefix_extra" "1")
17876 (set_attr "length_immediate" "1")
17877 (set_attr "prefix" "maybe_vex")
17878 (set_attr "memory" "none,load")
17879 (set_attr "btver2_decode" "vector")
17880 (set_attr "mode" "TI")])
17881
17882 (define_insn "sse4_2_pcmpistr_cconly"
17883 [(set (reg:CC FLAGS_REG)
17884 (unspec:CC
17885 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
17886 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
17887 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
17888 UNSPEC_PCMPISTR))
17889 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
17890 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
17891 "TARGET_SSE4_2"
17892 "@
17893 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
17894 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
17895 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
17896 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
17897 [(set_attr "type" "sselog")
17898 (set_attr "prefix_data16" "1")
17899 (set_attr "prefix_extra" "1")
17900 (set_attr "length_immediate" "1")
17901 (set_attr "memory" "none,load,none,load")
17902 (set_attr "prefix" "maybe_vex")
17903 (set_attr "btver2_decode" "vector,vector,vector,vector")
17904 (set_attr "mode" "TI")])
17905
17906 ;; Packed float variants
17907 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
17908 [(V8DI "V8SF") (V16SI "V16SF")])
17909
17910 (define_expand "avx512pf_gatherpf<mode>sf"
17911 [(unspec
17912 [(match_operand:<avx512fmaskmode> 0 "register_operand")
17913 (mem:<GATHER_SCATTER_SF_MEM_MODE>
17914 (match_par_dup 5
17915 [(match_operand 2 "vsib_address_operand")
17916 (match_operand:VI48_512 1 "register_operand")
17917 (match_operand:SI 3 "const1248_operand")]))
17918 (match_operand:SI 4 "const_2_to_3_operand")]
17919 UNSPEC_GATHER_PREFETCH)]
17920 "TARGET_AVX512PF"
17921 {
17922 operands[5]
17923 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
17924 operands[3]), UNSPEC_VSIBADDR);
17925 })
17926
17927 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
17928 [(unspec
17929 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
17930 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
17931 [(unspec:P
17932 [(match_operand:P 2 "vsib_address_operand" "Tv")
17933 (match_operand:VI48_512 1 "register_operand" "v")
17934 (match_operand:SI 3 "const1248_operand" "n")]
17935 UNSPEC_VSIBADDR)])
17936 (match_operand:SI 4 "const_2_to_3_operand" "n")]
17937 UNSPEC_GATHER_PREFETCH)]
17938 "TARGET_AVX512PF"
17939 {
17940 switch (INTVAL (operands[4]))
17941 {
17942 case 3:
17943 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
17944 gas changed what it requires incompatibly. */
17945 return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
17946 case 2:
17947 return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
17948 default:
17949 gcc_unreachable ();
17950 }
17951 }
17952 [(set_attr "type" "sse")
17953 (set_attr "prefix" "evex")
17954 (set_attr "mode" "XI")])
17955
17956 ;; Packed double variants
17957 (define_expand "avx512pf_gatherpf<mode>df"
17958 [(unspec
17959 [(match_operand:<avx512fmaskmode> 0 "register_operand")
17960 (mem:V8DF
17961 (match_par_dup 5
17962 [(match_operand 2 "vsib_address_operand")
17963 (match_operand:VI4_256_8_512 1 "register_operand")
17964 (match_operand:SI 3 "const1248_operand")]))
17965 (match_operand:SI 4 "const_2_to_3_operand")]
17966 UNSPEC_GATHER_PREFETCH)]
17967 "TARGET_AVX512PF"
17968 {
17969 operands[5]
17970 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
17971 operands[3]), UNSPEC_VSIBADDR);
17972 })
17973
17974 (define_insn "*avx512pf_gatherpf<mode>df_mask"
17975 [(unspec
17976 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
17977 (match_operator:V8DF 5 "vsib_mem_operator"
17978 [(unspec:P
17979 [(match_operand:P 2 "vsib_address_operand" "Tv")
17980 (match_operand:VI4_256_8_512 1 "register_operand" "v")
17981 (match_operand:SI 3 "const1248_operand" "n")]
17982 UNSPEC_VSIBADDR)])
17983 (match_operand:SI 4 "const_2_to_3_operand" "n")]
17984 UNSPEC_GATHER_PREFETCH)]
17985 "TARGET_AVX512PF"
17986 {
17987 switch (INTVAL (operands[4]))
17988 {
17989 case 3:
17990 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
17991 gas changed what it requires incompatibly. */
17992 return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
17993 case 2:
17994 return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
17995 default:
17996 gcc_unreachable ();
17997 }
17998 }
17999 [(set_attr "type" "sse")
18000 (set_attr "prefix" "evex")
18001 (set_attr "mode" "XI")])
18002
18003 ;; Packed float variants
18004 (define_expand "avx512pf_scatterpf<mode>sf"
18005 [(unspec
18006 [(match_operand:<avx512fmaskmode> 0 "register_operand")
18007 (mem:<GATHER_SCATTER_SF_MEM_MODE>
18008 (match_par_dup 5
18009 [(match_operand 2 "vsib_address_operand")
18010 (match_operand:VI48_512 1 "register_operand")
18011 (match_operand:SI 3 "const1248_operand")]))
18012 (match_operand:SI 4 "const2367_operand")]
18013 UNSPEC_SCATTER_PREFETCH)]
18014 "TARGET_AVX512PF"
18015 {
18016 operands[5]
18017 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18018 operands[3]), UNSPEC_VSIBADDR);
18019 })
18020
18021 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
18022 [(unspec
18023 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18024 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
18025 [(unspec:P
18026 [(match_operand:P 2 "vsib_address_operand" "Tv")
18027 (match_operand:VI48_512 1 "register_operand" "v")
18028 (match_operand:SI 3 "const1248_operand" "n")]
18029 UNSPEC_VSIBADDR)])
18030 (match_operand:SI 4 "const2367_operand" "n")]
18031 UNSPEC_SCATTER_PREFETCH)]
18032 "TARGET_AVX512PF"
18033 {
18034 switch (INTVAL (operands[4]))
18035 {
18036 case 3:
18037 case 7:
18038 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18039 gas changed what it requires incompatibly. */
18040 return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18041 case 2:
18042 case 6:
18043 return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18044 default:
18045 gcc_unreachable ();
18046 }
18047 }
18048 [(set_attr "type" "sse")
18049 (set_attr "prefix" "evex")
18050 (set_attr "mode" "XI")])
18051
18052 ;; Packed double variants
18053 (define_expand "avx512pf_scatterpf<mode>df"
18054 [(unspec
18055 [(match_operand:<avx512fmaskmode> 0 "register_operand")
18056 (mem:V8DF
18057 (match_par_dup 5
18058 [(match_operand 2 "vsib_address_operand")
18059 (match_operand:VI4_256_8_512 1 "register_operand")
18060 (match_operand:SI 3 "const1248_operand")]))
18061 (match_operand:SI 4 "const2367_operand")]
18062 UNSPEC_SCATTER_PREFETCH)]
18063 "TARGET_AVX512PF"
18064 {
18065 operands[5]
18066 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18067 operands[3]), UNSPEC_VSIBADDR);
18068 })
18069
18070 (define_insn "*avx512pf_scatterpf<mode>df_mask"
18071 [(unspec
18072 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18073 (match_operator:V8DF 5 "vsib_mem_operator"
18074 [(unspec:P
18075 [(match_operand:P 2 "vsib_address_operand" "Tv")
18076 (match_operand:VI4_256_8_512 1 "register_operand" "v")
18077 (match_operand:SI 3 "const1248_operand" "n")]
18078 UNSPEC_VSIBADDR)])
18079 (match_operand:SI 4 "const2367_operand" "n")]
18080 UNSPEC_SCATTER_PREFETCH)]
18081 "TARGET_AVX512PF"
18082 {
18083 switch (INTVAL (operands[4]))
18084 {
18085 case 3:
18086 case 7:
18087 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18088 gas changed what it requires incompatibly. */
18089 return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18090 case 2:
18091 case 6:
18092 return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18093 default:
18094 gcc_unreachable ();
18095 }
18096 }
18097 [(set_attr "type" "sse")
18098 (set_attr "prefix" "evex")
18099 (set_attr "mode" "XI")])
18100
18101 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
18102 [(set (match_operand:VF_512 0 "register_operand" "=v")
18103 (unspec:VF_512
18104 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18105 UNSPEC_EXP2))]
18106 "TARGET_AVX512ER"
18107 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18108 [(set_attr "prefix" "evex")
18109 (set_attr "type" "sse")
18110 (set_attr "mode" "<MODE>")])
18111
18112 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
18113 [(set (match_operand:VF_512 0 "register_operand" "=v")
18114 (unspec:VF_512
18115 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18116 UNSPEC_RCP28))]
18117 "TARGET_AVX512ER"
18118 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18119 [(set_attr "prefix" "evex")
18120 (set_attr "type" "sse")
18121 (set_attr "mode" "<MODE>")])
18122
18123 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
18124 [(set (match_operand:VF_128 0 "register_operand" "=v")
18125 (vec_merge:VF_128
18126 (unspec:VF_128
18127 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18128 UNSPEC_RCP28)
18129 (match_operand:VF_128 2 "register_operand" "v")
18130 (const_int 1)))]
18131 "TARGET_AVX512ER"
18132 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
18133 [(set_attr "length_immediate" "1")
18134 (set_attr "prefix" "evex")
18135 (set_attr "type" "sse")
18136 (set_attr "mode" "<MODE>")])
18137
18138 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
18139 [(set (match_operand:VF_512 0 "register_operand" "=v")
18140 (unspec:VF_512
18141 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18142 UNSPEC_RSQRT28))]
18143 "TARGET_AVX512ER"
18144 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18145 [(set_attr "prefix" "evex")
18146 (set_attr "type" "sse")
18147 (set_attr "mode" "<MODE>")])
18148
18149 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
18150 [(set (match_operand:VF_128 0 "register_operand" "=v")
18151 (vec_merge:VF_128
18152 (unspec:VF_128
18153 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18154 UNSPEC_RSQRT28)
18155 (match_operand:VF_128 2 "register_operand" "v")
18156 (const_int 1)))]
18157 "TARGET_AVX512ER"
18158 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
18159 [(set_attr "length_immediate" "1")
18160 (set_attr "type" "sse")
18161 (set_attr "prefix" "evex")
18162 (set_attr "mode" "<MODE>")])
18163
18164 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18165 ;;
18166 ;; XOP instructions
18167 ;;
18168 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18169
18170 (define_code_iterator xop_plus [plus ss_plus])
18171
18172 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
18173 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
18174
18175 ;; XOP parallel integer multiply/add instructions.
18176
18177 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
18178 [(set (match_operand:VI24_128 0 "register_operand" "=x")
18179 (xop_plus:VI24_128
18180 (mult:VI24_128
18181 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
18182 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
18183 (match_operand:VI24_128 3 "register_operand" "x")))]
18184 "TARGET_XOP"
18185 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18186 [(set_attr "type" "ssemuladd")
18187 (set_attr "mode" "TI")])
18188
18189 (define_insn "xop_p<macs>dql"
18190 [(set (match_operand:V2DI 0 "register_operand" "=x")
18191 (xop_plus:V2DI
18192 (mult:V2DI
18193 (sign_extend:V2DI
18194 (vec_select:V2SI
18195 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
18196 (parallel [(const_int 0) (const_int 2)])))
18197 (sign_extend:V2DI
18198 (vec_select:V2SI
18199 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18200 (parallel [(const_int 0) (const_int 2)]))))
18201 (match_operand:V2DI 3 "register_operand" "x")))]
18202 "TARGET_XOP"
18203 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18204 [(set_attr "type" "ssemuladd")
18205 (set_attr "mode" "TI")])
18206
18207 (define_insn "xop_p<macs>dqh"
18208 [(set (match_operand:V2DI 0 "register_operand" "=x")
18209 (xop_plus:V2DI
18210 (mult:V2DI
18211 (sign_extend:V2DI
18212 (vec_select:V2SI
18213 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
18214 (parallel [(const_int 1) (const_int 3)])))
18215 (sign_extend:V2DI
18216 (vec_select:V2SI
18217 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18218 (parallel [(const_int 1) (const_int 3)]))))
18219 (match_operand:V2DI 3 "register_operand" "x")))]
18220 "TARGET_XOP"
18221 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18222 [(set_attr "type" "ssemuladd")
18223 (set_attr "mode" "TI")])
18224
18225 ;; XOP parallel integer multiply/add instructions for the intrinisics
18226 (define_insn "xop_p<macs>wd"
18227 [(set (match_operand:V4SI 0 "register_operand" "=x")
18228 (xop_plus:V4SI
18229 (mult:V4SI
18230 (sign_extend:V4SI
18231 (vec_select:V4HI
18232 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
18233 (parallel [(const_int 1) (const_int 3)
18234 (const_int 5) (const_int 7)])))
18235 (sign_extend:V4SI
18236 (vec_select:V4HI
18237 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18238 (parallel [(const_int 1) (const_int 3)
18239 (const_int 5) (const_int 7)]))))
18240 (match_operand:V4SI 3 "register_operand" "x")))]
18241 "TARGET_XOP"
18242 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18243 [(set_attr "type" "ssemuladd")
18244 (set_attr "mode" "TI")])
18245
18246 (define_insn "xop_p<madcs>wd"
18247 [(set (match_operand:V4SI 0 "register_operand" "=x")
18248 (xop_plus:V4SI
18249 (plus:V4SI
18250 (mult:V4SI
18251 (sign_extend:V4SI
18252 (vec_select:V4HI
18253 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
18254 (parallel [(const_int 0) (const_int 2)
18255 (const_int 4) (const_int 6)])))
18256 (sign_extend:V4SI
18257 (vec_select:V4HI
18258 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18259 (parallel [(const_int 0) (const_int 2)
18260 (const_int 4) (const_int 6)]))))
18261 (mult:V4SI
18262 (sign_extend:V4SI
18263 (vec_select:V4HI
18264 (match_dup 1)
18265 (parallel [(const_int 1) (const_int 3)
18266 (const_int 5) (const_int 7)])))
18267 (sign_extend:V4SI
18268 (vec_select:V4HI
18269 (match_dup 2)
18270 (parallel [(const_int 1) (const_int 3)
18271 (const_int 5) (const_int 7)])))))
18272 (match_operand:V4SI 3 "register_operand" "x")))]
18273 "TARGET_XOP"
18274 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18275 [(set_attr "type" "ssemuladd")
18276 (set_attr "mode" "TI")])
18277
18278 ;; XOP parallel XMM conditional moves
18279 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
18280 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
18281 (if_then_else:V_128_256
18282 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
18283 (match_operand:V_128_256 1 "register_operand" "x,x")
18284 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
18285 "TARGET_XOP"
18286 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18287 [(set_attr "type" "sse4arg")])
18288
18289 ;; XOP horizontal add/subtract instructions
18290 (define_insn "xop_phadd<u>bw"
18291 [(set (match_operand:V8HI 0 "register_operand" "=x")
18292 (plus:V8HI
18293 (any_extend:V8HI
18294 (vec_select:V8QI
18295 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18296 (parallel [(const_int 0) (const_int 2)
18297 (const_int 4) (const_int 6)
18298 (const_int 8) (const_int 10)
18299 (const_int 12) (const_int 14)])))
18300 (any_extend:V8HI
18301 (vec_select:V8QI
18302 (match_dup 1)
18303 (parallel [(const_int 1) (const_int 3)
18304 (const_int 5) (const_int 7)
18305 (const_int 9) (const_int 11)
18306 (const_int 13) (const_int 15)])))))]
18307 "TARGET_XOP"
18308 "vphadd<u>bw\t{%1, %0|%0, %1}"
18309 [(set_attr "type" "sseiadd1")])
18310
18311 (define_insn "xop_phadd<u>bd"
18312 [(set (match_operand:V4SI 0 "register_operand" "=x")
18313 (plus:V4SI
18314 (plus:V4SI
18315 (any_extend:V4SI
18316 (vec_select:V4QI
18317 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18318 (parallel [(const_int 0) (const_int 4)
18319 (const_int 8) (const_int 12)])))
18320 (any_extend:V4SI
18321 (vec_select:V4QI
18322 (match_dup 1)
18323 (parallel [(const_int 1) (const_int 5)
18324 (const_int 9) (const_int 13)]))))
18325 (plus:V4SI
18326 (any_extend:V4SI
18327 (vec_select:V4QI
18328 (match_dup 1)
18329 (parallel [(const_int 2) (const_int 6)
18330 (const_int 10) (const_int 14)])))
18331 (any_extend:V4SI
18332 (vec_select:V4QI
18333 (match_dup 1)
18334 (parallel [(const_int 3) (const_int 7)
18335 (const_int 11) (const_int 15)]))))))]
18336 "TARGET_XOP"
18337 "vphadd<u>bd\t{%1, %0|%0, %1}"
18338 [(set_attr "type" "sseiadd1")])
18339
18340 (define_insn "xop_phadd<u>bq"
18341 [(set (match_operand:V2DI 0 "register_operand" "=x")
18342 (plus:V2DI
18343 (plus:V2DI
18344 (plus:V2DI
18345 (any_extend:V2DI
18346 (vec_select:V2QI
18347 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18348 (parallel [(const_int 0) (const_int 8)])))
18349 (any_extend:V2DI
18350 (vec_select:V2QI
18351 (match_dup 1)
18352 (parallel [(const_int 1) (const_int 9)]))))
18353 (plus:V2DI
18354 (any_extend:V2DI
18355 (vec_select:V2QI
18356 (match_dup 1)
18357 (parallel [(const_int 2) (const_int 10)])))
18358 (any_extend:V2DI
18359 (vec_select:V2QI
18360 (match_dup 1)
18361 (parallel [(const_int 3) (const_int 11)])))))
18362 (plus:V2DI
18363 (plus:V2DI
18364 (any_extend:V2DI
18365 (vec_select:V2QI
18366 (match_dup 1)
18367 (parallel [(const_int 4) (const_int 12)])))
18368 (any_extend:V2DI
18369 (vec_select:V2QI
18370 (match_dup 1)
18371 (parallel [(const_int 5) (const_int 13)]))))
18372 (plus:V2DI
18373 (any_extend:V2DI
18374 (vec_select:V2QI
18375 (match_dup 1)
18376 (parallel [(const_int 6) (const_int 14)])))
18377 (any_extend:V2DI
18378 (vec_select:V2QI
18379 (match_dup 1)
18380 (parallel [(const_int 7) (const_int 15)])))))))]
18381 "TARGET_XOP"
18382 "vphadd<u>bq\t{%1, %0|%0, %1}"
18383 [(set_attr "type" "sseiadd1")])
18384
18385 (define_insn "xop_phadd<u>wd"
18386 [(set (match_operand:V4SI 0 "register_operand" "=x")
18387 (plus:V4SI
18388 (any_extend:V4SI
18389 (vec_select:V4HI
18390 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18391 (parallel [(const_int 0) (const_int 2)
18392 (const_int 4) (const_int 6)])))
18393 (any_extend:V4SI
18394 (vec_select:V4HI
18395 (match_dup 1)
18396 (parallel [(const_int 1) (const_int 3)
18397 (const_int 5) (const_int 7)])))))]
18398 "TARGET_XOP"
18399 "vphadd<u>wd\t{%1, %0|%0, %1}"
18400 [(set_attr "type" "sseiadd1")])
18401
18402 (define_insn "xop_phadd<u>wq"
18403 [(set (match_operand:V2DI 0 "register_operand" "=x")
18404 (plus:V2DI
18405 (plus:V2DI
18406 (any_extend:V2DI
18407 (vec_select:V2HI
18408 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18409 (parallel [(const_int 0) (const_int 4)])))
18410 (any_extend:V2DI
18411 (vec_select:V2HI
18412 (match_dup 1)
18413 (parallel [(const_int 1) (const_int 5)]))))
18414 (plus:V2DI
18415 (any_extend:V2DI
18416 (vec_select:V2HI
18417 (match_dup 1)
18418 (parallel [(const_int 2) (const_int 6)])))
18419 (any_extend:V2DI
18420 (vec_select:V2HI
18421 (match_dup 1)
18422 (parallel [(const_int 3) (const_int 7)]))))))]
18423 "TARGET_XOP"
18424 "vphadd<u>wq\t{%1, %0|%0, %1}"
18425 [(set_attr "type" "sseiadd1")])
18426
18427 (define_insn "xop_phadd<u>dq"
18428 [(set (match_operand:V2DI 0 "register_operand" "=x")
18429 (plus:V2DI
18430 (any_extend:V2DI
18431 (vec_select:V2SI
18432 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
18433 (parallel [(const_int 0) (const_int 2)])))
18434 (any_extend:V2DI
18435 (vec_select:V2SI
18436 (match_dup 1)
18437 (parallel [(const_int 1) (const_int 3)])))))]
18438 "TARGET_XOP"
18439 "vphadd<u>dq\t{%1, %0|%0, %1}"
18440 [(set_attr "type" "sseiadd1")])
18441
18442 (define_insn "xop_phsubbw"
18443 [(set (match_operand:V8HI 0 "register_operand" "=x")
18444 (minus:V8HI
18445 (sign_extend:V8HI
18446 (vec_select:V8QI
18447 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18448 (parallel [(const_int 0) (const_int 2)
18449 (const_int 4) (const_int 6)
18450 (const_int 8) (const_int 10)
18451 (const_int 12) (const_int 14)])))
18452 (sign_extend:V8HI
18453 (vec_select:V8QI
18454 (match_dup 1)
18455 (parallel [(const_int 1) (const_int 3)
18456 (const_int 5) (const_int 7)
18457 (const_int 9) (const_int 11)
18458 (const_int 13) (const_int 15)])))))]
18459 "TARGET_XOP"
18460 "vphsubbw\t{%1, %0|%0, %1}"
18461 [(set_attr "type" "sseiadd1")])
18462
18463 (define_insn "xop_phsubwd"
18464 [(set (match_operand:V4SI 0 "register_operand" "=x")
18465 (minus:V4SI
18466 (sign_extend:V4SI
18467 (vec_select:V4HI
18468 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18469 (parallel [(const_int 0) (const_int 2)
18470 (const_int 4) (const_int 6)])))
18471 (sign_extend:V4SI
18472 (vec_select:V4HI
18473 (match_dup 1)
18474 (parallel [(const_int 1) (const_int 3)
18475 (const_int 5) (const_int 7)])))))]
18476 "TARGET_XOP"
18477 "vphsubwd\t{%1, %0|%0, %1}"
18478 [(set_attr "type" "sseiadd1")])
18479
18480 (define_insn "xop_phsubdq"
18481 [(set (match_operand:V2DI 0 "register_operand" "=x")
18482 (minus:V2DI
18483 (sign_extend:V2DI
18484 (vec_select:V2SI
18485 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
18486 (parallel [(const_int 0) (const_int 2)])))
18487 (sign_extend:V2DI
18488 (vec_select:V2SI
18489 (match_dup 1)
18490 (parallel [(const_int 1) (const_int 3)])))))]
18491 "TARGET_XOP"
18492 "vphsubdq\t{%1, %0|%0, %1}"
18493 [(set_attr "type" "sseiadd1")])
18494
18495 ;; XOP permute instructions
18496 (define_insn "xop_pperm"
18497 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
18498 (unspec:V16QI
18499 [(match_operand:V16QI 1 "register_operand" "x,x")
18500 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
18501 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
18502 UNSPEC_XOP_PERMUTE))]
18503 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18504 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18505 [(set_attr "type" "sse4arg")
18506 (set_attr "mode" "TI")])
18507
18508 ;; XOP pack instructions that combine two vectors into a smaller vector
18509 (define_insn "xop_pperm_pack_v2di_v4si"
18510 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
18511 (vec_concat:V4SI
18512 (truncate:V2SI
18513 (match_operand:V2DI 1 "register_operand" "x,x"))
18514 (truncate:V2SI
18515 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
18516 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
18517 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18518 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18519 [(set_attr "type" "sse4arg")
18520 (set_attr "mode" "TI")])
18521
18522 (define_insn "xop_pperm_pack_v4si_v8hi"
18523 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
18524 (vec_concat:V8HI
18525 (truncate:V4HI
18526 (match_operand:V4SI 1 "register_operand" "x,x"))
18527 (truncate:V4HI
18528 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
18529 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
18530 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18531 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18532 [(set_attr "type" "sse4arg")
18533 (set_attr "mode" "TI")])
18534
18535 (define_insn "xop_pperm_pack_v8hi_v16qi"
18536 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
18537 (vec_concat:V16QI
18538 (truncate:V8QI
18539 (match_operand:V8HI 1 "register_operand" "x,x"))
18540 (truncate:V8QI
18541 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
18542 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
18543 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18544 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18545 [(set_attr "type" "sse4arg")
18546 (set_attr "mode" "TI")])
18547
18548 ;; XOP packed rotate instructions
18549 (define_expand "rotl<mode>3"
18550 [(set (match_operand:VI_128 0 "register_operand")
18551 (rotate:VI_128
18552 (match_operand:VI_128 1 "nonimmediate_operand")
18553 (match_operand:SI 2 "general_operand")))]
18554 "TARGET_XOP"
18555 {
18556 /* If we were given a scalar, convert it to parallel */
18557 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
18558 {
18559 rtvec vs = rtvec_alloc (<ssescalarnum>);
18560 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
18561 rtx reg = gen_reg_rtx (<MODE>mode);
18562 rtx op2 = operands[2];
18563 int i;
18564
18565 if (GET_MODE (op2) != <ssescalarmode>mode)
18566 {
18567 op2 = gen_reg_rtx (<ssescalarmode>mode);
18568 convert_move (op2, operands[2], false);
18569 }
18570
18571 for (i = 0; i < <ssescalarnum>; i++)
18572 RTVEC_ELT (vs, i) = op2;
18573
18574 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
18575 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
18576 DONE;
18577 }
18578 })
18579
18580 (define_expand "rotr<mode>3"
18581 [(set (match_operand:VI_128 0 "register_operand")
18582 (rotatert:VI_128
18583 (match_operand:VI_128 1 "nonimmediate_operand")
18584 (match_operand:SI 2 "general_operand")))]
18585 "TARGET_XOP"
18586 {
18587 /* If we were given a scalar, convert it to parallel */
18588 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
18589 {
18590 rtvec vs = rtvec_alloc (<ssescalarnum>);
18591 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
18592 rtx neg = gen_reg_rtx (<MODE>mode);
18593 rtx reg = gen_reg_rtx (<MODE>mode);
18594 rtx op2 = operands[2];
18595 int i;
18596
18597 if (GET_MODE (op2) != <ssescalarmode>mode)
18598 {
18599 op2 = gen_reg_rtx (<ssescalarmode>mode);
18600 convert_move (op2, operands[2], false);
18601 }
18602
18603 for (i = 0; i < <ssescalarnum>; i++)
18604 RTVEC_ELT (vs, i) = op2;
18605
18606 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
18607 emit_insn (gen_neg<mode>2 (neg, reg));
18608 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
18609 DONE;
18610 }
18611 })
18612
18613 (define_insn "xop_rotl<mode>3"
18614 [(set (match_operand:VI_128 0 "register_operand" "=x")
18615 (rotate:VI_128
18616 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
18617 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
18618 "TARGET_XOP"
18619 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18620 [(set_attr "type" "sseishft")
18621 (set_attr "length_immediate" "1")
18622 (set_attr "mode" "TI")])
18623
18624 (define_insn "xop_rotr<mode>3"
18625 [(set (match_operand:VI_128 0 "register_operand" "=x")
18626 (rotatert:VI_128
18627 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
18628 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
18629 "TARGET_XOP"
18630 {
18631 operands[3]
18632 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
18633 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
18634 }
18635 [(set_attr "type" "sseishft")
18636 (set_attr "length_immediate" "1")
18637 (set_attr "mode" "TI")])
18638
18639 (define_expand "vrotr<mode>3"
18640 [(match_operand:VI_128 0 "register_operand")
18641 (match_operand:VI_128 1 "register_operand")
18642 (match_operand:VI_128 2 "register_operand")]
18643 "TARGET_XOP"
18644 {
18645 rtx reg = gen_reg_rtx (<MODE>mode);
18646 emit_insn (gen_neg<mode>2 (reg, operands[2]));
18647 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
18648 DONE;
18649 })
18650
18651 (define_expand "vrotl<mode>3"
18652 [(match_operand:VI_128 0 "register_operand")
18653 (match_operand:VI_128 1 "register_operand")
18654 (match_operand:VI_128 2 "register_operand")]
18655 "TARGET_XOP"
18656 {
18657 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
18658 DONE;
18659 })
18660
18661 (define_insn "xop_vrotl<mode>3"
18662 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
18663 (if_then_else:VI_128
18664 (ge:VI_128
18665 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18666 (const_int 0))
18667 (rotate:VI_128
18668 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18669 (match_dup 2))
18670 (rotatert:VI_128
18671 (match_dup 1)
18672 (neg:VI_128 (match_dup 2)))))]
18673 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18674 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18675 [(set_attr "type" "sseishft")
18676 (set_attr "prefix_data16" "0")
18677 (set_attr "prefix_extra" "2")
18678 (set_attr "mode" "TI")])
18679
18680 ;; XOP packed shift instructions.
18681 (define_expand "vlshr<mode>3"
18682 [(set (match_operand:VI12_128 0 "register_operand")
18683 (lshiftrt:VI12_128
18684 (match_operand:VI12_128 1 "register_operand")
18685 (match_operand:VI12_128 2 "nonimmediate_operand")))]
18686 "TARGET_XOP"
18687 {
18688 rtx neg = gen_reg_rtx (<MODE>mode);
18689 emit_insn (gen_neg<mode>2 (neg, operands[2]));
18690 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
18691 DONE;
18692 })
18693
18694 (define_expand "vlshr<mode>3"
18695 [(set (match_operand:VI48_128 0 "register_operand")
18696 (lshiftrt:VI48_128
18697 (match_operand:VI48_128 1 "register_operand")
18698 (match_operand:VI48_128 2 "nonimmediate_operand")))]
18699 "TARGET_AVX2 || TARGET_XOP"
18700 {
18701 if (!TARGET_AVX2)
18702 {
18703 rtx neg = gen_reg_rtx (<MODE>mode);
18704 emit_insn (gen_neg<mode>2 (neg, operands[2]));
18705 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
18706 DONE;
18707 }
18708 })
18709
18710 (define_expand "vlshr<mode>3"
18711 [(set (match_operand:VI48_512 0 "register_operand")
18712 (lshiftrt:VI48_512
18713 (match_operand:VI48_512 1 "register_operand")
18714 (match_operand:VI48_512 2 "nonimmediate_operand")))]
18715 "TARGET_AVX512F")
18716
18717 (define_expand "vlshr<mode>3"
18718 [(set (match_operand:VI48_256 0 "register_operand")
18719 (lshiftrt:VI48_256
18720 (match_operand:VI48_256 1 "register_operand")
18721 (match_operand:VI48_256 2 "nonimmediate_operand")))]
18722 "TARGET_AVX2")
18723
18724 (define_expand "vashrv8hi3<mask_name>"
18725 [(set (match_operand:V8HI 0 "register_operand")
18726 (ashiftrt:V8HI
18727 (match_operand:V8HI 1 "register_operand")
18728 (match_operand:V8HI 2 "nonimmediate_operand")))]
18729 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
18730 {
18731 if (TARGET_XOP)
18732 {
18733 rtx neg = gen_reg_rtx (V8HImode);
18734 emit_insn (gen_negv8hi2 (neg, operands[2]));
18735 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
18736 DONE;
18737 }
18738 })
18739
18740 (define_expand "vashrv16qi3"
18741 [(set (match_operand:V16QI 0 "register_operand")
18742 (ashiftrt:V16QI
18743 (match_operand:V16QI 1 "register_operand")
18744 (match_operand:V16QI 2 "nonimmediate_operand")))]
18745 "TARGET_XOP"
18746 {
18747 rtx neg = gen_reg_rtx (V16QImode);
18748 emit_insn (gen_negv16qi2 (neg, operands[2]));
18749 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
18750 DONE;
18751 })
18752
18753 (define_expand "vashrv2di3<mask_name>"
18754 [(set (match_operand:V2DI 0 "register_operand")
18755 (ashiftrt:V2DI
18756 (match_operand:V2DI 1 "register_operand")
18757 (match_operand:V2DI 2 "nonimmediate_operand")))]
18758 "TARGET_XOP || TARGET_AVX512VL"
18759 {
18760 if (TARGET_XOP)
18761 {
18762 rtx neg = gen_reg_rtx (V2DImode);
18763 emit_insn (gen_negv2di2 (neg, operands[2]));
18764 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
18765 DONE;
18766 }
18767 })
18768
18769 (define_expand "vashrv4si3"
18770 [(set (match_operand:V4SI 0 "register_operand")
18771 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
18772 (match_operand:V4SI 2 "nonimmediate_operand")))]
18773 "TARGET_AVX2 || TARGET_XOP"
18774 {
18775 if (!TARGET_AVX2)
18776 {
18777 rtx neg = gen_reg_rtx (V4SImode);
18778 emit_insn (gen_negv4si2 (neg, operands[2]));
18779 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
18780 DONE;
18781 }
18782 })
18783
18784 (define_expand "vashrv16si3"
18785 [(set (match_operand:V16SI 0 "register_operand")
18786 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
18787 (match_operand:V16SI 2 "nonimmediate_operand")))]
18788 "TARGET_AVX512F")
18789
18790 (define_expand "vashrv8si3"
18791 [(set (match_operand:V8SI 0 "register_operand")
18792 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
18793 (match_operand:V8SI 2 "nonimmediate_operand")))]
18794 "TARGET_AVX2")
18795
18796 (define_expand "vashl<mode>3"
18797 [(set (match_operand:VI12_128 0 "register_operand")
18798 (ashift:VI12_128
18799 (match_operand:VI12_128 1 "register_operand")
18800 (match_operand:VI12_128 2 "nonimmediate_operand")))]
18801 "TARGET_XOP"
18802 {
18803 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
18804 DONE;
18805 })
18806
18807 (define_expand "vashl<mode>3"
18808 [(set (match_operand:VI48_128 0 "register_operand")
18809 (ashift:VI48_128
18810 (match_operand:VI48_128 1 "register_operand")
18811 (match_operand:VI48_128 2 "nonimmediate_operand")))]
18812 "TARGET_AVX2 || TARGET_XOP"
18813 {
18814 if (!TARGET_AVX2)
18815 {
18816 operands[2] = force_reg (<MODE>mode, operands[2]);
18817 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
18818 DONE;
18819 }
18820 })
18821
18822 (define_expand "vashl<mode>3"
18823 [(set (match_operand:VI48_512 0 "register_operand")
18824 (ashift:VI48_512
18825 (match_operand:VI48_512 1 "register_operand")
18826 (match_operand:VI48_512 2 "nonimmediate_operand")))]
18827 "TARGET_AVX512F")
18828
18829 (define_expand "vashl<mode>3"
18830 [(set (match_operand:VI48_256 0 "register_operand")
18831 (ashift:VI48_256
18832 (match_operand:VI48_256 1 "register_operand")
18833 (match_operand:VI48_256 2 "nonimmediate_operand")))]
18834 "TARGET_AVX2")
18835
18836 (define_insn "xop_sha<mode>3"
18837 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
18838 (if_then_else:VI_128
18839 (ge:VI_128
18840 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18841 (const_int 0))
18842 (ashift:VI_128
18843 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18844 (match_dup 2))
18845 (ashiftrt:VI_128
18846 (match_dup 1)
18847 (neg:VI_128 (match_dup 2)))))]
18848 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18849 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18850 [(set_attr "type" "sseishft")
18851 (set_attr "prefix_data16" "0")
18852 (set_attr "prefix_extra" "2")
18853 (set_attr "mode" "TI")])
18854
18855 (define_insn "xop_shl<mode>3"
18856 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
18857 (if_then_else:VI_128
18858 (ge:VI_128
18859 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18860 (const_int 0))
18861 (ashift:VI_128
18862 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18863 (match_dup 2))
18864 (lshiftrt:VI_128
18865 (match_dup 1)
18866 (neg:VI_128 (match_dup 2)))))]
18867 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18868 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18869 [(set_attr "type" "sseishft")
18870 (set_attr "prefix_data16" "0")
18871 (set_attr "prefix_extra" "2")
18872 (set_attr "mode" "TI")])
18873
18874 (define_expand "<shift_insn><mode>3"
18875 [(set (match_operand:VI1_AVX512 0 "register_operand")
18876 (any_shift:VI1_AVX512
18877 (match_operand:VI1_AVX512 1 "register_operand")
18878 (match_operand:SI 2 "nonmemory_operand")))]
18879 "TARGET_SSE2"
18880 {
18881 if (TARGET_XOP && <MODE>mode == V16QImode)
18882 {
18883 bool negate = false;
18884 rtx (*gen) (rtx, rtx, rtx);
18885 rtx tmp, par;
18886 int i;
18887
18888 if (<CODE> != ASHIFT)
18889 {
18890 if (CONST_INT_P (operands[2]))
18891 operands[2] = GEN_INT (-INTVAL (operands[2]));
18892 else
18893 negate = true;
18894 }
18895 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
18896 for (i = 0; i < 16; i++)
18897 XVECEXP (par, 0, i) = operands[2];
18898
18899 tmp = gen_reg_rtx (V16QImode);
18900 emit_insn (gen_vec_initv16qiqi (tmp, par));
18901
18902 if (negate)
18903 emit_insn (gen_negv16qi2 (tmp, tmp));
18904
18905 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
18906 emit_insn (gen (operands[0], operands[1], tmp));
18907 }
18908 else
18909 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
18910 DONE;
18911 })
18912
18913 (define_expand "ashrv2di3"
18914 [(set (match_operand:V2DI 0 "register_operand")
18915 (ashiftrt:V2DI
18916 (match_operand:V2DI 1 "register_operand")
18917 (match_operand:DI 2 "nonmemory_operand")))]
18918 "TARGET_XOP || TARGET_AVX512VL"
18919 {
18920 if (!TARGET_AVX512VL)
18921 {
18922 rtx reg = gen_reg_rtx (V2DImode);
18923 rtx par;
18924 bool negate = false;
18925 int i;
18926
18927 if (CONST_INT_P (operands[2]))
18928 operands[2] = GEN_INT (-INTVAL (operands[2]));
18929 else
18930 negate = true;
18931
18932 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
18933 for (i = 0; i < 2; i++)
18934 XVECEXP (par, 0, i) = operands[2];
18935
18936 emit_insn (gen_vec_initv2didi (reg, par));
18937
18938 if (negate)
18939 emit_insn (gen_negv2di2 (reg, reg));
18940
18941 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
18942 DONE;
18943 }
18944 })
18945
18946 ;; XOP FRCZ support
18947 (define_insn "xop_frcz<mode>2"
18948 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
18949 (unspec:FMAMODE
18950 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
18951 UNSPEC_FRCZ))]
18952 "TARGET_XOP"
18953 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
18954 [(set_attr "type" "ssecvt1")
18955 (set_attr "mode" "<MODE>")])
18956
18957 (define_expand "xop_vmfrcz<mode>2"
18958 [(set (match_operand:VF_128 0 "register_operand")
18959 (vec_merge:VF_128
18960 (unspec:VF_128
18961 [(match_operand:VF_128 1 "nonimmediate_operand")]
18962 UNSPEC_FRCZ)
18963 (match_dup 2)
18964 (const_int 1)))]
18965 "TARGET_XOP"
18966 "operands[2] = CONST0_RTX (<MODE>mode);")
18967
18968 (define_insn "*xop_vmfrcz<mode>2"
18969 [(set (match_operand:VF_128 0 "register_operand" "=x")
18970 (vec_merge:VF_128
18971 (unspec:VF_128
18972 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
18973 UNSPEC_FRCZ)
18974 (match_operand:VF_128 2 "const0_operand")
18975 (const_int 1)))]
18976 "TARGET_XOP"
18977 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
18978 [(set_attr "type" "ssecvt1")
18979 (set_attr "mode" "<MODE>")])
18980
18981 (define_insn "xop_maskcmp<mode>3"
18982 [(set (match_operand:VI_128 0 "register_operand" "=x")
18983 (match_operator:VI_128 1 "ix86_comparison_int_operator"
18984 [(match_operand:VI_128 2 "register_operand" "x")
18985 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
18986 "TARGET_XOP"
18987 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
18988 [(set_attr "type" "sse4arg")
18989 (set_attr "prefix_data16" "0")
18990 (set_attr "prefix_rep" "0")
18991 (set_attr "prefix_extra" "2")
18992 (set_attr "length_immediate" "1")
18993 (set_attr "mode" "TI")])
18994
18995 (define_insn "xop_maskcmp_uns<mode>3"
18996 [(set (match_operand:VI_128 0 "register_operand" "=x")
18997 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
18998 [(match_operand:VI_128 2 "register_operand" "x")
18999 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
19000 "TARGET_XOP"
19001 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19002 [(set_attr "type" "ssecmp")
19003 (set_attr "prefix_data16" "0")
19004 (set_attr "prefix_rep" "0")
19005 (set_attr "prefix_extra" "2")
19006 (set_attr "length_immediate" "1")
19007 (set_attr "mode" "TI")])
19008
19009 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
19010 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
19011 ;; the exact instruction generated for the intrinsic.
19012 (define_insn "xop_maskcmp_uns2<mode>3"
19013 [(set (match_operand:VI_128 0 "register_operand" "=x")
19014 (unspec:VI_128
19015 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
19016 [(match_operand:VI_128 2 "register_operand" "x")
19017 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
19018 UNSPEC_XOP_UNSIGNED_CMP))]
19019 "TARGET_XOP"
19020 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19021 [(set_attr "type" "ssecmp")
19022 (set_attr "prefix_data16" "0")
19023 (set_attr "prefix_extra" "2")
19024 (set_attr "length_immediate" "1")
19025 (set_attr "mode" "TI")])
19026
19027 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
19028 ;; being added here to be complete.
19029 (define_insn "xop_pcom_tf<mode>3"
19030 [(set (match_operand:VI_128 0 "register_operand" "=x")
19031 (unspec:VI_128
19032 [(match_operand:VI_128 1 "register_operand" "x")
19033 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
19034 (match_operand:SI 3 "const_int_operand" "n")]
19035 UNSPEC_XOP_TRUEFALSE))]
19036 "TARGET_XOP"
19037 {
19038 return ((INTVAL (operands[3]) != 0)
19039 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19040 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
19041 }
19042 [(set_attr "type" "ssecmp")
19043 (set_attr "prefix_data16" "0")
19044 (set_attr "prefix_extra" "2")
19045 (set_attr "length_immediate" "1")
19046 (set_attr "mode" "TI")])
19047
19048 (define_insn "xop_vpermil2<mode>3"
19049 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
19050 (unspec:VF_128_256
19051 [(match_operand:VF_128_256 1 "register_operand" "x,x")
19052 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
19053 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
19054 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
19055 UNSPEC_VPERMIL2))]
19056 "TARGET_XOP"
19057 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
19058 [(set_attr "type" "sse4arg")
19059 (set_attr "length_immediate" "1")
19060 (set_attr "mode" "<MODE>")])
19061
19062 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19063
19064 (define_insn "aesenc"
19065 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19066 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19067 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19068 UNSPEC_AESENC))]
19069 "TARGET_AES"
19070 "@
19071 aesenc\t{%2, %0|%0, %2}
19072 vaesenc\t{%2, %1, %0|%0, %1, %2}"
19073 [(set_attr "isa" "noavx,avx")
19074 (set_attr "type" "sselog1")
19075 (set_attr "prefix_extra" "1")
19076 (set_attr "prefix" "orig,vex")
19077 (set_attr "btver2_decode" "double,double")
19078 (set_attr "mode" "TI")])
19079
19080 (define_insn "aesenclast"
19081 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19082 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19083 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19084 UNSPEC_AESENCLAST))]
19085 "TARGET_AES"
19086 "@
19087 aesenclast\t{%2, %0|%0, %2}
19088 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
19089 [(set_attr "isa" "noavx,avx")
19090 (set_attr "type" "sselog1")
19091 (set_attr "prefix_extra" "1")
19092 (set_attr "prefix" "orig,vex")
19093 (set_attr "btver2_decode" "double,double")
19094 (set_attr "mode" "TI")])
19095
19096 (define_insn "aesdec"
19097 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19098 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19099 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19100 UNSPEC_AESDEC))]
19101 "TARGET_AES"
19102 "@
19103 aesdec\t{%2, %0|%0, %2}
19104 vaesdec\t{%2, %1, %0|%0, %1, %2}"
19105 [(set_attr "isa" "noavx,avx")
19106 (set_attr "type" "sselog1")
19107 (set_attr "prefix_extra" "1")
19108 (set_attr "prefix" "orig,vex")
19109 (set_attr "btver2_decode" "double,double")
19110 (set_attr "mode" "TI")])
19111
19112 (define_insn "aesdeclast"
19113 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19114 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19115 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19116 UNSPEC_AESDECLAST))]
19117 "TARGET_AES"
19118 "@
19119 aesdeclast\t{%2, %0|%0, %2}
19120 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
19121 [(set_attr "isa" "noavx,avx")
19122 (set_attr "type" "sselog1")
19123 (set_attr "prefix_extra" "1")
19124 (set_attr "prefix" "orig,vex")
19125 (set_attr "btver2_decode" "double,double")
19126 (set_attr "mode" "TI")])
19127
19128 (define_insn "aesimc"
19129 [(set (match_operand:V2DI 0 "register_operand" "=x")
19130 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
19131 UNSPEC_AESIMC))]
19132 "TARGET_AES"
19133 "%vaesimc\t{%1, %0|%0, %1}"
19134 [(set_attr "type" "sselog1")
19135 (set_attr "prefix_extra" "1")
19136 (set_attr "prefix" "maybe_vex")
19137 (set_attr "mode" "TI")])
19138
19139 (define_insn "aeskeygenassist"
19140 [(set (match_operand:V2DI 0 "register_operand" "=x")
19141 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
19142 (match_operand:SI 2 "const_0_to_255_operand" "n")]
19143 UNSPEC_AESKEYGENASSIST))]
19144 "TARGET_AES"
19145 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
19146 [(set_attr "type" "sselog1")
19147 (set_attr "prefix_extra" "1")
19148 (set_attr "length_immediate" "1")
19149 (set_attr "prefix" "maybe_vex")
19150 (set_attr "mode" "TI")])
19151
19152 (define_insn "pclmulqdq"
19153 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19154 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19155 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
19156 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
19157 UNSPEC_PCLMUL))]
19158 "TARGET_PCLMUL"
19159 "@
19160 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
19161 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19162 [(set_attr "isa" "noavx,avx")
19163 (set_attr "type" "sselog1")
19164 (set_attr "prefix_extra" "1")
19165 (set_attr "length_immediate" "1")
19166 (set_attr "prefix" "orig,vex")
19167 (set_attr "mode" "TI")])
19168
19169 (define_expand "avx_vzeroall"
19170 [(match_par_dup 0 [(const_int 0)])]
19171 "TARGET_AVX"
19172 {
19173 int nregs = TARGET_64BIT ? 16 : 8;
19174 int regno;
19175
19176 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
19177
19178 XVECEXP (operands[0], 0, 0)
19179 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
19180 UNSPECV_VZEROALL);
19181
19182 for (regno = 0; regno < nregs; regno++)
19183 XVECEXP (operands[0], 0, regno + 1)
19184 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
19185 CONST0_RTX (V8SImode));
19186 })
19187
19188 (define_insn "*avx_vzeroall"
19189 [(match_parallel 0 "vzeroall_operation"
19190 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
19191 "TARGET_AVX"
19192 "vzeroall"
19193 [(set_attr "type" "sse")
19194 (set_attr "modrm" "0")
19195 (set_attr "memory" "none")
19196 (set_attr "prefix" "vex")
19197 (set_attr "btver2_decode" "vector")
19198 (set_attr "mode" "OI")])
19199
19200 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
19201 ;; if the upper 128bits are unused.
19202 (define_insn "avx_vzeroupper"
19203 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
19204 "TARGET_AVX"
19205 "vzeroupper"
19206 [(set_attr "type" "sse")
19207 (set_attr "modrm" "0")
19208 (set_attr "memory" "none")
19209 (set_attr "prefix" "vex")
19210 (set_attr "btver2_decode" "vector")
19211 (set_attr "mode" "OI")])
19212
19213 (define_mode_attr pbroadcast_evex_isa
19214 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
19215 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
19216 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
19217 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
19218
19219 (define_insn "avx2_pbroadcast<mode>"
19220 [(set (match_operand:VI 0 "register_operand" "=x,v")
19221 (vec_duplicate:VI
19222 (vec_select:<ssescalarmode>
19223 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
19224 (parallel [(const_int 0)]))))]
19225 "TARGET_AVX2"
19226 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
19227 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
19228 (set_attr "type" "ssemov")
19229 (set_attr "prefix_extra" "1")
19230 (set_attr "prefix" "vex,evex")
19231 (set_attr "mode" "<sseinsnmode>")])
19232
19233 (define_insn "avx2_pbroadcast<mode>_1"
19234 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
19235 (vec_duplicate:VI_256
19236 (vec_select:<ssescalarmode>
19237 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
19238 (parallel [(const_int 0)]))))]
19239 "TARGET_AVX2"
19240 "@
19241 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
19242 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
19243 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
19244 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
19245 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
19246 (set_attr "type" "ssemov")
19247 (set_attr "prefix_extra" "1")
19248 (set_attr "prefix" "vex")
19249 (set_attr "mode" "<sseinsnmode>")])
19250
19251 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
19252 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
19253 (unspec:VI48F_256_512
19254 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
19255 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19256 UNSPEC_VPERMVAR))]
19257 "TARGET_AVX2 && <mask_mode512bit_condition>"
19258 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19259 [(set_attr "type" "sselog")
19260 (set_attr "prefix" "<mask_prefix2>")
19261 (set_attr "mode" "<sseinsnmode>")])
19262
19263 (define_insn "<avx512>_permvar<mode><mask_name>"
19264 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19265 (unspec:VI1_AVX512VL
19266 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
19267 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19268 UNSPEC_VPERMVAR))]
19269 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
19270 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19271 [(set_attr "type" "sselog")
19272 (set_attr "prefix" "<mask_prefix2>")
19273 (set_attr "mode" "<sseinsnmode>")])
19274
19275 (define_insn "<avx512>_permvar<mode><mask_name>"
19276 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19277 (unspec:VI2_AVX512VL
19278 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
19279 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19280 UNSPEC_VPERMVAR))]
19281 "TARGET_AVX512BW && <mask_mode512bit_condition>"
19282 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19283 [(set_attr "type" "sselog")
19284 (set_attr "prefix" "<mask_prefix2>")
19285 (set_attr "mode" "<sseinsnmode>")])
19286
19287 (define_expand "avx2_perm<mode>"
19288 [(match_operand:VI8F_256 0 "register_operand")
19289 (match_operand:VI8F_256 1 "nonimmediate_operand")
19290 (match_operand:SI 2 "const_0_to_255_operand")]
19291 "TARGET_AVX2"
19292 {
19293 int mask = INTVAL (operands[2]);
19294 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
19295 GEN_INT ((mask >> 0) & 3),
19296 GEN_INT ((mask >> 2) & 3),
19297 GEN_INT ((mask >> 4) & 3),
19298 GEN_INT ((mask >> 6) & 3)));
19299 DONE;
19300 })
19301
19302 (define_expand "avx512vl_perm<mode>_mask"
19303 [(match_operand:VI8F_256 0 "register_operand")
19304 (match_operand:VI8F_256 1 "nonimmediate_operand")
19305 (match_operand:SI 2 "const_0_to_255_operand")
19306 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
19307 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19308 "TARGET_AVX512VL"
19309 {
19310 int mask = INTVAL (operands[2]);
19311 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
19312 GEN_INT ((mask >> 0) & 3),
19313 GEN_INT ((mask >> 2) & 3),
19314 GEN_INT ((mask >> 4) & 3),
19315 GEN_INT ((mask >> 6) & 3),
19316 operands[3], operands[4]));
19317 DONE;
19318 })
19319
19320 (define_insn "avx2_perm<mode>_1<mask_name>"
19321 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
19322 (vec_select:VI8F_256
19323 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
19324 (parallel [(match_operand 2 "const_0_to_3_operand")
19325 (match_operand 3 "const_0_to_3_operand")
19326 (match_operand 4 "const_0_to_3_operand")
19327 (match_operand 5 "const_0_to_3_operand")])))]
19328 "TARGET_AVX2 && <mask_mode512bit_condition>"
19329 {
19330 int mask = 0;
19331 mask |= INTVAL (operands[2]) << 0;
19332 mask |= INTVAL (operands[3]) << 2;
19333 mask |= INTVAL (operands[4]) << 4;
19334 mask |= INTVAL (operands[5]) << 6;
19335 operands[2] = GEN_INT (mask);
19336 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
19337 }
19338 [(set_attr "type" "sselog")
19339 (set_attr "prefix" "<mask_prefix2>")
19340 (set_attr "mode" "<sseinsnmode>")])
19341
19342 (define_expand "avx512f_perm<mode>"
19343 [(match_operand:V8FI 0 "register_operand")
19344 (match_operand:V8FI 1 "nonimmediate_operand")
19345 (match_operand:SI 2 "const_0_to_255_operand")]
19346 "TARGET_AVX512F"
19347 {
19348 int mask = INTVAL (operands[2]);
19349 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
19350 GEN_INT ((mask >> 0) & 3),
19351 GEN_INT ((mask >> 2) & 3),
19352 GEN_INT ((mask >> 4) & 3),
19353 GEN_INT ((mask >> 6) & 3),
19354 GEN_INT (((mask >> 0) & 3) + 4),
19355 GEN_INT (((mask >> 2) & 3) + 4),
19356 GEN_INT (((mask >> 4) & 3) + 4),
19357 GEN_INT (((mask >> 6) & 3) + 4)));
19358 DONE;
19359 })
19360
19361 (define_expand "avx512f_perm<mode>_mask"
19362 [(match_operand:V8FI 0 "register_operand")
19363 (match_operand:V8FI 1 "nonimmediate_operand")
19364 (match_operand:SI 2 "const_0_to_255_operand")
19365 (match_operand:V8FI 3 "nonimm_or_0_operand")
19366 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19367 "TARGET_AVX512F"
19368 {
19369 int mask = INTVAL (operands[2]);
19370 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
19371 GEN_INT ((mask >> 0) & 3),
19372 GEN_INT ((mask >> 2) & 3),
19373 GEN_INT ((mask >> 4) & 3),
19374 GEN_INT ((mask >> 6) & 3),
19375 GEN_INT (((mask >> 0) & 3) + 4),
19376 GEN_INT (((mask >> 2) & 3) + 4),
19377 GEN_INT (((mask >> 4) & 3) + 4),
19378 GEN_INT (((mask >> 6) & 3) + 4),
19379 operands[3], operands[4]));
19380 DONE;
19381 })
19382
19383 (define_insn "avx512f_perm<mode>_1<mask_name>"
19384 [(set (match_operand:V8FI 0 "register_operand" "=v")
19385 (vec_select:V8FI
19386 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
19387 (parallel [(match_operand 2 "const_0_to_3_operand")
19388 (match_operand 3 "const_0_to_3_operand")
19389 (match_operand 4 "const_0_to_3_operand")
19390 (match_operand 5 "const_0_to_3_operand")
19391 (match_operand 6 "const_4_to_7_operand")
19392 (match_operand 7 "const_4_to_7_operand")
19393 (match_operand 8 "const_4_to_7_operand")
19394 (match_operand 9 "const_4_to_7_operand")])))]
19395 "TARGET_AVX512F && <mask_mode512bit_condition>
19396 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
19397 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
19398 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
19399 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
19400 {
19401 int mask = 0;
19402 mask |= INTVAL (operands[2]) << 0;
19403 mask |= INTVAL (operands[3]) << 2;
19404 mask |= INTVAL (operands[4]) << 4;
19405 mask |= INTVAL (operands[5]) << 6;
19406 operands[2] = GEN_INT (mask);
19407 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
19408 }
19409 [(set_attr "type" "sselog")
19410 (set_attr "prefix" "<mask_prefix2>")
19411 (set_attr "mode" "<sseinsnmode>")])
19412
19413 (define_insn "avx2_permv2ti"
19414 [(set (match_operand:V4DI 0 "register_operand" "=x")
19415 (unspec:V4DI
19416 [(match_operand:V4DI 1 "register_operand" "x")
19417 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
19418 (match_operand:SI 3 "const_0_to_255_operand" "n")]
19419 UNSPEC_VPERMTI))]
19420 "TARGET_AVX2"
19421 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19422 [(set_attr "type" "sselog")
19423 (set_attr "prefix" "vex")
19424 (set_attr "mode" "OI")])
19425
19426 (define_insn "avx2_vec_dupv4df"
19427 [(set (match_operand:V4DF 0 "register_operand" "=v")
19428 (vec_duplicate:V4DF
19429 (vec_select:DF
19430 (match_operand:V2DF 1 "register_operand" "v")
19431 (parallel [(const_int 0)]))))]
19432 "TARGET_AVX2"
19433 "vbroadcastsd\t{%1, %0|%0, %1}"
19434 [(set_attr "type" "sselog1")
19435 (set_attr "prefix" "maybe_evex")
19436 (set_attr "mode" "V4DF")])
19437
19438 (define_insn "<avx512>_vec_dup<mode>_1"
19439 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
19440 (vec_duplicate:VI_AVX512BW
19441 (vec_select:<ssescalarmode>
19442 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
19443 (parallel [(const_int 0)]))))]
19444 "TARGET_AVX512F"
19445 "@
19446 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
19447 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
19448 [(set_attr "type" "ssemov")
19449 (set_attr "prefix" "evex")
19450 (set_attr "mode" "<sseinsnmode>")])
19451
19452 (define_insn "<avx512>_vec_dup<mode><mask_name>"
19453 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
19454 (vec_duplicate:V48_AVX512VL
19455 (vec_select:<ssescalarmode>
19456 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
19457 (parallel [(const_int 0)]))))]
19458 "TARGET_AVX512F"
19459 {
19460 /* There is no DF broadcast (in AVX-512*) to 128b register.
19461 Mimic it with integer variant. */
19462 if (<MODE>mode == V2DFmode)
19463 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
19464
19465 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
19466 }
19467 [(set_attr "type" "ssemov")
19468 (set_attr "prefix" "evex")
19469 (set_attr "mode" "<sseinsnmode>")])
19470
19471 (define_insn "<avx512>_vec_dup<mode><mask_name>"
19472 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
19473 (vec_duplicate:VI12_AVX512VL
19474 (vec_select:<ssescalarmode>
19475 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
19476 (parallel [(const_int 0)]))))]
19477 "TARGET_AVX512BW"
19478 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
19479 [(set_attr "type" "ssemov")
19480 (set_attr "prefix" "evex")
19481 (set_attr "mode" "<sseinsnmode>")])
19482
19483 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
19484 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
19485 (vec_duplicate:V16FI
19486 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
19487 "TARGET_AVX512F"
19488 "@
19489 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
19490 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19491 [(set_attr "type" "ssemov")
19492 (set_attr "prefix" "evex")
19493 (set_attr "mode" "<sseinsnmode>")])
19494
19495 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
19496 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
19497 (vec_duplicate:V8FI
19498 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
19499 "TARGET_AVX512F"
19500 "@
19501 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
19502 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19503 [(set_attr "type" "ssemov")
19504 (set_attr "prefix" "evex")
19505 (set_attr "mode" "<sseinsnmode>")])
19506
19507 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
19508 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
19509 (vec_duplicate:VI12_AVX512VL
19510 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
19511 "TARGET_AVX512BW"
19512 "@
19513 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
19514 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
19515 [(set_attr "type" "ssemov")
19516 (set_attr "prefix" "evex")
19517 (set_attr "mode" "<sseinsnmode>")])
19518
19519 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
19520 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
19521 (vec_duplicate:V48_AVX512VL
19522 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
19523 "TARGET_AVX512F"
19524 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19525 [(set_attr "type" "ssemov")
19526 (set_attr "prefix" "evex")
19527 (set_attr "mode" "<sseinsnmode>")
19528 (set (attr "enabled")
19529 (if_then_else (eq_attr "alternative" "1")
19530 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
19531 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
19532 (const_int 1)))])
19533
19534 (define_insn "vec_dupv4sf"
19535 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
19536 (vec_duplicate:V4SF
19537 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
19538 "TARGET_SSE"
19539 "@
19540 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
19541 vbroadcastss\t{%1, %0|%0, %1}
19542 shufps\t{$0, %0, %0|%0, %0, 0}"
19543 [(set_attr "isa" "avx,avx,noavx")
19544 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
19545 (set_attr "length_immediate" "1,0,1")
19546 (set_attr "prefix_extra" "0,1,*")
19547 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
19548 (set_attr "mode" "V4SF")])
19549
19550 (define_insn "*vec_dupv4si"
19551 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
19552 (vec_duplicate:V4SI
19553 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
19554 "TARGET_SSE"
19555 "@
19556 %vpshufd\t{$0, %1, %0|%0, %1, 0}
19557 vbroadcastss\t{%1, %0|%0, %1}
19558 shufps\t{$0, %0, %0|%0, %0, 0}"
19559 [(set_attr "isa" "sse2,avx,noavx")
19560 (set_attr "type" "sselog1,ssemov,sselog1")
19561 (set_attr "length_immediate" "1,0,1")
19562 (set_attr "prefix_extra" "0,1,*")
19563 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
19564 (set_attr "mode" "TI,V4SF,V4SF")])
19565
19566 (define_insn "*vec_dupv2di"
19567 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
19568 (vec_duplicate:V2DI
19569 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
19570 "TARGET_SSE"
19571 "@
19572 punpcklqdq\t%0, %0
19573 vpunpcklqdq\t{%d1, %0|%0, %d1}
19574 %vmovddup\t{%1, %0|%0, %1}
19575 movlhps\t%0, %0"
19576 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
19577 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
19578 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
19579 (set_attr "mode" "TI,TI,DF,V4SF")])
19580
19581 (define_insn "avx2_vbroadcasti128_<mode>"
19582 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
19583 (vec_concat:VI_256
19584 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
19585 (match_dup 1)))]
19586 "TARGET_AVX2"
19587 "@
19588 vbroadcasti128\t{%1, %0|%0, %1}
19589 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
19590 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
19591 [(set_attr "isa" "*,avx512dq,avx512vl")
19592 (set_attr "type" "ssemov")
19593 (set_attr "prefix_extra" "1")
19594 (set_attr "prefix" "vex,evex,evex")
19595 (set_attr "mode" "OI")])
19596
19597 ;; Modes handled by AVX vec_dup patterns.
19598 (define_mode_iterator AVX_VEC_DUP_MODE
19599 [V8SI V8SF V4DI V4DF])
19600 (define_mode_attr vecdupssescalarmodesuffix
19601 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
19602 ;; Modes handled by AVX2 vec_dup patterns.
19603 (define_mode_iterator AVX2_VEC_DUP_MODE
19604 [V32QI V16QI V16HI V8HI V8SI V4SI])
19605
19606 (define_insn "*vec_dup<mode>"
19607 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
19608 (vec_duplicate:AVX2_VEC_DUP_MODE
19609 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
19610 "TARGET_AVX2"
19611 "@
19612 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
19613 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
19614 #"
19615 [(set_attr "isa" "*,*,noavx512vl")
19616 (set_attr "type" "ssemov")
19617 (set_attr "prefix_extra" "1")
19618 (set_attr "prefix" "maybe_evex")
19619 (set_attr "mode" "<sseinsnmode>")
19620 (set (attr "preferred_for_speed")
19621 (cond [(eq_attr "alternative" "2")
19622 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
19623 ]
19624 (symbol_ref "true")))])
19625
19626 (define_insn "vec_dup<mode>"
19627 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
19628 (vec_duplicate:AVX_VEC_DUP_MODE
19629 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
19630 "TARGET_AVX"
19631 "@
19632 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
19633 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
19634 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
19635 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
19636 #"
19637 [(set_attr "type" "ssemov")
19638 (set_attr "prefix_extra" "1")
19639 (set_attr "prefix" "maybe_evex")
19640 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
19641 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
19642
19643 (define_split
19644 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
19645 (vec_duplicate:AVX2_VEC_DUP_MODE
19646 (match_operand:<ssescalarmode> 1 "register_operand")))]
19647 "TARGET_AVX2
19648 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
19649 available, because then we can broadcast from GPRs directly.
19650 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
19651 for V*SI mode it requires just -mavx512vl. */
19652 && !(TARGET_AVX512VL
19653 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
19654 && reload_completed && GENERAL_REG_P (operands[1])"
19655 [(const_int 0)]
19656 {
19657 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
19658 CONST0_RTX (V4SImode),
19659 gen_lowpart (SImode, operands[1])));
19660 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
19661 gen_lowpart (<ssexmmmode>mode,
19662 operands[0])));
19663 DONE;
19664 })
19665
19666 (define_split
19667 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
19668 (vec_duplicate:AVX_VEC_DUP_MODE
19669 (match_operand:<ssescalarmode> 1 "register_operand")))]
19670 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
19671 [(set (match_dup 2)
19672 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
19673 (set (match_dup 0)
19674 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
19675 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
19676
19677 (define_insn "avx_vbroadcastf128_<mode>"
19678 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
19679 (vec_concat:V_256
19680 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
19681 (match_dup 1)))]
19682 "TARGET_AVX"
19683 "@
19684 vbroadcast<i128>\t{%1, %0|%0, %1}
19685 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
19686 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
19687 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
19688 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
19689 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
19690 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
19691 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
19692 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
19693 (set_attr "prefix_extra" "1")
19694 (set_attr "length_immediate" "0,1,1,0,1,0,1")
19695 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
19696 (set_attr "mode" "<sseinsnmode>")])
19697
19698 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
19699 (define_mode_iterator VI4F_BRCST32x2
19700 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
19701 V16SF (V8SF "TARGET_AVX512VL")])
19702
19703 (define_mode_attr 64x2mode
19704 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
19705
19706 (define_mode_attr 32x2mode
19707 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
19708 (V8SF "V2SF") (V4SI "V2SI")])
19709
19710 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
19711 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
19712 (vec_duplicate:VI4F_BRCST32x2
19713 (vec_select:<32x2mode>
19714 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
19715 (parallel [(const_int 0) (const_int 1)]))))]
19716 "TARGET_AVX512DQ"
19717 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
19718 [(set_attr "type" "ssemov")
19719 (set_attr "prefix_extra" "1")
19720 (set_attr "prefix" "evex")
19721 (set_attr "mode" "<sseinsnmode>")])
19722
19723 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
19724 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
19725 (vec_duplicate:VI4F_256
19726 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
19727 "TARGET_AVX512VL"
19728 "@
19729 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
19730 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19731 [(set_attr "type" "ssemov")
19732 (set_attr "prefix_extra" "1")
19733 (set_attr "prefix" "evex")
19734 (set_attr "mode" "<sseinsnmode>")])
19735
19736 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
19737 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
19738 (vec_duplicate:V16FI
19739 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
19740 "TARGET_AVX512DQ"
19741 "@
19742 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
19743 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19744 [(set_attr "type" "ssemov")
19745 (set_attr "prefix_extra" "1")
19746 (set_attr "prefix" "evex")
19747 (set_attr "mode" "<sseinsnmode>")])
19748
19749 ;; For broadcast[i|f]64x2
19750 (define_mode_iterator VI8F_BRCST64x2
19751 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
19752
19753 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
19754 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
19755 (vec_duplicate:VI8F_BRCST64x2
19756 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
19757 "TARGET_AVX512DQ"
19758 "@
19759 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
19760 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19761 [(set_attr "type" "ssemov")
19762 (set_attr "prefix_extra" "1")
19763 (set_attr "prefix" "evex")
19764 (set_attr "mode" "<sseinsnmode>")])
19765
19766 (define_insn "avx512cd_maskb_vec_dup<mode>"
19767 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19768 (vec_duplicate:VI8_AVX512VL
19769 (zero_extend:DI
19770 (match_operand:QI 1 "register_operand" "k"))))]
19771 "TARGET_AVX512CD"
19772 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
19773 [(set_attr "type" "mskmov")
19774 (set_attr "prefix" "evex")
19775 (set_attr "mode" "XI")])
19776
19777 (define_insn "avx512cd_maskw_vec_dup<mode>"
19778 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
19779 (vec_duplicate:VI4_AVX512VL
19780 (zero_extend:SI
19781 (match_operand:HI 1 "register_operand" "k"))))]
19782 "TARGET_AVX512CD"
19783 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
19784 [(set_attr "type" "mskmov")
19785 (set_attr "prefix" "evex")
19786 (set_attr "mode" "XI")])
19787
19788 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
19789 ;; If it so happens that the input is in memory, use vbroadcast.
19790 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
19791 (define_insn "*avx_vperm_broadcast_v4sf"
19792 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
19793 (vec_select:V4SF
19794 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
19795 (match_parallel 2 "avx_vbroadcast_operand"
19796 [(match_operand 3 "const_int_operand" "C,n,n")])))]
19797 "TARGET_AVX"
19798 {
19799 int elt = INTVAL (operands[3]);
19800 switch (which_alternative)
19801 {
19802 case 0:
19803 case 1:
19804 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
19805 return "vbroadcastss\t{%1, %0|%0, %k1}";
19806 case 2:
19807 operands[2] = GEN_INT (elt * 0x55);
19808 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
19809 default:
19810 gcc_unreachable ();
19811 }
19812 }
19813 [(set_attr "type" "ssemov,ssemov,sselog1")
19814 (set_attr "prefix_extra" "1")
19815 (set_attr "length_immediate" "0,0,1")
19816 (set_attr "prefix" "maybe_evex")
19817 (set_attr "mode" "SF,SF,V4SF")])
19818
19819 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
19820 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
19821 (vec_select:VF_256
19822 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
19823 (match_parallel 2 "avx_vbroadcast_operand"
19824 [(match_operand 3 "const_int_operand" "C,n,n")])))]
19825 "TARGET_AVX"
19826 "#"
19827 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
19828 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
19829 {
19830 rtx op0 = operands[0], op1 = operands[1];
19831 int elt = INTVAL (operands[3]);
19832
19833 if (REG_P (op1))
19834 {
19835 int mask;
19836
19837 if (TARGET_AVX2 && elt == 0)
19838 {
19839 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
19840 op1)));
19841 DONE;
19842 }
19843
19844 /* Shuffle element we care about into all elements of the 128-bit lane.
19845 The other lane gets shuffled too, but we don't care. */
19846 if (<MODE>mode == V4DFmode)
19847 mask = (elt & 1 ? 15 : 0);
19848 else
19849 mask = (elt & 3) * 0x55;
19850 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
19851
19852 /* Shuffle the lane we care about into both lanes of the dest. */
19853 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
19854 if (EXT_REX_SSE_REG_P (op0))
19855 {
19856 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
19857 or VSHUFF128. */
19858 gcc_assert (<MODE>mode == V8SFmode);
19859 if ((mask & 1) == 0)
19860 emit_insn (gen_avx2_vec_dupv8sf (op0,
19861 gen_lowpart (V4SFmode, op0)));
19862 else
19863 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
19864 GEN_INT (4), GEN_INT (5),
19865 GEN_INT (6), GEN_INT (7),
19866 GEN_INT (12), GEN_INT (13),
19867 GEN_INT (14), GEN_INT (15)));
19868 DONE;
19869 }
19870
19871 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
19872 DONE;
19873 }
19874
19875 operands[1] = adjust_address (op1, <ssescalarmode>mode,
19876 elt * GET_MODE_SIZE (<ssescalarmode>mode));
19877 })
19878
19879 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
19880 [(set (match_operand:VF2 0 "register_operand")
19881 (vec_select:VF2
19882 (match_operand:VF2 1 "nonimmediate_operand")
19883 (match_operand:SI 2 "const_0_to_255_operand")))]
19884 "TARGET_AVX && <mask_mode512bit_condition>"
19885 {
19886 int mask = INTVAL (operands[2]);
19887 rtx perm[<ssescalarnum>];
19888
19889 int i;
19890 for (i = 0; i < <ssescalarnum>; i = i + 2)
19891 {
19892 perm[i] = GEN_INT (((mask >> i) & 1) + i);
19893 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
19894 }
19895
19896 operands[2]
19897 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
19898 })
19899
19900 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
19901 [(set (match_operand:VF1 0 "register_operand")
19902 (vec_select:VF1
19903 (match_operand:VF1 1 "nonimmediate_operand")
19904 (match_operand:SI 2 "const_0_to_255_operand")))]
19905 "TARGET_AVX && <mask_mode512bit_condition>"
19906 {
19907 int mask = INTVAL (operands[2]);
19908 rtx perm[<ssescalarnum>];
19909
19910 int i;
19911 for (i = 0; i < <ssescalarnum>; i = i + 4)
19912 {
19913 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
19914 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
19915 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
19916 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
19917 }
19918
19919 operands[2]
19920 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
19921 })
19922
19923 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
19924 [(set (match_operand:VF 0 "register_operand" "=v")
19925 (vec_select:VF
19926 (match_operand:VF 1 "nonimmediate_operand" "vm")
19927 (match_parallel 2 ""
19928 [(match_operand 3 "const_int_operand")])))]
19929 "TARGET_AVX && <mask_mode512bit_condition>
19930 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
19931 {
19932 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
19933 operands[2] = GEN_INT (mask);
19934 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
19935 }
19936 [(set_attr "type" "sselog")
19937 (set_attr "prefix_extra" "1")
19938 (set_attr "length_immediate" "1")
19939 (set_attr "prefix" "<mask_prefix>")
19940 (set_attr "mode" "<sseinsnmode>")])
19941
19942 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
19943 [(set (match_operand:VF 0 "register_operand" "=v")
19944 (unspec:VF
19945 [(match_operand:VF 1 "register_operand" "v")
19946 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
19947 UNSPEC_VPERMIL))]
19948 "TARGET_AVX && <mask_mode512bit_condition>"
19949 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19950 [(set_attr "type" "sselog")
19951 (set_attr "prefix_extra" "1")
19952 (set_attr "btver2_decode" "vector")
19953 (set_attr "prefix" "<mask_prefix>")
19954 (set_attr "mode" "<sseinsnmode>")])
19955
19956 (define_mode_iterator VPERMI2
19957 [V16SI V16SF V8DI V8DF
19958 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
19959 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
19960 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
19961 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
19962 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
19963 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
19964 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
19965 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
19966
19967 (define_mode_iterator VPERMI2I
19968 [V16SI V8DI
19969 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
19970 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
19971 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
19972 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
19973 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
19974 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
19975
19976 (define_expand "<avx512>_vpermi2var<mode>3_mask"
19977 [(set (match_operand:VPERMI2 0 "register_operand")
19978 (vec_merge:VPERMI2
19979 (unspec:VPERMI2
19980 [(match_operand:<sseintvecmode> 2 "register_operand")
19981 (match_operand:VPERMI2 1 "register_operand")
19982 (match_operand:VPERMI2 3 "nonimmediate_operand")]
19983 UNSPEC_VPERMT2)
19984 (match_dup 5)
19985 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
19986 "TARGET_AVX512F"
19987 {
19988 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
19989 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
19990 })
19991
19992 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
19993 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
19994 (vec_merge:VPERMI2I
19995 (unspec:VPERMI2I
19996 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
19997 (match_operand:VPERMI2I 1 "register_operand" "v")
19998 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
19999 UNSPEC_VPERMT2)
20000 (match_dup 2)
20001 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20002 "TARGET_AVX512F"
20003 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20004 [(set_attr "type" "sselog")
20005 (set_attr "prefix" "evex")
20006 (set_attr "mode" "<sseinsnmode>")])
20007
20008 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
20009 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
20010 (vec_merge:VF_AVX512VL
20011 (unspec:VF_AVX512VL
20012 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
20013 (match_operand:VF_AVX512VL 1 "register_operand" "v")
20014 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
20015 UNSPEC_VPERMT2)
20016 (subreg:VF_AVX512VL (match_dup 2) 0)
20017 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20018 "TARGET_AVX512F"
20019 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20020 [(set_attr "type" "sselog")
20021 (set_attr "prefix" "evex")
20022 (set_attr "mode" "<sseinsnmode>")])
20023
20024 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
20025 [(match_operand:VPERMI2 0 "register_operand")
20026 (match_operand:<sseintvecmode> 1 "register_operand")
20027 (match_operand:VPERMI2 2 "register_operand")
20028 (match_operand:VPERMI2 3 "nonimmediate_operand")
20029 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20030 "TARGET_AVX512F"
20031 {
20032 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
20033 operands[0], operands[1], operands[2], operands[3],
20034 CONST0_RTX (<MODE>mode), operands[4]));
20035 DONE;
20036 })
20037
20038 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
20039 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
20040 (unspec:VPERMI2
20041 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
20042 (match_operand:VPERMI2 2 "register_operand" "0,v")
20043 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
20044 UNSPEC_VPERMT2))]
20045 "TARGET_AVX512F"
20046 "@
20047 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
20048 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
20049 [(set_attr "type" "sselog")
20050 (set_attr "prefix" "evex")
20051 (set_attr "mode" "<sseinsnmode>")])
20052
20053 (define_insn "<avx512>_vpermt2var<mode>3_mask"
20054 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
20055 (vec_merge:VPERMI2
20056 (unspec:VPERMI2
20057 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
20058 (match_operand:VPERMI2 2 "register_operand" "0")
20059 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
20060 UNSPEC_VPERMT2)
20061 (match_dup 2)
20062 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20063 "TARGET_AVX512F"
20064 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20065 [(set_attr "type" "sselog")
20066 (set_attr "prefix" "evex")
20067 (set_attr "mode" "<sseinsnmode>")])
20068
20069 (define_expand "avx_vperm2f128<mode>3"
20070 [(set (match_operand:AVX256MODE2P 0 "register_operand")
20071 (unspec:AVX256MODE2P
20072 [(match_operand:AVX256MODE2P 1 "register_operand")
20073 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
20074 (match_operand:SI 3 "const_0_to_255_operand")]
20075 UNSPEC_VPERMIL2F128))]
20076 "TARGET_AVX"
20077 {
20078 int mask = INTVAL (operands[3]);
20079 if ((mask & 0x88) == 0)
20080 {
20081 rtx perm[<ssescalarnum>], t1, t2;
20082 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
20083
20084 base = (mask & 3) * nelt2;
20085 for (i = 0; i < nelt2; ++i)
20086 perm[i] = GEN_INT (base + i);
20087
20088 base = ((mask >> 4) & 3) * nelt2;
20089 for (i = 0; i < nelt2; ++i)
20090 perm[i + nelt2] = GEN_INT (base + i);
20091
20092 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
20093 operands[1], operands[2]);
20094 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
20095 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
20096 t2 = gen_rtx_SET (operands[0], t2);
20097 emit_insn (t2);
20098 DONE;
20099 }
20100 })
20101
20102 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
20103 ;; means that in order to represent this properly in rtl we'd have to
20104 ;; nest *another* vec_concat with a zero operand and do the select from
20105 ;; a 4x wide vector. That doesn't seem very nice.
20106 (define_insn "*avx_vperm2f128<mode>_full"
20107 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
20108 (unspec:AVX256MODE2P
20109 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
20110 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
20111 (match_operand:SI 3 "const_0_to_255_operand" "n")]
20112 UNSPEC_VPERMIL2F128))]
20113 "TARGET_AVX"
20114 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20115 [(set_attr "type" "sselog")
20116 (set_attr "prefix_extra" "1")
20117 (set_attr "length_immediate" "1")
20118 (set_attr "prefix" "vex")
20119 (set_attr "mode" "<sseinsnmode>")])
20120
20121 (define_insn "*avx_vperm2f128<mode>_nozero"
20122 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
20123 (vec_select:AVX256MODE2P
20124 (vec_concat:<ssedoublevecmode>
20125 (match_operand:AVX256MODE2P 1 "register_operand" "x")
20126 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
20127 (match_parallel 3 ""
20128 [(match_operand 4 "const_int_operand")])))]
20129 "TARGET_AVX
20130 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
20131 {
20132 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
20133 if (mask == 0x12)
20134 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
20135 if (mask == 0x20)
20136 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
20137 operands[3] = GEN_INT (mask);
20138 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
20139 }
20140 [(set_attr "type" "sselog")
20141 (set_attr "prefix_extra" "1")
20142 (set_attr "length_immediate" "1")
20143 (set_attr "prefix" "vex")
20144 (set_attr "mode" "<sseinsnmode>")])
20145
20146 (define_insn "*ssse3_palignr<mode>_perm"
20147 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
20148 (vec_select:V_128
20149 (match_operand:V_128 1 "register_operand" "0,x,v")
20150 (match_parallel 2 "palignr_operand"
20151 [(match_operand 3 "const_int_operand" "n,n,n")])))]
20152 "TARGET_SSSE3"
20153 {
20154 operands[2] = (GEN_INT (INTVAL (operands[3])
20155 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
20156
20157 switch (which_alternative)
20158 {
20159 case 0:
20160 return "palignr\t{%2, %1, %0|%0, %1, %2}";
20161 case 1:
20162 case 2:
20163 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
20164 default:
20165 gcc_unreachable ();
20166 }
20167 }
20168 [(set_attr "isa" "noavx,avx,avx512bw")
20169 (set_attr "type" "sseishft")
20170 (set_attr "atom_unit" "sishuf")
20171 (set_attr "prefix_data16" "1,*,*")
20172 (set_attr "prefix_extra" "1")
20173 (set_attr "length_immediate" "1")
20174 (set_attr "prefix" "orig,vex,evex")])
20175
20176 (define_expand "avx512vl_vinsert<mode>"
20177 [(match_operand:VI48F_256 0 "register_operand")
20178 (match_operand:VI48F_256 1 "register_operand")
20179 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
20180 (match_operand:SI 3 "const_0_to_1_operand")
20181 (match_operand:VI48F_256 4 "register_operand")
20182 (match_operand:<avx512fmaskmode> 5 "register_operand")]
20183 "TARGET_AVX512VL"
20184 {
20185 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20186
20187 switch (INTVAL (operands[3]))
20188 {
20189 case 0:
20190 insn = gen_vec_set_lo_<mode>_mask;
20191 break;
20192 case 1:
20193 insn = gen_vec_set_hi_<mode>_mask;
20194 break;
20195 default:
20196 gcc_unreachable ();
20197 }
20198
20199 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
20200 operands[5]));
20201 DONE;
20202 })
20203
20204 (define_expand "avx_vinsertf128<mode>"
20205 [(match_operand:V_256 0 "register_operand")
20206 (match_operand:V_256 1 "register_operand")
20207 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
20208 (match_operand:SI 3 "const_0_to_1_operand")]
20209 "TARGET_AVX"
20210 {
20211 rtx (*insn)(rtx, rtx, rtx);
20212
20213 switch (INTVAL (operands[3]))
20214 {
20215 case 0:
20216 insn = gen_vec_set_lo_<mode>;
20217 break;
20218 case 1:
20219 insn = gen_vec_set_hi_<mode>;
20220 break;
20221 default:
20222 gcc_unreachable ();
20223 }
20224
20225 emit_insn (insn (operands[0], operands[1], operands[2]));
20226 DONE;
20227 })
20228
20229 (define_insn "vec_set_lo_<mode><mask_name>"
20230 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20231 (vec_concat:VI8F_256
20232 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
20233 (vec_select:<ssehalfvecmode>
20234 (match_operand:VI8F_256 1 "register_operand" "v")
20235 (parallel [(const_int 2) (const_int 3)]))))]
20236 "TARGET_AVX && <mask_avx512dq_condition>"
20237 {
20238 if (TARGET_AVX512DQ)
20239 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20240 else if (TARGET_AVX512VL)
20241 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20242 else
20243 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
20244 }
20245 [(set_attr "type" "sselog")
20246 (set_attr "prefix_extra" "1")
20247 (set_attr "length_immediate" "1")
20248 (set_attr "prefix" "vex")
20249 (set_attr "mode" "<sseinsnmode>")])
20250
20251 (define_insn "vec_set_hi_<mode><mask_name>"
20252 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20253 (vec_concat:VI8F_256
20254 (vec_select:<ssehalfvecmode>
20255 (match_operand:VI8F_256 1 "register_operand" "v")
20256 (parallel [(const_int 0) (const_int 1)]))
20257 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
20258 "TARGET_AVX && <mask_avx512dq_condition>"
20259 {
20260 if (TARGET_AVX512DQ)
20261 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20262 else if (TARGET_AVX512VL)
20263 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20264 else
20265 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
20266 }
20267 [(set_attr "type" "sselog")
20268 (set_attr "prefix_extra" "1")
20269 (set_attr "length_immediate" "1")
20270 (set_attr "prefix" "vex")
20271 (set_attr "mode" "<sseinsnmode>")])
20272
20273 (define_insn "vec_set_lo_<mode><mask_name>"
20274 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
20275 (vec_concat:VI4F_256
20276 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
20277 (vec_select:<ssehalfvecmode>
20278 (match_operand:VI4F_256 1 "register_operand" "v")
20279 (parallel [(const_int 4) (const_int 5)
20280 (const_int 6) (const_int 7)]))))]
20281 "TARGET_AVX"
20282 {
20283 if (TARGET_AVX512VL)
20284 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20285 else
20286 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
20287 }
20288 [(set_attr "type" "sselog")
20289 (set_attr "prefix_extra" "1")
20290 (set_attr "length_immediate" "1")
20291 (set_attr "prefix" "vex")
20292 (set_attr "mode" "<sseinsnmode>")])
20293
20294 (define_insn "vec_set_hi_<mode><mask_name>"
20295 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
20296 (vec_concat:VI4F_256
20297 (vec_select:<ssehalfvecmode>
20298 (match_operand:VI4F_256 1 "register_operand" "v")
20299 (parallel [(const_int 0) (const_int 1)
20300 (const_int 2) (const_int 3)]))
20301 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
20302 "TARGET_AVX"
20303 {
20304 if (TARGET_AVX512VL)
20305 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20306 else
20307 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
20308 }
20309 [(set_attr "type" "sselog")
20310 (set_attr "prefix_extra" "1")
20311 (set_attr "length_immediate" "1")
20312 (set_attr "prefix" "vex")
20313 (set_attr "mode" "<sseinsnmode>")])
20314
20315 (define_insn "vec_set_lo_v16hi"
20316 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
20317 (vec_concat:V16HI
20318 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
20319 (vec_select:V8HI
20320 (match_operand:V16HI 1 "register_operand" "x,v")
20321 (parallel [(const_int 8) (const_int 9)
20322 (const_int 10) (const_int 11)
20323 (const_int 12) (const_int 13)
20324 (const_int 14) (const_int 15)]))))]
20325 "TARGET_AVX"
20326 "@
20327 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
20328 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
20329 [(set_attr "type" "sselog")
20330 (set_attr "prefix_extra" "1")
20331 (set_attr "length_immediate" "1")
20332 (set_attr "prefix" "vex,evex")
20333 (set_attr "mode" "OI")])
20334
20335 (define_insn "vec_set_hi_v16hi"
20336 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
20337 (vec_concat:V16HI
20338 (vec_select:V8HI
20339 (match_operand:V16HI 1 "register_operand" "x,v")
20340 (parallel [(const_int 0) (const_int 1)
20341 (const_int 2) (const_int 3)
20342 (const_int 4) (const_int 5)
20343 (const_int 6) (const_int 7)]))
20344 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
20345 "TARGET_AVX"
20346 "@
20347 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
20348 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
20349 [(set_attr "type" "sselog")
20350 (set_attr "prefix_extra" "1")
20351 (set_attr "length_immediate" "1")
20352 (set_attr "prefix" "vex,evex")
20353 (set_attr "mode" "OI")])
20354
20355 (define_insn "vec_set_lo_v32qi"
20356 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
20357 (vec_concat:V32QI
20358 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
20359 (vec_select:V16QI
20360 (match_operand:V32QI 1 "register_operand" "x,v")
20361 (parallel [(const_int 16) (const_int 17)
20362 (const_int 18) (const_int 19)
20363 (const_int 20) (const_int 21)
20364 (const_int 22) (const_int 23)
20365 (const_int 24) (const_int 25)
20366 (const_int 26) (const_int 27)
20367 (const_int 28) (const_int 29)
20368 (const_int 30) (const_int 31)]))))]
20369 "TARGET_AVX"
20370 "@
20371 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
20372 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
20373 [(set_attr "type" "sselog")
20374 (set_attr "prefix_extra" "1")
20375 (set_attr "length_immediate" "1")
20376 (set_attr "prefix" "vex,evex")
20377 (set_attr "mode" "OI")])
20378
20379 (define_insn "vec_set_hi_v32qi"
20380 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
20381 (vec_concat:V32QI
20382 (vec_select:V16QI
20383 (match_operand:V32QI 1 "register_operand" "x,v")
20384 (parallel [(const_int 0) (const_int 1)
20385 (const_int 2) (const_int 3)
20386 (const_int 4) (const_int 5)
20387 (const_int 6) (const_int 7)
20388 (const_int 8) (const_int 9)
20389 (const_int 10) (const_int 11)
20390 (const_int 12) (const_int 13)
20391 (const_int 14) (const_int 15)]))
20392 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
20393 "TARGET_AVX"
20394 "@
20395 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
20396 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
20397 [(set_attr "type" "sselog")
20398 (set_attr "prefix_extra" "1")
20399 (set_attr "length_immediate" "1")
20400 (set_attr "prefix" "vex,evex")
20401 (set_attr "mode" "OI")])
20402
20403 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
20404 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
20405 (unspec:V48_AVX2
20406 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
20407 (match_operand:V48_AVX2 1 "memory_operand" "m")]
20408 UNSPEC_MASKMOV))]
20409 "TARGET_AVX"
20410 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
20411 [(set_attr "type" "sselog1")
20412 (set_attr "prefix_extra" "1")
20413 (set_attr "prefix" "vex")
20414 (set_attr "btver2_decode" "vector")
20415 (set_attr "mode" "<sseinsnmode>")])
20416
20417 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
20418 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
20419 (unspec:V48_AVX2
20420 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
20421 (match_operand:V48_AVX2 2 "register_operand" "x")
20422 (match_dup 0)]
20423 UNSPEC_MASKMOV))]
20424 "TARGET_AVX"
20425 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20426 [(set_attr "type" "sselog1")
20427 (set_attr "prefix_extra" "1")
20428 (set_attr "prefix" "vex")
20429 (set_attr "btver2_decode" "vector")
20430 (set_attr "mode" "<sseinsnmode>")])
20431
20432 (define_expand "maskload<mode><sseintvecmodelower>"
20433 [(set (match_operand:V48_AVX2 0 "register_operand")
20434 (unspec:V48_AVX2
20435 [(match_operand:<sseintvecmode> 2 "register_operand")
20436 (match_operand:V48_AVX2 1 "memory_operand")]
20437 UNSPEC_MASKMOV))]
20438 "TARGET_AVX")
20439
20440 (define_expand "maskload<mode><avx512fmaskmodelower>"
20441 [(set (match_operand:V48_AVX512VL 0 "register_operand")
20442 (vec_merge:V48_AVX512VL
20443 (match_operand:V48_AVX512VL 1 "memory_operand")
20444 (match_dup 0)
20445 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20446 "TARGET_AVX512F")
20447
20448 (define_expand "maskload<mode><avx512fmaskmodelower>"
20449 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
20450 (vec_merge:VI12_AVX512VL
20451 (match_operand:VI12_AVX512VL 1 "memory_operand")
20452 (match_dup 0)
20453 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20454 "TARGET_AVX512BW")
20455
20456 (define_expand "maskstore<mode><sseintvecmodelower>"
20457 [(set (match_operand:V48_AVX2 0 "memory_operand")
20458 (unspec:V48_AVX2
20459 [(match_operand:<sseintvecmode> 2 "register_operand")
20460 (match_operand:V48_AVX2 1 "register_operand")
20461 (match_dup 0)]
20462 UNSPEC_MASKMOV))]
20463 "TARGET_AVX")
20464
20465 (define_expand "maskstore<mode><avx512fmaskmodelower>"
20466 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
20467 (vec_merge:V48_AVX512VL
20468 (match_operand:V48_AVX512VL 1 "register_operand")
20469 (match_dup 0)
20470 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20471 "TARGET_AVX512F")
20472
20473 (define_expand "maskstore<mode><avx512fmaskmodelower>"
20474 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
20475 (vec_merge:VI12_AVX512VL
20476 (match_operand:VI12_AVX512VL 1 "register_operand")
20477 (match_dup 0)
20478 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20479 "TARGET_AVX512BW")
20480
20481 (define_expand "cbranch<mode>4"
20482 [(set (reg:CC FLAGS_REG)
20483 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
20484 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
20485 (set (pc) (if_then_else
20486 (match_operator 0 "bt_comparison_operator"
20487 [(reg:CC FLAGS_REG) (const_int 0)])
20488 (label_ref (match_operand 3))
20489 (pc)))]
20490 "TARGET_SSE4_1"
20491 {
20492 ix86_expand_branch (GET_CODE (operands[0]),
20493 operands[1], operands[2], operands[3]);
20494 DONE;
20495 })
20496
20497
20498 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
20499 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
20500 (unspec:AVX256MODE2P
20501 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
20502 UNSPEC_CAST))]
20503 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
20504 "#"
20505 "&& reload_completed"
20506 [(set (match_dup 0) (match_dup 1))]
20507 {
20508 if (REG_P (operands[0]))
20509 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
20510 else
20511 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
20512 <ssehalfvecmode>mode);
20513 })
20514
20515 ;; Modes handled by vec_init expanders.
20516 (define_mode_iterator VEC_INIT_MODE
20517 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
20518 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
20519 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
20520 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
20521 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
20522 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
20523 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
20524
20525 ;; Likewise, but for initialization from half sized vectors.
20526 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
20527 (define_mode_iterator VEC_INIT_HALF_MODE
20528 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
20529 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
20530 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
20531 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
20532 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
20533 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
20534 (V4TI "TARGET_AVX512F")])
20535
20536 (define_expand "vec_init<mode><ssescalarmodelower>"
20537 [(match_operand:VEC_INIT_MODE 0 "register_operand")
20538 (match_operand 1)]
20539 "TARGET_SSE"
20540 {
20541 ix86_expand_vector_init (false, operands[0], operands[1]);
20542 DONE;
20543 })
20544
20545 (define_expand "vec_init<mode><ssehalfvecmodelower>"
20546 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
20547 (match_operand 1)]
20548 "TARGET_SSE"
20549 {
20550 ix86_expand_vector_init (false, operands[0], operands[1]);
20551 DONE;
20552 })
20553
20554 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
20555 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
20556 (ashiftrt:VI48_AVX512F_AVX512VL
20557 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
20558 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
20559 "TARGET_AVX2 && <mask_mode512bit_condition>"
20560 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20561 [(set_attr "type" "sseishft")
20562 (set_attr "prefix" "maybe_evex")
20563 (set_attr "mode" "<sseinsnmode>")])
20564
20565 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
20566 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20567 (ashiftrt:VI2_AVX512VL
20568 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
20569 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
20570 "TARGET_AVX512BW"
20571 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20572 [(set_attr "type" "sseishft")
20573 (set_attr "prefix" "maybe_evex")
20574 (set_attr "mode" "<sseinsnmode>")])
20575
20576 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
20577 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
20578 (any_lshift:VI48_AVX512F
20579 (match_operand:VI48_AVX512F 1 "register_operand" "v")
20580 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
20581 "TARGET_AVX2 && <mask_mode512bit_condition>"
20582 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20583 [(set_attr "type" "sseishft")
20584 (set_attr "prefix" "maybe_evex")
20585 (set_attr "mode" "<sseinsnmode>")])
20586
20587 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
20588 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20589 (any_lshift:VI2_AVX512VL
20590 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
20591 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
20592 "TARGET_AVX512BW"
20593 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20594 [(set_attr "type" "sseishft")
20595 (set_attr "prefix" "maybe_evex")
20596 (set_attr "mode" "<sseinsnmode>")])
20597
20598 (define_insn "avx_vec_concat<mode>"
20599 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
20600 (vec_concat:V_256_512
20601 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,v,x,v")
20602 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
20603 "TARGET_AVX"
20604 {
20605 switch (which_alternative)
20606 {
20607 case 0:
20608 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20609 case 1:
20610 if (<MODE_SIZE> == 64)
20611 {
20612 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
20613 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20614 else
20615 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20616 }
20617 else
20618 {
20619 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
20620 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20621 else
20622 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20623 }
20624 case 2:
20625 case 3:
20626 switch (get_attr_mode (insn))
20627 {
20628 case MODE_V16SF:
20629 return "vmovaps\t{%1, %t0|%t0, %1}";
20630 case MODE_V8DF:
20631 return "vmovapd\t{%1, %t0|%t0, %1}";
20632 case MODE_V8SF:
20633 return "vmovaps\t{%1, %x0|%x0, %1}";
20634 case MODE_V4DF:
20635 return "vmovapd\t{%1, %x0|%x0, %1}";
20636 case MODE_XI:
20637 if (which_alternative == 2)
20638 return "vmovdqa\t{%1, %t0|%t0, %1}";
20639 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
20640 return "vmovdqa64\t{%1, %t0|%t0, %1}";
20641 else
20642 return "vmovdqa32\t{%1, %t0|%t0, %1}";
20643 case MODE_OI:
20644 if (which_alternative == 2)
20645 return "vmovdqa\t{%1, %x0|%x0, %1}";
20646 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
20647 return "vmovdqa64\t{%1, %x0|%x0, %1}";
20648 else
20649 return "vmovdqa32\t{%1, %x0|%x0, %1}";
20650 default:
20651 gcc_unreachable ();
20652 }
20653 default:
20654 gcc_unreachable ();
20655 }
20656 }
20657 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
20658 (set_attr "prefix_extra" "1,1,*,*")
20659 (set_attr "length_immediate" "1,1,*,*")
20660 (set_attr "prefix" "maybe_evex")
20661 (set_attr "mode" "<sseinsnmode>")])
20662
20663 (define_insn "vcvtph2ps<mask_name>"
20664 [(set (match_operand:V4SF 0 "register_operand" "=v")
20665 (vec_select:V4SF
20666 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
20667 UNSPEC_VCVTPH2PS)
20668 (parallel [(const_int 0) (const_int 1)
20669 (const_int 2) (const_int 3)])))]
20670 "TARGET_F16C || TARGET_AVX512VL"
20671 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20672 [(set_attr "type" "ssecvt")
20673 (set_attr "prefix" "maybe_evex")
20674 (set_attr "mode" "V4SF")])
20675
20676 (define_insn "*vcvtph2ps_load<mask_name>"
20677 [(set (match_operand:V4SF 0 "register_operand" "=v")
20678 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
20679 UNSPEC_VCVTPH2PS))]
20680 "TARGET_F16C || TARGET_AVX512VL"
20681 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20682 [(set_attr "type" "ssecvt")
20683 (set_attr "prefix" "vex")
20684 (set_attr "mode" "V8SF")])
20685
20686 (define_insn "vcvtph2ps256<mask_name>"
20687 [(set (match_operand:V8SF 0 "register_operand" "=v")
20688 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
20689 UNSPEC_VCVTPH2PS))]
20690 "TARGET_F16C || TARGET_AVX512VL"
20691 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20692 [(set_attr "type" "ssecvt")
20693 (set_attr "prefix" "vex")
20694 (set_attr "btver2_decode" "double")
20695 (set_attr "mode" "V8SF")])
20696
20697 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
20698 [(set (match_operand:V16SF 0 "register_operand" "=v")
20699 (unspec:V16SF
20700 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
20701 UNSPEC_VCVTPH2PS))]
20702 "TARGET_AVX512F"
20703 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
20704 [(set_attr "type" "ssecvt")
20705 (set_attr "prefix" "evex")
20706 (set_attr "mode" "V16SF")])
20707
20708 (define_expand "vcvtps2ph_mask"
20709 [(set (match_operand:V8HI 0 "register_operand")
20710 (vec_merge:V8HI
20711 (vec_concat:V8HI
20712 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
20713 (match_operand:SI 2 "const_0_to_255_operand")]
20714 UNSPEC_VCVTPS2PH)
20715 (match_dup 5))
20716 (match_operand:V8HI 3 "nonimm_or_0_operand")
20717 (match_operand:QI 4 "register_operand")))]
20718 "TARGET_AVX512VL"
20719 "operands[5] = CONST0_RTX (V4HImode);")
20720
20721 (define_expand "vcvtps2ph"
20722 [(set (match_operand:V8HI 0 "register_operand")
20723 (vec_concat:V8HI
20724 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
20725 (match_operand:SI 2 "const_0_to_255_operand")]
20726 UNSPEC_VCVTPS2PH)
20727 (match_dup 3)))]
20728 "TARGET_F16C"
20729 "operands[3] = CONST0_RTX (V4HImode);")
20730
20731 (define_insn "*vcvtps2ph<mask_name>"
20732 [(set (match_operand:V8HI 0 "register_operand" "=v")
20733 (vec_concat:V8HI
20734 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
20735 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20736 UNSPEC_VCVTPS2PH)
20737 (match_operand:V4HI 3 "const0_operand")))]
20738 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
20739 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
20740 [(set_attr "type" "ssecvt")
20741 (set_attr "prefix" "maybe_evex")
20742 (set_attr "mode" "V4SF")])
20743
20744 (define_insn "*vcvtps2ph_store<mask_name>"
20745 [(set (match_operand:V4HI 0 "memory_operand" "=m")
20746 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
20747 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20748 UNSPEC_VCVTPS2PH))]
20749 "TARGET_F16C || TARGET_AVX512VL"
20750 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20751 [(set_attr "type" "ssecvt")
20752 (set_attr "prefix" "maybe_evex")
20753 (set_attr "mode" "V4SF")])
20754
20755 (define_insn "vcvtps2ph256<mask_name>"
20756 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
20757 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
20758 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20759 UNSPEC_VCVTPS2PH))]
20760 "TARGET_F16C || TARGET_AVX512VL"
20761 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20762 [(set_attr "type" "ssecvt")
20763 (set_attr "prefix" "maybe_evex")
20764 (set_attr "btver2_decode" "vector")
20765 (set_attr "mode" "V8SF")])
20766
20767 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
20768 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
20769 (unspec:V16HI
20770 [(match_operand:V16SF 1 "register_operand" "v")
20771 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20772 UNSPEC_VCVTPS2PH))]
20773 "TARGET_AVX512F"
20774 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20775 [(set_attr "type" "ssecvt")
20776 (set_attr "prefix" "evex")
20777 (set_attr "mode" "V16SF")])
20778
20779 ;; For gather* insn patterns
20780 (define_mode_iterator VEC_GATHER_MODE
20781 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
20782 (define_mode_attr VEC_GATHER_IDXSI
20783 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
20784 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
20785 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
20786 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
20787
20788 (define_mode_attr VEC_GATHER_IDXDI
20789 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
20790 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
20791 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
20792 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
20793
20794 (define_mode_attr VEC_GATHER_SRCDI
20795 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
20796 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
20797 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
20798 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
20799
20800 (define_expand "avx2_gathersi<mode>"
20801 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
20802 (unspec:VEC_GATHER_MODE
20803 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
20804 (mem:<ssescalarmode>
20805 (match_par_dup 6
20806 [(match_operand 2 "vsib_address_operand")
20807 (match_operand:<VEC_GATHER_IDXSI>
20808 3 "register_operand")
20809 (match_operand:SI 5 "const1248_operand ")]))
20810 (mem:BLK (scratch))
20811 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
20812 UNSPEC_GATHER))
20813 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
20814 "TARGET_AVX2"
20815 {
20816 operands[6]
20817 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20818 operands[5]), UNSPEC_VSIBADDR);
20819 })
20820
20821 (define_insn "*avx2_gathersi<mode>"
20822 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20823 (unspec:VEC_GATHER_MODE
20824 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
20825 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20826 [(unspec:P
20827 [(match_operand:P 3 "vsib_address_operand" "Tv")
20828 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
20829 (match_operand:SI 6 "const1248_operand" "n")]
20830 UNSPEC_VSIBADDR)])
20831 (mem:BLK (scratch))
20832 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
20833 UNSPEC_GATHER))
20834 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20835 "TARGET_AVX2"
20836 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
20837 [(set_attr "type" "ssemov")
20838 (set_attr "prefix" "vex")
20839 (set_attr "mode" "<sseinsnmode>")])
20840
20841 (define_insn "*avx2_gathersi<mode>_2"
20842 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20843 (unspec:VEC_GATHER_MODE
20844 [(pc)
20845 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20846 [(unspec:P
20847 [(match_operand:P 2 "vsib_address_operand" "Tv")
20848 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
20849 (match_operand:SI 5 "const1248_operand" "n")]
20850 UNSPEC_VSIBADDR)])
20851 (mem:BLK (scratch))
20852 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
20853 UNSPEC_GATHER))
20854 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20855 "TARGET_AVX2"
20856 "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
20857 [(set_attr "type" "ssemov")
20858 (set_attr "prefix" "vex")
20859 (set_attr "mode" "<sseinsnmode>")])
20860
20861 (define_expand "avx2_gatherdi<mode>"
20862 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
20863 (unspec:VEC_GATHER_MODE
20864 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
20865 (mem:<ssescalarmode>
20866 (match_par_dup 6
20867 [(match_operand 2 "vsib_address_operand")
20868 (match_operand:<VEC_GATHER_IDXDI>
20869 3 "register_operand")
20870 (match_operand:SI 5 "const1248_operand ")]))
20871 (mem:BLK (scratch))
20872 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
20873 UNSPEC_GATHER))
20874 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
20875 "TARGET_AVX2"
20876 {
20877 operands[6]
20878 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20879 operands[5]), UNSPEC_VSIBADDR);
20880 })
20881
20882 (define_insn "*avx2_gatherdi<mode>"
20883 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20884 (unspec:VEC_GATHER_MODE
20885 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
20886 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20887 [(unspec:P
20888 [(match_operand:P 3 "vsib_address_operand" "Tv")
20889 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
20890 (match_operand:SI 6 "const1248_operand" "n")]
20891 UNSPEC_VSIBADDR)])
20892 (mem:BLK (scratch))
20893 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
20894 UNSPEC_GATHER))
20895 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20896 "TARGET_AVX2"
20897 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
20898 [(set_attr "type" "ssemov")
20899 (set_attr "prefix" "vex")
20900 (set_attr "mode" "<sseinsnmode>")])
20901
20902 (define_insn "*avx2_gatherdi<mode>_2"
20903 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20904 (unspec:VEC_GATHER_MODE
20905 [(pc)
20906 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20907 [(unspec:P
20908 [(match_operand:P 2 "vsib_address_operand" "Tv")
20909 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
20910 (match_operand:SI 5 "const1248_operand" "n")]
20911 UNSPEC_VSIBADDR)])
20912 (mem:BLK (scratch))
20913 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
20914 UNSPEC_GATHER))
20915 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20916 "TARGET_AVX2"
20917 {
20918 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
20919 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
20920 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
20921 }
20922 [(set_attr "type" "ssemov")
20923 (set_attr "prefix" "vex")
20924 (set_attr "mode" "<sseinsnmode>")])
20925
20926 (define_insn "*avx2_gatherdi<mode>_3"
20927 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
20928 (vec_select:<VEC_GATHER_SRCDI>
20929 (unspec:VI4F_256
20930 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
20931 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20932 [(unspec:P
20933 [(match_operand:P 3 "vsib_address_operand" "Tv")
20934 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
20935 (match_operand:SI 6 "const1248_operand" "n")]
20936 UNSPEC_VSIBADDR)])
20937 (mem:BLK (scratch))
20938 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
20939 UNSPEC_GATHER)
20940 (parallel [(const_int 0) (const_int 1)
20941 (const_int 2) (const_int 3)])))
20942 (clobber (match_scratch:VI4F_256 1 "=&x"))]
20943 "TARGET_AVX2"
20944 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
20945 [(set_attr "type" "ssemov")
20946 (set_attr "prefix" "vex")
20947 (set_attr "mode" "<sseinsnmode>")])
20948
20949 (define_insn "*avx2_gatherdi<mode>_4"
20950 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
20951 (vec_select:<VEC_GATHER_SRCDI>
20952 (unspec:VI4F_256
20953 [(pc)
20954 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20955 [(unspec:P
20956 [(match_operand:P 2 "vsib_address_operand" "Tv")
20957 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
20958 (match_operand:SI 5 "const1248_operand" "n")]
20959 UNSPEC_VSIBADDR)])
20960 (mem:BLK (scratch))
20961 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
20962 UNSPEC_GATHER)
20963 (parallel [(const_int 0) (const_int 1)
20964 (const_int 2) (const_int 3)])))
20965 (clobber (match_scratch:VI4F_256 1 "=&x"))]
20966 "TARGET_AVX2"
20967 "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
20968 [(set_attr "type" "ssemov")
20969 (set_attr "prefix" "vex")
20970 (set_attr "mode" "<sseinsnmode>")])
20971
20972 (define_expand "<avx512>_gathersi<mode>"
20973 [(parallel [(set (match_operand:VI48F 0 "register_operand")
20974 (unspec:VI48F
20975 [(match_operand:VI48F 1 "register_operand")
20976 (match_operand:<avx512fmaskmode> 4 "register_operand")
20977 (mem:<ssescalarmode>
20978 (match_par_dup 6
20979 [(match_operand 2 "vsib_address_operand")
20980 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
20981 (match_operand:SI 5 "const1248_operand")]))]
20982 UNSPEC_GATHER))
20983 (clobber (match_scratch:<avx512fmaskmode> 7))])]
20984 "TARGET_AVX512F"
20985 {
20986 operands[6]
20987 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20988 operands[5]), UNSPEC_VSIBADDR);
20989 })
20990
20991 (define_insn "*avx512f_gathersi<mode>"
20992 [(set (match_operand:VI48F 0 "register_operand" "=&v")
20993 (unspec:VI48F
20994 [(match_operand:VI48F 1 "register_operand" "0")
20995 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
20996 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20997 [(unspec:P
20998 [(match_operand:P 4 "vsib_address_operand" "Tv")
20999 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
21000 (match_operand:SI 5 "const1248_operand" "n")]
21001 UNSPEC_VSIBADDR)])]
21002 UNSPEC_GATHER))
21003 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
21004 "TARGET_AVX512F"
21005 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
21006 ;; gas changed what it requires incompatibly.
21007 "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
21008 [(set_attr "type" "ssemov")
21009 (set_attr "prefix" "evex")
21010 (set_attr "mode" "<sseinsnmode>")])
21011
21012 (define_insn "*avx512f_gathersi<mode>_2"
21013 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21014 (unspec:VI48F
21015 [(pc)
21016 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
21017 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
21018 [(unspec:P
21019 [(match_operand:P 3 "vsib_address_operand" "Tv")
21020 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
21021 (match_operand:SI 4 "const1248_operand" "n")]
21022 UNSPEC_VSIBADDR)])]
21023 UNSPEC_GATHER))
21024 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
21025 "TARGET_AVX512F"
21026 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21027 ;; gas changed what it requires incompatibly.
21028 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
21029 [(set_attr "type" "ssemov")
21030 (set_attr "prefix" "evex")
21031 (set_attr "mode" "<sseinsnmode>")])
21032
21033
21034 (define_expand "<avx512>_gatherdi<mode>"
21035 [(parallel [(set (match_operand:VI48F 0 "register_operand")
21036 (unspec:VI48F
21037 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
21038 (match_operand:QI 4 "register_operand")
21039 (mem:<ssescalarmode>
21040 (match_par_dup 6
21041 [(match_operand 2 "vsib_address_operand")
21042 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
21043 (match_operand:SI 5 "const1248_operand")]))]
21044 UNSPEC_GATHER))
21045 (clobber (match_scratch:QI 7))])]
21046 "TARGET_AVX512F"
21047 {
21048 operands[6]
21049 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
21050 operands[5]), UNSPEC_VSIBADDR);
21051 })
21052
21053 (define_insn "*avx512f_gatherdi<mode>"
21054 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21055 (unspec:VI48F
21056 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
21057 (match_operand:QI 7 "register_operand" "2")
21058 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21059 [(unspec:P
21060 [(match_operand:P 4 "vsib_address_operand" "Tv")
21061 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
21062 (match_operand:SI 5 "const1248_operand" "n")]
21063 UNSPEC_VSIBADDR)])]
21064 UNSPEC_GATHER))
21065 (clobber (match_scratch:QI 2 "=&Yk"))]
21066 "TARGET_AVX512F"
21067 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
21068 ;; gas changed what it requires incompatibly.
21069 "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
21070 [(set_attr "type" "ssemov")
21071 (set_attr "prefix" "evex")
21072 (set_attr "mode" "<sseinsnmode>")])
21073
21074 (define_insn "*avx512f_gatherdi<mode>_2"
21075 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21076 (unspec:VI48F
21077 [(pc)
21078 (match_operand:QI 6 "register_operand" "1")
21079 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
21080 [(unspec:P
21081 [(match_operand:P 3 "vsib_address_operand" "Tv")
21082 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
21083 (match_operand:SI 4 "const1248_operand" "n")]
21084 UNSPEC_VSIBADDR)])]
21085 UNSPEC_GATHER))
21086 (clobber (match_scratch:QI 1 "=&Yk"))]
21087 "TARGET_AVX512F"
21088 {
21089 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21090 gas changed what it requires incompatibly. */
21091 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
21092 {
21093 if (<MODE_SIZE> != 64)
21094 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
21095 else
21096 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
21097 }
21098 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
21099 }
21100 [(set_attr "type" "ssemov")
21101 (set_attr "prefix" "evex")
21102 (set_attr "mode" "<sseinsnmode>")])
21103
21104 (define_expand "<avx512>_scattersi<mode>"
21105 [(parallel [(set (mem:VI48F
21106 (match_par_dup 5
21107 [(match_operand 0 "vsib_address_operand")
21108 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
21109 (match_operand:SI 4 "const1248_operand")]))
21110 (unspec:VI48F
21111 [(match_operand:<avx512fmaskmode> 1 "register_operand")
21112 (match_operand:VI48F 3 "register_operand")]
21113 UNSPEC_SCATTER))
21114 (clobber (match_scratch:<avx512fmaskmode> 6))])]
21115 "TARGET_AVX512F"
21116 {
21117 operands[5]
21118 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
21119 operands[4]), UNSPEC_VSIBADDR);
21120 })
21121
21122 (define_insn "*avx512f_scattersi<mode>"
21123 [(set (match_operator:VI48F 5 "vsib_mem_operator"
21124 [(unspec:P
21125 [(match_operand:P 0 "vsib_address_operand" "Tv")
21126 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
21127 (match_operand:SI 4 "const1248_operand" "n")]
21128 UNSPEC_VSIBADDR)])
21129 (unspec:VI48F
21130 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
21131 (match_operand:VI48F 3 "register_operand" "v")]
21132 UNSPEC_SCATTER))
21133 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
21134 "TARGET_AVX512F"
21135 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21136 ;; gas changed what it requires incompatibly.
21137 "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
21138 [(set_attr "type" "ssemov")
21139 (set_attr "prefix" "evex")
21140 (set_attr "mode" "<sseinsnmode>")])
21141
21142 (define_expand "<avx512>_scatterdi<mode>"
21143 [(parallel [(set (mem:VI48F
21144 (match_par_dup 5
21145 [(match_operand 0 "vsib_address_operand")
21146 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
21147 (match_operand:SI 4 "const1248_operand")]))
21148 (unspec:VI48F
21149 [(match_operand:QI 1 "register_operand")
21150 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
21151 UNSPEC_SCATTER))
21152 (clobber (match_scratch:QI 6))])]
21153 "TARGET_AVX512F"
21154 {
21155 operands[5]
21156 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
21157 operands[4]), UNSPEC_VSIBADDR);
21158 })
21159
21160 (define_insn "*avx512f_scatterdi<mode>"
21161 [(set (match_operator:VI48F 5 "vsib_mem_operator"
21162 [(unspec:P
21163 [(match_operand:P 0 "vsib_address_operand" "Tv")
21164 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
21165 (match_operand:SI 4 "const1248_operand" "n")]
21166 UNSPEC_VSIBADDR)])
21167 (unspec:VI48F
21168 [(match_operand:QI 6 "register_operand" "1")
21169 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
21170 UNSPEC_SCATTER))
21171 (clobber (match_scratch:QI 1 "=&Yk"))]
21172 "TARGET_AVX512F"
21173 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21174 ;; gas changed what it requires incompatibly.
21175 "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
21176 [(set_attr "type" "ssemov")
21177 (set_attr "prefix" "evex")
21178 (set_attr "mode" "<sseinsnmode>")])
21179
21180 (define_insn "<avx512>_compress<mode>_mask"
21181 [(set (match_operand:VI48F 0 "register_operand" "=v")
21182 (unspec:VI48F
21183 [(match_operand:VI48F 1 "register_operand" "v")
21184 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
21185 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
21186 UNSPEC_COMPRESS))]
21187 "TARGET_AVX512F"
21188 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21189 [(set_attr "type" "ssemov")
21190 (set_attr "prefix" "evex")
21191 (set_attr "mode" "<sseinsnmode>")])
21192
21193 (define_insn "compress<mode>_mask"
21194 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
21195 (unspec:VI12_AVX512VLBW
21196 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
21197 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
21198 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
21199 UNSPEC_COMPRESS))]
21200 "TARGET_AVX512VBMI2"
21201 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21202 [(set_attr "type" "ssemov")
21203 (set_attr "prefix" "evex")
21204 (set_attr "mode" "<sseinsnmode>")])
21205
21206 (define_insn "<avx512>_compressstore<mode>_mask"
21207 [(set (match_operand:VI48F 0 "memory_operand" "=m")
21208 (unspec:VI48F
21209 [(match_operand:VI48F 1 "register_operand" "x")
21210 (match_dup 0)
21211 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
21212 UNSPEC_COMPRESS_STORE))]
21213 "TARGET_AVX512F"
21214 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
21215 [(set_attr "type" "ssemov")
21216 (set_attr "prefix" "evex")
21217 (set_attr "memory" "store")
21218 (set_attr "mode" "<sseinsnmode>")])
21219
21220 (define_insn "compressstore<mode>_mask"
21221 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
21222 (unspec:VI12_AVX512VLBW
21223 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
21224 (match_dup 0)
21225 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
21226 UNSPEC_COMPRESS_STORE))]
21227 "TARGET_AVX512VBMI2"
21228 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
21229 [(set_attr "type" "ssemov")
21230 (set_attr "prefix" "evex")
21231 (set_attr "memory" "store")
21232 (set_attr "mode" "<sseinsnmode>")])
21233
21234 (define_expand "<avx512>_expand<mode>_maskz"
21235 [(set (match_operand:VI48F 0 "register_operand")
21236 (unspec:VI48F
21237 [(match_operand:VI48F 1 "nonimmediate_operand")
21238 (match_operand:VI48F 2 "nonimm_or_0_operand")
21239 (match_operand:<avx512fmaskmode> 3 "register_operand")]
21240 UNSPEC_EXPAND))]
21241 "TARGET_AVX512F"
21242 "operands[2] = CONST0_RTX (<MODE>mode);")
21243
21244 (define_insn "<avx512>_expand<mode>_mask"
21245 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
21246 (unspec:VI48F
21247 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
21248 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
21249 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
21250 UNSPEC_EXPAND))]
21251 "TARGET_AVX512F"
21252 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21253 [(set_attr "type" "ssemov")
21254 (set_attr "prefix" "evex")
21255 (set_attr "memory" "none,load")
21256 (set_attr "mode" "<sseinsnmode>")])
21257
21258 (define_insn "expand<mode>_mask"
21259 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
21260 (unspec:VI12_AVX512VLBW
21261 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
21262 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
21263 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
21264 UNSPEC_EXPAND))]
21265 "TARGET_AVX512VBMI2"
21266 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21267 [(set_attr "type" "ssemov")
21268 (set_attr "prefix" "evex")
21269 (set_attr "memory" "none,load")
21270 (set_attr "mode" "<sseinsnmode>")])
21271
21272 (define_expand "expand<mode>_maskz"
21273 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
21274 (unspec:VI12_AVX512VLBW
21275 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
21276 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
21277 (match_operand:<avx512fmaskmode> 3 "register_operand")]
21278 UNSPEC_EXPAND))]
21279 "TARGET_AVX512VBMI2"
21280 "operands[2] = CONST0_RTX (<MODE>mode);")
21281
21282 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
21283 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21284 (unspec:VF_AVX512VL
21285 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
21286 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
21287 (match_operand:SI 3 "const_0_to_15_operand")]
21288 UNSPEC_RANGE))]
21289 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
21290 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
21291 [(set_attr "type" "sse")
21292 (set_attr "prefix" "evex")
21293 (set_attr "mode" "<MODE>")])
21294
21295 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
21296 [(set (match_operand:VF_128 0 "register_operand" "=v")
21297 (vec_merge:VF_128
21298 (unspec:VF_128
21299 [(match_operand:VF_128 1 "register_operand" "v")
21300 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
21301 (match_operand:SI 3 "const_0_to_15_operand")]
21302 UNSPEC_RANGE)
21303 (match_dup 1)
21304 (const_int 1)))]
21305 "TARGET_AVX512DQ"
21306 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
21307 [(set_attr "type" "sse")
21308 (set_attr "prefix" "evex")
21309 (set_attr "mode" "<MODE>")])
21310
21311 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
21312 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
21313 (unspec:<avx512fmaskmode>
21314 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
21315 (match_operand:QI 2 "const_0_to_255_operand" "n")]
21316 UNSPEC_FPCLASS))]
21317 "TARGET_AVX512DQ"
21318 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
21319 [(set_attr "type" "sse")
21320 (set_attr "length_immediate" "1")
21321 (set_attr "prefix" "evex")
21322 (set_attr "mode" "<MODE>")])
21323
21324 (define_insn "avx512dq_vmfpclass<mode>"
21325 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
21326 (and:<avx512fmaskmode>
21327 (unspec:<avx512fmaskmode>
21328 [(match_operand:VF_128 1 "register_operand" "v")
21329 (match_operand:QI 2 "const_0_to_255_operand" "n")]
21330 UNSPEC_FPCLASS)
21331 (const_int 1)))]
21332 "TARGET_AVX512DQ"
21333 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
21334 [(set_attr "type" "sse")
21335 (set_attr "length_immediate" "1")
21336 (set_attr "prefix" "evex")
21337 (set_attr "mode" "<MODE>")])
21338
21339 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
21340 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21341 (unspec:VF_AVX512VL
21342 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
21343 (match_operand:SI 2 "const_0_to_15_operand")]
21344 UNSPEC_GETMANT))]
21345 "TARGET_AVX512F"
21346 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
21347 [(set_attr "prefix" "evex")
21348 (set_attr "mode" "<MODE>")])
21349
21350 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
21351 [(set (match_operand:VF_128 0 "register_operand" "=v")
21352 (vec_merge:VF_128
21353 (unspec:VF_128
21354 [(match_operand:VF_128 1 "register_operand" "v")
21355 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
21356 (match_operand:SI 3 "const_0_to_15_operand")]
21357 UNSPEC_GETMANT)
21358 (match_dup 1)
21359 (const_int 1)))]
21360 "TARGET_AVX512F"
21361 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
21362 [(set_attr "prefix" "evex")
21363 (set_attr "mode" "<ssescalarmode>")])
21364
21365 ;; The correct representation for this is absolutely enormous, and
21366 ;; surely not generally useful.
21367 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
21368 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21369 (unspec:VI2_AVX512VL
21370 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
21371 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
21372 (match_operand:SI 3 "const_0_to_255_operand")]
21373 UNSPEC_DBPSADBW))]
21374 "TARGET_AVX512BW"
21375 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
21376 [(set_attr "type" "sselog1")
21377 (set_attr "length_immediate" "1")
21378 (set_attr "prefix" "evex")
21379 (set_attr "mode" "<sseinsnmode>")])
21380
21381 (define_insn "clz<mode>2<mask_name>"
21382 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
21383 (clz:VI48_AVX512VL
21384 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
21385 "TARGET_AVX512CD"
21386 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21387 [(set_attr "type" "sse")
21388 (set_attr "prefix" "evex")
21389 (set_attr "mode" "<sseinsnmode>")])
21390
21391 (define_insn "<mask_codefor>conflict<mode><mask_name>"
21392 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
21393 (unspec:VI48_AVX512VL
21394 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
21395 UNSPEC_CONFLICT))]
21396 "TARGET_AVX512CD"
21397 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21398 [(set_attr "type" "sse")
21399 (set_attr "prefix" "evex")
21400 (set_attr "mode" "<sseinsnmode>")])
21401
21402 (define_insn "sha1msg1"
21403 [(set (match_operand:V4SI 0 "register_operand" "=x")
21404 (unspec:V4SI
21405 [(match_operand:V4SI 1 "register_operand" "0")
21406 (match_operand:V4SI 2 "vector_operand" "xBm")]
21407 UNSPEC_SHA1MSG1))]
21408 "TARGET_SHA"
21409 "sha1msg1\t{%2, %0|%0, %2}"
21410 [(set_attr "type" "sselog1")
21411 (set_attr "mode" "TI")])
21412
21413 (define_insn "sha1msg2"
21414 [(set (match_operand:V4SI 0 "register_operand" "=x")
21415 (unspec:V4SI
21416 [(match_operand:V4SI 1 "register_operand" "0")
21417 (match_operand:V4SI 2 "vector_operand" "xBm")]
21418 UNSPEC_SHA1MSG2))]
21419 "TARGET_SHA"
21420 "sha1msg2\t{%2, %0|%0, %2}"
21421 [(set_attr "type" "sselog1")
21422 (set_attr "mode" "TI")])
21423
21424 (define_insn "sha1nexte"
21425 [(set (match_operand:V4SI 0 "register_operand" "=x")
21426 (unspec:V4SI
21427 [(match_operand:V4SI 1 "register_operand" "0")
21428 (match_operand:V4SI 2 "vector_operand" "xBm")]
21429 UNSPEC_SHA1NEXTE))]
21430 "TARGET_SHA"
21431 "sha1nexte\t{%2, %0|%0, %2}"
21432 [(set_attr "type" "sselog1")
21433 (set_attr "mode" "TI")])
21434
21435 (define_insn "sha1rnds4"
21436 [(set (match_operand:V4SI 0 "register_operand" "=x")
21437 (unspec:V4SI
21438 [(match_operand:V4SI 1 "register_operand" "0")
21439 (match_operand:V4SI 2 "vector_operand" "xBm")
21440 (match_operand:SI 3 "const_0_to_3_operand" "n")]
21441 UNSPEC_SHA1RNDS4))]
21442 "TARGET_SHA"
21443 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
21444 [(set_attr "type" "sselog1")
21445 (set_attr "length_immediate" "1")
21446 (set_attr "mode" "TI")])
21447
21448 (define_insn "sha256msg1"
21449 [(set (match_operand:V4SI 0 "register_operand" "=x")
21450 (unspec:V4SI
21451 [(match_operand:V4SI 1 "register_operand" "0")
21452 (match_operand:V4SI 2 "vector_operand" "xBm")]
21453 UNSPEC_SHA256MSG1))]
21454 "TARGET_SHA"
21455 "sha256msg1\t{%2, %0|%0, %2}"
21456 [(set_attr "type" "sselog1")
21457 (set_attr "mode" "TI")])
21458
21459 (define_insn "sha256msg2"
21460 [(set (match_operand:V4SI 0 "register_operand" "=x")
21461 (unspec:V4SI
21462 [(match_operand:V4SI 1 "register_operand" "0")
21463 (match_operand:V4SI 2 "vector_operand" "xBm")]
21464 UNSPEC_SHA256MSG2))]
21465 "TARGET_SHA"
21466 "sha256msg2\t{%2, %0|%0, %2}"
21467 [(set_attr "type" "sselog1")
21468 (set_attr "mode" "TI")])
21469
21470 (define_insn "sha256rnds2"
21471 [(set (match_operand:V4SI 0 "register_operand" "=x")
21472 (unspec:V4SI
21473 [(match_operand:V4SI 1 "register_operand" "0")
21474 (match_operand:V4SI 2 "vector_operand" "xBm")
21475 (match_operand:V4SI 3 "register_operand" "Yz")]
21476 UNSPEC_SHA256RNDS2))]
21477 "TARGET_SHA"
21478 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
21479 [(set_attr "type" "sselog1")
21480 (set_attr "length_immediate" "1")
21481 (set_attr "mode" "TI")])
21482
21483 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
21484 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
21485 (unspec:AVX512MODE2P
21486 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
21487 UNSPEC_CAST))]
21488 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21489 "#"
21490 "&& reload_completed"
21491 [(set (match_dup 0) (match_dup 1))]
21492 {
21493 if (REG_P (operands[0]))
21494 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
21495 else
21496 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21497 <ssequartermode>mode);
21498 })
21499
21500 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
21501 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
21502 (unspec:AVX512MODE2P
21503 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
21504 UNSPEC_CAST))]
21505 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21506 "#"
21507 "&& reload_completed"
21508 [(set (match_dup 0) (match_dup 1))]
21509 {
21510 if (REG_P (operands[0]))
21511 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
21512 else
21513 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21514 <ssehalfvecmode>mode);
21515 })
21516
21517 (define_int_iterator VPMADD52
21518 [UNSPEC_VPMADD52LUQ
21519 UNSPEC_VPMADD52HUQ])
21520
21521 (define_int_attr vpmadd52type
21522 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
21523
21524 (define_expand "vpamdd52huq<mode>_maskz"
21525 [(match_operand:VI8_AVX512VL 0 "register_operand")
21526 (match_operand:VI8_AVX512VL 1 "register_operand")
21527 (match_operand:VI8_AVX512VL 2 "register_operand")
21528 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
21529 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21530 "TARGET_AVX512IFMA"
21531 {
21532 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
21533 operands[0], operands[1], operands[2], operands[3],
21534 CONST0_RTX (<MODE>mode), operands[4]));
21535 DONE;
21536 })
21537
21538 (define_expand "vpamdd52luq<mode>_maskz"
21539 [(match_operand:VI8_AVX512VL 0 "register_operand")
21540 (match_operand:VI8_AVX512VL 1 "register_operand")
21541 (match_operand:VI8_AVX512VL 2 "register_operand")
21542 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
21543 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21544 "TARGET_AVX512IFMA"
21545 {
21546 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
21547 operands[0], operands[1], operands[2], operands[3],
21548 CONST0_RTX (<MODE>mode), operands[4]));
21549 DONE;
21550 })
21551
21552 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
21553 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
21554 (unspec:VI8_AVX512VL
21555 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
21556 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
21557 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
21558 VPMADD52))]
21559 "TARGET_AVX512IFMA"
21560 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
21561 [(set_attr "type" "ssemuladd")
21562 (set_attr "prefix" "evex")
21563 (set_attr "mode" "<sseinsnmode>")])
21564
21565 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
21566 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
21567 (vec_merge:VI8_AVX512VL
21568 (unspec:VI8_AVX512VL
21569 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
21570 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
21571 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
21572 VPMADD52)
21573 (match_dup 1)
21574 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21575 "TARGET_AVX512IFMA"
21576 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
21577 [(set_attr "type" "ssemuladd")
21578 (set_attr "prefix" "evex")
21579 (set_attr "mode" "<sseinsnmode>")])
21580
21581 (define_insn "vpmultishiftqb<mode><mask_name>"
21582 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
21583 (unspec:VI1_AVX512VL
21584 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
21585 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
21586 UNSPEC_VPMULTISHIFT))]
21587 "TARGET_AVX512VBMI"
21588 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21589 [(set_attr "type" "sselog")
21590 (set_attr "prefix" "evex")
21591 (set_attr "mode" "<sseinsnmode>")])
21592
21593 (define_mode_iterator IMOD4
21594 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
21595
21596 (define_mode_attr imod4_narrow
21597 [(V64SF "V16SF") (V64SI "V16SI")])
21598
21599 (define_expand "mov<mode>"
21600 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
21601 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
21602 "TARGET_AVX512F"
21603 {
21604 ix86_expand_vector_move (<MODE>mode, operands);
21605 DONE;
21606 })
21607
21608 (define_insn_and_split "*mov<mode>_internal"
21609 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
21610 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
21611 "TARGET_AVX512F
21612 && (register_operand (operands[0], <MODE>mode)
21613 || register_operand (operands[1], <MODE>mode))"
21614 "#"
21615 "&& reload_completed"
21616 [(const_int 0)]
21617 {
21618 rtx op0, op1;
21619 int i;
21620
21621 for (i = 0; i < 4; i++)
21622 {
21623 op0 = simplify_subreg
21624 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
21625 op1 = simplify_subreg
21626 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
21627 emit_move_insn (op0, op1);
21628 }
21629 DONE;
21630 })
21631
21632 (define_insn "avx5124fmaddps_4fmaddps"
21633 [(set (match_operand:V16SF 0 "register_operand" "=v")
21634 (unspec:V16SF
21635 [(match_operand:V16SF 1 "register_operand" "0")
21636 (match_operand:V64SF 2 "register_operand" "v")
21637 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
21638 "TARGET_AVX5124FMAPS"
21639 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
21640 [(set_attr ("type") ("ssemuladd"))
21641 (set_attr ("prefix") ("evex"))
21642 (set_attr ("mode") ("V16SF"))])
21643
21644 (define_insn "avx5124fmaddps_4fmaddps_mask"
21645 [(set (match_operand:V16SF 0 "register_operand" "=v")
21646 (vec_merge:V16SF
21647 (unspec:V16SF
21648 [(match_operand:V64SF 1 "register_operand" "v")
21649 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
21650 (match_operand:V16SF 3 "register_operand" "0")
21651 (match_operand:HI 4 "register_operand" "Yk")))]
21652 "TARGET_AVX5124FMAPS"
21653 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21654 [(set_attr ("type") ("ssemuladd"))
21655 (set_attr ("prefix") ("evex"))
21656 (set_attr ("mode") ("V16SF"))])
21657
21658 (define_insn "avx5124fmaddps_4fmaddps_maskz"
21659 [(set (match_operand:V16SF 0 "register_operand" "=v")
21660 (vec_merge:V16SF
21661 (unspec:V16SF
21662 [(match_operand:V16SF 1 "register_operand" "0")
21663 (match_operand:V64SF 2 "register_operand" "v")
21664 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
21665 (match_operand:V16SF 4 "const0_operand" "C")
21666 (match_operand:HI 5 "register_operand" "Yk")))]
21667 "TARGET_AVX5124FMAPS"
21668 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21669 [(set_attr ("type") ("ssemuladd"))
21670 (set_attr ("prefix") ("evex"))
21671 (set_attr ("mode") ("V16SF"))])
21672
21673 (define_insn "avx5124fmaddps_4fmaddss"
21674 [(set (match_operand:V4SF 0 "register_operand" "=v")
21675 (unspec:V4SF
21676 [(match_operand:V4SF 1 "register_operand" "0")
21677 (match_operand:V64SF 2 "register_operand" "v")
21678 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
21679 "TARGET_AVX5124FMAPS"
21680 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
21681 [(set_attr ("type") ("ssemuladd"))
21682 (set_attr ("prefix") ("evex"))
21683 (set_attr ("mode") ("SF"))])
21684
21685 (define_insn "avx5124fmaddps_4fmaddss_mask"
21686 [(set (match_operand:V4SF 0 "register_operand" "=v")
21687 (vec_merge:V4SF
21688 (unspec:V4SF
21689 [(match_operand:V64SF 1 "register_operand" "v")
21690 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
21691 (match_operand:V4SF 3 "register_operand" "0")
21692 (match_operand:QI 4 "register_operand" "Yk")))]
21693 "TARGET_AVX5124FMAPS"
21694 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
21695 [(set_attr ("type") ("ssemuladd"))
21696 (set_attr ("prefix") ("evex"))
21697 (set_attr ("mode") ("SF"))])
21698
21699 (define_insn "avx5124fmaddps_4fmaddss_maskz"
21700 [(set (match_operand:V4SF 0 "register_operand" "=v")
21701 (vec_merge:V4SF
21702 (unspec:V4SF
21703 [(match_operand:V4SF 1 "register_operand" "0")
21704 (match_operand:V64SF 2 "register_operand" "v")
21705 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
21706 (match_operand:V4SF 4 "const0_operand" "C")
21707 (match_operand:QI 5 "register_operand" "Yk")))]
21708 "TARGET_AVX5124FMAPS"
21709 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
21710 [(set_attr ("type") ("ssemuladd"))
21711 (set_attr ("prefix") ("evex"))
21712 (set_attr ("mode") ("SF"))])
21713
21714 (define_insn "avx5124fmaddps_4fnmaddps"
21715 [(set (match_operand:V16SF 0 "register_operand" "=v")
21716 (unspec:V16SF
21717 [(match_operand:V16SF 1 "register_operand" "0")
21718 (match_operand:V64SF 2 "register_operand" "v")
21719 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
21720 "TARGET_AVX5124FMAPS"
21721 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
21722 [(set_attr ("type") ("ssemuladd"))
21723 (set_attr ("prefix") ("evex"))
21724 (set_attr ("mode") ("V16SF"))])
21725
21726 (define_insn "avx5124fmaddps_4fnmaddps_mask"
21727 [(set (match_operand:V16SF 0 "register_operand" "=v")
21728 (vec_merge:V16SF
21729 (unspec:V16SF
21730 [(match_operand:V64SF 1 "register_operand" "v")
21731 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21732 (match_operand:V16SF 3 "register_operand" "0")
21733 (match_operand:HI 4 "register_operand" "Yk")))]
21734 "TARGET_AVX5124FMAPS"
21735 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21736 [(set_attr ("type") ("ssemuladd"))
21737 (set_attr ("prefix") ("evex"))
21738 (set_attr ("mode") ("V16SF"))])
21739
21740 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
21741 [(set (match_operand:V16SF 0 "register_operand" "=v")
21742 (vec_merge:V16SF
21743 (unspec:V16SF
21744 [(match_operand:V16SF 1 "register_operand" "0")
21745 (match_operand:V64SF 2 "register_operand" "v")
21746 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21747 (match_operand:V16SF 4 "const0_operand" "C")
21748 (match_operand:HI 5 "register_operand" "Yk")))]
21749 "TARGET_AVX5124FMAPS"
21750 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21751 [(set_attr ("type") ("ssemuladd"))
21752 (set_attr ("prefix") ("evex"))
21753 (set_attr ("mode") ("V16SF"))])
21754
21755 (define_insn "avx5124fmaddps_4fnmaddss"
21756 [(set (match_operand:V4SF 0 "register_operand" "=v")
21757 (unspec:V4SF
21758 [(match_operand:V4SF 1 "register_operand" "0")
21759 (match_operand:V64SF 2 "register_operand" "v")
21760 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
21761 "TARGET_AVX5124FMAPS"
21762 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
21763 [(set_attr ("type") ("ssemuladd"))
21764 (set_attr ("prefix") ("evex"))
21765 (set_attr ("mode") ("SF"))])
21766
21767 (define_insn "avx5124fmaddps_4fnmaddss_mask"
21768 [(set (match_operand:V4SF 0 "register_operand" "=v")
21769 (vec_merge:V4SF
21770 (unspec:V4SF
21771 [(match_operand:V64SF 1 "register_operand" "v")
21772 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21773 (match_operand:V4SF 3 "register_operand" "0")
21774 (match_operand:QI 4 "register_operand" "Yk")))]
21775 "TARGET_AVX5124FMAPS"
21776 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
21777 [(set_attr ("type") ("ssemuladd"))
21778 (set_attr ("prefix") ("evex"))
21779 (set_attr ("mode") ("SF"))])
21780
21781 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
21782 [(set (match_operand:V4SF 0 "register_operand" "=v")
21783 (vec_merge:V4SF
21784 (unspec:V4SF
21785 [(match_operand:V4SF 1 "register_operand" "0")
21786 (match_operand:V64SF 2 "register_operand" "v")
21787 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21788 (match_operand:V4SF 4 "const0_operand" "C")
21789 (match_operand:QI 5 "register_operand" "Yk")))]
21790 "TARGET_AVX5124FMAPS"
21791 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
21792 [(set_attr ("type") ("ssemuladd"))
21793 (set_attr ("prefix") ("evex"))
21794 (set_attr ("mode") ("SF"))])
21795
21796 (define_insn "avx5124vnniw_vp4dpwssd"
21797 [(set (match_operand:V16SI 0 "register_operand" "=v")
21798 (unspec:V16SI
21799 [(match_operand:V16SI 1 "register_operand" "0")
21800 (match_operand:V64SI 2 "register_operand" "v")
21801 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
21802 "TARGET_AVX5124VNNIW"
21803 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
21804 [(set_attr ("type") ("ssemuladd"))
21805 (set_attr ("prefix") ("evex"))
21806 (set_attr ("mode") ("TI"))])
21807
21808 (define_insn "avx5124vnniw_vp4dpwssd_mask"
21809 [(set (match_operand:V16SI 0 "register_operand" "=v")
21810 (vec_merge:V16SI
21811 (unspec:V16SI
21812 [(match_operand:V64SI 1 "register_operand" "v")
21813 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
21814 (match_operand:V16SI 3 "register_operand" "0")
21815 (match_operand:HI 4 "register_operand" "Yk")))]
21816 "TARGET_AVX5124VNNIW"
21817 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21818 [(set_attr ("type") ("ssemuladd"))
21819 (set_attr ("prefix") ("evex"))
21820 (set_attr ("mode") ("TI"))])
21821
21822 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
21823 [(set (match_operand:V16SI 0 "register_operand" "=v")
21824 (vec_merge:V16SI
21825 (unspec:V16SI
21826 [(match_operand:V16SI 1 "register_operand" "0")
21827 (match_operand:V64SI 2 "register_operand" "v")
21828 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
21829 (match_operand:V16SI 4 "const0_operand" "C")
21830 (match_operand:HI 5 "register_operand" "Yk")))]
21831 "TARGET_AVX5124VNNIW"
21832 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21833 [(set_attr ("type") ("ssemuladd"))
21834 (set_attr ("prefix") ("evex"))
21835 (set_attr ("mode") ("TI"))])
21836
21837 (define_insn "avx5124vnniw_vp4dpwssds"
21838 [(set (match_operand:V16SI 0 "register_operand" "=v")
21839 (unspec:V16SI
21840 [(match_operand:V16SI 1 "register_operand" "0")
21841 (match_operand:V64SI 2 "register_operand" "v")
21842 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
21843 "TARGET_AVX5124VNNIW"
21844 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
21845 [(set_attr ("type") ("ssemuladd"))
21846 (set_attr ("prefix") ("evex"))
21847 (set_attr ("mode") ("TI"))])
21848
21849 (define_insn "avx5124vnniw_vp4dpwssds_mask"
21850 [(set (match_operand:V16SI 0 "register_operand" "=v")
21851 (vec_merge:V16SI
21852 (unspec:V16SI
21853 [(match_operand:V64SI 1 "register_operand" "v")
21854 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
21855 (match_operand:V16SI 3 "register_operand" "0")
21856 (match_operand:HI 4 "register_operand" "Yk")))]
21857 "TARGET_AVX5124VNNIW"
21858 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21859 [(set_attr ("type") ("ssemuladd"))
21860 (set_attr ("prefix") ("evex"))
21861 (set_attr ("mode") ("TI"))])
21862
21863 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
21864 [(set (match_operand:V16SI 0 "register_operand" "=v")
21865 (vec_merge:V16SI
21866 (unspec:V16SI
21867 [(match_operand:V16SI 1 "register_operand" "0")
21868 (match_operand:V64SI 2 "register_operand" "v")
21869 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
21870 (match_operand:V16SI 4 "const0_operand" "C")
21871 (match_operand:HI 5 "register_operand" "Yk")))]
21872 "TARGET_AVX5124VNNIW"
21873 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21874 [(set_attr ("type") ("ssemuladd"))
21875 (set_attr ("prefix") ("evex"))
21876 (set_attr ("mode") ("TI"))])
21877
21878 (define_insn "vpopcount<mode><mask_name>"
21879 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
21880 (popcount:VI48_AVX512VL
21881 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
21882 "TARGET_AVX512VPOPCNTDQ"
21883 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
21884
21885 ;; Save multiple registers out-of-line.
21886 (define_insn "save_multiple<mode>"
21887 [(match_parallel 0 "save_multiple"
21888 [(use (match_operand:P 1 "symbol_operand"))])]
21889 "TARGET_SSE && TARGET_64BIT"
21890 "call\t%P1")
21891
21892 ;; Restore multiple registers out-of-line.
21893 (define_insn "restore_multiple<mode>"
21894 [(match_parallel 0 "restore_multiple"
21895 [(use (match_operand:P 1 "symbol_operand"))])]
21896 "TARGET_SSE && TARGET_64BIT"
21897 "call\t%P1")
21898
21899 ;; Restore multiple registers out-of-line and return.
21900 (define_insn "restore_multiple_and_return<mode>"
21901 [(match_parallel 0 "restore_multiple"
21902 [(return)
21903 (use (match_operand:P 1 "symbol_operand"))
21904 (set (reg:DI SP_REG) (reg:DI R10_REG))
21905 ])]
21906 "TARGET_SSE && TARGET_64BIT"
21907 "jmp\t%P1")
21908
21909 ;; Restore multiple registers out-of-line when hard frame pointer is used,
21910 ;; perform the leave operation prior to returning (from the function).
21911 (define_insn "restore_multiple_leave_return<mode>"
21912 [(match_parallel 0 "restore_multiple"
21913 [(return)
21914 (use (match_operand:P 1 "symbol_operand"))
21915 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
21916 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
21917 (clobber (mem:BLK (scratch)))
21918 ])]
21919 "TARGET_SSE && TARGET_64BIT"
21920 "jmp\t%P1")
21921
21922 (define_insn "vpopcount<mode><mask_name>"
21923 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
21924 (popcount:VI12_AVX512VL
21925 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
21926 "TARGET_AVX512BITALG"
21927 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
21928
21929 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
21930 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21931 (unspec:VI1_AVX512F
21932 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21933 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
21934 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
21935 UNSPEC_GF2P8AFFINEINV))]
21936 "TARGET_GFNI"
21937 "@
21938 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
21939 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
21940 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
21941 [(set_attr "isa" "noavx,avx,avx512f")
21942 (set_attr "prefix_data16" "1,*,*")
21943 (set_attr "prefix_extra" "1")
21944 (set_attr "prefix" "orig,maybe_evex,evex")
21945 (set_attr "mode" "<sseinsnmode>")])
21946
21947 (define_insn "vgf2p8affineqb_<mode><mask_name>"
21948 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21949 (unspec:VI1_AVX512F
21950 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21951 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
21952 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
21953 UNSPEC_GF2P8AFFINE))]
21954 "TARGET_GFNI"
21955 "@
21956 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
21957 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
21958 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
21959 [(set_attr "isa" "noavx,avx,avx512f")
21960 (set_attr "prefix_data16" "1,*,*")
21961 (set_attr "prefix_extra" "1")
21962 (set_attr "prefix" "orig,maybe_evex,evex")
21963 (set_attr "mode" "<sseinsnmode>")])
21964
21965 (define_insn "vgf2p8mulb_<mode><mask_name>"
21966 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21967 (unspec:VI1_AVX512F
21968 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21969 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")]
21970 UNSPEC_GF2P8MUL))]
21971 "TARGET_GFNI"
21972 "@
21973 gf2p8mulb\t{%2, %0| %0, %2}
21974 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}
21975 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
21976 [(set_attr "isa" "noavx,avx,avx512f")
21977 (set_attr "prefix_data16" "1,*,*")
21978 (set_attr "prefix_extra" "1")
21979 (set_attr "prefix" "orig,maybe_evex,evex")
21980 (set_attr "mode" "<sseinsnmode>")])
21981
21982 (define_insn "vpshrd_<mode><mask_name>"
21983 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21984 (unspec:VI248_AVX512VL
21985 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
21986 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
21987 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21988 UNSPEC_VPSHRD))]
21989 "TARGET_AVX512VBMI2"
21990 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
21991 [(set_attr ("prefix") ("evex"))])
21992
21993 (define_insn "vpshld_<mode><mask_name>"
21994 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21995 (unspec:VI248_AVX512VL
21996 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
21997 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
21998 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21999 UNSPEC_VPSHLD))]
22000 "TARGET_AVX512VBMI2"
22001 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
22002 [(set_attr ("prefix") ("evex"))])
22003
22004 (define_insn "vpshrdv_<mode>"
22005 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22006 (unspec:VI248_AVX512VL
22007 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22008 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22009 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22010 UNSPEC_VPSHRDV))]
22011 "TARGET_AVX512VBMI2"
22012 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
22013 [(set_attr ("prefix") ("evex"))
22014 (set_attr "mode" "<sseinsnmode>")])
22015
22016 (define_insn "vpshrdv_<mode>_mask"
22017 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22018 (vec_merge:VI248_AVX512VL
22019 (unspec:VI248_AVX512VL
22020 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22021 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22022 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22023 UNSPEC_VPSHRDV)
22024 (match_dup 1)
22025 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22026 "TARGET_AVX512VBMI2"
22027 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22028 [(set_attr ("prefix") ("evex"))
22029 (set_attr "mode" "<sseinsnmode>")])
22030
22031 (define_expand "vpshrdv_<mode>_maskz"
22032 [(match_operand:VI248_AVX512VL 0 "register_operand")
22033 (match_operand:VI248_AVX512VL 1 "register_operand")
22034 (match_operand:VI248_AVX512VL 2 "register_operand")
22035 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
22036 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22037 "TARGET_AVX512VBMI2"
22038 {
22039 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
22040 operands[2], operands[3],
22041 CONST0_RTX (<MODE>mode),
22042 operands[4]));
22043 DONE;
22044 })
22045
22046 (define_insn "vpshrdv_<mode>_maskz_1"
22047 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22048 (vec_merge:VI248_AVX512VL
22049 (unspec:VI248_AVX512VL
22050 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22051 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22052 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22053 UNSPEC_VPSHRDV)
22054 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
22055 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22056 "TARGET_AVX512VBMI2"
22057 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22058 [(set_attr ("prefix") ("evex"))
22059 (set_attr "mode" "<sseinsnmode>")])
22060
22061 (define_insn "vpshldv_<mode>"
22062 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22063 (unspec:VI248_AVX512VL
22064 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22065 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22066 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22067 UNSPEC_VPSHLDV))]
22068 "TARGET_AVX512VBMI2"
22069 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
22070 [(set_attr ("prefix") ("evex"))
22071 (set_attr "mode" "<sseinsnmode>")])
22072
22073 (define_insn "vpshldv_<mode>_mask"
22074 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22075 (vec_merge:VI248_AVX512VL
22076 (unspec:VI248_AVX512VL
22077 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22078 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22079 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22080 UNSPEC_VPSHLDV)
22081 (match_dup 1)
22082 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22083 "TARGET_AVX512VBMI2"
22084 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22085 [(set_attr ("prefix") ("evex"))
22086 (set_attr "mode" "<sseinsnmode>")])
22087
22088 (define_expand "vpshldv_<mode>_maskz"
22089 [(match_operand:VI248_AVX512VL 0 "register_operand")
22090 (match_operand:VI248_AVX512VL 1 "register_operand")
22091 (match_operand:VI248_AVX512VL 2 "register_operand")
22092 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
22093 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22094 "TARGET_AVX512VBMI2"
22095 {
22096 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
22097 operands[2], operands[3],
22098 CONST0_RTX (<MODE>mode),
22099 operands[4]));
22100 DONE;
22101 })
22102
22103 (define_insn "vpshldv_<mode>_maskz_1"
22104 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22105 (vec_merge:VI248_AVX512VL
22106 (unspec:VI248_AVX512VL
22107 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22108 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22109 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22110 UNSPEC_VPSHLDV)
22111 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
22112 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22113 "TARGET_AVX512VBMI2"
22114 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22115 [(set_attr ("prefix") ("evex"))
22116 (set_attr "mode" "<sseinsnmode>")])
22117
22118 (define_insn "vpdpbusd_<mode>"
22119 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22120 (unspec:VI4_AVX512VL
22121 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22122 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22123 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22124 UNSPEC_VPMADDUBSWACCD))]
22125 "TARGET_AVX512VNNI"
22126 "vpdpbusd\t{%3, %2, %0|%0, %2, %3 }"
22127 [(set_attr ("prefix") ("evex"))])
22128
22129 (define_insn "vpdpbusd_<mode>_mask"
22130 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22131 (vec_merge:VI4_AVX512VL
22132 (unspec:VI4_AVX512VL
22133 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22134 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22135 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22136 UNSPEC_VPMADDUBSWACCD)
22137 (match_dup 1)
22138 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22139 "TARGET_AVX512VNNI"
22140 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22141 [(set_attr ("prefix") ("evex"))])
22142
22143 (define_expand "vpdpbusd_<mode>_maskz"
22144 [(match_operand:VI4_AVX512VL 0 "register_operand")
22145 (match_operand:VI4_AVX512VL 1 "register_operand")
22146 (match_operand:VI4_AVX512VL 2 "register_operand")
22147 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22148 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22149 "TARGET_AVX512VNNI"
22150 {
22151 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
22152 operands[2], operands[3],
22153 CONST0_RTX (<MODE>mode),
22154 operands[4]));
22155 DONE;
22156 })
22157
22158 (define_insn "vpdpbusd_<mode>_maskz_1"
22159 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22160 (vec_merge:VI4_AVX512VL
22161 (unspec:VI4_AVX512VL
22162 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22163 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22164 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
22165 ] UNSPEC_VPMADDUBSWACCD)
22166 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22167 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22168 "TARGET_AVX512VNNI"
22169 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22170 [(set_attr ("prefix") ("evex"))])
22171
22172
22173 (define_insn "vpdpbusds_<mode>"
22174 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22175 (unspec:VI4_AVX512VL
22176 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22177 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22178 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22179 UNSPEC_VPMADDUBSWACCSSD))]
22180 "TARGET_AVX512VNNI"
22181 "vpdpbusds\t{%3, %2, %0|%0, %2, %3 }"
22182 [(set_attr ("prefix") ("evex"))])
22183
22184 (define_insn "vpdpbusds_<mode>_mask"
22185 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22186 (vec_merge:VI4_AVX512VL
22187 (unspec:VI4_AVX512VL
22188 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22189 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22190 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22191 UNSPEC_VPMADDUBSWACCSSD)
22192 (match_dup 1)
22193 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22194 "TARGET_AVX512VNNI"
22195 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22196 [(set_attr ("prefix") ("evex"))])
22197
22198 (define_expand "vpdpbusds_<mode>_maskz"
22199 [(match_operand:VI4_AVX512VL 0 "register_operand")
22200 (match_operand:VI4_AVX512VL 1 "register_operand")
22201 (match_operand:VI4_AVX512VL 2 "register_operand")
22202 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22203 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22204 "TARGET_AVX512VNNI"
22205 {
22206 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
22207 operands[2], operands[3],
22208 CONST0_RTX (<MODE>mode),
22209 operands[4]));
22210 DONE;
22211 })
22212
22213 (define_insn "vpdpbusds_<mode>_maskz_1"
22214 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22215 (vec_merge:VI4_AVX512VL
22216 (unspec:VI4_AVX512VL
22217 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22218 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22219 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22220 UNSPEC_VPMADDUBSWACCSSD)
22221 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22222 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22223 "TARGET_AVX512VNNI"
22224 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22225 [(set_attr ("prefix") ("evex"))])
22226
22227
22228 (define_insn "vpdpwssd_<mode>"
22229 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22230 (unspec:VI4_AVX512VL
22231 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22232 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22233 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22234 UNSPEC_VPMADDWDACCD))]
22235 "TARGET_AVX512VNNI"
22236 "vpdpwssd\t{%3, %2, %0|%0, %2, %3 }"
22237 [(set_attr ("prefix") ("evex"))])
22238
22239 (define_insn "vpdpwssd_<mode>_mask"
22240 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22241 (vec_merge:VI4_AVX512VL
22242 (unspec:VI4_AVX512VL
22243 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22244 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22245 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22246 UNSPEC_VPMADDWDACCD)
22247 (match_dup 1)
22248 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22249 "TARGET_AVX512VNNI"
22250 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22251 [(set_attr ("prefix") ("evex"))])
22252
22253 (define_expand "vpdpwssd_<mode>_maskz"
22254 [(match_operand:VI4_AVX512VL 0 "register_operand")
22255 (match_operand:VI4_AVX512VL 1 "register_operand")
22256 (match_operand:VI4_AVX512VL 2 "register_operand")
22257 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22258 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22259 "TARGET_AVX512VNNI"
22260 {
22261 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
22262 operands[2], operands[3],
22263 CONST0_RTX (<MODE>mode),
22264 operands[4]));
22265 DONE;
22266 })
22267
22268 (define_insn "vpdpwssd_<mode>_maskz_1"
22269 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22270 (vec_merge:VI4_AVX512VL
22271 (unspec:VI4_AVX512VL
22272 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22273 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22274 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22275 UNSPEC_VPMADDWDACCD)
22276 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22277 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22278 "TARGET_AVX512VNNI"
22279 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22280 [(set_attr ("prefix") ("evex"))])
22281
22282
22283 (define_insn "vpdpwssds_<mode>"
22284 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22285 (unspec:VI4_AVX512VL
22286 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22287 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22288 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22289 UNSPEC_VPMADDWDACCSSD))]
22290 "TARGET_AVX512VNNI"
22291 "vpdpwssds\t{%3, %2, %0|%0, %2, %3 }"
22292 [(set_attr ("prefix") ("evex"))])
22293
22294 (define_insn "vpdpwssds_<mode>_mask"
22295 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22296 (vec_merge:VI4_AVX512VL
22297 (unspec:VI4_AVX512VL
22298 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22299 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22300 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22301 UNSPEC_VPMADDWDACCSSD)
22302 (match_dup 1)
22303 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22304 "TARGET_AVX512VNNI"
22305 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22306 [(set_attr ("prefix") ("evex"))])
22307
22308 (define_expand "vpdpwssds_<mode>_maskz"
22309 [(match_operand:VI4_AVX512VL 0 "register_operand")
22310 (match_operand:VI4_AVX512VL 1 "register_operand")
22311 (match_operand:VI4_AVX512VL 2 "register_operand")
22312 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22313 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22314 "TARGET_AVX512VNNI"
22315 {
22316 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
22317 operands[2], operands[3],
22318 CONST0_RTX (<MODE>mode),
22319 operands[4]));
22320 DONE;
22321 })
22322
22323 (define_insn "vpdpwssds_<mode>_maskz_1"
22324 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22325 (vec_merge:VI4_AVX512VL
22326 (unspec:VI4_AVX512VL
22327 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22328 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22329 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22330 UNSPEC_VPMADDWDACCSSD)
22331 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22332 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22333 "TARGET_AVX512VNNI"
22334 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22335 [(set_attr ("prefix") ("evex"))])
22336
22337 (define_insn "vaesdec_<mode>"
22338 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22339 (unspec:VI1_AVX512VL_F
22340 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22341 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22342 UNSPEC_VAESDEC))]
22343 "TARGET_VAES"
22344 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
22345 )
22346
22347 (define_insn "vaesdeclast_<mode>"
22348 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22349 (unspec:VI1_AVX512VL_F
22350 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22351 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22352 UNSPEC_VAESDECLAST))]
22353 "TARGET_VAES"
22354 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
22355 )
22356
22357 (define_insn "vaesenc_<mode>"
22358 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22359 (unspec:VI1_AVX512VL_F
22360 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22361 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22362 UNSPEC_VAESENC))]
22363 "TARGET_VAES"
22364 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
22365 )
22366
22367 (define_insn "vaesenclast_<mode>"
22368 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22369 (unspec:VI1_AVX512VL_F
22370 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22371 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22372 UNSPEC_VAESENCLAST))]
22373 "TARGET_VAES"
22374 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
22375 )
22376
22377 (define_insn "vpclmulqdq_<mode>"
22378 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
22379 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
22380 (match_operand:VI8_FVL 2 "vector_operand" "vm")
22381 (match_operand:SI 3 "const_0_to_255_operand" "n")]
22382 UNSPEC_VPCLMULQDQ))]
22383 "TARGET_VPCLMULQDQ"
22384 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
22385 [(set_attr "mode" "DI")])
22386
22387 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
22388 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22389 (unspec:<avx512fmaskmode>
22390 [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
22391 (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
22392 UNSPEC_VPSHUFBIT))]
22393 "TARGET_AVX512BITALG"
22394 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
22395 [(set_attr "prefix" "evex")
22396 (set_attr "mode" "<sseinsnmode>")])
22397
22398 (define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
22399 ;; Converting from BF to SF
22400 (define_mode_attr bf16_cvt_2sf
22401 [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
22402 ;; Converting from SF to BF
22403 (define_mode_attr sf_cvt_bf16
22404 [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
22405 ;; Mapping from BF to SF
22406 (define_mode_attr sf_bf16
22407 [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
22408
22409 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
22410 [(match_operand:BF16 0 "register_operand")
22411 (match_operand:<bf16_cvt_2sf> 1 "register_operand")
22412 (match_operand:<bf16_cvt_2sf> 2 "register_operand")
22413 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22414 "TARGET_AVX512BF16"
22415 {
22416 emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
22417 operands[2], CONST0_RTX(<MODE>mode), operands[3]));
22418 DONE;
22419 })
22420
22421 (define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
22422 [(set (match_operand:BF16 0 "register_operand" "=v")
22423 (unspec:BF16
22424 [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
22425 (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
22426 UNSPEC_VCVTNE2PS2BF16))]
22427 "TARGET_AVX512BF16"
22428 "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
22429
22430 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
22431 [(match_operand:<sf_cvt_bf16> 0 "register_operand")
22432 (match_operand:VF1_AVX512VL 1 "register_operand")
22433 (match_operand:<avx512fmaskmode> 2 "register_operand")]
22434 "TARGET_AVX512BF16"
22435 {
22436 emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
22437 CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
22438 DONE;
22439 })
22440
22441 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
22442 [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
22443 (unspec:<sf_cvt_bf16>
22444 [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
22445 UNSPEC_VCVTNEPS2BF16))]
22446 "TARGET_AVX512BF16"
22447 "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
22448
22449 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
22450 [(match_operand:VF1_AVX512VL 0 "register_operand")
22451 (match_operand:VF1_AVX512VL 1 "register_operand")
22452 (match_operand:<sf_bf16> 2 "register_operand")
22453 (match_operand:<sf_bf16> 3 "register_operand")
22454 (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
22455 "TARGET_AVX512BF16"
22456 {
22457 emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
22458 operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
22459 DONE;
22460 })
22461
22462 (define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
22463 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
22464 (unspec:VF1_AVX512VL
22465 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
22466 (match_operand:<sf_bf16> 2 "register_operand" "v")
22467 (match_operand:<sf_bf16> 3 "register_operand" "v")]
22468 UNSPEC_VDPBF16PS))]
22469 "TARGET_AVX512BF16"
22470 "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
22471
22472 (define_insn "avx512f_dpbf16ps_<mode>_mask"
22473 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
22474 (vec_merge:VF1_AVX512VL
22475 (unspec:VF1_AVX512VL
22476 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
22477 (match_operand:<sf_bf16> 2 "register_operand" "v")
22478 (match_operand:<sf_bf16> 3 "register_operand" "v")]
22479 UNSPEC_VDPBF16PS)
22480 (match_dup 1)
22481 (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
22482 "TARGET_AVX512BF16"
22483 "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")