73560d897d7f2eb25314849c2c781813de2073ef
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23
24 ;; SSE3
25 UNSPEC_LDDQU
26
27 ;; SSSE3
28 UNSPEC_PSHUFB
29 UNSPEC_PSIGN
30 UNSPEC_PALIGNR
31
32 ;; For SSE4A support
33 UNSPEC_EXTRQI
34 UNSPEC_EXTRQ
35 UNSPEC_INSERTQI
36 UNSPEC_INSERTQ
37
38 ;; For SSE4.1 support
39 UNSPEC_BLENDV
40 UNSPEC_INSERTPS
41 UNSPEC_DP
42 UNSPEC_MOVNTDQA
43 UNSPEC_MPSADBW
44 UNSPEC_PHMINPOSUW
45 UNSPEC_PTEST
46
47 ;; For SSE4.2 support
48 UNSPEC_PCMPESTR
49 UNSPEC_PCMPISTR
50
51 ;; For FMA4 support
52 UNSPEC_FMADDSUB
53 UNSPEC_XOP_UNSIGNED_CMP
54 UNSPEC_XOP_TRUEFALSE
55 UNSPEC_XOP_PERMUTE
56 UNSPEC_FRCZ
57
58 ;; For AES support
59 UNSPEC_AESENC
60 UNSPEC_AESENCLAST
61 UNSPEC_AESDEC
62 UNSPEC_AESDECLAST
63 UNSPEC_AESIMC
64 UNSPEC_AESKEYGENASSIST
65
66 ;; For PCLMUL support
67 UNSPEC_PCLMUL
68
69 ;; For AVX support
70 UNSPEC_PCMP
71 UNSPEC_VPERMIL
72 UNSPEC_VPERMIL2
73 UNSPEC_VPERMIL2F128
74 UNSPEC_CAST
75 UNSPEC_VTESTP
76 UNSPEC_VCVTPH2PS
77 UNSPEC_VCVTPS2PH
78
79 ;; For AVX2 support
80 UNSPEC_VPERMVAR
81 UNSPEC_VPERMTI
82 UNSPEC_GATHER
83 UNSPEC_VSIBADDR
84
85 ;; For AVX512F support
86 UNSPEC_VPERMT2
87 UNSPEC_UNSIGNED_FIX_NOTRUNC
88 UNSPEC_UNSIGNED_PCMP
89 UNSPEC_TESTM
90 UNSPEC_TESTNM
91 UNSPEC_SCATTER
92 UNSPEC_RCP14
93 UNSPEC_RSQRT14
94 UNSPEC_FIXUPIMM
95 UNSPEC_SCALEF
96 UNSPEC_VTERNLOG
97 UNSPEC_GETEXP
98 UNSPEC_GETMANT
99 UNSPEC_ALIGN
100 UNSPEC_CONFLICT
101 UNSPEC_COMPRESS
102 UNSPEC_COMPRESS_STORE
103 UNSPEC_EXPAND
104 UNSPEC_MASKED_EQ
105 UNSPEC_MASKED_GT
106
107 ;; Mask operations
108 UNSPEC_MASKOP
109 UNSPEC_KORTEST
110 UNSPEC_KTEST
111
112 ;; For embed. rounding feature
113 UNSPEC_EMBEDDED_ROUNDING
114
115 ;; For AVX512PF support
116 UNSPEC_GATHER_PREFETCH
117 UNSPEC_SCATTER_PREFETCH
118
119 ;; For AVX512ER support
120 UNSPEC_EXP2
121 UNSPEC_RCP28
122 UNSPEC_RSQRT28
123
124 ;; For SHA support
125 UNSPEC_SHA1MSG1
126 UNSPEC_SHA1MSG2
127 UNSPEC_SHA1NEXTE
128 UNSPEC_SHA1RNDS4
129 UNSPEC_SHA256MSG1
130 UNSPEC_SHA256MSG2
131 UNSPEC_SHA256RNDS2
132
133 ;; For AVX512BW support
134 UNSPEC_DBPSADBW
135 UNSPEC_PMADDUBSW512
136 UNSPEC_PMADDWD512
137 UNSPEC_PSHUFHW
138 UNSPEC_PSHUFLW
139 UNSPEC_CVTINT2MASK
140
141 ;; For AVX512DQ support
142 UNSPEC_REDUCE
143 UNSPEC_FPCLASS
144 UNSPEC_RANGE
145
146 ;; For AVX512IFMA support
147 UNSPEC_VPMADD52LUQ
148 UNSPEC_VPMADD52HUQ
149
150 ;; For AVX512VBMI support
151 UNSPEC_VPMULTISHIFT
152
153 ;; For AVX5124FMAPS/AVX5124VNNIW support
154 UNSPEC_VP4FMADD
155 UNSPEC_VP4FNMADD
156 UNSPEC_VP4DPWSSD
157 UNSPEC_VP4DPWSSDS
158
159 ;; For GFNI support
160 UNSPEC_GF2P8AFFINEINV
161 UNSPEC_GF2P8AFFINE
162 UNSPEC_GF2P8MUL
163
164 ;; For AVX512VBMI2 support
165 UNSPEC_VPSHLD
166 UNSPEC_VPSHRD
167 ])
168
169 (define_c_enum "unspecv" [
170 UNSPECV_LDMXCSR
171 UNSPECV_STMXCSR
172 UNSPECV_CLFLUSH
173 UNSPECV_MONITOR
174 UNSPECV_MWAIT
175 UNSPECV_VZEROALL
176 UNSPECV_VZEROUPPER
177 ])
178
179 ;; All vector modes including V?TImode, used in move patterns.
180 (define_mode_iterator VMOVE
181 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
182 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
183 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
184 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
185 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
186 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
187 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
188
189 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
190 (define_mode_iterator V48_AVX512VL
191 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
192 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
193 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
194 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
195
196 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
197 (define_mode_iterator VI12_AVX512VL
198 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
199 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
200
201 ;; Same iterator, but without supposed TARGET_AVX512BW
202 (define_mode_iterator VI12_AVX512VLBW
203 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
204 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
205 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
206
207 (define_mode_iterator VI1_AVX512VL
208 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
209
210 ;; All vector modes
211 (define_mode_iterator V
212 [(V32QI "TARGET_AVX") V16QI
213 (V16HI "TARGET_AVX") V8HI
214 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
215 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
216 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
217 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
218
219 ;; All 128bit vector modes
220 (define_mode_iterator V_128
221 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
222
223 ;; All 256bit vector modes
224 (define_mode_iterator V_256
225 [V32QI V16HI V8SI V4DI V8SF V4DF])
226
227 ;; All 512bit vector modes
228 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
229
230 ;; All 256bit and 512bit vector modes
231 (define_mode_iterator V_256_512
232 [V32QI V16HI V8SI V4DI V8SF V4DF
233 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
234 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
235
236 ;; All vector float modes
237 (define_mode_iterator VF
238 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
239 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
240
241 ;; 128- and 256-bit float vector modes
242 (define_mode_iterator VF_128_256
243 [(V8SF "TARGET_AVX") V4SF
244 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
245
246 ;; All SFmode vector float modes
247 (define_mode_iterator VF1
248 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
249
250 ;; 128- and 256-bit SF vector modes
251 (define_mode_iterator VF1_128_256
252 [(V8SF "TARGET_AVX") V4SF])
253
254 (define_mode_iterator VF1_128_256VL
255 [V8SF (V4SF "TARGET_AVX512VL")])
256
257 ;; All DFmode vector float modes
258 (define_mode_iterator VF2
259 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
260
261 ;; 128- and 256-bit DF vector modes
262 (define_mode_iterator VF2_128_256
263 [(V4DF "TARGET_AVX") V2DF])
264
265 (define_mode_iterator VF2_512_256
266 [(V8DF "TARGET_AVX512F") V4DF])
267
268 (define_mode_iterator VF2_512_256VL
269 [V8DF (V4DF "TARGET_AVX512VL")])
270
271 ;; All 128bit vector float modes
272 (define_mode_iterator VF_128
273 [V4SF (V2DF "TARGET_SSE2")])
274
275 ;; All 256bit vector float modes
276 (define_mode_iterator VF_256
277 [V8SF V4DF])
278
279 ;; All 512bit vector float modes
280 (define_mode_iterator VF_512
281 [V16SF V8DF])
282
283 (define_mode_iterator VI48_AVX512VL
284 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
285 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
286
287 (define_mode_iterator VF_AVX512VL
288 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
289 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
290
291 (define_mode_iterator VF2_AVX512VL
292 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
293
294 (define_mode_iterator VF1_AVX512VL
295 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
296
297 ;; All vector integer modes
298 (define_mode_iterator VI
299 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
300 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
301 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
302 (V8SI "TARGET_AVX") V4SI
303 (V4DI "TARGET_AVX") V2DI])
304
305 (define_mode_iterator VI_AVX2
306 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
307 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
308 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
309 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
310
311 ;; All QImode vector integer modes
312 (define_mode_iterator VI1
313 [(V32QI "TARGET_AVX") V16QI])
314
315 ;; All DImode vector integer modes
316 (define_mode_iterator V_AVX
317 [V16QI V8HI V4SI V2DI V4SF V2DF
318 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
319 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
320 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
321
322 (define_mode_iterator VI48_AVX
323 [V4SI V2DI
324 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
325
326 (define_mode_iterator VI8
327 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
328
329 (define_mode_iterator VI8_AVX512VL
330 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
331
332 (define_mode_iterator VI8_256_512
333 [V8DI (V4DI "TARGET_AVX512VL")])
334
335 (define_mode_iterator VI1_AVX2
336 [(V32QI "TARGET_AVX2") V16QI])
337
338 (define_mode_iterator VI1_AVX512
339 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
340
341 (define_mode_iterator VI1_AVX512F
342 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
343
344 (define_mode_iterator VI2_AVX2
345 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
346
347 (define_mode_iterator VI2_AVX512F
348 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
349
350 (define_mode_iterator VI4_AVX
351 [(V8SI "TARGET_AVX") V4SI])
352
353 (define_mode_iterator VI4_AVX2
354 [(V8SI "TARGET_AVX2") V4SI])
355
356 (define_mode_iterator VI4_AVX512F
357 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
358
359 (define_mode_iterator VI4_AVX512VL
360 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
361
362 (define_mode_iterator VI48_AVX512F_AVX512VL
363 [V4SI V8SI (V16SI "TARGET_AVX512F")
364 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
365
366 (define_mode_iterator VI2_AVX512VL
367 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
368
369 (define_mode_iterator VI8_AVX2_AVX512BW
370 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
371
372 (define_mode_iterator VI8_AVX2
373 [(V4DI "TARGET_AVX2") V2DI])
374
375 (define_mode_iterator VI8_AVX2_AVX512F
376 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
377
378 (define_mode_iterator VI4_128_8_256
379 [V4SI V4DI])
380
381 ;; All V8D* modes
382 (define_mode_iterator V8FI
383 [V8DF V8DI])
384
385 ;; All V16S* modes
386 (define_mode_iterator V16FI
387 [V16SF V16SI])
388
389 ;; ??? We should probably use TImode instead.
390 (define_mode_iterator VIMAX_AVX2_AVX512BW
391 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
392
393 ;; Suppose TARGET_AVX512BW as baseline
394 (define_mode_iterator VIMAX_AVX512VL
395 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
396
397 (define_mode_iterator VIMAX_AVX2
398 [(V2TI "TARGET_AVX2") V1TI])
399
400 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
401 (define_mode_iterator SSESCALARMODE
402 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
403
404 (define_mode_iterator VI12_AVX2
405 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
406 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
407
408 (define_mode_iterator VI24_AVX2
409 [(V16HI "TARGET_AVX2") V8HI
410 (V8SI "TARGET_AVX2") V4SI])
411
412 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
413 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
414 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
415 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
416
417 (define_mode_iterator VI124_AVX2
418 [(V32QI "TARGET_AVX2") V16QI
419 (V16HI "TARGET_AVX2") V8HI
420 (V8SI "TARGET_AVX2") V4SI])
421
422 (define_mode_iterator VI2_AVX2_AVX512BW
423 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
424
425 (define_mode_iterator VI248_VLBW
426 [(V32HI "TARGET_AVX512BW") V16SI V8DI
427 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
428 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
429 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
430
431 (define_mode_iterator VI48_AVX2
432 [(V8SI "TARGET_AVX2") V4SI
433 (V4DI "TARGET_AVX2") V2DI])
434
435 (define_mode_iterator VI248_AVX2
436 [(V16HI "TARGET_AVX2") V8HI
437 (V8SI "TARGET_AVX2") V4SI
438 (V4DI "TARGET_AVX2") V2DI])
439
440 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
441 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
442 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
443 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
444
445 (define_mode_iterator VI248_AVX512BW
446 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
447
448 (define_mode_iterator VI248_AVX512BW_AVX512VL
449 [(V32HI "TARGET_AVX512BW")
450 (V4DI "TARGET_AVX512VL") V16SI V8DI])
451
452 ;; Suppose TARGET_AVX512VL as baseline
453 (define_mode_iterator VI248_AVX512BW_1
454 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
455 V8SI V4SI
456 V2DI])
457
458 (define_mode_iterator VI248_AVX512BW_2
459 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
460 V8SI V4SI
461 V4DI V2DI])
462
463 (define_mode_iterator VI48_AVX512F
464 [(V16SI "TARGET_AVX512F") V8SI V4SI
465 (V8DI "TARGET_AVX512F") V4DI V2DI])
466
467 (define_mode_iterator VI48_AVX_AVX512F
468 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
469 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
470
471 (define_mode_iterator VI12_AVX_AVX512F
472 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
473 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
474
475 (define_mode_iterator V48_AVX2
476 [V4SF V2DF
477 V8SF V4DF
478 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
479 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
480
481 (define_mode_attr avx512
482 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
483 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
484 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
485 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
486 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
487 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
488
489 (define_mode_attr sse2_avx_avx512f
490 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
491 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
492 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
493 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
494 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
495 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
496
497 (define_mode_attr sse2_avx2
498 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
499 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
500 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
501 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
502 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
503
504 (define_mode_attr ssse3_avx2
505 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
506 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
507 (V4SI "ssse3") (V8SI "avx2")
508 (V2DI "ssse3") (V4DI "avx2")
509 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
510
511 (define_mode_attr sse4_1_avx2
512 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
513 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
514 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
515 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
516
517 (define_mode_attr avx_avx2
518 [(V4SF "avx") (V2DF "avx")
519 (V8SF "avx") (V4DF "avx")
520 (V4SI "avx2") (V2DI "avx2")
521 (V8SI "avx2") (V4DI "avx2")])
522
523 (define_mode_attr vec_avx2
524 [(V16QI "vec") (V32QI "avx2")
525 (V8HI "vec") (V16HI "avx2")
526 (V4SI "vec") (V8SI "avx2")
527 (V2DI "vec") (V4DI "avx2")])
528
529 (define_mode_attr avx2_avx512
530 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
531 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
532 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
533 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
534 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
535
536 (define_mode_attr shuffletype
537 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
538 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
539 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
540 (V32HI "i") (V16HI "i") (V8HI "i")
541 (V64QI "i") (V32QI "i") (V16QI "i")
542 (V4TI "i") (V2TI "i") (V1TI "i")])
543
544 (define_mode_attr ssequartermode
545 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
546
547 (define_mode_attr ssedoublemodelower
548 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
549 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
550 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
551
552 (define_mode_attr ssedoublemode
553 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
554 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
555 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
556 (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
557 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
558 (V4DI "V8DI") (V8DI "V16DI")])
559
560 (define_mode_attr ssebytemode
561 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
562
563 ;; All 128bit vector integer modes
564 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
565
566 ;; All 256bit vector integer modes
567 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
568
569 ;; Various 128bit vector integer mode combinations
570 (define_mode_iterator VI12_128 [V16QI V8HI])
571 (define_mode_iterator VI14_128 [V16QI V4SI])
572 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
573 (define_mode_iterator VI24_128 [V8HI V4SI])
574 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
575 (define_mode_iterator VI48_128 [V4SI V2DI])
576
577 ;; Various 256bit and 512 vector integer mode combinations
578 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
579 (define_mode_iterator VI124_256_AVX512F_AVX512BW
580 [V32QI V16HI V8SI
581 (V64QI "TARGET_AVX512BW")
582 (V32HI "TARGET_AVX512BW")
583 (V16SI "TARGET_AVX512F")])
584 (define_mode_iterator VI48_256 [V8SI V4DI])
585 (define_mode_iterator VI48_512 [V16SI V8DI])
586 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
587 (define_mode_iterator VI_AVX512BW
588 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
589
590 ;; Int-float size matches
591 (define_mode_iterator VI4F_128 [V4SI V4SF])
592 (define_mode_iterator VI8F_128 [V2DI V2DF])
593 (define_mode_iterator VI4F_256 [V8SI V8SF])
594 (define_mode_iterator VI8F_256 [V4DI V4DF])
595 (define_mode_iterator VI48F_256_512
596 [V8SI V8SF
597 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
598 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
599 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
600 (define_mode_iterator VF48_I1248
601 [V16SI V16SF V8DI V8DF V32HI V64QI])
602 (define_mode_iterator VI48F
603 [V16SI V16SF V8DI V8DF
604 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
605 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
606 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
607 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
608 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
609
610 ;; Mapping from float mode to required SSE level
611 (define_mode_attr sse
612 [(SF "sse") (DF "sse2")
613 (V4SF "sse") (V2DF "sse2")
614 (V16SF "avx512f") (V8SF "avx")
615 (V8DF "avx512f") (V4DF "avx")])
616
617 (define_mode_attr sse2
618 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
619 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
620
621 (define_mode_attr sse3
622 [(V16QI "sse3") (V32QI "avx")])
623
624 (define_mode_attr sse4_1
625 [(V4SF "sse4_1") (V2DF "sse4_1")
626 (V8SF "avx") (V4DF "avx")
627 (V8DF "avx512f")
628 (V4DI "avx") (V2DI "sse4_1")
629 (V8SI "avx") (V4SI "sse4_1")
630 (V16QI "sse4_1") (V32QI "avx")
631 (V8HI "sse4_1") (V16HI "avx")])
632
633 (define_mode_attr avxsizesuffix
634 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
635 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
636 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
637 (V16SF "512") (V8DF "512")
638 (V8SF "256") (V4DF "256")
639 (V4SF "") (V2DF "")])
640
641 ;; SSE instruction mode
642 (define_mode_attr sseinsnmode
643 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
644 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
645 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
646 (V16SF "V16SF") (V8DF "V8DF")
647 (V8SF "V8SF") (V4DF "V4DF")
648 (V4SF "V4SF") (V2DF "V2DF")
649 (TI "TI")])
650
651 ;; Mapping of vector modes to corresponding mask size
652 (define_mode_attr avx512fmaskmode
653 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
654 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
655 (V16SI "HI") (V8SI "QI") (V4SI "QI")
656 (V8DI "QI") (V4DI "QI") (V2DI "QI")
657 (V16SF "HI") (V8SF "QI") (V4SF "QI")
658 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
659
660 ;; Mapping of vector modes to corresponding mask size
661 (define_mode_attr avx512fmaskmodelower
662 [(V64QI "di") (V32QI "si") (V16QI "hi")
663 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
664 (V16SI "hi") (V8SI "qi") (V4SI "qi")
665 (V8DI "qi") (V4DI "qi") (V2DI "qi")
666 (V16SF "hi") (V8SF "qi") (V4SF "qi")
667 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
668
669 ;; Mapping of vector float modes to an integer mode of the same size
670 (define_mode_attr sseintvecmode
671 [(V16SF "V16SI") (V8DF "V8DI")
672 (V8SF "V8SI") (V4DF "V4DI")
673 (V4SF "V4SI") (V2DF "V2DI")
674 (V16SI "V16SI") (V8DI "V8DI")
675 (V8SI "V8SI") (V4DI "V4DI")
676 (V4SI "V4SI") (V2DI "V2DI")
677 (V16HI "V16HI") (V8HI "V8HI")
678 (V32HI "V32HI") (V64QI "V64QI")
679 (V32QI "V32QI") (V16QI "V16QI")])
680
681 (define_mode_attr sseintvecmode2
682 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
683 (V8SF "OI") (V4SF "TI")])
684
685 (define_mode_attr sseintvecmodelower
686 [(V16SF "v16si") (V8DF "v8di")
687 (V8SF "v8si") (V4DF "v4di")
688 (V4SF "v4si") (V2DF "v2di")
689 (V8SI "v8si") (V4DI "v4di")
690 (V4SI "v4si") (V2DI "v2di")
691 (V16HI "v16hi") (V8HI "v8hi")
692 (V32QI "v32qi") (V16QI "v16qi")])
693
694 ;; Mapping of vector modes to a vector mode of double size
695 (define_mode_attr ssedoublevecmode
696 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
697 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
698 (V8SF "V16SF") (V4DF "V8DF")
699 (V4SF "V8SF") (V2DF "V4DF")])
700
701 ;; Mapping of vector modes to a vector mode of half size
702 (define_mode_attr ssehalfvecmode
703 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
704 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
705 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
706 (V16SF "V8SF") (V8DF "V4DF")
707 (V8SF "V4SF") (V4DF "V2DF")
708 (V4SF "V2SF")])
709
710 (define_mode_attr ssehalfvecmodelower
711 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
712 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
713 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
714 (V16SF "v8sf") (V8DF "v4df")
715 (V8SF "v4sf") (V4DF "v2df")
716 (V4SF "v2sf")])
717
718 ;; Mapping of vector modes ti packed single mode of the same size
719 (define_mode_attr ssePSmode
720 [(V16SI "V16SF") (V8DF "V16SF")
721 (V16SF "V16SF") (V8DI "V16SF")
722 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
723 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
724 (V8SI "V8SF") (V4SI "V4SF")
725 (V4DI "V8SF") (V2DI "V4SF")
726 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
727 (V8SF "V8SF") (V4SF "V4SF")
728 (V4DF "V8SF") (V2DF "V4SF")])
729
730 (define_mode_attr ssePSmode2
731 [(V8DI "V8SF") (V4DI "V4SF")])
732
733 ;; Mapping of vector modes back to the scalar modes
734 (define_mode_attr ssescalarmode
735 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
736 (V32HI "HI") (V16HI "HI") (V8HI "HI")
737 (V16SI "SI") (V8SI "SI") (V4SI "SI")
738 (V8DI "DI") (V4DI "DI") (V2DI "DI")
739 (V16SF "SF") (V8SF "SF") (V4SF "SF")
740 (V8DF "DF") (V4DF "DF") (V2DF "DF")
741 (V4TI "TI") (V2TI "TI")])
742
743 ;; Mapping of vector modes back to the scalar modes
744 (define_mode_attr ssescalarmodelower
745 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
746 (V32HI "hi") (V16HI "hi") (V8HI "hi")
747 (V16SI "si") (V8SI "si") (V4SI "si")
748 (V8DI "di") (V4DI "di") (V2DI "di")
749 (V16SF "sf") (V8SF "sf") (V4SF "sf")
750 (V8DF "df") (V4DF "df") (V2DF "df")
751 (V4TI "ti") (V2TI "ti")])
752
753 ;; Mapping of vector modes to the 128bit modes
754 (define_mode_attr ssexmmmode
755 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
756 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
757 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
758 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
759 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
760 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
761
762 ;; Pointer size override for scalar modes (Intel asm dialect)
763 (define_mode_attr iptr
764 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
765 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
766 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
767 (V8SF "k") (V4DF "q")
768 (V4SF "k") (V2DF "q")
769 (SF "k") (DF "q")])
770
771 ;; Number of scalar elements in each vector type
772 (define_mode_attr ssescalarnum
773 [(V64QI "64") (V16SI "16") (V8DI "8")
774 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
775 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
776 (V16SF "16") (V8DF "8")
777 (V8SF "8") (V4DF "4")
778 (V4SF "4") (V2DF "2")])
779
780 ;; Mask of scalar elements in each vector type
781 (define_mode_attr ssescalarnummask
782 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
783 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
784 (V8SF "7") (V4DF "3")
785 (V4SF "3") (V2DF "1")])
786
787 (define_mode_attr ssescalarsize
788 [(V4TI "64") (V2TI "64") (V1TI "64")
789 (V8DI "64") (V4DI "64") (V2DI "64")
790 (V64QI "8") (V32QI "8") (V16QI "8")
791 (V32HI "16") (V16HI "16") (V8HI "16")
792 (V16SI "32") (V8SI "32") (V4SI "32")
793 (V16SF "32") (V8SF "32") (V4SF "32")
794 (V8DF "64") (V4DF "64") (V2DF "64")])
795
796 ;; SSE prefix for integer vector modes
797 (define_mode_attr sseintprefix
798 [(V2DI "p") (V2DF "")
799 (V4DI "p") (V4DF "")
800 (V8DI "p") (V8DF "")
801 (V4SI "p") (V4SF "")
802 (V8SI "p") (V8SF "")
803 (V16SI "p") (V16SF "")
804 (V16QI "p") (V8HI "p")
805 (V32QI "p") (V16HI "p")
806 (V64QI "p") (V32HI "p")])
807
808 ;; SSE scalar suffix for vector modes
809 (define_mode_attr ssescalarmodesuffix
810 [(SF "ss") (DF "sd")
811 (V8SF "ss") (V4DF "sd")
812 (V4SF "ss") (V2DF "sd")
813 (V8SI "ss") (V4DI "sd")
814 (V4SI "d")])
815
816 ;; Pack/unpack vector modes
817 (define_mode_attr sseunpackmode
818 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
819 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
820 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
821
822 (define_mode_attr ssepackmode
823 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
824 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
825 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
826
827 ;; Mapping of the max integer size for xop rotate immediate constraint
828 (define_mode_attr sserotatemax
829 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
830
831 ;; Mapping of mode to cast intrinsic name
832 (define_mode_attr castmode
833 [(V8SI "si") (V8SF "ps") (V4DF "pd")
834 (V16SI "si") (V16SF "ps") (V8DF "pd")])
835
836 ;; Instruction suffix for sign and zero extensions.
837 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
838
839 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
840 ;; i64x4 or f64x4 for 512bit modes.
841 (define_mode_attr i128
842 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
843 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
844 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
845
846 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
847 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
848 (define_mode_attr i128vldq
849 [(V8SF "f32x4") (V4DF "f64x2")
850 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
851
852 ;; Mix-n-match
853 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
854 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
855
856 ;; Mapping for dbpsabbw modes
857 (define_mode_attr dbpsadbwmode
858 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
859
860 ;; Mapping suffixes for broadcast
861 (define_mode_attr bcstscalarsuff
862 [(V64QI "b") (V32QI "b") (V16QI "b")
863 (V32HI "w") (V16HI "w") (V8HI "w")
864 (V16SI "d") (V8SI "d") (V4SI "d")
865 (V8DI "q") (V4DI "q") (V2DI "q")
866 (V16SF "ss") (V8SF "ss") (V4SF "ss")
867 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
868
869 ;; Tie mode of assembler operand to mode iterator
870 (define_mode_attr concat_tg_mode
871 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
872 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
873
874 ;; Tie mode of assembler operand to mode iterator
875 (define_mode_attr xtg_mode
876 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
877 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
878 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
879
880 ;; Half mask mode for unpacks
881 (define_mode_attr HALFMASKMODE
882 [(DI "SI") (SI "HI")])
883
884 ;; Double mask mode for packs
885 (define_mode_attr DOUBLEMASKMODE
886 [(HI "SI") (SI "DI")])
887
888
889 ;; Include define_subst patterns for instructions with mask
890 (include "subst.md")
891
892 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
893
894 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
895 ;;
896 ;; Move patterns
897 ;;
898 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
899
900 ;; All of these patterns are enabled for SSE1 as well as SSE2.
901 ;; This is essential for maintaining stable calling conventions.
902
903 (define_expand "mov<mode>"
904 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
905 (match_operand:VMOVE 1 "nonimmediate_operand"))]
906 "TARGET_SSE"
907 {
908 ix86_expand_vector_move (<MODE>mode, operands);
909 DONE;
910 })
911
912 (define_insn "mov<mode>_internal"
913 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
914 "=v,v ,v ,m")
915 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
916 " C,BC,vm,v"))]
917 "TARGET_SSE
918 && (register_operand (operands[0], <MODE>mode)
919 || register_operand (operands[1], <MODE>mode))"
920 {
921 switch (get_attr_type (insn))
922 {
923 case TYPE_SSELOG1:
924 return standard_sse_constant_opcode (insn, operands[1]);
925
926 case TYPE_SSEMOV:
927 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
928 in avx512f, so we need to use workarounds, to access sse registers
929 16-31, which are evex-only. In avx512vl we don't need workarounds. */
930 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
931 && (EXT_REX_SSE_REG_P (operands[0])
932 || EXT_REX_SSE_REG_P (operands[1])))
933 {
934 if (memory_operand (operands[0], <MODE>mode))
935 {
936 if (<MODE_SIZE> == 32)
937 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
938 else if (<MODE_SIZE> == 16)
939 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
940 else
941 gcc_unreachable ();
942 }
943 else if (memory_operand (operands[1], <MODE>mode))
944 {
945 if (<MODE_SIZE> == 32)
946 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
947 else if (<MODE_SIZE> == 16)
948 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
949 else
950 gcc_unreachable ();
951 }
952 else
953 /* Reg -> reg move is always aligned. Just use wider move. */
954 switch (get_attr_mode (insn))
955 {
956 case MODE_V8SF:
957 case MODE_V4SF:
958 return "vmovaps\t{%g1, %g0|%g0, %g1}";
959 case MODE_V4DF:
960 case MODE_V2DF:
961 return "vmovapd\t{%g1, %g0|%g0, %g1}";
962 case MODE_OI:
963 case MODE_TI:
964 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
965 default:
966 gcc_unreachable ();
967 }
968 }
969
970 switch (get_attr_mode (insn))
971 {
972 case MODE_V16SF:
973 case MODE_V8SF:
974 case MODE_V4SF:
975 if (misaligned_operand (operands[0], <MODE>mode)
976 || misaligned_operand (operands[1], <MODE>mode))
977 return "%vmovups\t{%1, %0|%0, %1}";
978 else
979 return "%vmovaps\t{%1, %0|%0, %1}";
980
981 case MODE_V8DF:
982 case MODE_V4DF:
983 case MODE_V2DF:
984 if (misaligned_operand (operands[0], <MODE>mode)
985 || misaligned_operand (operands[1], <MODE>mode))
986 return "%vmovupd\t{%1, %0|%0, %1}";
987 else
988 return "%vmovapd\t{%1, %0|%0, %1}";
989
990 case MODE_OI:
991 case MODE_TI:
992 if (misaligned_operand (operands[0], <MODE>mode)
993 || misaligned_operand (operands[1], <MODE>mode))
994 return TARGET_AVX512VL ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
995 : "%vmovdqu\t{%1, %0|%0, %1}";
996 else
997 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
998 : "%vmovdqa\t{%1, %0|%0, %1}";
999 case MODE_XI:
1000 if (misaligned_operand (operands[0], <MODE>mode)
1001 || misaligned_operand (operands[1], <MODE>mode))
1002 return (<MODE>mode == V16SImode
1003 || <MODE>mode == V8DImode
1004 || TARGET_AVX512BW)
1005 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1006 : "vmovdqu64\t{%1, %0|%0, %1}";
1007 else
1008 return "vmovdqa64\t{%1, %0|%0, %1}";
1009
1010 default:
1011 gcc_unreachable ();
1012 }
1013
1014 default:
1015 gcc_unreachable ();
1016 }
1017 }
1018 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1019 (set_attr "prefix" "maybe_vex")
1020 (set (attr "mode")
1021 (cond [(and (eq_attr "alternative" "1")
1022 (match_test "TARGET_AVX512VL"))
1023 (const_string "<sseinsnmode>")
1024 (and (match_test "<MODE_SIZE> == 16")
1025 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1026 (and (eq_attr "alternative" "3")
1027 (match_test "TARGET_SSE_TYPELESS_STORES"))))
1028 (const_string "<ssePSmode>")
1029 (match_test "TARGET_AVX")
1030 (const_string "<sseinsnmode>")
1031 (ior (not (match_test "TARGET_SSE2"))
1032 (match_test "optimize_function_for_size_p (cfun)"))
1033 (const_string "V4SF")
1034 (and (eq_attr "alternative" "0")
1035 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1036 (const_string "TI")
1037 ]
1038 (const_string "<sseinsnmode>")))
1039 (set (attr "enabled")
1040 (cond [(and (match_test "<MODE_SIZE> == 16")
1041 (eq_attr "alternative" "1"))
1042 (symbol_ref "TARGET_SSE2")
1043 (and (match_test "<MODE_SIZE> == 32")
1044 (eq_attr "alternative" "1"))
1045 (symbol_ref "TARGET_AVX2")
1046 ]
1047 (symbol_ref "true")))])
1048
1049 (define_insn "<avx512>_load<mode>_mask"
1050 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1051 (vec_merge:V48_AVX512VL
1052 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1053 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
1054 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1055 "TARGET_AVX512F"
1056 {
1057 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1058 {
1059 if (misaligned_operand (operands[1], <MODE>mode))
1060 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1061 else
1062 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1063 }
1064 else
1065 {
1066 if (misaligned_operand (operands[1], <MODE>mode))
1067 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1068 else
1069 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1070 }
1071 }
1072 [(set_attr "type" "ssemov")
1073 (set_attr "prefix" "evex")
1074 (set_attr "memory" "none,load")
1075 (set_attr "mode" "<sseinsnmode>")])
1076
1077 (define_insn "<avx512>_load<mode>_mask"
1078 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1079 (vec_merge:VI12_AVX512VL
1080 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1081 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
1082 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1083 "TARGET_AVX512BW"
1084 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1085 [(set_attr "type" "ssemov")
1086 (set_attr "prefix" "evex")
1087 (set_attr "memory" "none,load")
1088 (set_attr "mode" "<sseinsnmode>")])
1089
1090 (define_insn "<avx512>_blendm<mode>"
1091 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1092 (vec_merge:V48_AVX512VL
1093 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1094 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1095 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1096 "TARGET_AVX512F"
1097 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1098 [(set_attr "type" "ssemov")
1099 (set_attr "prefix" "evex")
1100 (set_attr "mode" "<sseinsnmode>")])
1101
1102 (define_insn "<avx512>_blendm<mode>"
1103 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1104 (vec_merge:VI12_AVX512VL
1105 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1106 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1107 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1108 "TARGET_AVX512BW"
1109 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1110 [(set_attr "type" "ssemov")
1111 (set_attr "prefix" "evex")
1112 (set_attr "mode" "<sseinsnmode>")])
1113
1114 (define_insn "<avx512>_store<mode>_mask"
1115 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1116 (vec_merge:V48_AVX512VL
1117 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1118 (match_dup 0)
1119 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1120 "TARGET_AVX512F"
1121 {
1122 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1123 {
1124 if (misaligned_operand (operands[0], <MODE>mode))
1125 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1126 else
1127 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1128 }
1129 else
1130 {
1131 if (misaligned_operand (operands[0], <MODE>mode))
1132 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1133 else
1134 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1135 }
1136 }
1137 [(set_attr "type" "ssemov")
1138 (set_attr "prefix" "evex")
1139 (set_attr "memory" "store")
1140 (set_attr "mode" "<sseinsnmode>")])
1141
1142 (define_insn "<avx512>_store<mode>_mask"
1143 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1144 (vec_merge:VI12_AVX512VL
1145 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1146 (match_dup 0)
1147 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1148 "TARGET_AVX512BW"
1149 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1150 [(set_attr "type" "ssemov")
1151 (set_attr "prefix" "evex")
1152 (set_attr "memory" "store")
1153 (set_attr "mode" "<sseinsnmode>")])
1154
1155 (define_insn "sse2_movq128"
1156 [(set (match_operand:V2DI 0 "register_operand" "=v")
1157 (vec_concat:V2DI
1158 (vec_select:DI
1159 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1160 (parallel [(const_int 0)]))
1161 (const_int 0)))]
1162 "TARGET_SSE2"
1163 "%vmovq\t{%1, %0|%0, %q1}"
1164 [(set_attr "type" "ssemov")
1165 (set_attr "prefix" "maybe_vex")
1166 (set_attr "mode" "TI")])
1167
1168 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1169 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1170 ;; from memory, we'd prefer to load the memory directly into the %xmm
1171 ;; register. To facilitate this happy circumstance, this pattern won't
1172 ;; split until after register allocation. If the 64-bit value didn't
1173 ;; come from memory, this is the best we can do. This is much better
1174 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1175 ;; from there.
1176
1177 (define_insn_and_split "movdi_to_sse"
1178 [(parallel
1179 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1180 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1181 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1182 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1183 "#"
1184 "&& reload_completed"
1185 [(const_int 0)]
1186 {
1187 if (register_operand (operands[1], DImode))
1188 {
1189 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1190 Assemble the 64-bit DImode value in an xmm register. */
1191 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1192 gen_lowpart (SImode, operands[1])));
1193 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1194 gen_highpart (SImode, operands[1])));
1195 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1196 operands[2]));
1197 }
1198 else if (memory_operand (operands[1], DImode))
1199 {
1200 rtx tmp = gen_reg_rtx (V2DImode);
1201 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1202 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1203 }
1204 else
1205 gcc_unreachable ();
1206 DONE;
1207 })
1208
1209 (define_split
1210 [(set (match_operand:V4SF 0 "register_operand")
1211 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1212 "TARGET_SSE && reload_completed"
1213 [(set (match_dup 0)
1214 (vec_merge:V4SF
1215 (vec_duplicate:V4SF (match_dup 1))
1216 (match_dup 2)
1217 (const_int 1)))]
1218 {
1219 operands[1] = gen_lowpart (SFmode, operands[1]);
1220 operands[2] = CONST0_RTX (V4SFmode);
1221 })
1222
1223 (define_split
1224 [(set (match_operand:V2DF 0 "register_operand")
1225 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1226 "TARGET_SSE2 && reload_completed"
1227 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1228 {
1229 operands[1] = gen_lowpart (DFmode, operands[1]);
1230 operands[2] = CONST0_RTX (DFmode);
1231 })
1232
1233 (define_expand "movmisalign<mode>"
1234 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1235 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1236 "TARGET_SSE"
1237 {
1238 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1239 DONE;
1240 })
1241
1242 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1243 (define_peephole2
1244 [(set (match_operand:V2DF 0 "sse_reg_operand")
1245 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1246 (match_operand:DF 4 "const0_operand")))
1247 (set (match_operand:V2DF 2 "sse_reg_operand")
1248 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1249 (parallel [(const_int 0)]))
1250 (match_operand:DF 3 "memory_operand")))]
1251 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1252 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1253 [(set (match_dup 2) (match_dup 5))]
1254 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1255
1256 (define_peephole2
1257 [(set (match_operand:DF 0 "sse_reg_operand")
1258 (match_operand:DF 1 "memory_operand"))
1259 (set (match_operand:V2DF 2 "sse_reg_operand")
1260 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1261 (match_operand:DF 3 "memory_operand")))]
1262 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1263 && REGNO (operands[4]) == REGNO (operands[2])
1264 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1265 [(set (match_dup 2) (match_dup 5))]
1266 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1267
1268 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1269 (define_peephole2
1270 [(set (match_operand:DF 0 "memory_operand")
1271 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1272 (parallel [(const_int 0)])))
1273 (set (match_operand:DF 2 "memory_operand")
1274 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1275 (parallel [(const_int 1)])))]
1276 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1277 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1278 [(set (match_dup 4) (match_dup 1))]
1279 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1280
1281 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1282 [(set (match_operand:VI1 0 "register_operand" "=x")
1283 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1284 UNSPEC_LDDQU))]
1285 "TARGET_SSE3"
1286 "%vlddqu\t{%1, %0|%0, %1}"
1287 [(set_attr "type" "ssemov")
1288 (set_attr "movu" "1")
1289 (set (attr "prefix_data16")
1290 (if_then_else
1291 (match_test "TARGET_AVX")
1292 (const_string "*")
1293 (const_string "0")))
1294 (set (attr "prefix_rep")
1295 (if_then_else
1296 (match_test "TARGET_AVX")
1297 (const_string "*")
1298 (const_string "1")))
1299 (set_attr "prefix" "maybe_vex")
1300 (set_attr "mode" "<sseinsnmode>")])
1301
1302 (define_insn "sse2_movnti<mode>"
1303 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1304 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1305 UNSPEC_MOVNT))]
1306 "TARGET_SSE2"
1307 "movnti\t{%1, %0|%0, %1}"
1308 [(set_attr "type" "ssemov")
1309 (set_attr "prefix_data16" "0")
1310 (set_attr "mode" "<MODE>")])
1311
1312 (define_insn "<sse>_movnt<mode>"
1313 [(set (match_operand:VF 0 "memory_operand" "=m")
1314 (unspec:VF
1315 [(match_operand:VF 1 "register_operand" "v")]
1316 UNSPEC_MOVNT))]
1317 "TARGET_SSE"
1318 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1319 [(set_attr "type" "ssemov")
1320 (set_attr "prefix" "maybe_vex")
1321 (set_attr "mode" "<MODE>")])
1322
1323 (define_insn "<sse2>_movnt<mode>"
1324 [(set (match_operand:VI8 0 "memory_operand" "=m")
1325 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1326 UNSPEC_MOVNT))]
1327 "TARGET_SSE2"
1328 "%vmovntdq\t{%1, %0|%0, %1}"
1329 [(set_attr "type" "ssecvt")
1330 (set (attr "prefix_data16")
1331 (if_then_else
1332 (match_test "TARGET_AVX")
1333 (const_string "*")
1334 (const_string "1")))
1335 (set_attr "prefix" "maybe_vex")
1336 (set_attr "mode" "<sseinsnmode>")])
1337
1338 ; Expand patterns for non-temporal stores. At the moment, only those
1339 ; that directly map to insns are defined; it would be possible to
1340 ; define patterns for other modes that would expand to several insns.
1341
1342 ;; Modes handled by storent patterns.
1343 (define_mode_iterator STORENT_MODE
1344 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1345 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1346 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1347 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1348 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1349
1350 (define_expand "storent<mode>"
1351 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1352 (unspec:STORENT_MODE
1353 [(match_operand:STORENT_MODE 1 "register_operand")]
1354 UNSPEC_MOVNT))]
1355 "TARGET_SSE")
1356
1357 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1358 ;;
1359 ;; Mask operations
1360 ;;
1361 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1362
1363 ;; All integer modes with AVX512BW/DQ.
1364 (define_mode_iterator SWI1248_AVX512BWDQ
1365 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1366
1367 ;; All integer modes with AVX512BW, where HImode operation
1368 ;; can be used instead of QImode.
1369 (define_mode_iterator SWI1248_AVX512BW
1370 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1371
1372 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1373 (define_mode_iterator SWI1248_AVX512BWDQ2
1374 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1375 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1376
1377 (define_expand "kmov<mskmodesuffix>"
1378 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1379 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1380 "TARGET_AVX512F
1381 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1382
1383 (define_insn "k<code><mode>"
1384 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1385 (any_logic:SWI1248_AVX512BW
1386 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1387 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1388 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1389 "TARGET_AVX512F"
1390 {
1391 if (get_attr_mode (insn) == MODE_HI)
1392 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1393 else
1394 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1395 }
1396 [(set_attr "type" "msklog")
1397 (set_attr "prefix" "vex")
1398 (set (attr "mode")
1399 (cond [(and (match_test "<MODE>mode == QImode")
1400 (not (match_test "TARGET_AVX512DQ")))
1401 (const_string "HI")
1402 ]
1403 (const_string "<MODE>")))])
1404
1405 (define_insn "kandn<mode>"
1406 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1407 (and:SWI1248_AVX512BW
1408 (not:SWI1248_AVX512BW
1409 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1410 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1411 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1412 "TARGET_AVX512F"
1413 {
1414 if (get_attr_mode (insn) == MODE_HI)
1415 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1416 else
1417 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1418 }
1419 [(set_attr "type" "msklog")
1420 (set_attr "prefix" "vex")
1421 (set (attr "mode")
1422 (cond [(and (match_test "<MODE>mode == QImode")
1423 (not (match_test "TARGET_AVX512DQ")))
1424 (const_string "HI")
1425 ]
1426 (const_string "<MODE>")))])
1427
1428 (define_insn "kxnor<mode>"
1429 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1430 (not:SWI1248_AVX512BW
1431 (xor:SWI1248_AVX512BW
1432 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1433 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1434 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1435 "TARGET_AVX512F"
1436 {
1437 if (get_attr_mode (insn) == MODE_HI)
1438 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1439 else
1440 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1441 }
1442 [(set_attr "type" "msklog")
1443 (set_attr "prefix" "vex")
1444 (set (attr "mode")
1445 (cond [(and (match_test "<MODE>mode == QImode")
1446 (not (match_test "TARGET_AVX512DQ")))
1447 (const_string "HI")
1448 ]
1449 (const_string "<MODE>")))])
1450
1451 (define_insn "knot<mode>"
1452 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1453 (not:SWI1248_AVX512BW
1454 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1455 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1456 "TARGET_AVX512F"
1457 {
1458 if (get_attr_mode (insn) == MODE_HI)
1459 return "knotw\t{%1, %0|%0, %1}";
1460 else
1461 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1462 }
1463 [(set_attr "type" "msklog")
1464 (set_attr "prefix" "vex")
1465 (set (attr "mode")
1466 (cond [(and (match_test "<MODE>mode == QImode")
1467 (not (match_test "TARGET_AVX512DQ")))
1468 (const_string "HI")
1469 ]
1470 (const_string "<MODE>")))])
1471
1472 (define_insn "kadd<mode>"
1473 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1474 (plus:SWI1248_AVX512BWDQ2
1475 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1476 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1477 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1478 "TARGET_AVX512F"
1479 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1480 [(set_attr "type" "msklog")
1481 (set_attr "prefix" "vex")
1482 (set_attr "mode" "<MODE>")])
1483
1484 ;; Mask variant shift mnemonics
1485 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1486
1487 (define_insn "k<code><mode>"
1488 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1489 (any_lshift:SWI1248_AVX512BWDQ
1490 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1491 (match_operand:QI 2 "immediate_operand" "n")))
1492 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1493 "TARGET_AVX512F"
1494 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1495 [(set_attr "type" "msklog")
1496 (set_attr "prefix" "vex")
1497 (set_attr "mode" "<MODE>")])
1498
1499 (define_insn "ktest<mode>"
1500 [(set (reg:CC FLAGS_REG)
1501 (unspec:CC
1502 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1503 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1504 UNSPEC_KTEST))]
1505 "TARGET_AVX512F"
1506 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1507 [(set_attr "mode" "<MODE>")
1508 (set_attr "type" "msklog")
1509 (set_attr "prefix" "vex")])
1510
1511 (define_insn "kortest<mode>"
1512 [(set (reg:CC FLAGS_REG)
1513 (unspec:CC
1514 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1515 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1516 UNSPEC_KORTEST))]
1517 "TARGET_AVX512F"
1518 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1519 [(set_attr "mode" "<MODE>")
1520 (set_attr "type" "msklog")
1521 (set_attr "prefix" "vex")])
1522
1523 (define_insn "kunpckhi"
1524 [(set (match_operand:HI 0 "register_operand" "=k")
1525 (ior:HI
1526 (ashift:HI
1527 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1528 (const_int 8))
1529 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1530 "TARGET_AVX512F"
1531 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1532 [(set_attr "mode" "HI")
1533 (set_attr "type" "msklog")
1534 (set_attr "prefix" "vex")])
1535
1536 (define_insn "kunpcksi"
1537 [(set (match_operand:SI 0 "register_operand" "=k")
1538 (ior:SI
1539 (ashift:SI
1540 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1541 (const_int 16))
1542 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1543 "TARGET_AVX512BW"
1544 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1545 [(set_attr "mode" "SI")])
1546
1547 (define_insn "kunpckdi"
1548 [(set (match_operand:DI 0 "register_operand" "=k")
1549 (ior:DI
1550 (ashift:DI
1551 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1552 (const_int 32))
1553 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1554 "TARGET_AVX512BW"
1555 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1556 [(set_attr "mode" "DI")])
1557
1558
1559 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1560 ;;
1561 ;; Parallel floating point arithmetic
1562 ;;
1563 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1564
1565 (define_expand "<code><mode>2"
1566 [(set (match_operand:VF 0 "register_operand")
1567 (absneg:VF
1568 (match_operand:VF 1 "register_operand")))]
1569 "TARGET_SSE"
1570 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1571
1572 (define_insn_and_split "*absneg<mode>2"
1573 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1574 (match_operator:VF 3 "absneg_operator"
1575 [(match_operand:VF 1 "vector_operand" "0, xBm,v, m")]))
1576 (use (match_operand:VF 2 "vector_operand" "xBm,0, vm,v"))]
1577 "TARGET_SSE"
1578 "#"
1579 "&& reload_completed"
1580 [(const_int 0)]
1581 {
1582 enum rtx_code absneg_op;
1583 rtx op1, op2;
1584 rtx t;
1585
1586 if (TARGET_AVX)
1587 {
1588 if (MEM_P (operands[1]))
1589 op1 = operands[2], op2 = operands[1];
1590 else
1591 op1 = operands[1], op2 = operands[2];
1592 }
1593 else
1594 {
1595 op1 = operands[0];
1596 if (rtx_equal_p (operands[0], operands[1]))
1597 op2 = operands[2];
1598 else
1599 op2 = operands[1];
1600 }
1601
1602 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1603 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1604 t = gen_rtx_SET (operands[0], t);
1605 emit_insn (t);
1606 DONE;
1607 }
1608 [(set_attr "isa" "noavx,noavx,avx,avx")])
1609
1610 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1611 [(set (match_operand:VF 0 "register_operand")
1612 (plusminus:VF
1613 (match_operand:VF 1 "<round_nimm_predicate>")
1614 (match_operand:VF 2 "<round_nimm_predicate>")))]
1615 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1616 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1617
1618 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1619 [(set (match_operand:VF 0 "register_operand" "=x,v")
1620 (plusminus:VF
1621 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1622 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1623 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1624 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1625 "@
1626 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1627 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1628 [(set_attr "isa" "noavx,avx")
1629 (set_attr "type" "sseadd")
1630 (set_attr "prefix" "<mask_prefix3>")
1631 (set_attr "mode" "<MODE>")])
1632
1633 (define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1634 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1635 (vec_merge:VF_128
1636 (plusminus:VF_128
1637 (match_operand:VF_128 1 "register_operand" "0,v")
1638 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1639 (match_dup 1)
1640 (const_int 1)))]
1641 "TARGET_SSE"
1642 "@
1643 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1644 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1645 [(set_attr "isa" "noavx,avx")
1646 (set_attr "type" "sseadd")
1647 (set_attr "prefix" "<round_scalar_prefix>")
1648 (set_attr "mode" "<ssescalarmode>")])
1649
1650 (define_expand "mul<mode>3<mask_name><round_name>"
1651 [(set (match_operand:VF 0 "register_operand")
1652 (mult:VF
1653 (match_operand:VF 1 "<round_nimm_predicate>")
1654 (match_operand:VF 2 "<round_nimm_predicate>")))]
1655 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1656 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1657
1658 (define_insn "*mul<mode>3<mask_name><round_name>"
1659 [(set (match_operand:VF 0 "register_operand" "=x,v")
1660 (mult:VF
1661 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1662 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1663 "TARGET_SSE
1664 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1665 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1666 "@
1667 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1668 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1669 [(set_attr "isa" "noavx,avx")
1670 (set_attr "type" "ssemul")
1671 (set_attr "prefix" "<mask_prefix3>")
1672 (set_attr "btver2_decode" "direct,double")
1673 (set_attr "mode" "<MODE>")])
1674
1675 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1676 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1677 (vec_merge:VF_128
1678 (multdiv:VF_128
1679 (match_operand:VF_128 1 "register_operand" "0,v")
1680 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1681 (match_dup 1)
1682 (const_int 1)))]
1683 "TARGET_SSE"
1684 "@
1685 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1686 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1687 [(set_attr "isa" "noavx,avx")
1688 (set_attr "type" "sse<multdiv_mnemonic>")
1689 (set_attr "prefix" "<round_scalar_prefix>")
1690 (set_attr "btver2_decode" "direct,double")
1691 (set_attr "mode" "<ssescalarmode>")])
1692
1693 (define_expand "div<mode>3"
1694 [(set (match_operand:VF2 0 "register_operand")
1695 (div:VF2 (match_operand:VF2 1 "register_operand")
1696 (match_operand:VF2 2 "vector_operand")))]
1697 "TARGET_SSE2"
1698 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1699
1700 (define_expand "div<mode>3"
1701 [(set (match_operand:VF1 0 "register_operand")
1702 (div:VF1 (match_operand:VF1 1 "register_operand")
1703 (match_operand:VF1 2 "vector_operand")))]
1704 "TARGET_SSE"
1705 {
1706 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1707
1708 if (TARGET_SSE_MATH
1709 && TARGET_RECIP_VEC_DIV
1710 && !optimize_insn_for_size_p ()
1711 && flag_finite_math_only && !flag_trapping_math
1712 && flag_unsafe_math_optimizations)
1713 {
1714 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1715 DONE;
1716 }
1717 })
1718
1719 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1720 [(set (match_operand:VF 0 "register_operand" "=x,v")
1721 (div:VF
1722 (match_operand:VF 1 "register_operand" "0,v")
1723 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1724 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1725 "@
1726 div<ssemodesuffix>\t{%2, %0|%0, %2}
1727 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1728 [(set_attr "isa" "noavx,avx")
1729 (set_attr "type" "ssediv")
1730 (set_attr "prefix" "<mask_prefix3>")
1731 (set_attr "mode" "<MODE>")])
1732
1733 (define_insn "<sse>_rcp<mode>2"
1734 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1735 (unspec:VF1_128_256
1736 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1737 "TARGET_SSE"
1738 "%vrcpps\t{%1, %0|%0, %1}"
1739 [(set_attr "type" "sse")
1740 (set_attr "atom_sse_attr" "rcp")
1741 (set_attr "btver2_sse_attr" "rcp")
1742 (set_attr "prefix" "maybe_vex")
1743 (set_attr "mode" "<MODE>")])
1744
1745 (define_insn "sse_vmrcpv4sf2"
1746 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1747 (vec_merge:V4SF
1748 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1749 UNSPEC_RCP)
1750 (match_operand:V4SF 2 "register_operand" "0,x")
1751 (const_int 1)))]
1752 "TARGET_SSE"
1753 "@
1754 rcpss\t{%1, %0|%0, %k1}
1755 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1756 [(set_attr "isa" "noavx,avx")
1757 (set_attr "type" "sse")
1758 (set_attr "atom_sse_attr" "rcp")
1759 (set_attr "btver2_sse_attr" "rcp")
1760 (set_attr "prefix" "orig,vex")
1761 (set_attr "mode" "SF")])
1762
1763 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1764 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1765 (unspec:VF_AVX512VL
1766 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1767 UNSPEC_RCP14))]
1768 "TARGET_AVX512F"
1769 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1770 [(set_attr "type" "sse")
1771 (set_attr "prefix" "evex")
1772 (set_attr "mode" "<MODE>")])
1773
1774 (define_insn "srcp14<mode>"
1775 [(set (match_operand:VF_128 0 "register_operand" "=v")
1776 (vec_merge:VF_128
1777 (unspec:VF_128
1778 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1779 UNSPEC_RCP14)
1780 (match_operand:VF_128 2 "register_operand" "v")
1781 (const_int 1)))]
1782 "TARGET_AVX512F"
1783 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1784 [(set_attr "type" "sse")
1785 (set_attr "prefix" "evex")
1786 (set_attr "mode" "<MODE>")])
1787
1788 (define_insn "srcp14<mode>_mask"
1789 [(set (match_operand:VF_128 0 "register_operand" "=v")
1790 (vec_merge:VF_128
1791 (vec_merge:VF_128
1792 (unspec:VF_128
1793 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1794 UNSPEC_RCP14)
1795 (match_operand:VF_128 3 "vector_move_operand" "0C")
1796 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1797 (match_operand:VF_128 2 "register_operand" "v")
1798 (const_int 1)))]
1799 "TARGET_AVX512F"
1800 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1801 [(set_attr "type" "sse")
1802 (set_attr "prefix" "evex")
1803 (set_attr "mode" "<MODE>")])
1804
1805 (define_expand "sqrt<mode>2"
1806 [(set (match_operand:VF2 0 "register_operand")
1807 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
1808 "TARGET_SSE2")
1809
1810 (define_expand "sqrt<mode>2"
1811 [(set (match_operand:VF1 0 "register_operand")
1812 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
1813 "TARGET_SSE"
1814 {
1815 if (TARGET_SSE_MATH
1816 && TARGET_RECIP_VEC_SQRT
1817 && !optimize_insn_for_size_p ()
1818 && flag_finite_math_only && !flag_trapping_math
1819 && flag_unsafe_math_optimizations)
1820 {
1821 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1822 DONE;
1823 }
1824 })
1825
1826 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1827 [(set (match_operand:VF 0 "register_operand" "=x,v")
1828 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1829 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1830 "@
1831 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
1832 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1833 [(set_attr "isa" "noavx,avx")
1834 (set_attr "type" "sse")
1835 (set_attr "atom_sse_attr" "sqrt")
1836 (set_attr "btver2_sse_attr" "sqrt")
1837 (set_attr "prefix" "maybe_vex")
1838 (set_attr "mode" "<MODE>")])
1839
1840 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1841 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1842 (vec_merge:VF_128
1843 (sqrt:VF_128
1844 (match_operand:VF_128 1 "vector_operand" "xBm,<round_constraint>"))
1845 (match_operand:VF_128 2 "register_operand" "0,v")
1846 (const_int 1)))]
1847 "TARGET_SSE"
1848 "@
1849 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1850 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1851 [(set_attr "isa" "noavx,avx")
1852 (set_attr "type" "sse")
1853 (set_attr "atom_sse_attr" "sqrt")
1854 (set_attr "prefix" "<round_prefix>")
1855 (set_attr "btver2_sse_attr" "sqrt")
1856 (set_attr "mode" "<ssescalarmode>")])
1857
1858 (define_expand "rsqrt<mode>2"
1859 [(set (match_operand:VF1_128_256 0 "register_operand")
1860 (unspec:VF1_128_256
1861 [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
1862 "TARGET_SSE_MATH"
1863 {
1864 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1865 DONE;
1866 })
1867
1868 (define_expand "rsqrtv16sf2"
1869 [(set (match_operand:V16SF 0 "register_operand")
1870 (unspec:V16SF
1871 [(match_operand:V16SF 1 "vector_operand")]
1872 UNSPEC_RSQRT28))]
1873 "TARGET_SSE_MATH && TARGET_AVX512ER"
1874 {
1875 ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
1876 DONE;
1877 })
1878
1879 (define_insn "<sse>_rsqrt<mode>2"
1880 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1881 (unspec:VF1_128_256
1882 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
1883 "TARGET_SSE"
1884 "%vrsqrtps\t{%1, %0|%0, %1}"
1885 [(set_attr "type" "sse")
1886 (set_attr "prefix" "maybe_vex")
1887 (set_attr "mode" "<MODE>")])
1888
1889 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1890 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1891 (unspec:VF_AVX512VL
1892 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1893 UNSPEC_RSQRT14))]
1894 "TARGET_AVX512F"
1895 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1896 [(set_attr "type" "sse")
1897 (set_attr "prefix" "evex")
1898 (set_attr "mode" "<MODE>")])
1899
1900 (define_insn "rsqrt14<mode>"
1901 [(set (match_operand:VF_128 0 "register_operand" "=v")
1902 (vec_merge:VF_128
1903 (unspec:VF_128
1904 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1905 UNSPEC_RSQRT14)
1906 (match_operand:VF_128 2 "register_operand" "v")
1907 (const_int 1)))]
1908 "TARGET_AVX512F"
1909 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1910 [(set_attr "type" "sse")
1911 (set_attr "prefix" "evex")
1912 (set_attr "mode" "<MODE>")])
1913
1914 (define_insn "rsqrt14_<mode>_mask"
1915 [(set (match_operand:VF_128 0 "register_operand" "=v")
1916 (vec_merge:VF_128
1917 (vec_merge:VF_128
1918 (unspec:VF_128
1919 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1920 UNSPEC_RSQRT14)
1921 (match_operand:VF_128 3 "vector_move_operand" "0C")
1922 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1923 (match_operand:VF_128 2 "register_operand" "v")
1924 (const_int 1)))]
1925 "TARGET_AVX512F"
1926 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1927 [(set_attr "type" "sse")
1928 (set_attr "prefix" "evex")
1929 (set_attr "mode" "<MODE>")])
1930
1931 (define_insn "sse_vmrsqrtv4sf2"
1932 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1933 (vec_merge:V4SF
1934 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1935 UNSPEC_RSQRT)
1936 (match_operand:V4SF 2 "register_operand" "0,x")
1937 (const_int 1)))]
1938 "TARGET_SSE"
1939 "@
1940 rsqrtss\t{%1, %0|%0, %k1}
1941 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1942 [(set_attr "isa" "noavx,avx")
1943 (set_attr "type" "sse")
1944 (set_attr "prefix" "orig,vex")
1945 (set_attr "mode" "SF")])
1946
1947 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1948 [(set (match_operand:VF 0 "register_operand")
1949 (smaxmin:VF
1950 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1951 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1952 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1953 {
1954 if (!flag_finite_math_only || flag_signed_zeros)
1955 {
1956 operands[1] = force_reg (<MODE>mode, operands[1]);
1957 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
1958 (operands[0], operands[1], operands[2]
1959 <mask_operand_arg34>
1960 <round_saeonly_mask_arg3>));
1961 DONE;
1962 }
1963 else
1964 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1965 })
1966
1967 ;; These versions of the min/max patterns are intentionally ignorant of
1968 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
1969 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
1970 ;; are undefined in this condition, we're certain this is correct.
1971
1972 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1973 [(set (match_operand:VF 0 "register_operand" "=x,v")
1974 (smaxmin:VF
1975 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1976 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
1977 "TARGET_SSE
1978 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1979 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1980 "@
1981 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1982 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1983 [(set_attr "isa" "noavx,avx")
1984 (set_attr "type" "sseadd")
1985 (set_attr "btver2_sse_attr" "maxmin")
1986 (set_attr "prefix" "<mask_prefix3>")
1987 (set_attr "mode" "<MODE>")])
1988
1989 ;; These versions of the min/max patterns implement exactly the operations
1990 ;; min = (op1 < op2 ? op1 : op2)
1991 ;; max = (!(op1 < op2) ? op1 : op2)
1992 ;; Their operands are not commutative, and thus they may be used in the
1993 ;; presence of -0.0 and NaN.
1994
1995 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
1996 [(set (match_operand:VF 0 "register_operand" "=x,v")
1997 (unspec:VF
1998 [(match_operand:VF 1 "register_operand" "0,v")
1999 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2000 IEEE_MAXMIN))]
2001 "TARGET_SSE
2002 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2003 "@
2004 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2005 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2006 [(set_attr "isa" "noavx,avx")
2007 (set_attr "type" "sseadd")
2008 (set_attr "btver2_sse_attr" "maxmin")
2009 (set_attr "prefix" "<mask_prefix3>")
2010 (set_attr "mode" "<MODE>")])
2011
2012 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2013 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2014 (vec_merge:VF_128
2015 (smaxmin:VF_128
2016 (match_operand:VF_128 1 "register_operand" "0,v")
2017 (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
2018 (match_dup 1)
2019 (const_int 1)))]
2020 "TARGET_SSE"
2021 "@
2022 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2023 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2024 [(set_attr "isa" "noavx,avx")
2025 (set_attr "type" "sse")
2026 (set_attr "btver2_sse_attr" "maxmin")
2027 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2028 (set_attr "mode" "<ssescalarmode>")])
2029
2030 (define_insn "avx_addsubv4df3"
2031 [(set (match_operand:V4DF 0 "register_operand" "=x")
2032 (vec_merge:V4DF
2033 (minus:V4DF
2034 (match_operand:V4DF 1 "register_operand" "x")
2035 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2036 (plus:V4DF (match_dup 1) (match_dup 2))
2037 (const_int 5)))]
2038 "TARGET_AVX"
2039 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2040 [(set_attr "type" "sseadd")
2041 (set_attr "prefix" "vex")
2042 (set_attr "mode" "V4DF")])
2043
2044 (define_insn "sse3_addsubv2df3"
2045 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2046 (vec_merge:V2DF
2047 (minus:V2DF
2048 (match_operand:V2DF 1 "register_operand" "0,x")
2049 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2050 (plus:V2DF (match_dup 1) (match_dup 2))
2051 (const_int 1)))]
2052 "TARGET_SSE3"
2053 "@
2054 addsubpd\t{%2, %0|%0, %2}
2055 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2056 [(set_attr "isa" "noavx,avx")
2057 (set_attr "type" "sseadd")
2058 (set_attr "atom_unit" "complex")
2059 (set_attr "prefix" "orig,vex")
2060 (set_attr "mode" "V2DF")])
2061
2062 (define_insn "avx_addsubv8sf3"
2063 [(set (match_operand:V8SF 0 "register_operand" "=x")
2064 (vec_merge:V8SF
2065 (minus:V8SF
2066 (match_operand:V8SF 1 "register_operand" "x")
2067 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2068 (plus:V8SF (match_dup 1) (match_dup 2))
2069 (const_int 85)))]
2070 "TARGET_AVX"
2071 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2072 [(set_attr "type" "sseadd")
2073 (set_attr "prefix" "vex")
2074 (set_attr "mode" "V8SF")])
2075
2076 (define_insn "sse3_addsubv4sf3"
2077 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2078 (vec_merge:V4SF
2079 (minus:V4SF
2080 (match_operand:V4SF 1 "register_operand" "0,x")
2081 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2082 (plus:V4SF (match_dup 1) (match_dup 2))
2083 (const_int 5)))]
2084 "TARGET_SSE3"
2085 "@
2086 addsubps\t{%2, %0|%0, %2}
2087 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2088 [(set_attr "isa" "noavx,avx")
2089 (set_attr "type" "sseadd")
2090 (set_attr "prefix" "orig,vex")
2091 (set_attr "prefix_rep" "1,*")
2092 (set_attr "mode" "V4SF")])
2093
2094 (define_split
2095 [(set (match_operand:VF_128_256 0 "register_operand")
2096 (match_operator:VF_128_256 6 "addsub_vm_operator"
2097 [(minus:VF_128_256
2098 (match_operand:VF_128_256 1 "register_operand")
2099 (match_operand:VF_128_256 2 "vector_operand"))
2100 (plus:VF_128_256
2101 (match_operand:VF_128_256 3 "vector_operand")
2102 (match_operand:VF_128_256 4 "vector_operand"))
2103 (match_operand 5 "const_int_operand")]))]
2104 "TARGET_SSE3
2105 && can_create_pseudo_p ()
2106 && ((rtx_equal_p (operands[1], operands[3])
2107 && rtx_equal_p (operands[2], operands[4]))
2108 || (rtx_equal_p (operands[1], operands[4])
2109 && rtx_equal_p (operands[2], operands[3])))"
2110 [(set (match_dup 0)
2111 (vec_merge:VF_128_256
2112 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2113 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2114 (match_dup 5)))])
2115
2116 (define_split
2117 [(set (match_operand:VF_128_256 0 "register_operand")
2118 (match_operator:VF_128_256 6 "addsub_vm_operator"
2119 [(plus:VF_128_256
2120 (match_operand:VF_128_256 1 "vector_operand")
2121 (match_operand:VF_128_256 2 "vector_operand"))
2122 (minus:VF_128_256
2123 (match_operand:VF_128_256 3 "register_operand")
2124 (match_operand:VF_128_256 4 "vector_operand"))
2125 (match_operand 5 "const_int_operand")]))]
2126 "TARGET_SSE3
2127 && can_create_pseudo_p ()
2128 && ((rtx_equal_p (operands[1], operands[3])
2129 && rtx_equal_p (operands[2], operands[4]))
2130 || (rtx_equal_p (operands[1], operands[4])
2131 && rtx_equal_p (operands[2], operands[3])))"
2132 [(set (match_dup 0)
2133 (vec_merge:VF_128_256
2134 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2135 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2136 (match_dup 5)))]
2137 {
2138 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2139 operands[5]
2140 = GEN_INT (~INTVAL (operands[5])
2141 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2142 })
2143
2144 (define_split
2145 [(set (match_operand:VF_128_256 0 "register_operand")
2146 (match_operator:VF_128_256 7 "addsub_vs_operator"
2147 [(vec_concat:<ssedoublemode>
2148 (minus:VF_128_256
2149 (match_operand:VF_128_256 1 "register_operand")
2150 (match_operand:VF_128_256 2 "vector_operand"))
2151 (plus:VF_128_256
2152 (match_operand:VF_128_256 3 "vector_operand")
2153 (match_operand:VF_128_256 4 "vector_operand")))
2154 (match_parallel 5 "addsub_vs_parallel"
2155 [(match_operand 6 "const_int_operand")])]))]
2156 "TARGET_SSE3
2157 && can_create_pseudo_p ()
2158 && ((rtx_equal_p (operands[1], operands[3])
2159 && rtx_equal_p (operands[2], operands[4]))
2160 || (rtx_equal_p (operands[1], operands[4])
2161 && rtx_equal_p (operands[2], operands[3])))"
2162 [(set (match_dup 0)
2163 (vec_merge:VF_128_256
2164 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2165 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2166 (match_dup 5)))]
2167 {
2168 int i, nelt = XVECLEN (operands[5], 0);
2169 HOST_WIDE_INT ival = 0;
2170
2171 for (i = 0; i < nelt; i++)
2172 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2173 ival |= HOST_WIDE_INT_1 << i;
2174
2175 operands[5] = GEN_INT (ival);
2176 })
2177
2178 (define_split
2179 [(set (match_operand:VF_128_256 0 "register_operand")
2180 (match_operator:VF_128_256 7 "addsub_vs_operator"
2181 [(vec_concat:<ssedoublemode>
2182 (plus:VF_128_256
2183 (match_operand:VF_128_256 1 "vector_operand")
2184 (match_operand:VF_128_256 2 "vector_operand"))
2185 (minus:VF_128_256
2186 (match_operand:VF_128_256 3 "register_operand")
2187 (match_operand:VF_128_256 4 "vector_operand")))
2188 (match_parallel 5 "addsub_vs_parallel"
2189 [(match_operand 6 "const_int_operand")])]))]
2190 "TARGET_SSE3
2191 && can_create_pseudo_p ()
2192 && ((rtx_equal_p (operands[1], operands[3])
2193 && rtx_equal_p (operands[2], operands[4]))
2194 || (rtx_equal_p (operands[1], operands[4])
2195 && rtx_equal_p (operands[2], operands[3])))"
2196 [(set (match_dup 0)
2197 (vec_merge:VF_128_256
2198 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2199 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2200 (match_dup 5)))]
2201 {
2202 int i, nelt = XVECLEN (operands[5], 0);
2203 HOST_WIDE_INT ival = 0;
2204
2205 for (i = 0; i < nelt; i++)
2206 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2207 ival |= HOST_WIDE_INT_1 << i;
2208
2209 operands[5] = GEN_INT (ival);
2210 })
2211
2212 (define_insn "avx_h<plusminus_insn>v4df3"
2213 [(set (match_operand:V4DF 0 "register_operand" "=x")
2214 (vec_concat:V4DF
2215 (vec_concat:V2DF
2216 (plusminus:DF
2217 (vec_select:DF
2218 (match_operand:V4DF 1 "register_operand" "x")
2219 (parallel [(const_int 0)]))
2220 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2221 (plusminus:DF
2222 (vec_select:DF
2223 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2224 (parallel [(const_int 0)]))
2225 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2226 (vec_concat:V2DF
2227 (plusminus:DF
2228 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2229 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2230 (plusminus:DF
2231 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2232 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2233 "TARGET_AVX"
2234 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2235 [(set_attr "type" "sseadd")
2236 (set_attr "prefix" "vex")
2237 (set_attr "mode" "V4DF")])
2238
2239 (define_expand "sse3_haddv2df3"
2240 [(set (match_operand:V2DF 0 "register_operand")
2241 (vec_concat:V2DF
2242 (plus:DF
2243 (vec_select:DF
2244 (match_operand:V2DF 1 "register_operand")
2245 (parallel [(const_int 0)]))
2246 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2247 (plus:DF
2248 (vec_select:DF
2249 (match_operand:V2DF 2 "vector_operand")
2250 (parallel [(const_int 0)]))
2251 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2252 "TARGET_SSE3")
2253
2254 (define_insn "*sse3_haddv2df3"
2255 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2256 (vec_concat:V2DF
2257 (plus:DF
2258 (vec_select:DF
2259 (match_operand:V2DF 1 "register_operand" "0,x")
2260 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2261 (vec_select:DF
2262 (match_dup 1)
2263 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2264 (plus:DF
2265 (vec_select:DF
2266 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2267 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2268 (vec_select:DF
2269 (match_dup 2)
2270 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2271 "TARGET_SSE3
2272 && INTVAL (operands[3]) != INTVAL (operands[4])
2273 && INTVAL (operands[5]) != INTVAL (operands[6])"
2274 "@
2275 haddpd\t{%2, %0|%0, %2}
2276 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2277 [(set_attr "isa" "noavx,avx")
2278 (set_attr "type" "sseadd")
2279 (set_attr "prefix" "orig,vex")
2280 (set_attr "mode" "V2DF")])
2281
2282 (define_insn "sse3_hsubv2df3"
2283 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2284 (vec_concat:V2DF
2285 (minus:DF
2286 (vec_select:DF
2287 (match_operand:V2DF 1 "register_operand" "0,x")
2288 (parallel [(const_int 0)]))
2289 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2290 (minus:DF
2291 (vec_select:DF
2292 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2293 (parallel [(const_int 0)]))
2294 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2295 "TARGET_SSE3"
2296 "@
2297 hsubpd\t{%2, %0|%0, %2}
2298 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2299 [(set_attr "isa" "noavx,avx")
2300 (set_attr "type" "sseadd")
2301 (set_attr "prefix" "orig,vex")
2302 (set_attr "mode" "V2DF")])
2303
2304 (define_insn "*sse3_haddv2df3_low"
2305 [(set (match_operand:DF 0 "register_operand" "=x,x")
2306 (plus:DF
2307 (vec_select:DF
2308 (match_operand:V2DF 1 "register_operand" "0,x")
2309 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2310 (vec_select:DF
2311 (match_dup 1)
2312 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2313 "TARGET_SSE3
2314 && INTVAL (operands[2]) != INTVAL (operands[3])"
2315 "@
2316 haddpd\t{%0, %0|%0, %0}
2317 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2318 [(set_attr "isa" "noavx,avx")
2319 (set_attr "type" "sseadd1")
2320 (set_attr "prefix" "orig,vex")
2321 (set_attr "mode" "V2DF")])
2322
2323 (define_insn "*sse3_hsubv2df3_low"
2324 [(set (match_operand:DF 0 "register_operand" "=x,x")
2325 (minus:DF
2326 (vec_select:DF
2327 (match_operand:V2DF 1 "register_operand" "0,x")
2328 (parallel [(const_int 0)]))
2329 (vec_select:DF
2330 (match_dup 1)
2331 (parallel [(const_int 1)]))))]
2332 "TARGET_SSE3"
2333 "@
2334 hsubpd\t{%0, %0|%0, %0}
2335 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2336 [(set_attr "isa" "noavx,avx")
2337 (set_attr "type" "sseadd1")
2338 (set_attr "prefix" "orig,vex")
2339 (set_attr "mode" "V2DF")])
2340
2341 (define_insn "avx_h<plusminus_insn>v8sf3"
2342 [(set (match_operand:V8SF 0 "register_operand" "=x")
2343 (vec_concat:V8SF
2344 (vec_concat:V4SF
2345 (vec_concat:V2SF
2346 (plusminus:SF
2347 (vec_select:SF
2348 (match_operand:V8SF 1 "register_operand" "x")
2349 (parallel [(const_int 0)]))
2350 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2351 (plusminus:SF
2352 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2353 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2354 (vec_concat:V2SF
2355 (plusminus:SF
2356 (vec_select:SF
2357 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2358 (parallel [(const_int 0)]))
2359 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2360 (plusminus:SF
2361 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2362 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2363 (vec_concat:V4SF
2364 (vec_concat:V2SF
2365 (plusminus:SF
2366 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2367 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2368 (plusminus:SF
2369 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2370 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2371 (vec_concat:V2SF
2372 (plusminus:SF
2373 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2374 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2375 (plusminus:SF
2376 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2377 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2378 "TARGET_AVX"
2379 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2380 [(set_attr "type" "sseadd")
2381 (set_attr "prefix" "vex")
2382 (set_attr "mode" "V8SF")])
2383
2384 (define_insn "sse3_h<plusminus_insn>v4sf3"
2385 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2386 (vec_concat:V4SF
2387 (vec_concat:V2SF
2388 (plusminus:SF
2389 (vec_select:SF
2390 (match_operand:V4SF 1 "register_operand" "0,x")
2391 (parallel [(const_int 0)]))
2392 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2393 (plusminus:SF
2394 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2395 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2396 (vec_concat:V2SF
2397 (plusminus:SF
2398 (vec_select:SF
2399 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2400 (parallel [(const_int 0)]))
2401 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2402 (plusminus:SF
2403 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2404 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2405 "TARGET_SSE3"
2406 "@
2407 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2408 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2409 [(set_attr "isa" "noavx,avx")
2410 (set_attr "type" "sseadd")
2411 (set_attr "atom_unit" "complex")
2412 (set_attr "prefix" "orig,vex")
2413 (set_attr "prefix_rep" "1,*")
2414 (set_attr "mode" "V4SF")])
2415
2416 (define_expand "reduc_plus_scal_v8df"
2417 [(match_operand:DF 0 "register_operand")
2418 (match_operand:V8DF 1 "register_operand")]
2419 "TARGET_AVX512F"
2420 {
2421 rtx tmp = gen_reg_rtx (V8DFmode);
2422 ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
2423 emit_insn (gen_vec_extractv8dfdf (operands[0], tmp, const0_rtx));
2424 DONE;
2425 })
2426
2427 (define_expand "reduc_plus_scal_v4df"
2428 [(match_operand:DF 0 "register_operand")
2429 (match_operand:V4DF 1 "register_operand")]
2430 "TARGET_AVX"
2431 {
2432 rtx tmp = gen_reg_rtx (V4DFmode);
2433 rtx tmp2 = gen_reg_rtx (V4DFmode);
2434 rtx vec_res = gen_reg_rtx (V4DFmode);
2435 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2436 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2437 emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
2438 emit_insn (gen_vec_extractv4dfdf (operands[0], vec_res, const0_rtx));
2439 DONE;
2440 })
2441
2442 (define_expand "reduc_plus_scal_v2df"
2443 [(match_operand:DF 0 "register_operand")
2444 (match_operand:V2DF 1 "register_operand")]
2445 "TARGET_SSE3"
2446 {
2447 rtx tmp = gen_reg_rtx (V2DFmode);
2448 emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
2449 emit_insn (gen_vec_extractv2dfdf (operands[0], tmp, const0_rtx));
2450 DONE;
2451 })
2452
2453 (define_expand "reduc_plus_scal_v16sf"
2454 [(match_operand:SF 0 "register_operand")
2455 (match_operand:V16SF 1 "register_operand")]
2456 "TARGET_AVX512F"
2457 {
2458 rtx tmp = gen_reg_rtx (V16SFmode);
2459 ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
2460 emit_insn (gen_vec_extractv16sfsf (operands[0], tmp, const0_rtx));
2461 DONE;
2462 })
2463
2464 (define_expand "reduc_plus_scal_v8sf"
2465 [(match_operand:SF 0 "register_operand")
2466 (match_operand:V8SF 1 "register_operand")]
2467 "TARGET_AVX"
2468 {
2469 rtx tmp = gen_reg_rtx (V8SFmode);
2470 rtx tmp2 = gen_reg_rtx (V8SFmode);
2471 rtx vec_res = gen_reg_rtx (V8SFmode);
2472 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2473 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2474 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2475 emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
2476 emit_insn (gen_vec_extractv8sfsf (operands[0], vec_res, const0_rtx));
2477 DONE;
2478 })
2479
2480 (define_expand "reduc_plus_scal_v4sf"
2481 [(match_operand:SF 0 "register_operand")
2482 (match_operand:V4SF 1 "register_operand")]
2483 "TARGET_SSE"
2484 {
2485 rtx vec_res = gen_reg_rtx (V4SFmode);
2486 if (TARGET_SSE3)
2487 {
2488 rtx tmp = gen_reg_rtx (V4SFmode);
2489 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2490 emit_insn (gen_sse3_haddv4sf3 (vec_res, tmp, tmp));
2491 }
2492 else
2493 ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
2494 emit_insn (gen_vec_extractv4sfsf (operands[0], vec_res, const0_rtx));
2495 DONE;
2496 })
2497
2498 ;; Modes handled by reduc_sm{in,ax}* patterns.
2499 (define_mode_iterator REDUC_SMINMAX_MODE
2500 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2501 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2502 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2503 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2504 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2505 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2506 (V8DF "TARGET_AVX512F")])
2507
2508 (define_expand "reduc_<code>_scal_<mode>"
2509 [(smaxmin:REDUC_SMINMAX_MODE
2510 (match_operand:<ssescalarmode> 0 "register_operand")
2511 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2512 ""
2513 {
2514 rtx tmp = gen_reg_rtx (<MODE>mode);
2515 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2516 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2517 const0_rtx));
2518 DONE;
2519 })
2520
2521 (define_expand "reduc_<code>_scal_<mode>"
2522 [(umaxmin:VI_AVX512BW
2523 (match_operand:<ssescalarmode> 0 "register_operand")
2524 (match_operand:VI_AVX512BW 1 "register_operand"))]
2525 "TARGET_AVX512F"
2526 {
2527 rtx tmp = gen_reg_rtx (<MODE>mode);
2528 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2529 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2530 const0_rtx));
2531 DONE;
2532 })
2533
2534 (define_expand "reduc_<code>_scal_<mode>"
2535 [(umaxmin:VI_256
2536 (match_operand:<ssescalarmode> 0 "register_operand")
2537 (match_operand:VI_256 1 "register_operand"))]
2538 "TARGET_AVX2"
2539 {
2540 rtx tmp = gen_reg_rtx (<MODE>mode);
2541 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2542 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2543 const0_rtx));
2544 DONE;
2545 })
2546
2547 (define_expand "reduc_umin_scal_v8hi"
2548 [(umin:V8HI
2549 (match_operand:HI 0 "register_operand")
2550 (match_operand:V8HI 1 "register_operand"))]
2551 "TARGET_SSE4_1"
2552 {
2553 rtx tmp = gen_reg_rtx (V8HImode);
2554 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2555 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2556 DONE;
2557 })
2558
2559 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2560 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2561 (unspec:VF_AVX512VL
2562 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2563 (match_operand:SI 2 "const_0_to_255_operand")]
2564 UNSPEC_REDUCE))]
2565 "TARGET_AVX512DQ"
2566 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2567 [(set_attr "type" "sse")
2568 (set_attr "prefix" "evex")
2569 (set_attr "mode" "<MODE>")])
2570
2571 (define_insn "reduces<mode><mask_scalar_name>"
2572 [(set (match_operand:VF_128 0 "register_operand" "=v")
2573 (vec_merge:VF_128
2574 (unspec:VF_128
2575 [(match_operand:VF_128 1 "register_operand" "v")
2576 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2577 (match_operand:SI 3 "const_0_to_255_operand")]
2578 UNSPEC_REDUCE)
2579 (match_dup 1)
2580 (const_int 1)))]
2581 "TARGET_AVX512DQ"
2582 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2, %3}"
2583 [(set_attr "type" "sse")
2584 (set_attr "prefix" "evex")
2585 (set_attr "mode" "<MODE>")])
2586
2587 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2588 ;;
2589 ;; Parallel floating point comparisons
2590 ;;
2591 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2592
2593 (define_insn "avx_cmp<mode>3"
2594 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2595 (unspec:VF_128_256
2596 [(match_operand:VF_128_256 1 "register_operand" "x")
2597 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2598 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2599 UNSPEC_PCMP))]
2600 "TARGET_AVX"
2601 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2602 [(set_attr "type" "ssecmp")
2603 (set_attr "length_immediate" "1")
2604 (set_attr "prefix" "vex")
2605 (set_attr "mode" "<MODE>")])
2606
2607 (define_insn "avx_vmcmp<mode>3"
2608 [(set (match_operand:VF_128 0 "register_operand" "=x")
2609 (vec_merge:VF_128
2610 (unspec:VF_128
2611 [(match_operand:VF_128 1 "register_operand" "x")
2612 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2613 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2614 UNSPEC_PCMP)
2615 (match_dup 1)
2616 (const_int 1)))]
2617 "TARGET_AVX"
2618 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2619 [(set_attr "type" "ssecmp")
2620 (set_attr "length_immediate" "1")
2621 (set_attr "prefix" "vex")
2622 (set_attr "mode" "<ssescalarmode>")])
2623
2624 (define_insn "*<sse>_maskcmp<mode>3_comm"
2625 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2626 (match_operator:VF_128_256 3 "sse_comparison_operator"
2627 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2628 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2629 "TARGET_SSE
2630 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2631 "@
2632 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2633 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2634 [(set_attr "isa" "noavx,avx")
2635 (set_attr "type" "ssecmp")
2636 (set_attr "length_immediate" "1")
2637 (set_attr "prefix" "orig,vex")
2638 (set_attr "mode" "<MODE>")])
2639
2640 (define_insn "<sse>_maskcmp<mode>3"
2641 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2642 (match_operator:VF_128_256 3 "sse_comparison_operator"
2643 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2644 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2645 "TARGET_SSE"
2646 "@
2647 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2648 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2649 [(set_attr "isa" "noavx,avx")
2650 (set_attr "type" "ssecmp")
2651 (set_attr "length_immediate" "1")
2652 (set_attr "prefix" "orig,vex")
2653 (set_attr "mode" "<MODE>")])
2654
2655 (define_insn "<sse>_vmmaskcmp<mode>3"
2656 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2657 (vec_merge:VF_128
2658 (match_operator:VF_128 3 "sse_comparison_operator"
2659 [(match_operand:VF_128 1 "register_operand" "0,x")
2660 (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2661 (match_dup 1)
2662 (const_int 1)))]
2663 "TARGET_SSE"
2664 "@
2665 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2666 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2667 [(set_attr "isa" "noavx,avx")
2668 (set_attr "type" "ssecmp")
2669 (set_attr "length_immediate" "1,*")
2670 (set_attr "prefix" "orig,vex")
2671 (set_attr "mode" "<ssescalarmode>")])
2672
2673 (define_mode_attr cmp_imm_predicate
2674 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2675 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2676 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2677 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2678 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2679 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2680 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2681 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2682 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2683
2684 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2685 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2686 (unspec:<avx512fmaskmode>
2687 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2688 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2689 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2690 UNSPEC_PCMP))]
2691 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2692 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2693 [(set_attr "type" "ssecmp")
2694 (set_attr "length_immediate" "1")
2695 (set_attr "prefix" "evex")
2696 (set_attr "mode" "<sseinsnmode>")])
2697
2698 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2699 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2700 (unspec:<avx512fmaskmode>
2701 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2702 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2703 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2704 UNSPEC_PCMP))]
2705 "TARGET_AVX512BW"
2706 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2707 [(set_attr "type" "ssecmp")
2708 (set_attr "length_immediate" "1")
2709 (set_attr "prefix" "evex")
2710 (set_attr "mode" "<sseinsnmode>")])
2711
2712 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2713 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2714 (unspec:<avx512fmaskmode>
2715 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2716 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2717 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2718 UNSPEC_UNSIGNED_PCMP))]
2719 "TARGET_AVX512BW"
2720 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2721 [(set_attr "type" "ssecmp")
2722 (set_attr "length_immediate" "1")
2723 (set_attr "prefix" "evex")
2724 (set_attr "mode" "<sseinsnmode>")])
2725
2726 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2727 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2728 (unspec:<avx512fmaskmode>
2729 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2730 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2731 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2732 UNSPEC_UNSIGNED_PCMP))]
2733 "TARGET_AVX512F"
2734 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2735 [(set_attr "type" "ssecmp")
2736 (set_attr "length_immediate" "1")
2737 (set_attr "prefix" "evex")
2738 (set_attr "mode" "<sseinsnmode>")])
2739
2740 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2741 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2742 (and:<avx512fmaskmode>
2743 (unspec:<avx512fmaskmode>
2744 [(match_operand:VF_128 1 "register_operand" "v")
2745 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2746 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2747 UNSPEC_PCMP)
2748 (const_int 1)))]
2749 "TARGET_AVX512F"
2750 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2751 [(set_attr "type" "ssecmp")
2752 (set_attr "length_immediate" "1")
2753 (set_attr "prefix" "evex")
2754 (set_attr "mode" "<ssescalarmode>")])
2755
2756 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2757 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2758 (and:<avx512fmaskmode>
2759 (unspec:<avx512fmaskmode>
2760 [(match_operand:VF_128 1 "register_operand" "v")
2761 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2762 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2763 UNSPEC_PCMP)
2764 (and:<avx512fmaskmode>
2765 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2766 (const_int 1))))]
2767 "TARGET_AVX512F"
2768 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2769 [(set_attr "type" "ssecmp")
2770 (set_attr "length_immediate" "1")
2771 (set_attr "prefix" "evex")
2772 (set_attr "mode" "<ssescalarmode>")])
2773
2774 (define_insn "avx512f_maskcmp<mode>3"
2775 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2776 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2777 [(match_operand:VF 1 "register_operand" "v")
2778 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2779 "TARGET_AVX512F"
2780 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2781 [(set_attr "type" "ssecmp")
2782 (set_attr "length_immediate" "1")
2783 (set_attr "prefix" "evex")
2784 (set_attr "mode" "<sseinsnmode>")])
2785
2786 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
2787 [(set (reg:CCFP FLAGS_REG)
2788 (compare:CCFP
2789 (vec_select:MODEF
2790 (match_operand:<ssevecmode> 0 "register_operand" "v")
2791 (parallel [(const_int 0)]))
2792 (vec_select:MODEF
2793 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2794 (parallel [(const_int 0)]))))]
2795 "SSE_FLOAT_MODE_P (<MODE>mode)"
2796 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2797 [(set_attr "type" "ssecomi")
2798 (set_attr "prefix" "maybe_vex")
2799 (set_attr "prefix_rep" "0")
2800 (set (attr "prefix_data16")
2801 (if_then_else (eq_attr "mode" "DF")
2802 (const_string "1")
2803 (const_string "0")))
2804 (set_attr "mode" "<MODE>")])
2805
2806 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2807 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2808 (match_operator:<avx512fmaskmode> 1 ""
2809 [(match_operand:V48_AVX512VL 2 "register_operand")
2810 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2811 "TARGET_AVX512F"
2812 {
2813 bool ok = ix86_expand_mask_vec_cmp (operands);
2814 gcc_assert (ok);
2815 DONE;
2816 })
2817
2818 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2819 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2820 (match_operator:<avx512fmaskmode> 1 ""
2821 [(match_operand:VI12_AVX512VL 2 "register_operand")
2822 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2823 "TARGET_AVX512BW"
2824 {
2825 bool ok = ix86_expand_mask_vec_cmp (operands);
2826 gcc_assert (ok);
2827 DONE;
2828 })
2829
2830 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2831 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2832 (match_operator:<sseintvecmode> 1 ""
2833 [(match_operand:VI_256 2 "register_operand")
2834 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2835 "TARGET_AVX2"
2836 {
2837 bool ok = ix86_expand_int_vec_cmp (operands);
2838 gcc_assert (ok);
2839 DONE;
2840 })
2841
2842 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2843 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2844 (match_operator:<sseintvecmode> 1 ""
2845 [(match_operand:VI124_128 2 "register_operand")
2846 (match_operand:VI124_128 3 "vector_operand")]))]
2847 "TARGET_SSE2"
2848 {
2849 bool ok = ix86_expand_int_vec_cmp (operands);
2850 gcc_assert (ok);
2851 DONE;
2852 })
2853
2854 (define_expand "vec_cmpv2div2di"
2855 [(set (match_operand:V2DI 0 "register_operand")
2856 (match_operator:V2DI 1 ""
2857 [(match_operand:V2DI 2 "register_operand")
2858 (match_operand:V2DI 3 "vector_operand")]))]
2859 "TARGET_SSE4_2"
2860 {
2861 bool ok = ix86_expand_int_vec_cmp (operands);
2862 gcc_assert (ok);
2863 DONE;
2864 })
2865
2866 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2867 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2868 (match_operator:<sseintvecmode> 1 ""
2869 [(match_operand:VF_256 2 "register_operand")
2870 (match_operand:VF_256 3 "nonimmediate_operand")]))]
2871 "TARGET_AVX"
2872 {
2873 bool ok = ix86_expand_fp_vec_cmp (operands);
2874 gcc_assert (ok);
2875 DONE;
2876 })
2877
2878 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2879 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2880 (match_operator:<sseintvecmode> 1 ""
2881 [(match_operand:VF_128 2 "register_operand")
2882 (match_operand:VF_128 3 "vector_operand")]))]
2883 "TARGET_SSE"
2884 {
2885 bool ok = ix86_expand_fp_vec_cmp (operands);
2886 gcc_assert (ok);
2887 DONE;
2888 })
2889
2890 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2891 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2892 (match_operator:<avx512fmaskmode> 1 ""
2893 [(match_operand:VI48_AVX512VL 2 "register_operand")
2894 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
2895 "TARGET_AVX512F"
2896 {
2897 bool ok = ix86_expand_mask_vec_cmp (operands);
2898 gcc_assert (ok);
2899 DONE;
2900 })
2901
2902 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2903 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2904 (match_operator:<avx512fmaskmode> 1 ""
2905 [(match_operand:VI12_AVX512VL 2 "register_operand")
2906 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2907 "TARGET_AVX512BW"
2908 {
2909 bool ok = ix86_expand_mask_vec_cmp (operands);
2910 gcc_assert (ok);
2911 DONE;
2912 })
2913
2914 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2915 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2916 (match_operator:<sseintvecmode> 1 ""
2917 [(match_operand:VI_256 2 "register_operand")
2918 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2919 "TARGET_AVX2"
2920 {
2921 bool ok = ix86_expand_int_vec_cmp (operands);
2922 gcc_assert (ok);
2923 DONE;
2924 })
2925
2926 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2927 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2928 (match_operator:<sseintvecmode> 1 ""
2929 [(match_operand:VI124_128 2 "register_operand")
2930 (match_operand:VI124_128 3 "vector_operand")]))]
2931 "TARGET_SSE2"
2932 {
2933 bool ok = ix86_expand_int_vec_cmp (operands);
2934 gcc_assert (ok);
2935 DONE;
2936 })
2937
2938 (define_expand "vec_cmpuv2div2di"
2939 [(set (match_operand:V2DI 0 "register_operand")
2940 (match_operator:V2DI 1 ""
2941 [(match_operand:V2DI 2 "register_operand")
2942 (match_operand:V2DI 3 "vector_operand")]))]
2943 "TARGET_SSE4_2"
2944 {
2945 bool ok = ix86_expand_int_vec_cmp (operands);
2946 gcc_assert (ok);
2947 DONE;
2948 })
2949
2950 (define_expand "vec_cmpeqv2div2di"
2951 [(set (match_operand:V2DI 0 "register_operand")
2952 (match_operator:V2DI 1 ""
2953 [(match_operand:V2DI 2 "register_operand")
2954 (match_operand:V2DI 3 "vector_operand")]))]
2955 "TARGET_SSE4_1"
2956 {
2957 bool ok = ix86_expand_int_vec_cmp (operands);
2958 gcc_assert (ok);
2959 DONE;
2960 })
2961
2962 (define_expand "vcond<V_512:mode><VF_512:mode>"
2963 [(set (match_operand:V_512 0 "register_operand")
2964 (if_then_else:V_512
2965 (match_operator 3 ""
2966 [(match_operand:VF_512 4 "nonimmediate_operand")
2967 (match_operand:VF_512 5 "nonimmediate_operand")])
2968 (match_operand:V_512 1 "general_operand")
2969 (match_operand:V_512 2 "general_operand")))]
2970 "TARGET_AVX512F
2971 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2972 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2973 {
2974 bool ok = ix86_expand_fp_vcond (operands);
2975 gcc_assert (ok);
2976 DONE;
2977 })
2978
2979 (define_expand "vcond<V_256:mode><VF_256:mode>"
2980 [(set (match_operand:V_256 0 "register_operand")
2981 (if_then_else:V_256
2982 (match_operator 3 ""
2983 [(match_operand:VF_256 4 "nonimmediate_operand")
2984 (match_operand:VF_256 5 "nonimmediate_operand")])
2985 (match_operand:V_256 1 "general_operand")
2986 (match_operand:V_256 2 "general_operand")))]
2987 "TARGET_AVX
2988 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2989 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2990 {
2991 bool ok = ix86_expand_fp_vcond (operands);
2992 gcc_assert (ok);
2993 DONE;
2994 })
2995
2996 (define_expand "vcond<V_128:mode><VF_128:mode>"
2997 [(set (match_operand:V_128 0 "register_operand")
2998 (if_then_else:V_128
2999 (match_operator 3 ""
3000 [(match_operand:VF_128 4 "vector_operand")
3001 (match_operand:VF_128 5 "vector_operand")])
3002 (match_operand:V_128 1 "general_operand")
3003 (match_operand:V_128 2 "general_operand")))]
3004 "TARGET_SSE
3005 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3006 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3007 {
3008 bool ok = ix86_expand_fp_vcond (operands);
3009 gcc_assert (ok);
3010 DONE;
3011 })
3012
3013 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3014 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3015 (vec_merge:V48_AVX512VL
3016 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3017 (match_operand:V48_AVX512VL 2 "vector_move_operand")
3018 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3019 "TARGET_AVX512F")
3020
3021 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3022 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3023 (vec_merge:VI12_AVX512VL
3024 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3025 (match_operand:VI12_AVX512VL 2 "vector_move_operand")
3026 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3027 "TARGET_AVX512BW")
3028
3029 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3030 [(set (match_operand:VI_256 0 "register_operand")
3031 (vec_merge:VI_256
3032 (match_operand:VI_256 1 "nonimmediate_operand")
3033 (match_operand:VI_256 2 "vector_move_operand")
3034 (match_operand:<sseintvecmode> 3 "register_operand")))]
3035 "TARGET_AVX2"
3036 {
3037 ix86_expand_sse_movcc (operands[0], operands[3],
3038 operands[1], operands[2]);
3039 DONE;
3040 })
3041
3042 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3043 [(set (match_operand:VI124_128 0 "register_operand")
3044 (vec_merge:VI124_128
3045 (match_operand:VI124_128 1 "vector_operand")
3046 (match_operand:VI124_128 2 "vector_move_operand")
3047 (match_operand:<sseintvecmode> 3 "register_operand")))]
3048 "TARGET_SSE2"
3049 {
3050 ix86_expand_sse_movcc (operands[0], operands[3],
3051 operands[1], operands[2]);
3052 DONE;
3053 })
3054
3055 (define_expand "vcond_mask_v2div2di"
3056 [(set (match_operand:V2DI 0 "register_operand")
3057 (vec_merge:V2DI
3058 (match_operand:V2DI 1 "vector_operand")
3059 (match_operand:V2DI 2 "vector_move_operand")
3060 (match_operand:V2DI 3 "register_operand")))]
3061 "TARGET_SSE4_2"
3062 {
3063 ix86_expand_sse_movcc (operands[0], operands[3],
3064 operands[1], operands[2]);
3065 DONE;
3066 })
3067
3068 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3069 [(set (match_operand:VF_256 0 "register_operand")
3070 (vec_merge:VF_256
3071 (match_operand:VF_256 1 "nonimmediate_operand")
3072 (match_operand:VF_256 2 "vector_move_operand")
3073 (match_operand:<sseintvecmode> 3 "register_operand")))]
3074 "TARGET_AVX"
3075 {
3076 ix86_expand_sse_movcc (operands[0], operands[3],
3077 operands[1], operands[2]);
3078 DONE;
3079 })
3080
3081 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3082 [(set (match_operand:VF_128 0 "register_operand")
3083 (vec_merge:VF_128
3084 (match_operand:VF_128 1 "vector_operand")
3085 (match_operand:VF_128 2 "vector_move_operand")
3086 (match_operand:<sseintvecmode> 3 "register_operand")))]
3087 "TARGET_SSE"
3088 {
3089 ix86_expand_sse_movcc (operands[0], operands[3],
3090 operands[1], operands[2]);
3091 DONE;
3092 })
3093
3094 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3095 ;;
3096 ;; Parallel floating point logical operations
3097 ;;
3098 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3099
3100 (define_insn "<sse>_andnot<mode>3<mask_name>"
3101 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3102 (and:VF_128_256
3103 (not:VF_128_256
3104 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3105 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3106 "TARGET_SSE && <mask_avx512vl_condition>"
3107 {
3108 static char buf[128];
3109 const char *ops;
3110 const char *suffix;
3111
3112 switch (which_alternative)
3113 {
3114 case 0:
3115 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3116 break;
3117 case 1:
3118 case 2:
3119 case 3:
3120 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3121 break;
3122 default:
3123 gcc_unreachable ();
3124 }
3125
3126 switch (get_attr_mode (insn))
3127 {
3128 case MODE_V8SF:
3129 case MODE_V4SF:
3130 suffix = "ps";
3131 break;
3132 case MODE_OI:
3133 case MODE_TI:
3134 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3135 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3136 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3137 break;
3138 default:
3139 suffix = "<ssemodesuffix>";
3140 }
3141
3142 snprintf (buf, sizeof (buf), ops, suffix);
3143 return buf;
3144 }
3145 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3146 (set_attr "type" "sselog")
3147 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3148 (set (attr "mode")
3149 (cond [(and (match_test "<mask_applied>")
3150 (and (eq_attr "alternative" "1")
3151 (match_test "!TARGET_AVX512DQ")))
3152 (const_string "<sseintvecmode2>")
3153 (eq_attr "alternative" "3")
3154 (const_string "<sseintvecmode2>")
3155 (and (match_test "<MODE_SIZE> == 16")
3156 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3157 (const_string "<ssePSmode>")
3158 (match_test "TARGET_AVX")
3159 (const_string "<MODE>")
3160 (match_test "optimize_function_for_size_p (cfun)")
3161 (const_string "V4SF")
3162 ]
3163 (const_string "<MODE>")))])
3164
3165
3166 (define_insn "<sse>_andnot<mode>3<mask_name>"
3167 [(set (match_operand:VF_512 0 "register_operand" "=v")
3168 (and:VF_512
3169 (not:VF_512
3170 (match_operand:VF_512 1 "register_operand" "v"))
3171 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3172 "TARGET_AVX512F"
3173 {
3174 static char buf[128];
3175 const char *ops;
3176 const char *suffix;
3177
3178 suffix = "<ssemodesuffix>";
3179 ops = "";
3180
3181 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3182 if (!TARGET_AVX512DQ)
3183 {
3184 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3185 ops = "p";
3186 }
3187
3188 snprintf (buf, sizeof (buf),
3189 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3190 ops, suffix);
3191 return buf;
3192 }
3193 [(set_attr "type" "sselog")
3194 (set_attr "prefix" "evex")
3195 (set (attr "mode")
3196 (if_then_else (match_test "TARGET_AVX512DQ")
3197 (const_string "<sseinsnmode>")
3198 (const_string "XI")))])
3199
3200 (define_expand "<code><mode>3<mask_name>"
3201 [(set (match_operand:VF_128_256 0 "register_operand")
3202 (any_logic:VF_128_256
3203 (match_operand:VF_128_256 1 "vector_operand")
3204 (match_operand:VF_128_256 2 "vector_operand")))]
3205 "TARGET_SSE && <mask_avx512vl_condition>"
3206 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3207
3208 (define_expand "<code><mode>3<mask_name>"
3209 [(set (match_operand:VF_512 0 "register_operand")
3210 (any_logic:VF_512
3211 (match_operand:VF_512 1 "nonimmediate_operand")
3212 (match_operand:VF_512 2 "nonimmediate_operand")))]
3213 "TARGET_AVX512F"
3214 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3215
3216 (define_insn "*<code><mode>3<mask_name>"
3217 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3218 (any_logic:VF_128_256
3219 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3220 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3221 "TARGET_SSE && <mask_avx512vl_condition>
3222 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3223 {
3224 static char buf[128];
3225 const char *ops;
3226 const char *suffix;
3227
3228 switch (which_alternative)
3229 {
3230 case 0:
3231 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3232 break;
3233 case 1:
3234 case 2:
3235 case 3:
3236 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3237 break;
3238 default:
3239 gcc_unreachable ();
3240 }
3241
3242 switch (get_attr_mode (insn))
3243 {
3244 case MODE_V8SF:
3245 case MODE_V4SF:
3246 suffix = "ps";
3247 break;
3248 case MODE_OI:
3249 case MODE_TI:
3250 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3251 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3252 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3253 break;
3254 default:
3255 suffix = "<ssemodesuffix>";
3256 }
3257
3258 snprintf (buf, sizeof (buf), ops, suffix);
3259 return buf;
3260 }
3261 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3262 (set_attr "type" "sselog")
3263 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3264 (set (attr "mode")
3265 (cond [(and (match_test "<mask_applied>")
3266 (and (eq_attr "alternative" "1")
3267 (match_test "!TARGET_AVX512DQ")))
3268 (const_string "<sseintvecmode2>")
3269 (eq_attr "alternative" "3")
3270 (const_string "<sseintvecmode2>")
3271 (and (match_test "<MODE_SIZE> == 16")
3272 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3273 (const_string "<ssePSmode>")
3274 (match_test "TARGET_AVX")
3275 (const_string "<MODE>")
3276 (match_test "optimize_function_for_size_p (cfun)")
3277 (const_string "V4SF")
3278 ]
3279 (const_string "<MODE>")))])
3280
3281 (define_insn "*<code><mode>3<mask_name>"
3282 [(set (match_operand:VF_512 0 "register_operand" "=v")
3283 (any_logic:VF_512
3284 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3285 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3286 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3287 {
3288 static char buf[128];
3289 const char *ops;
3290 const char *suffix;
3291
3292 suffix = "<ssemodesuffix>";
3293 ops = "";
3294
3295 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3296 if (!TARGET_AVX512DQ)
3297 {
3298 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3299 ops = "p";
3300 }
3301
3302 snprintf (buf, sizeof (buf),
3303 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3304 ops, suffix);
3305 return buf;
3306 }
3307 [(set_attr "type" "sselog")
3308 (set_attr "prefix" "evex")
3309 (set (attr "mode")
3310 (if_then_else (match_test "TARGET_AVX512DQ")
3311 (const_string "<sseinsnmode>")
3312 (const_string "XI")))])
3313
3314 (define_expand "copysign<mode>3"
3315 [(set (match_dup 4)
3316 (and:VF
3317 (not:VF (match_dup 3))
3318 (match_operand:VF 1 "vector_operand")))
3319 (set (match_dup 5)
3320 (and:VF (match_dup 3)
3321 (match_operand:VF 2 "vector_operand")))
3322 (set (match_operand:VF 0 "register_operand")
3323 (ior:VF (match_dup 4) (match_dup 5)))]
3324 "TARGET_SSE"
3325 {
3326 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3327
3328 operands[4] = gen_reg_rtx (<MODE>mode);
3329 operands[5] = gen_reg_rtx (<MODE>mode);
3330 })
3331
3332 ;; Also define scalar versions. These are used for abs, neg, and
3333 ;; conditional move. Using subregs into vector modes causes register
3334 ;; allocation lossage. These patterns do not allow memory operands
3335 ;; because the native instructions read the full 128-bits.
3336
3337 (define_insn "*andnot<mode>3"
3338 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3339 (and:MODEF
3340 (not:MODEF
3341 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3342 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3343 "SSE_FLOAT_MODE_P (<MODE>mode)"
3344 {
3345 static char buf[128];
3346 const char *ops;
3347 const char *suffix
3348 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3349
3350 switch (which_alternative)
3351 {
3352 case 0:
3353 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3354 break;
3355 case 1:
3356 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3357 break;
3358 case 2:
3359 if (TARGET_AVX512DQ)
3360 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3361 else
3362 {
3363 suffix = <MODE>mode == DFmode ? "q" : "d";
3364 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3365 }
3366 break;
3367 case 3:
3368 if (TARGET_AVX512DQ)
3369 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3370 else
3371 {
3372 suffix = <MODE>mode == DFmode ? "q" : "d";
3373 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3374 }
3375 break;
3376 default:
3377 gcc_unreachable ();
3378 }
3379
3380 snprintf (buf, sizeof (buf), ops, suffix);
3381 return buf;
3382 }
3383 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3384 (set_attr "type" "sselog")
3385 (set_attr "prefix" "orig,vex,evex,evex")
3386 (set (attr "mode")
3387 (cond [(eq_attr "alternative" "2")
3388 (if_then_else (match_test "TARGET_AVX512DQ")
3389 (const_string "<ssevecmode>")
3390 (const_string "TI"))
3391 (eq_attr "alternative" "3")
3392 (if_then_else (match_test "TARGET_AVX512DQ")
3393 (const_string "<avx512fvecmode>")
3394 (const_string "XI"))
3395 (and (match_test "<MODE_SIZE> == 16")
3396 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3397 (const_string "V4SF")
3398 (match_test "TARGET_AVX")
3399 (const_string "<ssevecmode>")
3400 (match_test "optimize_function_for_size_p (cfun)")
3401 (const_string "V4SF")
3402 ]
3403 (const_string "<ssevecmode>")))])
3404
3405 (define_insn "*andnottf3"
3406 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3407 (and:TF
3408 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3409 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3410 "TARGET_SSE"
3411 {
3412 static char buf[128];
3413 const char *ops;
3414 const char *tmp
3415 = (which_alternative >= 2 ? "pandnq"
3416 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3417
3418 switch (which_alternative)
3419 {
3420 case 0:
3421 ops = "%s\t{%%2, %%0|%%0, %%2}";
3422 break;
3423 case 1:
3424 case 2:
3425 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3426 break;
3427 case 3:
3428 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3429 break;
3430 default:
3431 gcc_unreachable ();
3432 }
3433
3434 snprintf (buf, sizeof (buf), ops, tmp);
3435 return buf;
3436 }
3437 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3438 (set_attr "type" "sselog")
3439 (set (attr "prefix_data16")
3440 (if_then_else
3441 (and (eq_attr "alternative" "0")
3442 (eq_attr "mode" "TI"))
3443 (const_string "1")
3444 (const_string "*")))
3445 (set_attr "prefix" "orig,vex,evex,evex")
3446 (set (attr "mode")
3447 (cond [(eq_attr "alternative" "2")
3448 (const_string "TI")
3449 (eq_attr "alternative" "3")
3450 (const_string "XI")
3451 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3452 (const_string "V4SF")
3453 (match_test "TARGET_AVX")
3454 (const_string "TI")
3455 (ior (not (match_test "TARGET_SSE2"))
3456 (match_test "optimize_function_for_size_p (cfun)"))
3457 (const_string "V4SF")
3458 ]
3459 (const_string "TI")))])
3460
3461 (define_insn "*<code><mode>3"
3462 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3463 (any_logic:MODEF
3464 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3465 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3466 "SSE_FLOAT_MODE_P (<MODE>mode)"
3467 {
3468 static char buf[128];
3469 const char *ops;
3470 const char *suffix
3471 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3472
3473 switch (which_alternative)
3474 {
3475 case 0:
3476 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3477 break;
3478 case 2:
3479 if (!TARGET_AVX512DQ)
3480 {
3481 suffix = <MODE>mode == DFmode ? "q" : "d";
3482 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3483 break;
3484 }
3485 /* FALLTHRU */
3486 case 1:
3487 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3488 break;
3489 case 3:
3490 if (TARGET_AVX512DQ)
3491 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3492 else
3493 {
3494 suffix = <MODE>mode == DFmode ? "q" : "d";
3495 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3496 }
3497 break;
3498 default:
3499 gcc_unreachable ();
3500 }
3501
3502 snprintf (buf, sizeof (buf), ops, suffix);
3503 return buf;
3504 }
3505 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3506 (set_attr "type" "sselog")
3507 (set_attr "prefix" "orig,vex,evex,evex")
3508 (set (attr "mode")
3509 (cond [(eq_attr "alternative" "2")
3510 (if_then_else (match_test "TARGET_AVX512DQ")
3511 (const_string "<ssevecmode>")
3512 (const_string "TI"))
3513 (eq_attr "alternative" "3")
3514 (if_then_else (match_test "TARGET_AVX512DQ")
3515 (const_string "<avx512fvecmode>")
3516 (const_string "XI"))
3517 (and (match_test "<MODE_SIZE> == 16")
3518 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3519 (const_string "V4SF")
3520 (match_test "TARGET_AVX")
3521 (const_string "<ssevecmode>")
3522 (match_test "optimize_function_for_size_p (cfun)")
3523 (const_string "V4SF")
3524 ]
3525 (const_string "<ssevecmode>")))])
3526
3527 (define_expand "<code>tf3"
3528 [(set (match_operand:TF 0 "register_operand")
3529 (any_logic:TF
3530 (match_operand:TF 1 "vector_operand")
3531 (match_operand:TF 2 "vector_operand")))]
3532 "TARGET_SSE"
3533 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3534
3535 (define_insn "*<code>tf3"
3536 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3537 (any_logic:TF
3538 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3539 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3540 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3541 {
3542 static char buf[128];
3543 const char *ops;
3544 const char *tmp
3545 = (which_alternative >= 2 ? "p<logic>q"
3546 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3547
3548 switch (which_alternative)
3549 {
3550 case 0:
3551 ops = "%s\t{%%2, %%0|%%0, %%2}";
3552 break;
3553 case 1:
3554 case 2:
3555 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3556 break;
3557 case 3:
3558 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3559 break;
3560 default:
3561 gcc_unreachable ();
3562 }
3563
3564 snprintf (buf, sizeof (buf), ops, tmp);
3565 return buf;
3566 }
3567 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3568 (set_attr "type" "sselog")
3569 (set (attr "prefix_data16")
3570 (if_then_else
3571 (and (eq_attr "alternative" "0")
3572 (eq_attr "mode" "TI"))
3573 (const_string "1")
3574 (const_string "*")))
3575 (set_attr "prefix" "orig,vex,evex,evex")
3576 (set (attr "mode")
3577 (cond [(eq_attr "alternative" "2")
3578 (const_string "TI")
3579 (eq_attr "alternative" "3")
3580 (const_string "QI")
3581 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3582 (const_string "V4SF")
3583 (match_test "TARGET_AVX")
3584 (const_string "TI")
3585 (ior (not (match_test "TARGET_SSE2"))
3586 (match_test "optimize_function_for_size_p (cfun)"))
3587 (const_string "V4SF")
3588 ]
3589 (const_string "TI")))])
3590
3591 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3592 ;;
3593 ;; FMA floating point multiply/accumulate instructions. These include
3594 ;; scalar versions of the instructions as well as vector versions.
3595 ;;
3596 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3597
3598 ;; The standard names for scalar FMA are only available with SSE math enabled.
3599 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3600 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3601 ;; and TARGET_FMA4 are both false.
3602 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3603 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3604 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3605 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3606 (define_mode_iterator FMAMODEM
3607 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3608 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3609 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3610 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3611 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3612 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3613 (V16SF "TARGET_AVX512F")
3614 (V8DF "TARGET_AVX512F")])
3615
3616 (define_expand "fma<mode>4"
3617 [(set (match_operand:FMAMODEM 0 "register_operand")
3618 (fma:FMAMODEM
3619 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3620 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3621 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3622
3623 (define_expand "fms<mode>4"
3624 [(set (match_operand:FMAMODEM 0 "register_operand")
3625 (fma:FMAMODEM
3626 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3627 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3628 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3629
3630 (define_expand "fnma<mode>4"
3631 [(set (match_operand:FMAMODEM 0 "register_operand")
3632 (fma:FMAMODEM
3633 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3634 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3635 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3636
3637 (define_expand "fnms<mode>4"
3638 [(set (match_operand:FMAMODEM 0 "register_operand")
3639 (fma:FMAMODEM
3640 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3641 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3642 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3643
3644 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3645 (define_mode_iterator FMAMODE_AVX512
3646 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3647 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3648 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3649 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3650 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3651 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3652 (V16SF "TARGET_AVX512F")
3653 (V8DF "TARGET_AVX512F")])
3654
3655 (define_mode_iterator FMAMODE
3656 [SF DF V4SF V2DF V8SF V4DF])
3657
3658 (define_expand "fma4i_fmadd_<mode>"
3659 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3660 (fma:FMAMODE_AVX512
3661 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3662 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3663 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3664
3665 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3666 [(match_operand:VF_AVX512VL 0 "register_operand")
3667 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3668 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3669 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3670 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3671 "TARGET_AVX512F && <round_mode512bit_condition>"
3672 {
3673 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3674 operands[0], operands[1], operands[2], operands[3],
3675 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3676 DONE;
3677 })
3678
3679 (define_insn "*fma_fmadd_<mode>"
3680 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3681 (fma:FMAMODE
3682 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3683 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3684 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3685 "TARGET_FMA || TARGET_FMA4"
3686 "@
3687 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3688 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3689 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3690 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3691 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3692 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3693 (set_attr "type" "ssemuladd")
3694 (set_attr "mode" "<MODE>")])
3695
3696 ;; Suppose AVX-512F as baseline
3697 (define_mode_iterator VF_SF_AVX512VL
3698 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3699 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3700
3701 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3702 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3703 (fma:VF_SF_AVX512VL
3704 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3705 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3706 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3707 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3708 "@
3709 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3710 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3711 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3712 [(set_attr "type" "ssemuladd")
3713 (set_attr "mode" "<MODE>")])
3714
3715 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3716 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3717 (vec_merge:VF_AVX512VL
3718 (fma:VF_AVX512VL
3719 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3720 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3721 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3722 (match_dup 1)
3723 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3724 "TARGET_AVX512F && <round_mode512bit_condition>"
3725 "@
3726 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3727 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3728 [(set_attr "type" "ssemuladd")
3729 (set_attr "mode" "<MODE>")])
3730
3731 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3732 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3733 (vec_merge:VF_AVX512VL
3734 (fma:VF_AVX512VL
3735 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3736 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3737 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3738 (match_dup 3)
3739 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3740 "TARGET_AVX512F"
3741 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3742 [(set_attr "type" "ssemuladd")
3743 (set_attr "mode" "<MODE>")])
3744
3745 (define_insn "*fma_fmsub_<mode>"
3746 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3747 (fma:FMAMODE
3748 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3749 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3750 (neg:FMAMODE
3751 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3752 "TARGET_FMA || TARGET_FMA4"
3753 "@
3754 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3755 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3756 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3757 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3758 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3759 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3760 (set_attr "type" "ssemuladd")
3761 (set_attr "mode" "<MODE>")])
3762
3763 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3764 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3765 (fma:VF_SF_AVX512VL
3766 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3767 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3768 (neg:VF_SF_AVX512VL
3769 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3770 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3771 "@
3772 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3773 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3774 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3775 [(set_attr "type" "ssemuladd")
3776 (set_attr "mode" "<MODE>")])
3777
3778 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3779 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3780 (vec_merge:VF_AVX512VL
3781 (fma:VF_AVX512VL
3782 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3783 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3784 (neg:VF_AVX512VL
3785 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3786 (match_dup 1)
3787 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3788 "TARGET_AVX512F"
3789 "@
3790 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3791 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3792 [(set_attr "type" "ssemuladd")
3793 (set_attr "mode" "<MODE>")])
3794
3795 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3796 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3797 (vec_merge:VF_AVX512VL
3798 (fma:VF_AVX512VL
3799 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3800 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3801 (neg:VF_AVX512VL
3802 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3803 (match_dup 3)
3804 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3805 "TARGET_AVX512F && <round_mode512bit_condition>"
3806 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3807 [(set_attr "type" "ssemuladd")
3808 (set_attr "mode" "<MODE>")])
3809
3810 (define_insn "*fma_fnmadd_<mode>"
3811 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3812 (fma:FMAMODE
3813 (neg:FMAMODE
3814 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3815 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3816 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3817 "TARGET_FMA || TARGET_FMA4"
3818 "@
3819 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3820 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3821 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3822 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3823 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3824 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3825 (set_attr "type" "ssemuladd")
3826 (set_attr "mode" "<MODE>")])
3827
3828 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3829 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3830 (fma:VF_SF_AVX512VL
3831 (neg:VF_SF_AVX512VL
3832 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3833 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3834 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3835 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3836 "@
3837 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3838 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3839 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3840 [(set_attr "type" "ssemuladd")
3841 (set_attr "mode" "<MODE>")])
3842
3843 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3844 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3845 (vec_merge:VF_AVX512VL
3846 (fma:VF_AVX512VL
3847 (neg:VF_AVX512VL
3848 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3849 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3850 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3851 (match_dup 1)
3852 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3853 "TARGET_AVX512F && <round_mode512bit_condition>"
3854 "@
3855 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3856 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3857 [(set_attr "type" "ssemuladd")
3858 (set_attr "mode" "<MODE>")])
3859
3860 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3861 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3862 (vec_merge:VF_AVX512VL
3863 (fma:VF_AVX512VL
3864 (neg:VF_AVX512VL
3865 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3866 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3867 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3868 (match_dup 3)
3869 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3870 "TARGET_AVX512F && <round_mode512bit_condition>"
3871 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3872 [(set_attr "type" "ssemuladd")
3873 (set_attr "mode" "<MODE>")])
3874
3875 (define_insn "*fma_fnmsub_<mode>"
3876 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3877 (fma:FMAMODE
3878 (neg:FMAMODE
3879 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3880 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3881 (neg:FMAMODE
3882 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3883 "TARGET_FMA || TARGET_FMA4"
3884 "@
3885 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3886 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3887 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3888 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3889 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3890 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3891 (set_attr "type" "ssemuladd")
3892 (set_attr "mode" "<MODE>")])
3893
3894 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3895 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3896 (fma:VF_SF_AVX512VL
3897 (neg:VF_SF_AVX512VL
3898 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3899 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3900 (neg:VF_SF_AVX512VL
3901 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3902 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3903 "@
3904 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3905 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3906 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3907 [(set_attr "type" "ssemuladd")
3908 (set_attr "mode" "<MODE>")])
3909
3910 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3911 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3912 (vec_merge:VF_AVX512VL
3913 (fma:VF_AVX512VL
3914 (neg:VF_AVX512VL
3915 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3916 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3917 (neg:VF_AVX512VL
3918 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3919 (match_dup 1)
3920 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3921 "TARGET_AVX512F && <round_mode512bit_condition>"
3922 "@
3923 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3924 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3925 [(set_attr "type" "ssemuladd")
3926 (set_attr "mode" "<MODE>")])
3927
3928 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3929 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3930 (vec_merge:VF_AVX512VL
3931 (fma:VF_AVX512VL
3932 (neg:VF_AVX512VL
3933 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3934 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3935 (neg:VF_AVX512VL
3936 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3937 (match_dup 3)
3938 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3939 "TARGET_AVX512F"
3940 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3941 [(set_attr "type" "ssemuladd")
3942 (set_attr "mode" "<MODE>")])
3943
3944 ;; FMA parallel floating point multiply addsub and subadd operations.
3945
3946 ;; It would be possible to represent these without the UNSPEC as
3947 ;;
3948 ;; (vec_merge
3949 ;; (fma op1 op2 op3)
3950 ;; (fma op1 op2 (neg op3))
3951 ;; (merge-const))
3952 ;;
3953 ;; But this doesn't seem useful in practice.
3954
3955 (define_expand "fmaddsub_<mode>"
3956 [(set (match_operand:VF 0 "register_operand")
3957 (unspec:VF
3958 [(match_operand:VF 1 "nonimmediate_operand")
3959 (match_operand:VF 2 "nonimmediate_operand")
3960 (match_operand:VF 3 "nonimmediate_operand")]
3961 UNSPEC_FMADDSUB))]
3962 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3963
3964 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3965 [(match_operand:VF_AVX512VL 0 "register_operand")
3966 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3967 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3968 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3969 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3970 "TARGET_AVX512F"
3971 {
3972 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3973 operands[0], operands[1], operands[2], operands[3],
3974 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3975 DONE;
3976 })
3977
3978 (define_insn "*fma_fmaddsub_<mode>"
3979 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3980 (unspec:VF_128_256
3981 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3982 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3983 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3984 UNSPEC_FMADDSUB))]
3985 "TARGET_FMA || TARGET_FMA4"
3986 "@
3987 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3988 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3989 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3990 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3991 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3992 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3993 (set_attr "type" "ssemuladd")
3994 (set_attr "mode" "<MODE>")])
3995
3996 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3997 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3998 (unspec:VF_SF_AVX512VL
3999 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4000 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4001 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4002 UNSPEC_FMADDSUB))]
4003 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4004 "@
4005 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4006 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4007 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4008 [(set_attr "type" "ssemuladd")
4009 (set_attr "mode" "<MODE>")])
4010
4011 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4012 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4013 (vec_merge:VF_AVX512VL
4014 (unspec:VF_AVX512VL
4015 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4016 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4017 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
4018 UNSPEC_FMADDSUB)
4019 (match_dup 1)
4020 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4021 "TARGET_AVX512F"
4022 "@
4023 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4024 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4025 [(set_attr "type" "ssemuladd")
4026 (set_attr "mode" "<MODE>")])
4027
4028 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4029 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4030 (vec_merge:VF_AVX512VL
4031 (unspec:VF_AVX512VL
4032 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4033 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4034 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4035 UNSPEC_FMADDSUB)
4036 (match_dup 3)
4037 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4038 "TARGET_AVX512F"
4039 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4040 [(set_attr "type" "ssemuladd")
4041 (set_attr "mode" "<MODE>")])
4042
4043 (define_insn "*fma_fmsubadd_<mode>"
4044 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4045 (unspec:VF_128_256
4046 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4047 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4048 (neg:VF_128_256
4049 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4050 UNSPEC_FMADDSUB))]
4051 "TARGET_FMA || TARGET_FMA4"
4052 "@
4053 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4054 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4055 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4056 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4057 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4058 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4059 (set_attr "type" "ssemuladd")
4060 (set_attr "mode" "<MODE>")])
4061
4062 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4063 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4064 (unspec:VF_SF_AVX512VL
4065 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4066 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4067 (neg:VF_SF_AVX512VL
4068 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4069 UNSPEC_FMADDSUB))]
4070 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4071 "@
4072 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4073 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4074 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4075 [(set_attr "type" "ssemuladd")
4076 (set_attr "mode" "<MODE>")])
4077
4078 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4079 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4080 (vec_merge:VF_AVX512VL
4081 (unspec:VF_AVX512VL
4082 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4083 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4084 (neg:VF_AVX512VL
4085 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
4086 UNSPEC_FMADDSUB)
4087 (match_dup 1)
4088 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4089 "TARGET_AVX512F"
4090 "@
4091 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4092 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4093 [(set_attr "type" "ssemuladd")
4094 (set_attr "mode" "<MODE>")])
4095
4096 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4097 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4098 (vec_merge:VF_AVX512VL
4099 (unspec:VF_AVX512VL
4100 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4101 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4102 (neg:VF_AVX512VL
4103 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4104 UNSPEC_FMADDSUB)
4105 (match_dup 3)
4106 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4107 "TARGET_AVX512F"
4108 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4109 [(set_attr "type" "ssemuladd")
4110 (set_attr "mode" "<MODE>")])
4111
4112 ;; FMA3 floating point scalar intrinsics. These merge result with
4113 ;; high-order elements from the destination register.
4114
4115 (define_expand "fmai_vmfmadd_<mode><round_name>"
4116 [(set (match_operand:VF_128 0 "register_operand")
4117 (vec_merge:VF_128
4118 (fma:VF_128
4119 (match_operand:VF_128 1 "<round_nimm_predicate>")
4120 (match_operand:VF_128 2 "<round_nimm_predicate>")
4121 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4122 (match_dup 1)
4123 (const_int 1)))]
4124 "TARGET_FMA")
4125
4126 (define_insn "*fmai_fmadd_<mode>"
4127 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4128 (vec_merge:VF_128
4129 (fma:VF_128
4130 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4131 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4132 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
4133 (match_dup 1)
4134 (const_int 1)))]
4135 "TARGET_FMA || TARGET_AVX512F"
4136 "@
4137 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4138 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4139 [(set_attr "type" "ssemuladd")
4140 (set_attr "mode" "<MODE>")])
4141
4142 (define_insn "*fmai_fmsub_<mode>"
4143 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4144 (vec_merge:VF_128
4145 (fma:VF_128
4146 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4147 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4148 (neg:VF_128
4149 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4150 (match_dup 1)
4151 (const_int 1)))]
4152 "TARGET_FMA || TARGET_AVX512F"
4153 "@
4154 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4155 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4156 [(set_attr "type" "ssemuladd")
4157 (set_attr "mode" "<MODE>")])
4158
4159 (define_insn "*fmai_fnmadd_<mode><round_name>"
4160 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4161 (vec_merge:VF_128
4162 (fma:VF_128
4163 (neg:VF_128
4164 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4165 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4166 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4167 (match_dup 1)
4168 (const_int 1)))]
4169 "TARGET_FMA || TARGET_AVX512F"
4170 "@
4171 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4172 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4173 [(set_attr "type" "ssemuladd")
4174 (set_attr "mode" "<MODE>")])
4175
4176 (define_insn "*fmai_fnmsub_<mode><round_name>"
4177 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4178 (vec_merge:VF_128
4179 (fma:VF_128
4180 (neg:VF_128
4181 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
4182 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4183 (neg:VF_128
4184 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4185 (match_dup 1)
4186 (const_int 1)))]
4187 "TARGET_FMA || TARGET_AVX512F"
4188 "@
4189 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4190 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4191 [(set_attr "type" "ssemuladd")
4192 (set_attr "mode" "<MODE>")])
4193
4194 ;; FMA4 floating point scalar intrinsics. These write the
4195 ;; entire destination register, with the high-order elements zeroed.
4196
4197 (define_expand "fma4i_vmfmadd_<mode>"
4198 [(set (match_operand:VF_128 0 "register_operand")
4199 (vec_merge:VF_128
4200 (fma:VF_128
4201 (match_operand:VF_128 1 "nonimmediate_operand")
4202 (match_operand:VF_128 2 "nonimmediate_operand")
4203 (match_operand:VF_128 3 "nonimmediate_operand"))
4204 (match_dup 4)
4205 (const_int 1)))]
4206 "TARGET_FMA4"
4207 "operands[4] = CONST0_RTX (<MODE>mode);")
4208
4209 (define_insn "*fma4i_vmfmadd_<mode>"
4210 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4211 (vec_merge:VF_128
4212 (fma:VF_128
4213 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4214 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4215 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4216 (match_operand:VF_128 4 "const0_operand")
4217 (const_int 1)))]
4218 "TARGET_FMA4"
4219 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4220 [(set_attr "type" "ssemuladd")
4221 (set_attr "mode" "<MODE>")])
4222
4223 (define_insn "*fma4i_vmfmsub_<mode>"
4224 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4225 (vec_merge:VF_128
4226 (fma:VF_128
4227 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4228 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4229 (neg:VF_128
4230 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4231 (match_operand:VF_128 4 "const0_operand")
4232 (const_int 1)))]
4233 "TARGET_FMA4"
4234 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4235 [(set_attr "type" "ssemuladd")
4236 (set_attr "mode" "<MODE>")])
4237
4238 (define_insn "*fma4i_vmfnmadd_<mode>"
4239 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4240 (vec_merge:VF_128
4241 (fma:VF_128
4242 (neg:VF_128
4243 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4244 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4245 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4246 (match_operand:VF_128 4 "const0_operand")
4247 (const_int 1)))]
4248 "TARGET_FMA4"
4249 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4250 [(set_attr "type" "ssemuladd")
4251 (set_attr "mode" "<MODE>")])
4252
4253 (define_insn "*fma4i_vmfnmsub_<mode>"
4254 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4255 (vec_merge:VF_128
4256 (fma:VF_128
4257 (neg:VF_128
4258 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4259 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4260 (neg:VF_128
4261 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4262 (match_operand:VF_128 4 "const0_operand")
4263 (const_int 1)))]
4264 "TARGET_FMA4"
4265 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4266 [(set_attr "type" "ssemuladd")
4267 (set_attr "mode" "<MODE>")])
4268
4269 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4270 ;;
4271 ;; Parallel single-precision floating point conversion operations
4272 ;;
4273 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4274
4275 (define_insn "sse_cvtpi2ps"
4276 [(set (match_operand:V4SF 0 "register_operand" "=x")
4277 (vec_merge:V4SF
4278 (vec_duplicate:V4SF
4279 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
4280 (match_operand:V4SF 1 "register_operand" "0")
4281 (const_int 3)))]
4282 "TARGET_SSE"
4283 "cvtpi2ps\t{%2, %0|%0, %2}"
4284 [(set_attr "type" "ssecvt")
4285 (set_attr "mode" "V4SF")])
4286
4287 (define_insn "sse_cvtps2pi"
4288 [(set (match_operand:V2SI 0 "register_operand" "=y")
4289 (vec_select:V2SI
4290 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
4291 UNSPEC_FIX_NOTRUNC)
4292 (parallel [(const_int 0) (const_int 1)])))]
4293 "TARGET_SSE"
4294 "cvtps2pi\t{%1, %0|%0, %q1}"
4295 [(set_attr "type" "ssecvt")
4296 (set_attr "unit" "mmx")
4297 (set_attr "mode" "DI")])
4298
4299 (define_insn "sse_cvttps2pi"
4300 [(set (match_operand:V2SI 0 "register_operand" "=y")
4301 (vec_select:V2SI
4302 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
4303 (parallel [(const_int 0) (const_int 1)])))]
4304 "TARGET_SSE"
4305 "cvttps2pi\t{%1, %0|%0, %q1}"
4306 [(set_attr "type" "ssecvt")
4307 (set_attr "unit" "mmx")
4308 (set_attr "prefix_rep" "0")
4309 (set_attr "mode" "SF")])
4310
4311 (define_insn "sse_cvtsi2ss<round_name>"
4312 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4313 (vec_merge:V4SF
4314 (vec_duplicate:V4SF
4315 (float:SF (match_operand:SI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4316 (match_operand:V4SF 1 "register_operand" "0,0,v")
4317 (const_int 1)))]
4318 "TARGET_SSE"
4319 "@
4320 cvtsi2ss\t{%2, %0|%0, %2}
4321 cvtsi2ss\t{%2, %0|%0, %2}
4322 vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4323 [(set_attr "isa" "noavx,noavx,avx")
4324 (set_attr "type" "sseicvt")
4325 (set_attr "athlon_decode" "vector,double,*")
4326 (set_attr "amdfam10_decode" "vector,double,*")
4327 (set_attr "bdver1_decode" "double,direct,*")
4328 (set_attr "btver2_decode" "double,double,double")
4329 (set_attr "znver1_decode" "double,double,double")
4330 (set_attr "prefix" "orig,orig,maybe_evex")
4331 (set_attr "mode" "SF")])
4332
4333 (define_insn "sse_cvtsi2ssq<round_name>"
4334 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4335 (vec_merge:V4SF
4336 (vec_duplicate:V4SF
4337 (float:SF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4338 (match_operand:V4SF 1 "register_operand" "0,0,v")
4339 (const_int 1)))]
4340 "TARGET_SSE && TARGET_64BIT"
4341 "@
4342 cvtsi2ssq\t{%2, %0|%0, %2}
4343 cvtsi2ssq\t{%2, %0|%0, %2}
4344 vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4345 [(set_attr "isa" "noavx,noavx,avx")
4346 (set_attr "type" "sseicvt")
4347 (set_attr "athlon_decode" "vector,double,*")
4348 (set_attr "amdfam10_decode" "vector,double,*")
4349 (set_attr "bdver1_decode" "double,direct,*")
4350 (set_attr "btver2_decode" "double,double,double")
4351 (set_attr "length_vex" "*,*,4")
4352 (set_attr "prefix_rex" "1,1,*")
4353 (set_attr "prefix" "orig,orig,maybe_evex")
4354 (set_attr "mode" "SF")])
4355
4356 (define_insn "sse_cvtss2si<round_name>"
4357 [(set (match_operand:SI 0 "register_operand" "=r,r")
4358 (unspec:SI
4359 [(vec_select:SF
4360 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4361 (parallel [(const_int 0)]))]
4362 UNSPEC_FIX_NOTRUNC))]
4363 "TARGET_SSE"
4364 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4365 [(set_attr "type" "sseicvt")
4366 (set_attr "athlon_decode" "double,vector")
4367 (set_attr "bdver1_decode" "double,double")
4368 (set_attr "prefix_rep" "1")
4369 (set_attr "prefix" "maybe_vex")
4370 (set_attr "mode" "SI")])
4371
4372 (define_insn "sse_cvtss2si_2"
4373 [(set (match_operand:SI 0 "register_operand" "=r,r")
4374 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4375 UNSPEC_FIX_NOTRUNC))]
4376 "TARGET_SSE"
4377 "%vcvtss2si\t{%1, %0|%0, %k1}"
4378 [(set_attr "type" "sseicvt")
4379 (set_attr "athlon_decode" "double,vector")
4380 (set_attr "amdfam10_decode" "double,double")
4381 (set_attr "bdver1_decode" "double,double")
4382 (set_attr "prefix_rep" "1")
4383 (set_attr "prefix" "maybe_vex")
4384 (set_attr "mode" "SI")])
4385
4386 (define_insn "sse_cvtss2siq<round_name>"
4387 [(set (match_operand:DI 0 "register_operand" "=r,r")
4388 (unspec:DI
4389 [(vec_select:SF
4390 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4391 (parallel [(const_int 0)]))]
4392 UNSPEC_FIX_NOTRUNC))]
4393 "TARGET_SSE && TARGET_64BIT"
4394 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4395 [(set_attr "type" "sseicvt")
4396 (set_attr "athlon_decode" "double,vector")
4397 (set_attr "bdver1_decode" "double,double")
4398 (set_attr "prefix_rep" "1")
4399 (set_attr "prefix" "maybe_vex")
4400 (set_attr "mode" "DI")])
4401
4402 (define_insn "sse_cvtss2siq_2"
4403 [(set (match_operand:DI 0 "register_operand" "=r,r")
4404 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4405 UNSPEC_FIX_NOTRUNC))]
4406 "TARGET_SSE && TARGET_64BIT"
4407 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
4408 [(set_attr "type" "sseicvt")
4409 (set_attr "athlon_decode" "double,vector")
4410 (set_attr "amdfam10_decode" "double,double")
4411 (set_attr "bdver1_decode" "double,double")
4412 (set_attr "prefix_rep" "1")
4413 (set_attr "prefix" "maybe_vex")
4414 (set_attr "mode" "DI")])
4415
4416 (define_insn "sse_cvttss2si<round_saeonly_name>"
4417 [(set (match_operand:SI 0 "register_operand" "=r,r")
4418 (fix:SI
4419 (vec_select:SF
4420 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4421 (parallel [(const_int 0)]))))]
4422 "TARGET_SSE"
4423 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4424 [(set_attr "type" "sseicvt")
4425 (set_attr "athlon_decode" "double,vector")
4426 (set_attr "amdfam10_decode" "double,double")
4427 (set_attr "bdver1_decode" "double,double")
4428 (set_attr "prefix_rep" "1")
4429 (set_attr "prefix" "maybe_vex")
4430 (set_attr "mode" "SI")])
4431
4432 (define_insn "sse_cvttss2siq<round_saeonly_name>"
4433 [(set (match_operand:DI 0 "register_operand" "=r,r")
4434 (fix:DI
4435 (vec_select:SF
4436 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
4437 (parallel [(const_int 0)]))))]
4438 "TARGET_SSE && TARGET_64BIT"
4439 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4440 [(set_attr "type" "sseicvt")
4441 (set_attr "athlon_decode" "double,vector")
4442 (set_attr "amdfam10_decode" "double,double")
4443 (set_attr "bdver1_decode" "double,double")
4444 (set_attr "prefix_rep" "1")
4445 (set_attr "prefix" "maybe_vex")
4446 (set_attr "mode" "DI")])
4447
4448 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
4449 [(set (match_operand:VF_128 0 "register_operand" "=v")
4450 (vec_merge:VF_128
4451 (vec_duplicate:VF_128
4452 (unsigned_float:<ssescalarmode>
4453 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4454 (match_operand:VF_128 1 "register_operand" "v")
4455 (const_int 1)))]
4456 "TARGET_AVX512F && <round_modev4sf_condition>"
4457 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4458 [(set_attr "type" "sseicvt")
4459 (set_attr "prefix" "evex")
4460 (set_attr "mode" "<ssescalarmode>")])
4461
4462 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4463 [(set (match_operand:VF_128 0 "register_operand" "=v")
4464 (vec_merge:VF_128
4465 (vec_duplicate:VF_128
4466 (unsigned_float:<ssescalarmode>
4467 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4468 (match_operand:VF_128 1 "register_operand" "v")
4469 (const_int 1)))]
4470 "TARGET_AVX512F && TARGET_64BIT"
4471 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4472 [(set_attr "type" "sseicvt")
4473 (set_attr "prefix" "evex")
4474 (set_attr "mode" "<ssescalarmode>")])
4475
4476 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4477 [(set (match_operand:VF1 0 "register_operand" "=x,v")
4478 (float:VF1
4479 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
4480 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4481 "@
4482 cvtdq2ps\t{%1, %0|%0, %1}
4483 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4484 [(set_attr "isa" "noavx,avx")
4485 (set_attr "type" "ssecvt")
4486 (set_attr "prefix" "maybe_vex")
4487 (set_attr "mode" "<sseinsnmode>")])
4488
4489 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4490 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4491 (unsigned_float:VF1_AVX512VL
4492 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4493 "TARGET_AVX512F"
4494 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4495 [(set_attr "type" "ssecvt")
4496 (set_attr "prefix" "evex")
4497 (set_attr "mode" "<MODE>")])
4498
4499 (define_expand "floatuns<sseintvecmodelower><mode>2"
4500 [(match_operand:VF1 0 "register_operand")
4501 (match_operand:<sseintvecmode> 1 "register_operand")]
4502 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4503 {
4504 if (<MODE>mode == V16SFmode)
4505 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4506 else
4507 if (TARGET_AVX512VL)
4508 {
4509 if (<MODE>mode == V4SFmode)
4510 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4511 else
4512 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4513 }
4514 else
4515 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4516
4517 DONE;
4518 })
4519
4520
4521 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4522 (define_mode_attr sf2simodelower
4523 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4524
4525 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4526 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4527 (unspec:VI4_AVX
4528 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
4529 UNSPEC_FIX_NOTRUNC))]
4530 "TARGET_SSE2 && <mask_mode512bit_condition>"
4531 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4532 [(set_attr "type" "ssecvt")
4533 (set (attr "prefix_data16")
4534 (if_then_else
4535 (match_test "TARGET_AVX")
4536 (const_string "*")
4537 (const_string "1")))
4538 (set_attr "prefix" "maybe_vex")
4539 (set_attr "mode" "<sseinsnmode>")])
4540
4541 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4542 [(set (match_operand:V16SI 0 "register_operand" "=v")
4543 (unspec:V16SI
4544 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4545 UNSPEC_FIX_NOTRUNC))]
4546 "TARGET_AVX512F"
4547 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4548 [(set_attr "type" "ssecvt")
4549 (set_attr "prefix" "evex")
4550 (set_attr "mode" "XI")])
4551
4552 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4553 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4554 (unspec:VI4_AVX512VL
4555 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4556 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4557 "TARGET_AVX512F"
4558 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4559 [(set_attr "type" "ssecvt")
4560 (set_attr "prefix" "evex")
4561 (set_attr "mode" "<sseinsnmode>")])
4562
4563 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4564 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4565 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4566 UNSPEC_FIX_NOTRUNC))]
4567 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4568 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4569 [(set_attr "type" "ssecvt")
4570 (set_attr "prefix" "evex")
4571 (set_attr "mode" "<sseinsnmode>")])
4572
4573 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4574 [(set (match_operand:V2DI 0 "register_operand" "=v")
4575 (unspec:V2DI
4576 [(vec_select:V2SF
4577 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4578 (parallel [(const_int 0) (const_int 1)]))]
4579 UNSPEC_FIX_NOTRUNC))]
4580 "TARGET_AVX512DQ && TARGET_AVX512VL"
4581 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4582 [(set_attr "type" "ssecvt")
4583 (set_attr "prefix" "evex")
4584 (set_attr "mode" "TI")])
4585
4586 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4587 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4588 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4589 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4590 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4591 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4592 [(set_attr "type" "ssecvt")
4593 (set_attr "prefix" "evex")
4594 (set_attr "mode" "<sseinsnmode>")])
4595
4596 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4597 [(set (match_operand:V2DI 0 "register_operand" "=v")
4598 (unspec:V2DI
4599 [(vec_select:V2SF
4600 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4601 (parallel [(const_int 0) (const_int 1)]))]
4602 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4603 "TARGET_AVX512DQ && TARGET_AVX512VL"
4604 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4605 [(set_attr "type" "ssecvt")
4606 (set_attr "prefix" "evex")
4607 (set_attr "mode" "TI")])
4608
4609 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4610 [(set (match_operand:V16SI 0 "register_operand" "=v")
4611 (any_fix:V16SI
4612 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4613 "TARGET_AVX512F"
4614 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4615 [(set_attr "type" "ssecvt")
4616 (set_attr "prefix" "evex")
4617 (set_attr "mode" "XI")])
4618
4619 (define_insn "fix_truncv8sfv8si2<mask_name>"
4620 [(set (match_operand:V8SI 0 "register_operand" "=v")
4621 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4622 "TARGET_AVX && <mask_avx512vl_condition>"
4623 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4624 [(set_attr "type" "ssecvt")
4625 (set_attr "prefix" "<mask_prefix>")
4626 (set_attr "mode" "OI")])
4627
4628 (define_insn "fix_truncv4sfv4si2<mask_name>"
4629 [(set (match_operand:V4SI 0 "register_operand" "=v")
4630 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
4631 "TARGET_SSE2 && <mask_avx512vl_condition>"
4632 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4633 [(set_attr "type" "ssecvt")
4634 (set (attr "prefix_rep")
4635 (if_then_else
4636 (match_test "TARGET_AVX")
4637 (const_string "*")
4638 (const_string "1")))
4639 (set (attr "prefix_data16")
4640 (if_then_else
4641 (match_test "TARGET_AVX")
4642 (const_string "*")
4643 (const_string "0")))
4644 (set_attr "prefix_data16" "0")
4645 (set_attr "prefix" "<mask_prefix2>")
4646 (set_attr "mode" "TI")])
4647
4648 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4649 [(match_operand:<sseintvecmode> 0 "register_operand")
4650 (match_operand:VF1 1 "register_operand")]
4651 "TARGET_SSE2"
4652 {
4653 if (<MODE>mode == V16SFmode)
4654 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4655 operands[1]));
4656 else
4657 {
4658 rtx tmp[3];
4659 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4660 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4661 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4662 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4663 }
4664 DONE;
4665 })
4666
4667 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4668 ;;
4669 ;; Parallel double-precision floating point conversion operations
4670 ;;
4671 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4672
4673 (define_insn "sse2_cvtpi2pd"
4674 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4675 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4676 "TARGET_SSE2"
4677 "cvtpi2pd\t{%1, %0|%0, %1}"
4678 [(set_attr "type" "ssecvt")
4679 (set_attr "unit" "mmx,*")
4680 (set_attr "prefix_data16" "1,*")
4681 (set_attr "mode" "V2DF")])
4682
4683 (define_insn "sse2_cvtpd2pi"
4684 [(set (match_operand:V2SI 0 "register_operand" "=y")
4685 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4686 UNSPEC_FIX_NOTRUNC))]
4687 "TARGET_SSE2"
4688 "cvtpd2pi\t{%1, %0|%0, %1}"
4689 [(set_attr "type" "ssecvt")
4690 (set_attr "unit" "mmx")
4691 (set_attr "bdver1_decode" "double")
4692 (set_attr "btver2_decode" "direct")
4693 (set_attr "prefix_data16" "1")
4694 (set_attr "mode" "DI")])
4695
4696 (define_insn "sse2_cvttpd2pi"
4697 [(set (match_operand:V2SI 0 "register_operand" "=y")
4698 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4699 "TARGET_SSE2"
4700 "cvttpd2pi\t{%1, %0|%0, %1}"
4701 [(set_attr "type" "ssecvt")
4702 (set_attr "unit" "mmx")
4703 (set_attr "bdver1_decode" "double")
4704 (set_attr "prefix_data16" "1")
4705 (set_attr "mode" "TI")])
4706
4707 (define_insn "sse2_cvtsi2sd"
4708 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4709 (vec_merge:V2DF
4710 (vec_duplicate:V2DF
4711 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4712 (match_operand:V2DF 1 "register_operand" "0,0,v")
4713 (const_int 1)))]
4714 "TARGET_SSE2"
4715 "@
4716 cvtsi2sd\t{%2, %0|%0, %2}
4717 cvtsi2sd\t{%2, %0|%0, %2}
4718 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4719 [(set_attr "isa" "noavx,noavx,avx")
4720 (set_attr "type" "sseicvt")
4721 (set_attr "athlon_decode" "double,direct,*")
4722 (set_attr "amdfam10_decode" "vector,double,*")
4723 (set_attr "bdver1_decode" "double,direct,*")
4724 (set_attr "btver2_decode" "double,double,double")
4725 (set_attr "znver1_decode" "double,double,double")
4726 (set_attr "prefix" "orig,orig,maybe_evex")
4727 (set_attr "mode" "DF")])
4728
4729 (define_insn "sse2_cvtsi2sdq<round_name>"
4730 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4731 (vec_merge:V2DF
4732 (vec_duplicate:V2DF
4733 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4734 (match_operand:V2DF 1 "register_operand" "0,0,v")
4735 (const_int 1)))]
4736 "TARGET_SSE2 && TARGET_64BIT"
4737 "@
4738 cvtsi2sdq\t{%2, %0|%0, %2}
4739 cvtsi2sdq\t{%2, %0|%0, %2}
4740 vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4741 [(set_attr "isa" "noavx,noavx,avx")
4742 (set_attr "type" "sseicvt")
4743 (set_attr "athlon_decode" "double,direct,*")
4744 (set_attr "amdfam10_decode" "vector,double,*")
4745 (set_attr "bdver1_decode" "double,direct,*")
4746 (set_attr "length_vex" "*,*,4")
4747 (set_attr "prefix_rex" "1,1,*")
4748 (set_attr "prefix" "orig,orig,maybe_evex")
4749 (set_attr "mode" "DF")])
4750
4751 (define_insn "avx512f_vcvtss2usi<round_name>"
4752 [(set (match_operand:SI 0 "register_operand" "=r")
4753 (unspec:SI
4754 [(vec_select:SF
4755 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4756 (parallel [(const_int 0)]))]
4757 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4758 "TARGET_AVX512F"
4759 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4760 [(set_attr "type" "sseicvt")
4761 (set_attr "prefix" "evex")
4762 (set_attr "mode" "SI")])
4763
4764 (define_insn "avx512f_vcvtss2usiq<round_name>"
4765 [(set (match_operand:DI 0 "register_operand" "=r")
4766 (unspec:DI
4767 [(vec_select:SF
4768 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4769 (parallel [(const_int 0)]))]
4770 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4771 "TARGET_AVX512F && TARGET_64BIT"
4772 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4773 [(set_attr "type" "sseicvt")
4774 (set_attr "prefix" "evex")
4775 (set_attr "mode" "DI")])
4776
4777 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4778 [(set (match_operand:SI 0 "register_operand" "=r")
4779 (unsigned_fix:SI
4780 (vec_select:SF
4781 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4782 (parallel [(const_int 0)]))))]
4783 "TARGET_AVX512F"
4784 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4785 [(set_attr "type" "sseicvt")
4786 (set_attr "prefix" "evex")
4787 (set_attr "mode" "SI")])
4788
4789 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4790 [(set (match_operand:DI 0 "register_operand" "=r")
4791 (unsigned_fix:DI
4792 (vec_select:SF
4793 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4794 (parallel [(const_int 0)]))))]
4795 "TARGET_AVX512F && TARGET_64BIT"
4796 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4797 [(set_attr "type" "sseicvt")
4798 (set_attr "prefix" "evex")
4799 (set_attr "mode" "DI")])
4800
4801 (define_insn "avx512f_vcvtsd2usi<round_name>"
4802 [(set (match_operand:SI 0 "register_operand" "=r")
4803 (unspec:SI
4804 [(vec_select:DF
4805 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4806 (parallel [(const_int 0)]))]
4807 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4808 "TARGET_AVX512F"
4809 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4810 [(set_attr "type" "sseicvt")
4811 (set_attr "prefix" "evex")
4812 (set_attr "mode" "SI")])
4813
4814 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4815 [(set (match_operand:DI 0 "register_operand" "=r")
4816 (unspec:DI
4817 [(vec_select:DF
4818 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4819 (parallel [(const_int 0)]))]
4820 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4821 "TARGET_AVX512F && TARGET_64BIT"
4822 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4823 [(set_attr "type" "sseicvt")
4824 (set_attr "prefix" "evex")
4825 (set_attr "mode" "DI")])
4826
4827 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4828 [(set (match_operand:SI 0 "register_operand" "=r")
4829 (unsigned_fix:SI
4830 (vec_select:DF
4831 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4832 (parallel [(const_int 0)]))))]
4833 "TARGET_AVX512F"
4834 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4835 [(set_attr "type" "sseicvt")
4836 (set_attr "prefix" "evex")
4837 (set_attr "mode" "SI")])
4838
4839 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4840 [(set (match_operand:DI 0 "register_operand" "=r")
4841 (unsigned_fix:DI
4842 (vec_select:DF
4843 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4844 (parallel [(const_int 0)]))))]
4845 "TARGET_AVX512F && TARGET_64BIT"
4846 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4847 [(set_attr "type" "sseicvt")
4848 (set_attr "prefix" "evex")
4849 (set_attr "mode" "DI")])
4850
4851 (define_insn "sse2_cvtsd2si<round_name>"
4852 [(set (match_operand:SI 0 "register_operand" "=r,r")
4853 (unspec:SI
4854 [(vec_select:DF
4855 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4856 (parallel [(const_int 0)]))]
4857 UNSPEC_FIX_NOTRUNC))]
4858 "TARGET_SSE2"
4859 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4860 [(set_attr "type" "sseicvt")
4861 (set_attr "athlon_decode" "double,vector")
4862 (set_attr "bdver1_decode" "double,double")
4863 (set_attr "btver2_decode" "double,double")
4864 (set_attr "prefix_rep" "1")
4865 (set_attr "prefix" "maybe_vex")
4866 (set_attr "mode" "SI")])
4867
4868 (define_insn "sse2_cvtsd2si_2"
4869 [(set (match_operand:SI 0 "register_operand" "=r,r")
4870 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4871 UNSPEC_FIX_NOTRUNC))]
4872 "TARGET_SSE2"
4873 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4874 [(set_attr "type" "sseicvt")
4875 (set_attr "athlon_decode" "double,vector")
4876 (set_attr "amdfam10_decode" "double,double")
4877 (set_attr "bdver1_decode" "double,double")
4878 (set_attr "prefix_rep" "1")
4879 (set_attr "prefix" "maybe_vex")
4880 (set_attr "mode" "SI")])
4881
4882 (define_insn "sse2_cvtsd2siq<round_name>"
4883 [(set (match_operand:DI 0 "register_operand" "=r,r")
4884 (unspec:DI
4885 [(vec_select:DF
4886 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4887 (parallel [(const_int 0)]))]
4888 UNSPEC_FIX_NOTRUNC))]
4889 "TARGET_SSE2 && TARGET_64BIT"
4890 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4891 [(set_attr "type" "sseicvt")
4892 (set_attr "athlon_decode" "double,vector")
4893 (set_attr "bdver1_decode" "double,double")
4894 (set_attr "prefix_rep" "1")
4895 (set_attr "prefix" "maybe_vex")
4896 (set_attr "mode" "DI")])
4897
4898 (define_insn "sse2_cvtsd2siq_2"
4899 [(set (match_operand:DI 0 "register_operand" "=r,r")
4900 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4901 UNSPEC_FIX_NOTRUNC))]
4902 "TARGET_SSE2 && TARGET_64BIT"
4903 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4904 [(set_attr "type" "sseicvt")
4905 (set_attr "athlon_decode" "double,vector")
4906 (set_attr "amdfam10_decode" "double,double")
4907 (set_attr "bdver1_decode" "double,double")
4908 (set_attr "prefix_rep" "1")
4909 (set_attr "prefix" "maybe_vex")
4910 (set_attr "mode" "DI")])
4911
4912 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4913 [(set (match_operand:SI 0 "register_operand" "=r,r")
4914 (fix:SI
4915 (vec_select:DF
4916 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4917 (parallel [(const_int 0)]))))]
4918 "TARGET_SSE2"
4919 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4920 [(set_attr "type" "sseicvt")
4921 (set_attr "athlon_decode" "double,vector")
4922 (set_attr "amdfam10_decode" "double,double")
4923 (set_attr "bdver1_decode" "double,double")
4924 (set_attr "btver2_decode" "double,double")
4925 (set_attr "prefix_rep" "1")
4926 (set_attr "prefix" "maybe_vex")
4927 (set_attr "mode" "SI")])
4928
4929 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4930 [(set (match_operand:DI 0 "register_operand" "=r,r")
4931 (fix:DI
4932 (vec_select:DF
4933 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4934 (parallel [(const_int 0)]))))]
4935 "TARGET_SSE2 && TARGET_64BIT"
4936 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4937 [(set_attr "type" "sseicvt")
4938 (set_attr "athlon_decode" "double,vector")
4939 (set_attr "amdfam10_decode" "double,double")
4940 (set_attr "bdver1_decode" "double,double")
4941 (set_attr "prefix_rep" "1")
4942 (set_attr "prefix" "maybe_vex")
4943 (set_attr "mode" "DI")])
4944
4945 ;; For float<si2dfmode><mode>2 insn pattern
4946 (define_mode_attr si2dfmode
4947 [(V8DF "V8SI") (V4DF "V4SI")])
4948 (define_mode_attr si2dfmodelower
4949 [(V8DF "v8si") (V4DF "v4si")])
4950
4951 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4952 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4953 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4954 "TARGET_AVX && <mask_mode512bit_condition>"
4955 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4956 [(set_attr "type" "ssecvt")
4957 (set_attr "prefix" "maybe_vex")
4958 (set_attr "mode" "<MODE>")])
4959
4960 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4961 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4962 (any_float:VF2_AVX512VL
4963 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4964 "TARGET_AVX512DQ"
4965 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4966 [(set_attr "type" "ssecvt")
4967 (set_attr "prefix" "evex")
4968 (set_attr "mode" "<MODE>")])
4969
4970 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4971 (define_mode_attr qq2pssuff
4972 [(V8SF "") (V4SF "{y}")])
4973
4974 (define_mode_attr sselongvecmode
4975 [(V8SF "V8DI") (V4SF "V4DI")])
4976
4977 (define_mode_attr sselongvecmodelower
4978 [(V8SF "v8di") (V4SF "v4di")])
4979
4980 (define_mode_attr sseintvecmode3
4981 [(V8SF "XI") (V4SF "OI")
4982 (V8DF "OI") (V4DF "TI")])
4983
4984 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4985 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4986 (any_float:VF1_128_256VL
4987 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4988 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4989 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4990 [(set_attr "type" "ssecvt")
4991 (set_attr "prefix" "evex")
4992 (set_attr "mode" "<MODE>")])
4993
4994 (define_insn "*<floatsuffix>floatv2div2sf2"
4995 [(set (match_operand:V4SF 0 "register_operand" "=v")
4996 (vec_concat:V4SF
4997 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4998 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4999 "TARGET_AVX512DQ && TARGET_AVX512VL"
5000 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5001 [(set_attr "type" "ssecvt")
5002 (set_attr "prefix" "evex")
5003 (set_attr "mode" "V4SF")])
5004
5005 (define_insn "<floatsuffix>floatv2div2sf2_mask"
5006 [(set (match_operand:V4SF 0 "register_operand" "=v")
5007 (vec_concat:V4SF
5008 (vec_merge:V2SF
5009 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5010 (vec_select:V2SF
5011 (match_operand:V4SF 2 "vector_move_operand" "0C")
5012 (parallel [(const_int 0) (const_int 1)]))
5013 (match_operand:QI 3 "register_operand" "Yk"))
5014 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5015 "TARGET_AVX512DQ && TARGET_AVX512VL"
5016 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5017 [(set_attr "type" "ssecvt")
5018 (set_attr "prefix" "evex")
5019 (set_attr "mode" "V4SF")])
5020
5021 (define_insn "*<floatsuffix>floatv2div2sf2_mask_1"
5022 [(set (match_operand:V4SF 0 "register_operand" "=v")
5023 (vec_concat:V4SF
5024 (vec_merge:V2SF
5025 (any_float:V2SF (match_operand:V2DI 1
5026 "nonimmediate_operand" "vm"))
5027 (const_vector:V2SF [(const_int 0) (const_int 0)])
5028 (match_operand:QI 2 "register_operand" "Yk"))
5029 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5030 "TARGET_AVX512DQ && TARGET_AVX512VL"
5031 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5032 [(set_attr "type" "ssecvt")
5033 (set_attr "prefix" "evex")
5034 (set_attr "mode" "V4SF")])
5035
5036 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5037 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5038 (unsigned_float:VF2_512_256VL
5039 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5040 "TARGET_AVX512F"
5041 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5042 [(set_attr "type" "ssecvt")
5043 (set_attr "prefix" "evex")
5044 (set_attr "mode" "<MODE>")])
5045
5046 (define_insn "ufloatv2siv2df2<mask_name>"
5047 [(set (match_operand:V2DF 0 "register_operand" "=v")
5048 (unsigned_float:V2DF
5049 (vec_select:V2SI
5050 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5051 (parallel [(const_int 0) (const_int 1)]))))]
5052 "TARGET_AVX512VL"
5053 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5054 [(set_attr "type" "ssecvt")
5055 (set_attr "prefix" "evex")
5056 (set_attr "mode" "V2DF")])
5057
5058 (define_insn "avx512f_cvtdq2pd512_2"
5059 [(set (match_operand:V8DF 0 "register_operand" "=v")
5060 (float:V8DF
5061 (vec_select:V8SI
5062 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5063 (parallel [(const_int 0) (const_int 1)
5064 (const_int 2) (const_int 3)
5065 (const_int 4) (const_int 5)
5066 (const_int 6) (const_int 7)]))))]
5067 "TARGET_AVX512F"
5068 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5069 [(set_attr "type" "ssecvt")
5070 (set_attr "prefix" "evex")
5071 (set_attr "mode" "V8DF")])
5072
5073 (define_insn "avx_cvtdq2pd256_2"
5074 [(set (match_operand:V4DF 0 "register_operand" "=v")
5075 (float:V4DF
5076 (vec_select:V4SI
5077 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5078 (parallel [(const_int 0) (const_int 1)
5079 (const_int 2) (const_int 3)]))))]
5080 "TARGET_AVX"
5081 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5082 [(set_attr "type" "ssecvt")
5083 (set_attr "prefix" "maybe_evex")
5084 (set_attr "mode" "V4DF")])
5085
5086 (define_insn "sse2_cvtdq2pd<mask_name>"
5087 [(set (match_operand:V2DF 0 "register_operand" "=v")
5088 (float:V2DF
5089 (vec_select:V2SI
5090 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5091 (parallel [(const_int 0) (const_int 1)]))))]
5092 "TARGET_SSE2 && <mask_avx512vl_condition>"
5093 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5094 [(set_attr "type" "ssecvt")
5095 (set_attr "prefix" "maybe_vex")
5096 (set_attr "mode" "V2DF")])
5097
5098 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5099 [(set (match_operand:V8SI 0 "register_operand" "=v")
5100 (unspec:V8SI
5101 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5102 UNSPEC_FIX_NOTRUNC))]
5103 "TARGET_AVX512F"
5104 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5105 [(set_attr "type" "ssecvt")
5106 (set_attr "prefix" "evex")
5107 (set_attr "mode" "OI")])
5108
5109 (define_insn "avx_cvtpd2dq256<mask_name>"
5110 [(set (match_operand:V4SI 0 "register_operand" "=v")
5111 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5112 UNSPEC_FIX_NOTRUNC))]
5113 "TARGET_AVX && <mask_avx512vl_condition>"
5114 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5115 [(set_attr "type" "ssecvt")
5116 (set_attr "prefix" "<mask_prefix>")
5117 (set_attr "mode" "OI")])
5118
5119 (define_expand "avx_cvtpd2dq256_2"
5120 [(set (match_operand:V8SI 0 "register_operand")
5121 (vec_concat:V8SI
5122 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5123 UNSPEC_FIX_NOTRUNC)
5124 (match_dup 2)))]
5125 "TARGET_AVX"
5126 "operands[2] = CONST0_RTX (V4SImode);")
5127
5128 (define_insn "*avx_cvtpd2dq256_2"
5129 [(set (match_operand:V8SI 0 "register_operand" "=v")
5130 (vec_concat:V8SI
5131 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5132 UNSPEC_FIX_NOTRUNC)
5133 (match_operand:V4SI 2 "const0_operand")))]
5134 "TARGET_AVX"
5135 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5136 [(set_attr "type" "ssecvt")
5137 (set_attr "prefix" "vex")
5138 (set_attr "btver2_decode" "vector")
5139 (set_attr "mode" "OI")])
5140
5141 (define_insn "sse2_cvtpd2dq<mask_name>"
5142 [(set (match_operand:V4SI 0 "register_operand" "=v")
5143 (vec_concat:V4SI
5144 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5145 UNSPEC_FIX_NOTRUNC)
5146 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5147 "TARGET_SSE2 && <mask_avx512vl_condition>"
5148 {
5149 if (TARGET_AVX)
5150 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5151 else
5152 return "cvtpd2dq\t{%1, %0|%0, %1}";
5153 }
5154 [(set_attr "type" "ssecvt")
5155 (set_attr "prefix_rep" "1")
5156 (set_attr "prefix_data16" "0")
5157 (set_attr "prefix" "maybe_vex")
5158 (set_attr "mode" "TI")
5159 (set_attr "amdfam10_decode" "double")
5160 (set_attr "athlon_decode" "vector")
5161 (set_attr "bdver1_decode" "double")])
5162
5163 ;; For ufix_notrunc* insn patterns
5164 (define_mode_attr pd2udqsuff
5165 [(V8DF "") (V4DF "{y}")])
5166
5167 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5168 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5169 (unspec:<si2dfmode>
5170 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
5171 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5172 "TARGET_AVX512F"
5173 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5174 [(set_attr "type" "ssecvt")
5175 (set_attr "prefix" "evex")
5176 (set_attr "mode" "<sseinsnmode>")])
5177
5178 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
5179 [(set (match_operand:V4SI 0 "register_operand" "=v")
5180 (vec_concat:V4SI
5181 (unspec:V2SI
5182 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5183 UNSPEC_UNSIGNED_FIX_NOTRUNC)
5184 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5185 "TARGET_AVX512VL"
5186 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5187 [(set_attr "type" "ssecvt")
5188 (set_attr "prefix" "evex")
5189 (set_attr "mode" "TI")])
5190
5191 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
5192 [(set (match_operand:V8SI 0 "register_operand" "=v")
5193 (any_fix:V8SI
5194 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5195 "TARGET_AVX512F"
5196 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5197 [(set_attr "type" "ssecvt")
5198 (set_attr "prefix" "evex")
5199 (set_attr "mode" "OI")])
5200
5201 (define_insn "ufix_truncv2dfv2si2<mask_name>"
5202 [(set (match_operand:V4SI 0 "register_operand" "=v")
5203 (vec_concat:V4SI
5204 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5205 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5206 "TARGET_AVX512VL"
5207 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5208 [(set_attr "type" "ssecvt")
5209 (set_attr "prefix" "evex")
5210 (set_attr "mode" "TI")])
5211
5212 (define_insn "fix_truncv4dfv4si2<mask_name>"
5213 [(set (match_operand:V4SI 0 "register_operand" "=v")
5214 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5215 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5216 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5217 [(set_attr "type" "ssecvt")
5218 (set_attr "prefix" "maybe_evex")
5219 (set_attr "mode" "OI")])
5220
5221 (define_insn "ufix_truncv4dfv4si2<mask_name>"
5222 [(set (match_operand:V4SI 0 "register_operand" "=v")
5223 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5224 "TARGET_AVX512VL && TARGET_AVX512F"
5225 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5226 [(set_attr "type" "ssecvt")
5227 (set_attr "prefix" "maybe_evex")
5228 (set_attr "mode" "OI")])
5229
5230 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5231 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5232 (any_fix:<sseintvecmode>
5233 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5234 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5235 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5236 [(set_attr "type" "ssecvt")
5237 (set_attr "prefix" "evex")
5238 (set_attr "mode" "<sseintvecmode2>")])
5239
5240 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5241 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5242 (unspec:<sseintvecmode>
5243 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5244 UNSPEC_FIX_NOTRUNC))]
5245 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5246 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5247 [(set_attr "type" "ssecvt")
5248 (set_attr "prefix" "evex")
5249 (set_attr "mode" "<sseintvecmode2>")])
5250
5251 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5252 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5253 (unspec:<sseintvecmode>
5254 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5255 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5256 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5257 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5258 [(set_attr "type" "ssecvt")
5259 (set_attr "prefix" "evex")
5260 (set_attr "mode" "<sseintvecmode2>")])
5261
5262 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5263 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5264 (any_fix:<sselongvecmode>
5265 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5266 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5267 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5268 [(set_attr "type" "ssecvt")
5269 (set_attr "prefix" "evex")
5270 (set_attr "mode" "<sseintvecmode3>")])
5271
5272 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
5273 [(set (match_operand:V2DI 0 "register_operand" "=v")
5274 (any_fix:V2DI
5275 (vec_select:V2SF
5276 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5277 (parallel [(const_int 0) (const_int 1)]))))]
5278 "TARGET_AVX512DQ && TARGET_AVX512VL"
5279 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5280 [(set_attr "type" "ssecvt")
5281 (set_attr "prefix" "evex")
5282 (set_attr "mode" "TI")])
5283
5284 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
5285 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5286 (unsigned_fix:<sseintvecmode>
5287 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
5288 "TARGET_AVX512VL"
5289 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5290 [(set_attr "type" "ssecvt")
5291 (set_attr "prefix" "evex")
5292 (set_attr "mode" "<sseintvecmode2>")])
5293
5294 (define_expand "avx_cvttpd2dq256_2"
5295 [(set (match_operand:V8SI 0 "register_operand")
5296 (vec_concat:V8SI
5297 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
5298 (match_dup 2)))]
5299 "TARGET_AVX"
5300 "operands[2] = CONST0_RTX (V4SImode);")
5301
5302 (define_insn "sse2_cvttpd2dq<mask_name>"
5303 [(set (match_operand:V4SI 0 "register_operand" "=v")
5304 (vec_concat:V4SI
5305 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
5306 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5307 "TARGET_SSE2 && <mask_avx512vl_condition>"
5308 {
5309 if (TARGET_AVX)
5310 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5311 else
5312 return "cvttpd2dq\t{%1, %0|%0, %1}";
5313 }
5314 [(set_attr "type" "ssecvt")
5315 (set_attr "amdfam10_decode" "double")
5316 (set_attr "athlon_decode" "vector")
5317 (set_attr "bdver1_decode" "double")
5318 (set_attr "prefix" "maybe_vex")
5319 (set_attr "mode" "TI")])
5320
5321 (define_insn "sse2_cvtsd2ss<round_name>"
5322 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5323 (vec_merge:V4SF
5324 (vec_duplicate:V4SF
5325 (float_truncate:V2SF
5326 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
5327 (match_operand:V4SF 1 "register_operand" "0,0,v")
5328 (const_int 1)))]
5329 "TARGET_SSE2"
5330 "@
5331 cvtsd2ss\t{%2, %0|%0, %2}
5332 cvtsd2ss\t{%2, %0|%0, %q2}
5333 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
5334 [(set_attr "isa" "noavx,noavx,avx")
5335 (set_attr "type" "ssecvt")
5336 (set_attr "athlon_decode" "vector,double,*")
5337 (set_attr "amdfam10_decode" "vector,double,*")
5338 (set_attr "bdver1_decode" "direct,direct,*")
5339 (set_attr "btver2_decode" "double,double,double")
5340 (set_attr "prefix" "orig,orig,<round_prefix>")
5341 (set_attr "mode" "SF")])
5342
5343 (define_insn "*sse2_vd_cvtsd2ss"
5344 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5345 (vec_merge:V4SF
5346 (vec_duplicate:V4SF
5347 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
5348 (match_operand:V4SF 1 "register_operand" "0,0,v")
5349 (const_int 1)))]
5350 "TARGET_SSE2"
5351 "@
5352 cvtsd2ss\t{%2, %0|%0, %2}
5353 cvtsd2ss\t{%2, %0|%0, %2}
5354 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
5355 [(set_attr "isa" "noavx,noavx,avx")
5356 (set_attr "type" "ssecvt")
5357 (set_attr "athlon_decode" "vector,double,*")
5358 (set_attr "amdfam10_decode" "vector,double,*")
5359 (set_attr "bdver1_decode" "direct,direct,*")
5360 (set_attr "btver2_decode" "double,double,double")
5361 (set_attr "prefix" "orig,orig,vex")
5362 (set_attr "mode" "SF")])
5363
5364 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
5365 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5366 (vec_merge:V2DF
5367 (float_extend:V2DF
5368 (vec_select:V2SF
5369 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
5370 (parallel [(const_int 0) (const_int 1)])))
5371 (match_operand:V2DF 1 "register_operand" "0,0,v")
5372 (const_int 1)))]
5373 "TARGET_SSE2"
5374 "@
5375 cvtss2sd\t{%2, %0|%0, %2}
5376 cvtss2sd\t{%2, %0|%0, %k2}
5377 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
5378 [(set_attr "isa" "noavx,noavx,avx")
5379 (set_attr "type" "ssecvt")
5380 (set_attr "amdfam10_decode" "vector,double,*")
5381 (set_attr "athlon_decode" "direct,direct,*")
5382 (set_attr "bdver1_decode" "direct,direct,*")
5383 (set_attr "btver2_decode" "double,double,double")
5384 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
5385 (set_attr "mode" "DF")])
5386
5387 (define_insn "*sse2_vd_cvtss2sd"
5388 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5389 (vec_merge:V2DF
5390 (vec_duplicate:V2DF
5391 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
5392 (match_operand:V2DF 1 "register_operand" "0,0,v")
5393 (const_int 1)))]
5394 "TARGET_SSE2"
5395 "@
5396 cvtss2sd\t{%2, %0|%0, %2}
5397 cvtss2sd\t{%2, %0|%0, %2}
5398 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
5399 [(set_attr "isa" "noavx,noavx,avx")
5400 (set_attr "type" "ssecvt")
5401 (set_attr "amdfam10_decode" "vector,double,*")
5402 (set_attr "athlon_decode" "direct,direct,*")
5403 (set_attr "bdver1_decode" "direct,direct,*")
5404 (set_attr "btver2_decode" "double,double,double")
5405 (set_attr "prefix" "orig,orig,vex")
5406 (set_attr "mode" "DF")])
5407
5408 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
5409 [(set (match_operand:V8SF 0 "register_operand" "=v")
5410 (float_truncate:V8SF
5411 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
5412 "TARGET_AVX512F"
5413 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5414 [(set_attr "type" "ssecvt")
5415 (set_attr "prefix" "evex")
5416 (set_attr "mode" "V8SF")])
5417
5418 (define_insn "avx_cvtpd2ps256<mask_name>"
5419 [(set (match_operand:V4SF 0 "register_operand" "=v")
5420 (float_truncate:V4SF
5421 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5422 "TARGET_AVX && <mask_avx512vl_condition>"
5423 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5424 [(set_attr "type" "ssecvt")
5425 (set_attr "prefix" "maybe_evex")
5426 (set_attr "btver2_decode" "vector")
5427 (set_attr "mode" "V4SF")])
5428
5429 (define_expand "sse2_cvtpd2ps"
5430 [(set (match_operand:V4SF 0 "register_operand")
5431 (vec_concat:V4SF
5432 (float_truncate:V2SF
5433 (match_operand:V2DF 1 "vector_operand"))
5434 (match_dup 2)))]
5435 "TARGET_SSE2"
5436 "operands[2] = CONST0_RTX (V2SFmode);")
5437
5438 (define_expand "sse2_cvtpd2ps_mask"
5439 [(set (match_operand:V4SF 0 "register_operand")
5440 (vec_merge:V4SF
5441 (vec_concat:V4SF
5442 (float_truncate:V2SF
5443 (match_operand:V2DF 1 "vector_operand"))
5444 (match_dup 4))
5445 (match_operand:V4SF 2 "register_operand")
5446 (match_operand:QI 3 "register_operand")))]
5447 "TARGET_SSE2"
5448 "operands[4] = CONST0_RTX (V2SFmode);")
5449
5450 (define_insn "*sse2_cvtpd2ps<mask_name>"
5451 [(set (match_operand:V4SF 0 "register_operand" "=v")
5452 (vec_concat:V4SF
5453 (float_truncate:V2SF
5454 (match_operand:V2DF 1 "vector_operand" "vBm"))
5455 (match_operand:V2SF 2 "const0_operand")))]
5456 "TARGET_SSE2 && <mask_avx512vl_condition>"
5457 {
5458 if (TARGET_AVX)
5459 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
5460 else
5461 return "cvtpd2ps\t{%1, %0|%0, %1}";
5462 }
5463 [(set_attr "type" "ssecvt")
5464 (set_attr "amdfam10_decode" "double")
5465 (set_attr "athlon_decode" "vector")
5466 (set_attr "bdver1_decode" "double")
5467 (set_attr "prefix_data16" "1")
5468 (set_attr "prefix" "maybe_vex")
5469 (set_attr "mode" "V4SF")])
5470
5471 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
5472 (define_mode_attr sf2dfmode
5473 [(V8DF "V8SF") (V4DF "V4SF")])
5474
5475 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
5476 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5477 (float_extend:VF2_512_256
5478 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5479 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
5480 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5481 [(set_attr "type" "ssecvt")
5482 (set_attr "prefix" "maybe_vex")
5483 (set_attr "mode" "<MODE>")])
5484
5485 (define_insn "*avx_cvtps2pd256_2"
5486 [(set (match_operand:V4DF 0 "register_operand" "=v")
5487 (float_extend:V4DF
5488 (vec_select:V4SF
5489 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5490 (parallel [(const_int 0) (const_int 1)
5491 (const_int 2) (const_int 3)]))))]
5492 "TARGET_AVX"
5493 "vcvtps2pd\t{%x1, %0|%0, %x1}"
5494 [(set_attr "type" "ssecvt")
5495 (set_attr "prefix" "vex")
5496 (set_attr "mode" "V4DF")])
5497
5498 (define_insn "vec_unpacks_lo_v16sf"
5499 [(set (match_operand:V8DF 0 "register_operand" "=v")
5500 (float_extend:V8DF
5501 (vec_select:V8SF
5502 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5503 (parallel [(const_int 0) (const_int 1)
5504 (const_int 2) (const_int 3)
5505 (const_int 4) (const_int 5)
5506 (const_int 6) (const_int 7)]))))]
5507 "TARGET_AVX512F"
5508 "vcvtps2pd\t{%t1, %0|%0, %t1}"
5509 [(set_attr "type" "ssecvt")
5510 (set_attr "prefix" "evex")
5511 (set_attr "mode" "V8DF")])
5512
5513 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5514 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5515 (unspec:<avx512fmaskmode>
5516 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5517 UNSPEC_CVTINT2MASK))]
5518 "TARGET_AVX512BW"
5519 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5520 [(set_attr "prefix" "evex")
5521 (set_attr "mode" "<sseinsnmode>")])
5522
5523 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5524 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5525 (unspec:<avx512fmaskmode>
5526 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5527 UNSPEC_CVTINT2MASK))]
5528 "TARGET_AVX512DQ"
5529 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5530 [(set_attr "prefix" "evex")
5531 (set_attr "mode" "<sseinsnmode>")])
5532
5533 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5534 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5535 (vec_merge:VI12_AVX512VL
5536 (match_dup 2)
5537 (match_dup 3)
5538 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5539 "TARGET_AVX512BW"
5540 {
5541 operands[2] = CONSTM1_RTX (<MODE>mode);
5542 operands[3] = CONST0_RTX (<MODE>mode);
5543 })
5544
5545 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5546 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5547 (vec_merge:VI12_AVX512VL
5548 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
5549 (match_operand:VI12_AVX512VL 3 "const0_operand")
5550 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5551 "TARGET_AVX512BW"
5552 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5553 [(set_attr "prefix" "evex")
5554 (set_attr "mode" "<sseinsnmode>")])
5555
5556 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5557 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5558 (vec_merge:VI48_AVX512VL
5559 (match_dup 2)
5560 (match_dup 3)
5561 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5562 "TARGET_AVX512DQ"
5563 "{
5564 operands[2] = CONSTM1_RTX (<MODE>mode);
5565 operands[3] = CONST0_RTX (<MODE>mode);
5566 }")
5567
5568 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5569 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5570 (vec_merge:VI48_AVX512VL
5571 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
5572 (match_operand:VI48_AVX512VL 3 "const0_operand")
5573 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5574 "TARGET_AVX512DQ"
5575 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5576 [(set_attr "prefix" "evex")
5577 (set_attr "mode" "<sseinsnmode>")])
5578
5579 (define_insn "sse2_cvtps2pd<mask_name>"
5580 [(set (match_operand:V2DF 0 "register_operand" "=v")
5581 (float_extend:V2DF
5582 (vec_select:V2SF
5583 (match_operand:V4SF 1 "vector_operand" "vm")
5584 (parallel [(const_int 0) (const_int 1)]))))]
5585 "TARGET_SSE2 && <mask_avx512vl_condition>"
5586 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5587 [(set_attr "type" "ssecvt")
5588 (set_attr "amdfam10_decode" "direct")
5589 (set_attr "athlon_decode" "double")
5590 (set_attr "bdver1_decode" "double")
5591 (set_attr "prefix_data16" "0")
5592 (set_attr "prefix" "maybe_vex")
5593 (set_attr "mode" "V2DF")])
5594
5595 (define_expand "vec_unpacks_hi_v4sf"
5596 [(set (match_dup 2)
5597 (vec_select:V4SF
5598 (vec_concat:V8SF
5599 (match_dup 2)
5600 (match_operand:V4SF 1 "vector_operand"))
5601 (parallel [(const_int 6) (const_int 7)
5602 (const_int 2) (const_int 3)])))
5603 (set (match_operand:V2DF 0 "register_operand")
5604 (float_extend:V2DF
5605 (vec_select:V2SF
5606 (match_dup 2)
5607 (parallel [(const_int 0) (const_int 1)]))))]
5608 "TARGET_SSE2"
5609 "operands[2] = gen_reg_rtx (V4SFmode);")
5610
5611 (define_expand "vec_unpacks_hi_v8sf"
5612 [(set (match_dup 2)
5613 (vec_select:V4SF
5614 (match_operand:V8SF 1 "register_operand")
5615 (parallel [(const_int 4) (const_int 5)
5616 (const_int 6) (const_int 7)])))
5617 (set (match_operand:V4DF 0 "register_operand")
5618 (float_extend:V4DF
5619 (match_dup 2)))]
5620 "TARGET_AVX"
5621 "operands[2] = gen_reg_rtx (V4SFmode);")
5622
5623 (define_expand "vec_unpacks_hi_v16sf"
5624 [(set (match_dup 2)
5625 (vec_select:V8SF
5626 (match_operand:V16SF 1 "register_operand")
5627 (parallel [(const_int 8) (const_int 9)
5628 (const_int 10) (const_int 11)
5629 (const_int 12) (const_int 13)
5630 (const_int 14) (const_int 15)])))
5631 (set (match_operand:V8DF 0 "register_operand")
5632 (float_extend:V8DF
5633 (match_dup 2)))]
5634 "TARGET_AVX512F"
5635 "operands[2] = gen_reg_rtx (V8SFmode);")
5636
5637 (define_expand "vec_unpacks_lo_v4sf"
5638 [(set (match_operand:V2DF 0 "register_operand")
5639 (float_extend:V2DF
5640 (vec_select:V2SF
5641 (match_operand:V4SF 1 "vector_operand")
5642 (parallel [(const_int 0) (const_int 1)]))))]
5643 "TARGET_SSE2")
5644
5645 (define_expand "vec_unpacks_lo_v8sf"
5646 [(set (match_operand:V4DF 0 "register_operand")
5647 (float_extend:V4DF
5648 (vec_select:V4SF
5649 (match_operand:V8SF 1 "nonimmediate_operand")
5650 (parallel [(const_int 0) (const_int 1)
5651 (const_int 2) (const_int 3)]))))]
5652 "TARGET_AVX")
5653
5654 (define_mode_attr sseunpackfltmode
5655 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5656 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5657
5658 (define_expand "vec_unpacks_float_hi_<mode>"
5659 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5660 (match_operand:VI2_AVX512F 1 "register_operand")]
5661 "TARGET_SSE2"
5662 {
5663 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5664
5665 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5666 emit_insn (gen_rtx_SET (operands[0],
5667 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5668 DONE;
5669 })
5670
5671 (define_expand "vec_unpacks_float_lo_<mode>"
5672 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5673 (match_operand:VI2_AVX512F 1 "register_operand")]
5674 "TARGET_SSE2"
5675 {
5676 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5677
5678 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5679 emit_insn (gen_rtx_SET (operands[0],
5680 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5681 DONE;
5682 })
5683
5684 (define_expand "vec_unpacku_float_hi_<mode>"
5685 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5686 (match_operand:VI2_AVX512F 1 "register_operand")]
5687 "TARGET_SSE2"
5688 {
5689 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5690
5691 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5692 emit_insn (gen_rtx_SET (operands[0],
5693 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5694 DONE;
5695 })
5696
5697 (define_expand "vec_unpacku_float_lo_<mode>"
5698 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5699 (match_operand:VI2_AVX512F 1 "register_operand")]
5700 "TARGET_SSE2"
5701 {
5702 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5703
5704 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5705 emit_insn (gen_rtx_SET (operands[0],
5706 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5707 DONE;
5708 })
5709
5710 (define_expand "vec_unpacks_float_hi_v4si"
5711 [(set (match_dup 2)
5712 (vec_select:V4SI
5713 (match_operand:V4SI 1 "vector_operand")
5714 (parallel [(const_int 2) (const_int 3)
5715 (const_int 2) (const_int 3)])))
5716 (set (match_operand:V2DF 0 "register_operand")
5717 (float:V2DF
5718 (vec_select:V2SI
5719 (match_dup 2)
5720 (parallel [(const_int 0) (const_int 1)]))))]
5721 "TARGET_SSE2"
5722 "operands[2] = gen_reg_rtx (V4SImode);")
5723
5724 (define_expand "vec_unpacks_float_lo_v4si"
5725 [(set (match_operand:V2DF 0 "register_operand")
5726 (float:V2DF
5727 (vec_select:V2SI
5728 (match_operand:V4SI 1 "vector_operand")
5729 (parallel [(const_int 0) (const_int 1)]))))]
5730 "TARGET_SSE2")
5731
5732 (define_expand "vec_unpacks_float_hi_v8si"
5733 [(set (match_dup 2)
5734 (vec_select:V4SI
5735 (match_operand:V8SI 1 "vector_operand")
5736 (parallel [(const_int 4) (const_int 5)
5737 (const_int 6) (const_int 7)])))
5738 (set (match_operand:V4DF 0 "register_operand")
5739 (float:V4DF
5740 (match_dup 2)))]
5741 "TARGET_AVX"
5742 "operands[2] = gen_reg_rtx (V4SImode);")
5743
5744 (define_expand "vec_unpacks_float_lo_v8si"
5745 [(set (match_operand:V4DF 0 "register_operand")
5746 (float:V4DF
5747 (vec_select:V4SI
5748 (match_operand:V8SI 1 "nonimmediate_operand")
5749 (parallel [(const_int 0) (const_int 1)
5750 (const_int 2) (const_int 3)]))))]
5751 "TARGET_AVX")
5752
5753 (define_expand "vec_unpacks_float_hi_v16si"
5754 [(set (match_dup 2)
5755 (vec_select:V8SI
5756 (match_operand:V16SI 1 "nonimmediate_operand")
5757 (parallel [(const_int 8) (const_int 9)
5758 (const_int 10) (const_int 11)
5759 (const_int 12) (const_int 13)
5760 (const_int 14) (const_int 15)])))
5761 (set (match_operand:V8DF 0 "register_operand")
5762 (float:V8DF
5763 (match_dup 2)))]
5764 "TARGET_AVX512F"
5765 "operands[2] = gen_reg_rtx (V8SImode);")
5766
5767 (define_expand "vec_unpacks_float_lo_v16si"
5768 [(set (match_operand:V8DF 0 "register_operand")
5769 (float:V8DF
5770 (vec_select:V8SI
5771 (match_operand:V16SI 1 "nonimmediate_operand")
5772 (parallel [(const_int 0) (const_int 1)
5773 (const_int 2) (const_int 3)
5774 (const_int 4) (const_int 5)
5775 (const_int 6) (const_int 7)]))))]
5776 "TARGET_AVX512F")
5777
5778 (define_expand "vec_unpacku_float_hi_v4si"
5779 [(set (match_dup 5)
5780 (vec_select:V4SI
5781 (match_operand:V4SI 1 "vector_operand")
5782 (parallel [(const_int 2) (const_int 3)
5783 (const_int 2) (const_int 3)])))
5784 (set (match_dup 6)
5785 (float:V2DF
5786 (vec_select:V2SI
5787 (match_dup 5)
5788 (parallel [(const_int 0) (const_int 1)]))))
5789 (set (match_dup 7)
5790 (lt:V2DF (match_dup 6) (match_dup 3)))
5791 (set (match_dup 8)
5792 (and:V2DF (match_dup 7) (match_dup 4)))
5793 (set (match_operand:V2DF 0 "register_operand")
5794 (plus:V2DF (match_dup 6) (match_dup 8)))]
5795 "TARGET_SSE2"
5796 {
5797 REAL_VALUE_TYPE TWO32r;
5798 rtx x;
5799 int i;
5800
5801 real_ldexp (&TWO32r, &dconst1, 32);
5802 x = const_double_from_real_value (TWO32r, DFmode);
5803
5804 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5805 operands[4] = force_reg (V2DFmode,
5806 ix86_build_const_vector (V2DFmode, 1, x));
5807
5808 operands[5] = gen_reg_rtx (V4SImode);
5809
5810 for (i = 6; i < 9; i++)
5811 operands[i] = gen_reg_rtx (V2DFmode);
5812 })
5813
5814 (define_expand "vec_unpacku_float_lo_v4si"
5815 [(set (match_dup 5)
5816 (float:V2DF
5817 (vec_select:V2SI
5818 (match_operand:V4SI 1 "vector_operand")
5819 (parallel [(const_int 0) (const_int 1)]))))
5820 (set (match_dup 6)
5821 (lt:V2DF (match_dup 5) (match_dup 3)))
5822 (set (match_dup 7)
5823 (and:V2DF (match_dup 6) (match_dup 4)))
5824 (set (match_operand:V2DF 0 "register_operand")
5825 (plus:V2DF (match_dup 5) (match_dup 7)))]
5826 "TARGET_SSE2"
5827 {
5828 REAL_VALUE_TYPE TWO32r;
5829 rtx x;
5830 int i;
5831
5832 real_ldexp (&TWO32r, &dconst1, 32);
5833 x = const_double_from_real_value (TWO32r, DFmode);
5834
5835 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5836 operands[4] = force_reg (V2DFmode,
5837 ix86_build_const_vector (V2DFmode, 1, x));
5838
5839 for (i = 5; i < 8; i++)
5840 operands[i] = gen_reg_rtx (V2DFmode);
5841 })
5842
5843 (define_expand "vec_unpacku_float_hi_v8si"
5844 [(match_operand:V4DF 0 "register_operand")
5845 (match_operand:V8SI 1 "register_operand")]
5846 "TARGET_AVX"
5847 {
5848 REAL_VALUE_TYPE TWO32r;
5849 rtx x, tmp[6];
5850 int i;
5851
5852 real_ldexp (&TWO32r, &dconst1, 32);
5853 x = const_double_from_real_value (TWO32r, DFmode);
5854
5855 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5856 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5857 tmp[5] = gen_reg_rtx (V4SImode);
5858
5859 for (i = 2; i < 5; i++)
5860 tmp[i] = gen_reg_rtx (V4DFmode);
5861 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5862 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5863 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5864 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5865 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5866 DONE;
5867 })
5868
5869 (define_expand "vec_unpacku_float_hi_v16si"
5870 [(match_operand:V8DF 0 "register_operand")
5871 (match_operand:V16SI 1 "register_operand")]
5872 "TARGET_AVX512F"
5873 {
5874 REAL_VALUE_TYPE TWO32r;
5875 rtx k, x, tmp[4];
5876
5877 real_ldexp (&TWO32r, &dconst1, 32);
5878 x = const_double_from_real_value (TWO32r, DFmode);
5879
5880 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5881 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5882 tmp[2] = gen_reg_rtx (V8DFmode);
5883 tmp[3] = gen_reg_rtx (V8SImode);
5884 k = gen_reg_rtx (QImode);
5885
5886 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5887 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5888 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5889 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5890 emit_move_insn (operands[0], tmp[2]);
5891 DONE;
5892 })
5893
5894 (define_expand "vec_unpacku_float_lo_v8si"
5895 [(match_operand:V4DF 0 "register_operand")
5896 (match_operand:V8SI 1 "nonimmediate_operand")]
5897 "TARGET_AVX"
5898 {
5899 REAL_VALUE_TYPE TWO32r;
5900 rtx x, tmp[5];
5901 int i;
5902
5903 real_ldexp (&TWO32r, &dconst1, 32);
5904 x = const_double_from_real_value (TWO32r, DFmode);
5905
5906 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5907 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5908
5909 for (i = 2; i < 5; i++)
5910 tmp[i] = gen_reg_rtx (V4DFmode);
5911 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5912 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5913 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5914 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5915 DONE;
5916 })
5917
5918 (define_expand "vec_unpacku_float_lo_v16si"
5919 [(match_operand:V8DF 0 "register_operand")
5920 (match_operand:V16SI 1 "nonimmediate_operand")]
5921 "TARGET_AVX512F"
5922 {
5923 REAL_VALUE_TYPE TWO32r;
5924 rtx k, x, tmp[3];
5925
5926 real_ldexp (&TWO32r, &dconst1, 32);
5927 x = const_double_from_real_value (TWO32r, DFmode);
5928
5929 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5930 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5931 tmp[2] = gen_reg_rtx (V8DFmode);
5932 k = gen_reg_rtx (QImode);
5933
5934 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5935 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5936 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5937 emit_move_insn (operands[0], tmp[2]);
5938 DONE;
5939 })
5940
5941 (define_expand "vec_pack_trunc_<mode>"
5942 [(set (match_dup 3)
5943 (float_truncate:<sf2dfmode>
5944 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5945 (set (match_dup 4)
5946 (float_truncate:<sf2dfmode>
5947 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5948 (set (match_operand:<ssePSmode> 0 "register_operand")
5949 (vec_concat:<ssePSmode>
5950 (match_dup 3)
5951 (match_dup 4)))]
5952 "TARGET_AVX"
5953 {
5954 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5955 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5956 })
5957
5958 (define_expand "vec_pack_trunc_v2df"
5959 [(match_operand:V4SF 0 "register_operand")
5960 (match_operand:V2DF 1 "vector_operand")
5961 (match_operand:V2DF 2 "vector_operand")]
5962 "TARGET_SSE2"
5963 {
5964 rtx tmp0, tmp1;
5965
5966 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5967 {
5968 tmp0 = gen_reg_rtx (V4DFmode);
5969 tmp1 = force_reg (V2DFmode, operands[1]);
5970
5971 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5972 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5973 }
5974 else
5975 {
5976 tmp0 = gen_reg_rtx (V4SFmode);
5977 tmp1 = gen_reg_rtx (V4SFmode);
5978
5979 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5980 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5981 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5982 }
5983 DONE;
5984 })
5985
5986 (define_expand "vec_pack_sfix_trunc_v8df"
5987 [(match_operand:V16SI 0 "register_operand")
5988 (match_operand:V8DF 1 "nonimmediate_operand")
5989 (match_operand:V8DF 2 "nonimmediate_operand")]
5990 "TARGET_AVX512F"
5991 {
5992 rtx r1, r2;
5993
5994 r1 = gen_reg_rtx (V8SImode);
5995 r2 = gen_reg_rtx (V8SImode);
5996
5997 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5998 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5999 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6000 DONE;
6001 })
6002
6003 (define_expand "vec_pack_sfix_trunc_v4df"
6004 [(match_operand:V8SI 0 "register_operand")
6005 (match_operand:V4DF 1 "nonimmediate_operand")
6006 (match_operand:V4DF 2 "nonimmediate_operand")]
6007 "TARGET_AVX"
6008 {
6009 rtx r1, r2;
6010
6011 r1 = gen_reg_rtx (V4SImode);
6012 r2 = gen_reg_rtx (V4SImode);
6013
6014 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
6015 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
6016 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6017 DONE;
6018 })
6019
6020 (define_expand "vec_pack_sfix_trunc_v2df"
6021 [(match_operand:V4SI 0 "register_operand")
6022 (match_operand:V2DF 1 "vector_operand")
6023 (match_operand:V2DF 2 "vector_operand")]
6024 "TARGET_SSE2"
6025 {
6026 rtx tmp0, tmp1, tmp2;
6027
6028 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6029 {
6030 tmp0 = gen_reg_rtx (V4DFmode);
6031 tmp1 = force_reg (V2DFmode, operands[1]);
6032
6033 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6034 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
6035 }
6036 else
6037 {
6038 tmp0 = gen_reg_rtx (V4SImode);
6039 tmp1 = gen_reg_rtx (V4SImode);
6040 tmp2 = gen_reg_rtx (V2DImode);
6041
6042 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
6043 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
6044 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6045 gen_lowpart (V2DImode, tmp0),
6046 gen_lowpart (V2DImode, tmp1)));
6047 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6048 }
6049 DONE;
6050 })
6051
6052 (define_mode_attr ssepackfltmode
6053 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
6054
6055 (define_expand "vec_pack_ufix_trunc_<mode>"
6056 [(match_operand:<ssepackfltmode> 0 "register_operand")
6057 (match_operand:VF2 1 "register_operand")
6058 (match_operand:VF2 2 "register_operand")]
6059 "TARGET_SSE2"
6060 {
6061 if (<MODE>mode == V8DFmode)
6062 {
6063 rtx r1, r2;
6064
6065 r1 = gen_reg_rtx (V8SImode);
6066 r2 = gen_reg_rtx (V8SImode);
6067
6068 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
6069 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
6070 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6071 }
6072 else
6073 {
6074 rtx tmp[7];
6075 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
6076 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
6077 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
6078 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
6079 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
6080 {
6081 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
6082 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
6083 }
6084 else
6085 {
6086 tmp[5] = gen_reg_rtx (V8SFmode);
6087 ix86_expand_vec_extract_even_odd (tmp[5],
6088 gen_lowpart (V8SFmode, tmp[2]),
6089 gen_lowpart (V8SFmode, tmp[3]), 0);
6090 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
6091 }
6092 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
6093 operands[0], 0, OPTAB_DIRECT);
6094 if (tmp[6] != operands[0])
6095 emit_move_insn (operands[0], tmp[6]);
6096 }
6097
6098 DONE;
6099 })
6100
6101 (define_expand "avx512f_vec_pack_sfix_v8df"
6102 [(match_operand:V16SI 0 "register_operand")
6103 (match_operand:V8DF 1 "nonimmediate_operand")
6104 (match_operand:V8DF 2 "nonimmediate_operand")]
6105 "TARGET_AVX512F"
6106 {
6107 rtx r1, r2;
6108
6109 r1 = gen_reg_rtx (V8SImode);
6110 r2 = gen_reg_rtx (V8SImode);
6111
6112 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
6113 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
6114 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6115 DONE;
6116 })
6117
6118 (define_expand "vec_pack_sfix_v4df"
6119 [(match_operand:V8SI 0 "register_operand")
6120 (match_operand:V4DF 1 "nonimmediate_operand")
6121 (match_operand:V4DF 2 "nonimmediate_operand")]
6122 "TARGET_AVX"
6123 {
6124 rtx r1, r2;
6125
6126 r1 = gen_reg_rtx (V4SImode);
6127 r2 = gen_reg_rtx (V4SImode);
6128
6129 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6130 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6131 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6132 DONE;
6133 })
6134
6135 (define_expand "vec_pack_sfix_v2df"
6136 [(match_operand:V4SI 0 "register_operand")
6137 (match_operand:V2DF 1 "vector_operand")
6138 (match_operand:V2DF 2 "vector_operand")]
6139 "TARGET_SSE2"
6140 {
6141 rtx tmp0, tmp1, tmp2;
6142
6143 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6144 {
6145 tmp0 = gen_reg_rtx (V4DFmode);
6146 tmp1 = force_reg (V2DFmode, operands[1]);
6147
6148 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6149 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6150 }
6151 else
6152 {
6153 tmp0 = gen_reg_rtx (V4SImode);
6154 tmp1 = gen_reg_rtx (V4SImode);
6155 tmp2 = gen_reg_rtx (V2DImode);
6156
6157 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6158 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
6159 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6160 gen_lowpart (V2DImode, tmp0),
6161 gen_lowpart (V2DImode, tmp1)));
6162 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6163 }
6164 DONE;
6165 })
6166
6167 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6168 ;;
6169 ;; Parallel single-precision floating point element swizzling
6170 ;;
6171 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6172
6173 (define_expand "sse_movhlps_exp"
6174 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6175 (vec_select:V4SF
6176 (vec_concat:V8SF
6177 (match_operand:V4SF 1 "nonimmediate_operand")
6178 (match_operand:V4SF 2 "nonimmediate_operand"))
6179 (parallel [(const_int 6)
6180 (const_int 7)
6181 (const_int 2)
6182 (const_int 3)])))]
6183 "TARGET_SSE"
6184 {
6185 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6186
6187 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6188
6189 /* Fix up the destination if needed. */
6190 if (dst != operands[0])
6191 emit_move_insn (operands[0], dst);
6192
6193 DONE;
6194 })
6195
6196 (define_insn "sse_movhlps"
6197 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6198 (vec_select:V4SF
6199 (vec_concat:V8SF
6200 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6201 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
6202 (parallel [(const_int 6)
6203 (const_int 7)
6204 (const_int 2)
6205 (const_int 3)])))]
6206 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6207 "@
6208 movhlps\t{%2, %0|%0, %2}
6209 vmovhlps\t{%2, %1, %0|%0, %1, %2}
6210 movlps\t{%H2, %0|%0, %H2}
6211 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
6212 %vmovhps\t{%2, %0|%q0, %2}"
6213 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6214 (set_attr "type" "ssemov")
6215 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6216 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6217
6218 (define_expand "sse_movlhps_exp"
6219 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6220 (vec_select:V4SF
6221 (vec_concat:V8SF
6222 (match_operand:V4SF 1 "nonimmediate_operand")
6223 (match_operand:V4SF 2 "nonimmediate_operand"))
6224 (parallel [(const_int 0)
6225 (const_int 1)
6226 (const_int 4)
6227 (const_int 5)])))]
6228 "TARGET_SSE"
6229 {
6230 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6231
6232 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6233
6234 /* Fix up the destination if needed. */
6235 if (dst != operands[0])
6236 emit_move_insn (operands[0], dst);
6237
6238 DONE;
6239 })
6240
6241 (define_insn "sse_movlhps"
6242 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6243 (vec_select:V4SF
6244 (vec_concat:V8SF
6245 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6246 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
6247 (parallel [(const_int 0)
6248 (const_int 1)
6249 (const_int 4)
6250 (const_int 5)])))]
6251 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6252 "@
6253 movlhps\t{%2, %0|%0, %2}
6254 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6255 movhps\t{%2, %0|%0, %q2}
6256 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6257 %vmovlps\t{%2, %H0|%H0, %2}"
6258 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6259 (set_attr "type" "ssemov")
6260 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6261 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6262
6263 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
6264 [(set (match_operand:V16SF 0 "register_operand" "=v")
6265 (vec_select:V16SF
6266 (vec_concat:V32SF
6267 (match_operand:V16SF 1 "register_operand" "v")
6268 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6269 (parallel [(const_int 2) (const_int 18)
6270 (const_int 3) (const_int 19)
6271 (const_int 6) (const_int 22)
6272 (const_int 7) (const_int 23)
6273 (const_int 10) (const_int 26)
6274 (const_int 11) (const_int 27)
6275 (const_int 14) (const_int 30)
6276 (const_int 15) (const_int 31)])))]
6277 "TARGET_AVX512F"
6278 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6279 [(set_attr "type" "sselog")
6280 (set_attr "prefix" "evex")
6281 (set_attr "mode" "V16SF")])
6282
6283 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6284 (define_insn "avx_unpckhps256<mask_name>"
6285 [(set (match_operand:V8SF 0 "register_operand" "=v")
6286 (vec_select:V8SF
6287 (vec_concat:V16SF
6288 (match_operand:V8SF 1 "register_operand" "v")
6289 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6290 (parallel [(const_int 2) (const_int 10)
6291 (const_int 3) (const_int 11)
6292 (const_int 6) (const_int 14)
6293 (const_int 7) (const_int 15)])))]
6294 "TARGET_AVX && <mask_avx512vl_condition>"
6295 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6296 [(set_attr "type" "sselog")
6297 (set_attr "prefix" "vex")
6298 (set_attr "mode" "V8SF")])
6299
6300 (define_expand "vec_interleave_highv8sf"
6301 [(set (match_dup 3)
6302 (vec_select:V8SF
6303 (vec_concat:V16SF
6304 (match_operand:V8SF 1 "register_operand")
6305 (match_operand:V8SF 2 "nonimmediate_operand"))
6306 (parallel [(const_int 0) (const_int 8)
6307 (const_int 1) (const_int 9)
6308 (const_int 4) (const_int 12)
6309 (const_int 5) (const_int 13)])))
6310 (set (match_dup 4)
6311 (vec_select:V8SF
6312 (vec_concat:V16SF
6313 (match_dup 1)
6314 (match_dup 2))
6315 (parallel [(const_int 2) (const_int 10)
6316 (const_int 3) (const_int 11)
6317 (const_int 6) (const_int 14)
6318 (const_int 7) (const_int 15)])))
6319 (set (match_operand:V8SF 0 "register_operand")
6320 (vec_select:V8SF
6321 (vec_concat:V16SF
6322 (match_dup 3)
6323 (match_dup 4))
6324 (parallel [(const_int 4) (const_int 5)
6325 (const_int 6) (const_int 7)
6326 (const_int 12) (const_int 13)
6327 (const_int 14) (const_int 15)])))]
6328 "TARGET_AVX"
6329 {
6330 operands[3] = gen_reg_rtx (V8SFmode);
6331 operands[4] = gen_reg_rtx (V8SFmode);
6332 })
6333
6334 (define_insn "vec_interleave_highv4sf<mask_name>"
6335 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6336 (vec_select:V4SF
6337 (vec_concat:V8SF
6338 (match_operand:V4SF 1 "register_operand" "0,v")
6339 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6340 (parallel [(const_int 2) (const_int 6)
6341 (const_int 3) (const_int 7)])))]
6342 "TARGET_SSE && <mask_avx512vl_condition>"
6343 "@
6344 unpckhps\t{%2, %0|%0, %2}
6345 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6346 [(set_attr "isa" "noavx,avx")
6347 (set_attr "type" "sselog")
6348 (set_attr "prefix" "orig,vex")
6349 (set_attr "mode" "V4SF")])
6350
6351 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
6352 [(set (match_operand:V16SF 0 "register_operand" "=v")
6353 (vec_select:V16SF
6354 (vec_concat:V32SF
6355 (match_operand:V16SF 1 "register_operand" "v")
6356 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6357 (parallel [(const_int 0) (const_int 16)
6358 (const_int 1) (const_int 17)
6359 (const_int 4) (const_int 20)
6360 (const_int 5) (const_int 21)
6361 (const_int 8) (const_int 24)
6362 (const_int 9) (const_int 25)
6363 (const_int 12) (const_int 28)
6364 (const_int 13) (const_int 29)])))]
6365 "TARGET_AVX512F"
6366 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6367 [(set_attr "type" "sselog")
6368 (set_attr "prefix" "evex")
6369 (set_attr "mode" "V16SF")])
6370
6371 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6372 (define_insn "avx_unpcklps256<mask_name>"
6373 [(set (match_operand:V8SF 0 "register_operand" "=v")
6374 (vec_select:V8SF
6375 (vec_concat:V16SF
6376 (match_operand:V8SF 1 "register_operand" "v")
6377 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6378 (parallel [(const_int 0) (const_int 8)
6379 (const_int 1) (const_int 9)
6380 (const_int 4) (const_int 12)
6381 (const_int 5) (const_int 13)])))]
6382 "TARGET_AVX && <mask_avx512vl_condition>"
6383 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6384 [(set_attr "type" "sselog")
6385 (set_attr "prefix" "vex")
6386 (set_attr "mode" "V8SF")])
6387
6388 (define_insn "unpcklps128_mask"
6389 [(set (match_operand:V4SF 0 "register_operand" "=v")
6390 (vec_merge:V4SF
6391 (vec_select:V4SF
6392 (vec_concat:V8SF
6393 (match_operand:V4SF 1 "register_operand" "v")
6394 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6395 (parallel [(const_int 0) (const_int 4)
6396 (const_int 1) (const_int 5)]))
6397 (match_operand:V4SF 3 "vector_move_operand" "0C")
6398 (match_operand:QI 4 "register_operand" "Yk")))]
6399 "TARGET_AVX512VL"
6400 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
6401 [(set_attr "type" "sselog")
6402 (set_attr "prefix" "evex")
6403 (set_attr "mode" "V4SF")])
6404
6405 (define_expand "vec_interleave_lowv8sf"
6406 [(set (match_dup 3)
6407 (vec_select:V8SF
6408 (vec_concat:V16SF
6409 (match_operand:V8SF 1 "register_operand")
6410 (match_operand:V8SF 2 "nonimmediate_operand"))
6411 (parallel [(const_int 0) (const_int 8)
6412 (const_int 1) (const_int 9)
6413 (const_int 4) (const_int 12)
6414 (const_int 5) (const_int 13)])))
6415 (set (match_dup 4)
6416 (vec_select:V8SF
6417 (vec_concat:V16SF
6418 (match_dup 1)
6419 (match_dup 2))
6420 (parallel [(const_int 2) (const_int 10)
6421 (const_int 3) (const_int 11)
6422 (const_int 6) (const_int 14)
6423 (const_int 7) (const_int 15)])))
6424 (set (match_operand:V8SF 0 "register_operand")
6425 (vec_select:V8SF
6426 (vec_concat:V16SF
6427 (match_dup 3)
6428 (match_dup 4))
6429 (parallel [(const_int 0) (const_int 1)
6430 (const_int 2) (const_int 3)
6431 (const_int 8) (const_int 9)
6432 (const_int 10) (const_int 11)])))]
6433 "TARGET_AVX"
6434 {
6435 operands[3] = gen_reg_rtx (V8SFmode);
6436 operands[4] = gen_reg_rtx (V8SFmode);
6437 })
6438
6439 (define_insn "vec_interleave_lowv4sf"
6440 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6441 (vec_select:V4SF
6442 (vec_concat:V8SF
6443 (match_operand:V4SF 1 "register_operand" "0,v")
6444 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6445 (parallel [(const_int 0) (const_int 4)
6446 (const_int 1) (const_int 5)])))]
6447 "TARGET_SSE"
6448 "@
6449 unpcklps\t{%2, %0|%0, %2}
6450 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
6451 [(set_attr "isa" "noavx,avx")
6452 (set_attr "type" "sselog")
6453 (set_attr "prefix" "orig,maybe_evex")
6454 (set_attr "mode" "V4SF")])
6455
6456 ;; These are modeled with the same vec_concat as the others so that we
6457 ;; capture users of shufps that can use the new instructions
6458 (define_insn "avx_movshdup256<mask_name>"
6459 [(set (match_operand:V8SF 0 "register_operand" "=v")
6460 (vec_select:V8SF
6461 (vec_concat:V16SF
6462 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6463 (match_dup 1))
6464 (parallel [(const_int 1) (const_int 1)
6465 (const_int 3) (const_int 3)
6466 (const_int 5) (const_int 5)
6467 (const_int 7) (const_int 7)])))]
6468 "TARGET_AVX && <mask_avx512vl_condition>"
6469 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6470 [(set_attr "type" "sse")
6471 (set_attr "prefix" "vex")
6472 (set_attr "mode" "V8SF")])
6473
6474 (define_insn "sse3_movshdup<mask_name>"
6475 [(set (match_operand:V4SF 0 "register_operand" "=v")
6476 (vec_select:V4SF
6477 (vec_concat:V8SF
6478 (match_operand:V4SF 1 "vector_operand" "vBm")
6479 (match_dup 1))
6480 (parallel [(const_int 1)
6481 (const_int 1)
6482 (const_int 7)
6483 (const_int 7)])))]
6484 "TARGET_SSE3 && <mask_avx512vl_condition>"
6485 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6486 [(set_attr "type" "sse")
6487 (set_attr "prefix_rep" "1")
6488 (set_attr "prefix" "maybe_vex")
6489 (set_attr "mode" "V4SF")])
6490
6491 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
6492 [(set (match_operand:V16SF 0 "register_operand" "=v")
6493 (vec_select:V16SF
6494 (vec_concat:V32SF
6495 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6496 (match_dup 1))
6497 (parallel [(const_int 1) (const_int 1)
6498 (const_int 3) (const_int 3)
6499 (const_int 5) (const_int 5)
6500 (const_int 7) (const_int 7)
6501 (const_int 9) (const_int 9)
6502 (const_int 11) (const_int 11)
6503 (const_int 13) (const_int 13)
6504 (const_int 15) (const_int 15)])))]
6505 "TARGET_AVX512F"
6506 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6507 [(set_attr "type" "sse")
6508 (set_attr "prefix" "evex")
6509 (set_attr "mode" "V16SF")])
6510
6511 (define_insn "avx_movsldup256<mask_name>"
6512 [(set (match_operand:V8SF 0 "register_operand" "=v")
6513 (vec_select:V8SF
6514 (vec_concat:V16SF
6515 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6516 (match_dup 1))
6517 (parallel [(const_int 0) (const_int 0)
6518 (const_int 2) (const_int 2)
6519 (const_int 4) (const_int 4)
6520 (const_int 6) (const_int 6)])))]
6521 "TARGET_AVX && <mask_avx512vl_condition>"
6522 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6523 [(set_attr "type" "sse")
6524 (set_attr "prefix" "vex")
6525 (set_attr "mode" "V8SF")])
6526
6527 (define_insn "sse3_movsldup<mask_name>"
6528 [(set (match_operand:V4SF 0 "register_operand" "=v")
6529 (vec_select:V4SF
6530 (vec_concat:V8SF
6531 (match_operand:V4SF 1 "vector_operand" "vBm")
6532 (match_dup 1))
6533 (parallel [(const_int 0)
6534 (const_int 0)
6535 (const_int 6)
6536 (const_int 6)])))]
6537 "TARGET_SSE3 && <mask_avx512vl_condition>"
6538 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6539 [(set_attr "type" "sse")
6540 (set_attr "prefix_rep" "1")
6541 (set_attr "prefix" "maybe_vex")
6542 (set_attr "mode" "V4SF")])
6543
6544 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6545 [(set (match_operand:V16SF 0 "register_operand" "=v")
6546 (vec_select:V16SF
6547 (vec_concat:V32SF
6548 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6549 (match_dup 1))
6550 (parallel [(const_int 0) (const_int 0)
6551 (const_int 2) (const_int 2)
6552 (const_int 4) (const_int 4)
6553 (const_int 6) (const_int 6)
6554 (const_int 8) (const_int 8)
6555 (const_int 10) (const_int 10)
6556 (const_int 12) (const_int 12)
6557 (const_int 14) (const_int 14)])))]
6558 "TARGET_AVX512F"
6559 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6560 [(set_attr "type" "sse")
6561 (set_attr "prefix" "evex")
6562 (set_attr "mode" "V16SF")])
6563
6564 (define_expand "avx_shufps256<mask_expand4_name>"
6565 [(match_operand:V8SF 0 "register_operand")
6566 (match_operand:V8SF 1 "register_operand")
6567 (match_operand:V8SF 2 "nonimmediate_operand")
6568 (match_operand:SI 3 "const_int_operand")]
6569 "TARGET_AVX"
6570 {
6571 int mask = INTVAL (operands[3]);
6572 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6573 operands[1],
6574 operands[2],
6575 GEN_INT ((mask >> 0) & 3),
6576 GEN_INT ((mask >> 2) & 3),
6577 GEN_INT (((mask >> 4) & 3) + 8),
6578 GEN_INT (((mask >> 6) & 3) + 8),
6579 GEN_INT (((mask >> 0) & 3) + 4),
6580 GEN_INT (((mask >> 2) & 3) + 4),
6581 GEN_INT (((mask >> 4) & 3) + 12),
6582 GEN_INT (((mask >> 6) & 3) + 12)
6583 <mask_expand4_args>));
6584 DONE;
6585 })
6586
6587 ;; One bit in mask selects 2 elements.
6588 (define_insn "avx_shufps256_1<mask_name>"
6589 [(set (match_operand:V8SF 0 "register_operand" "=v")
6590 (vec_select:V8SF
6591 (vec_concat:V16SF
6592 (match_operand:V8SF 1 "register_operand" "v")
6593 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6594 (parallel [(match_operand 3 "const_0_to_3_operand" )
6595 (match_operand 4 "const_0_to_3_operand" )
6596 (match_operand 5 "const_8_to_11_operand" )
6597 (match_operand 6 "const_8_to_11_operand" )
6598 (match_operand 7 "const_4_to_7_operand" )
6599 (match_operand 8 "const_4_to_7_operand" )
6600 (match_operand 9 "const_12_to_15_operand")
6601 (match_operand 10 "const_12_to_15_operand")])))]
6602 "TARGET_AVX
6603 && <mask_avx512vl_condition>
6604 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6605 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6606 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6607 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6608 {
6609 int mask;
6610 mask = INTVAL (operands[3]);
6611 mask |= INTVAL (operands[4]) << 2;
6612 mask |= (INTVAL (operands[5]) - 8) << 4;
6613 mask |= (INTVAL (operands[6]) - 8) << 6;
6614 operands[3] = GEN_INT (mask);
6615
6616 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6617 }
6618 [(set_attr "type" "sseshuf")
6619 (set_attr "length_immediate" "1")
6620 (set_attr "prefix" "<mask_prefix>")
6621 (set_attr "mode" "V8SF")])
6622
6623 (define_expand "sse_shufps<mask_expand4_name>"
6624 [(match_operand:V4SF 0 "register_operand")
6625 (match_operand:V4SF 1 "register_operand")
6626 (match_operand:V4SF 2 "vector_operand")
6627 (match_operand:SI 3 "const_int_operand")]
6628 "TARGET_SSE"
6629 {
6630 int mask = INTVAL (operands[3]);
6631 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6632 operands[1],
6633 operands[2],
6634 GEN_INT ((mask >> 0) & 3),
6635 GEN_INT ((mask >> 2) & 3),
6636 GEN_INT (((mask >> 4) & 3) + 4),
6637 GEN_INT (((mask >> 6) & 3) + 4)
6638 <mask_expand4_args>));
6639 DONE;
6640 })
6641
6642 (define_insn "sse_shufps_v4sf_mask"
6643 [(set (match_operand:V4SF 0 "register_operand" "=v")
6644 (vec_merge:V4SF
6645 (vec_select:V4SF
6646 (vec_concat:V8SF
6647 (match_operand:V4SF 1 "register_operand" "v")
6648 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6649 (parallel [(match_operand 3 "const_0_to_3_operand")
6650 (match_operand 4 "const_0_to_3_operand")
6651 (match_operand 5 "const_4_to_7_operand")
6652 (match_operand 6 "const_4_to_7_operand")]))
6653 (match_operand:V4SF 7 "vector_move_operand" "0C")
6654 (match_operand:QI 8 "register_operand" "Yk")))]
6655 "TARGET_AVX512VL"
6656 {
6657 int mask = 0;
6658 mask |= INTVAL (operands[3]) << 0;
6659 mask |= INTVAL (operands[4]) << 2;
6660 mask |= (INTVAL (operands[5]) - 4) << 4;
6661 mask |= (INTVAL (operands[6]) - 4) << 6;
6662 operands[3] = GEN_INT (mask);
6663
6664 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6665 }
6666 [(set_attr "type" "sseshuf")
6667 (set_attr "length_immediate" "1")
6668 (set_attr "prefix" "evex")
6669 (set_attr "mode" "V4SF")])
6670
6671 (define_insn "sse_shufps_<mode>"
6672 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
6673 (vec_select:VI4F_128
6674 (vec_concat:<ssedoublevecmode>
6675 (match_operand:VI4F_128 1 "register_operand" "0,v")
6676 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
6677 (parallel [(match_operand 3 "const_0_to_3_operand")
6678 (match_operand 4 "const_0_to_3_operand")
6679 (match_operand 5 "const_4_to_7_operand")
6680 (match_operand 6 "const_4_to_7_operand")])))]
6681 "TARGET_SSE"
6682 {
6683 int mask = 0;
6684 mask |= INTVAL (operands[3]) << 0;
6685 mask |= INTVAL (operands[4]) << 2;
6686 mask |= (INTVAL (operands[5]) - 4) << 4;
6687 mask |= (INTVAL (operands[6]) - 4) << 6;
6688 operands[3] = GEN_INT (mask);
6689
6690 switch (which_alternative)
6691 {
6692 case 0:
6693 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6694 case 1:
6695 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6696 default:
6697 gcc_unreachable ();
6698 }
6699 }
6700 [(set_attr "isa" "noavx,avx")
6701 (set_attr "type" "sseshuf")
6702 (set_attr "length_immediate" "1")
6703 (set_attr "prefix" "orig,maybe_evex")
6704 (set_attr "mode" "V4SF")])
6705
6706 (define_insn "sse_storehps"
6707 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6708 (vec_select:V2SF
6709 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
6710 (parallel [(const_int 2) (const_int 3)])))]
6711 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6712 "@
6713 %vmovhps\t{%1, %0|%q0, %1}
6714 %vmovhlps\t{%1, %d0|%d0, %1}
6715 %vmovlps\t{%H1, %d0|%d0, %H1}"
6716 [(set_attr "type" "ssemov")
6717 (set_attr "prefix" "maybe_vex")
6718 (set_attr "mode" "V2SF,V4SF,V2SF")])
6719
6720 (define_expand "sse_loadhps_exp"
6721 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6722 (vec_concat:V4SF
6723 (vec_select:V2SF
6724 (match_operand:V4SF 1 "nonimmediate_operand")
6725 (parallel [(const_int 0) (const_int 1)]))
6726 (match_operand:V2SF 2 "nonimmediate_operand")))]
6727 "TARGET_SSE"
6728 {
6729 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6730
6731 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6732
6733 /* Fix up the destination if needed. */
6734 if (dst != operands[0])
6735 emit_move_insn (operands[0], dst);
6736
6737 DONE;
6738 })
6739
6740 (define_insn "sse_loadhps"
6741 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6742 (vec_concat:V4SF
6743 (vec_select:V2SF
6744 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6745 (parallel [(const_int 0) (const_int 1)]))
6746 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
6747 "TARGET_SSE"
6748 "@
6749 movhps\t{%2, %0|%0, %q2}
6750 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6751 movlhps\t{%2, %0|%0, %2}
6752 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6753 %vmovlps\t{%2, %H0|%H0, %2}"
6754 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6755 (set_attr "type" "ssemov")
6756 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6757 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6758
6759 (define_insn "sse_storelps"
6760 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6761 (vec_select:V2SF
6762 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
6763 (parallel [(const_int 0) (const_int 1)])))]
6764 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6765 "@
6766 %vmovlps\t{%1, %0|%q0, %1}
6767 %vmovaps\t{%1, %0|%0, %1}
6768 %vmovlps\t{%1, %d0|%d0, %q1}"
6769 [(set_attr "type" "ssemov")
6770 (set_attr "prefix" "maybe_vex")
6771 (set_attr "mode" "V2SF,V4SF,V2SF")])
6772
6773 (define_expand "sse_loadlps_exp"
6774 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6775 (vec_concat:V4SF
6776 (match_operand:V2SF 2 "nonimmediate_operand")
6777 (vec_select:V2SF
6778 (match_operand:V4SF 1 "nonimmediate_operand")
6779 (parallel [(const_int 2) (const_int 3)]))))]
6780 "TARGET_SSE"
6781 {
6782 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6783
6784 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6785
6786 /* Fix up the destination if needed. */
6787 if (dst != operands[0])
6788 emit_move_insn (operands[0], dst);
6789
6790 DONE;
6791 })
6792
6793 (define_insn "sse_loadlps"
6794 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6795 (vec_concat:V4SF
6796 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
6797 (vec_select:V2SF
6798 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
6799 (parallel [(const_int 2) (const_int 3)]))))]
6800 "TARGET_SSE"
6801 "@
6802 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6803 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6804 movlps\t{%2, %0|%0, %q2}
6805 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6806 %vmovlps\t{%2, %0|%q0, %2}"
6807 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6808 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6809 (set (attr "length_immediate")
6810 (if_then_else (eq_attr "alternative" "0,1")
6811 (const_string "1")
6812 (const_string "*")))
6813 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6814 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6815
6816 (define_insn "sse_movss"
6817 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6818 (vec_merge:V4SF
6819 (match_operand:V4SF 2 "register_operand" " x,v")
6820 (match_operand:V4SF 1 "register_operand" " 0,v")
6821 (const_int 1)))]
6822 "TARGET_SSE"
6823 "@
6824 movss\t{%2, %0|%0, %2}
6825 vmovss\t{%2, %1, %0|%0, %1, %2}"
6826 [(set_attr "isa" "noavx,avx")
6827 (set_attr "type" "ssemov")
6828 (set_attr "prefix" "orig,maybe_evex")
6829 (set_attr "mode" "SF")])
6830
6831 (define_insn "avx2_vec_dup<mode>"
6832 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
6833 (vec_duplicate:VF1_128_256
6834 (vec_select:SF
6835 (match_operand:V4SF 1 "register_operand" "v")
6836 (parallel [(const_int 0)]))))]
6837 "TARGET_AVX2"
6838 "vbroadcastss\t{%1, %0|%0, %1}"
6839 [(set_attr "type" "sselog1")
6840 (set_attr "prefix" "maybe_evex")
6841 (set_attr "mode" "<MODE>")])
6842
6843 (define_insn "avx2_vec_dupv8sf_1"
6844 [(set (match_operand:V8SF 0 "register_operand" "=v")
6845 (vec_duplicate:V8SF
6846 (vec_select:SF
6847 (match_operand:V8SF 1 "register_operand" "v")
6848 (parallel [(const_int 0)]))))]
6849 "TARGET_AVX2"
6850 "vbroadcastss\t{%x1, %0|%0, %x1}"
6851 [(set_attr "type" "sselog1")
6852 (set_attr "prefix" "maybe_evex")
6853 (set_attr "mode" "V8SF")])
6854
6855 (define_insn "avx512f_vec_dup<mode>_1"
6856 [(set (match_operand:VF_512 0 "register_operand" "=v")
6857 (vec_duplicate:VF_512
6858 (vec_select:<ssescalarmode>
6859 (match_operand:VF_512 1 "register_operand" "v")
6860 (parallel [(const_int 0)]))))]
6861 "TARGET_AVX512F"
6862 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6863 [(set_attr "type" "sselog1")
6864 (set_attr "prefix" "evex")
6865 (set_attr "mode" "<MODE>")])
6866
6867 ;; Although insertps takes register source, we prefer
6868 ;; unpcklps with register source since it is shorter.
6869 (define_insn "*vec_concatv2sf_sse4_1"
6870 [(set (match_operand:V2SF 0 "register_operand"
6871 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
6872 (vec_concat:V2SF
6873 (match_operand:SF 1 "nonimmediate_operand"
6874 " 0, 0,Yv, 0,0, v,m, 0 , m")
6875 (match_operand:SF 2 "vector_move_operand"
6876 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
6877 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6878 "@
6879 unpcklps\t{%2, %0|%0, %2}
6880 unpcklps\t{%2, %0|%0, %2}
6881 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6882 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6883 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6884 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6885 %vmovss\t{%1, %0|%0, %1}
6886 punpckldq\t{%2, %0|%0, %2}
6887 movd\t{%1, %0|%0, %1}"
6888 [(set (attr "isa")
6889 (cond [(eq_attr "alternative" "0,1,3,4")
6890 (const_string "noavx")
6891 (eq_attr "alternative" "2,5")
6892 (const_string "avx")
6893 ]
6894 (const_string "*")))
6895 (set (attr "type")
6896 (cond [(eq_attr "alternative" "6")
6897 (const_string "ssemov")
6898 (eq_attr "alternative" "7")
6899 (const_string "mmxcvt")
6900 (eq_attr "alternative" "8")
6901 (const_string "mmxmov")
6902 ]
6903 (const_string "sselog")))
6904 (set (attr "prefix_data16")
6905 (if_then_else (eq_attr "alternative" "3,4")
6906 (const_string "1")
6907 (const_string "*")))
6908 (set (attr "prefix_extra")
6909 (if_then_else (eq_attr "alternative" "3,4,5")
6910 (const_string "1")
6911 (const_string "*")))
6912 (set (attr "length_immediate")
6913 (if_then_else (eq_attr "alternative" "3,4,5")
6914 (const_string "1")
6915 (const_string "*")))
6916 (set (attr "prefix")
6917 (cond [(eq_attr "alternative" "2,5")
6918 (const_string "maybe_evex")
6919 (eq_attr "alternative" "6")
6920 (const_string "maybe_vex")
6921 ]
6922 (const_string "orig")))
6923 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6924
6925 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6926 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
6927 ;; alternatives pretty much forces the MMX alternative to be chosen.
6928 (define_insn "*vec_concatv2sf_sse"
6929 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6930 (vec_concat:V2SF
6931 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6932 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6933 "TARGET_SSE"
6934 "@
6935 unpcklps\t{%2, %0|%0, %2}
6936 movss\t{%1, %0|%0, %1}
6937 punpckldq\t{%2, %0|%0, %2}
6938 movd\t{%1, %0|%0, %1}"
6939 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6940 (set_attr "mode" "V4SF,SF,DI,DI")])
6941
6942 (define_insn "*vec_concatv4sf"
6943 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
6944 (vec_concat:V4SF
6945 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
6946 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
6947 "TARGET_SSE"
6948 "@
6949 movlhps\t{%2, %0|%0, %2}
6950 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6951 movhps\t{%2, %0|%0, %q2}
6952 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6953 [(set_attr "isa" "noavx,avx,noavx,avx")
6954 (set_attr "type" "ssemov")
6955 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
6956 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6957
6958 ;; Avoid combining registers from different units in a single alternative,
6959 ;; see comment above inline_secondary_memory_needed function in i386.c
6960 (define_insn "vec_set<mode>_0"
6961 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6962 "=Yr,*x,v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6963 (vec_merge:VI4F_128
6964 (vec_duplicate:VI4F_128
6965 (match_operand:<ssescalarmode> 2 "general_operand"
6966 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6967 (match_operand:VI4F_128 1 "vector_move_operand"
6968 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
6969 (const_int 1)))]
6970 "TARGET_SSE"
6971 "@
6972 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
6973 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
6974 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
6975 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6976 %vmovd\t{%2, %0|%0, %2}
6977 movss\t{%2, %0|%0, %2}
6978 movss\t{%2, %0|%0, %2}
6979 vmovss\t{%2, %1, %0|%0, %1, %2}
6980 pinsrd\t{$0, %2, %0|%0, %2, 0}
6981 pinsrd\t{$0, %2, %0|%0, %2, 0}
6982 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6983 #
6984 #
6985 #"
6986 [(set (attr "isa")
6987 (cond [(eq_attr "alternative" "0,1,8,9")
6988 (const_string "sse4_noavx")
6989 (eq_attr "alternative" "2,7,10")
6990 (const_string "avx")
6991 (eq_attr "alternative" "3,4")
6992 (const_string "sse2")
6993 (eq_attr "alternative" "5,6")
6994 (const_string "noavx")
6995 ]
6996 (const_string "*")))
6997 (set (attr "type")
6998 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
6999 (const_string "sselog")
7000 (eq_attr "alternative" "12")
7001 (const_string "imov")
7002 (eq_attr "alternative" "13")
7003 (const_string "fmov")
7004 ]
7005 (const_string "ssemov")))
7006 (set (attr "prefix_extra")
7007 (if_then_else (eq_attr "alternative" "8,9,10")
7008 (const_string "1")
7009 (const_string "*")))
7010 (set (attr "length_immediate")
7011 (if_then_else (eq_attr "alternative" "8,9,10")
7012 (const_string "1")
7013 (const_string "*")))
7014 (set (attr "prefix")
7015 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
7016 (const_string "orig")
7017 (eq_attr "alternative" "2")
7018 (const_string "maybe_evex")
7019 (eq_attr "alternative" "3,4")
7020 (const_string "maybe_vex")
7021 (eq_attr "alternative" "7,10")
7022 (const_string "vex")
7023 ]
7024 (const_string "*")))
7025 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
7026
7027 ;; A subset is vec_setv4sf.
7028 (define_insn "*vec_setv4sf_sse4_1"
7029 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7030 (vec_merge:V4SF
7031 (vec_duplicate:V4SF
7032 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
7033 (match_operand:V4SF 1 "register_operand" "0,0,v")
7034 (match_operand:SI 3 "const_int_operand")))]
7035 "TARGET_SSE4_1
7036 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7037 < GET_MODE_NUNITS (V4SFmode))"
7038 {
7039 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
7040 switch (which_alternative)
7041 {
7042 case 0:
7043 case 1:
7044 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7045 case 2:
7046 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7047 default:
7048 gcc_unreachable ();
7049 }
7050 }
7051 [(set_attr "isa" "noavx,noavx,avx")
7052 (set_attr "type" "sselog")
7053 (set_attr "prefix_data16" "1,1,*")
7054 (set_attr "prefix_extra" "1")
7055 (set_attr "length_immediate" "1")
7056 (set_attr "prefix" "orig,orig,maybe_evex")
7057 (set_attr "mode" "V4SF")])
7058
7059 (define_insn "sse4_1_insertps"
7060 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7061 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
7062 (match_operand:V4SF 1 "register_operand" "0,0,v")
7063 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
7064 UNSPEC_INSERTPS))]
7065 "TARGET_SSE4_1"
7066 {
7067 if (MEM_P (operands[2]))
7068 {
7069 unsigned count_s = INTVAL (operands[3]) >> 6;
7070 if (count_s)
7071 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
7072 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
7073 }
7074 switch (which_alternative)
7075 {
7076 case 0:
7077 case 1:
7078 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7079 case 2:
7080 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7081 default:
7082 gcc_unreachable ();
7083 }
7084 }
7085 [(set_attr "isa" "noavx,noavx,avx")
7086 (set_attr "type" "sselog")
7087 (set_attr "prefix_data16" "1,1,*")
7088 (set_attr "prefix_extra" "1")
7089 (set_attr "length_immediate" "1")
7090 (set_attr "prefix" "orig,orig,maybe_evex")
7091 (set_attr "mode" "V4SF")])
7092
7093 (define_split
7094 [(set (match_operand:VI4F_128 0 "memory_operand")
7095 (vec_merge:VI4F_128
7096 (vec_duplicate:VI4F_128
7097 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
7098 (match_dup 0)
7099 (const_int 1)))]
7100 "TARGET_SSE && reload_completed"
7101 [(set (match_dup 0) (match_dup 1))]
7102 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
7103
7104 (define_expand "vec_set<mode>"
7105 [(match_operand:V 0 "register_operand")
7106 (match_operand:<ssescalarmode> 1 "register_operand")
7107 (match_operand 2 "const_int_operand")]
7108 "TARGET_SSE"
7109 {
7110 ix86_expand_vector_set (false, operands[0], operands[1],
7111 INTVAL (operands[2]));
7112 DONE;
7113 })
7114
7115 (define_insn_and_split "*vec_extractv4sf_0"
7116 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
7117 (vec_select:SF
7118 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
7119 (parallel [(const_int 0)])))]
7120 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7121 "#"
7122 "&& reload_completed"
7123 [(set (match_dup 0) (match_dup 1))]
7124 "operands[1] = gen_lowpart (SFmode, operands[1]);")
7125
7126 (define_insn_and_split "*sse4_1_extractps"
7127 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
7128 (vec_select:SF
7129 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
7130 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
7131 "TARGET_SSE4_1"
7132 "@
7133 extractps\t{%2, %1, %0|%0, %1, %2}
7134 extractps\t{%2, %1, %0|%0, %1, %2}
7135 vextractps\t{%2, %1, %0|%0, %1, %2}
7136 #
7137 #"
7138 "&& reload_completed && SSE_REG_P (operands[0])"
7139 [(const_int 0)]
7140 {
7141 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
7142 switch (INTVAL (operands[2]))
7143 {
7144 case 1:
7145 case 3:
7146 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
7147 operands[2], operands[2],
7148 GEN_INT (INTVAL (operands[2]) + 4),
7149 GEN_INT (INTVAL (operands[2]) + 4)));
7150 break;
7151 case 2:
7152 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
7153 break;
7154 default:
7155 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
7156 gcc_unreachable ();
7157 }
7158 DONE;
7159 }
7160 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
7161 (set_attr "type" "sselog,sselog,sselog,*,*")
7162 (set_attr "prefix_data16" "1,1,1,*,*")
7163 (set_attr "prefix_extra" "1,1,1,*,*")
7164 (set_attr "length_immediate" "1,1,1,*,*")
7165 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
7166 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
7167
7168 (define_insn_and_split "*vec_extractv4sf_mem"
7169 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
7170 (vec_select:SF
7171 (match_operand:V4SF 1 "memory_operand" "o,o,o")
7172 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
7173 "TARGET_SSE"
7174 "#"
7175 "&& reload_completed"
7176 [(set (match_dup 0) (match_dup 1))]
7177 {
7178 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
7179 })
7180
7181 (define_mode_attr extract_type
7182 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7183
7184 (define_mode_attr extract_suf
7185 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7186
7187 (define_mode_iterator AVX512_VEC
7188 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7189
7190 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
7191 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
7192 (match_operand:AVX512_VEC 1 "register_operand")
7193 (match_operand:SI 2 "const_0_to_3_operand")
7194 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7195 (match_operand:QI 4 "register_operand")]
7196 "TARGET_AVX512F"
7197 {
7198 int mask;
7199 mask = INTVAL (operands[2]);
7200 rtx dest = operands[0];
7201
7202 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
7203 dest = gen_reg_rtx (<ssequartermode>mode);
7204
7205 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7206 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
7207 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7208 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
7209 operands[4]));
7210 else
7211 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
7212 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
7213 operands[4]));
7214 if (dest != operands[0])
7215 emit_move_insn (operands[0], dest);
7216 DONE;
7217 })
7218
7219 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
7220 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7221 (vec_merge:<ssequartermode>
7222 (vec_select:<ssequartermode>
7223 (match_operand:V8FI 1 "register_operand" "v")
7224 (parallel [(match_operand 2 "const_0_to_7_operand")
7225 (match_operand 3 "const_0_to_7_operand")]))
7226 (match_operand:<ssequartermode> 4 "memory_operand" "0")
7227 (match_operand:QI 5 "register_operand" "Yk")))]
7228 "TARGET_AVX512DQ
7229 && INTVAL (operands[2]) % 2 == 0
7230 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7231 && rtx_equal_p (operands[4], operands[0])"
7232 {
7233 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
7234 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
7235 }
7236 [(set_attr "type" "sselog")
7237 (set_attr "prefix_extra" "1")
7238 (set_attr "length_immediate" "1")
7239 (set_attr "memory" "store")
7240 (set_attr "prefix" "evex")
7241 (set_attr "mode" "<sseinsnmode>")])
7242
7243 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
7244 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7245 (vec_merge:<ssequartermode>
7246 (vec_select:<ssequartermode>
7247 (match_operand:V16FI 1 "register_operand" "v")
7248 (parallel [(match_operand 2 "const_0_to_15_operand")
7249 (match_operand 3 "const_0_to_15_operand")
7250 (match_operand 4 "const_0_to_15_operand")
7251 (match_operand 5 "const_0_to_15_operand")]))
7252 (match_operand:<ssequartermode> 6 "memory_operand" "0")
7253 (match_operand:QI 7 "register_operand" "Yk")))]
7254 "TARGET_AVX512F
7255 && INTVAL (operands[2]) % 4 == 0
7256 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7257 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7258 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
7259 && rtx_equal_p (operands[6], operands[0])"
7260 {
7261 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7262 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
7263 }
7264 [(set_attr "type" "sselog")
7265 (set_attr "prefix_extra" "1")
7266 (set_attr "length_immediate" "1")
7267 (set_attr "memory" "store")
7268 (set_attr "prefix" "evex")
7269 (set_attr "mode" "<sseinsnmode>")])
7270
7271 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
7272 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7273 (vec_select:<ssequartermode>
7274 (match_operand:V8FI 1 "register_operand" "v")
7275 (parallel [(match_operand 2 "const_0_to_7_operand")
7276 (match_operand 3 "const_0_to_7_operand")])))]
7277 "TARGET_AVX512DQ
7278 && INTVAL (operands[2]) % 2 == 0
7279 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
7280 {
7281 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
7282 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
7283 }
7284 [(set_attr "type" "sselog1")
7285 (set_attr "prefix_extra" "1")
7286 (set_attr "length_immediate" "1")
7287 (set_attr "prefix" "evex")
7288 (set_attr "mode" "<sseinsnmode>")])
7289
7290 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
7291 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7292 (vec_select:<ssequartermode>
7293 (match_operand:V16FI 1 "register_operand" "v")
7294 (parallel [(match_operand 2 "const_0_to_15_operand")
7295 (match_operand 3 "const_0_to_15_operand")
7296 (match_operand 4 "const_0_to_15_operand")
7297 (match_operand 5 "const_0_to_15_operand")])))]
7298 "TARGET_AVX512F
7299 && INTVAL (operands[2]) % 4 == 0
7300 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7301 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7302 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
7303 {
7304 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7305 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
7306 }
7307 [(set_attr "type" "sselog1")
7308 (set_attr "prefix_extra" "1")
7309 (set_attr "length_immediate" "1")
7310 (set_attr "prefix" "evex")
7311 (set_attr "mode" "<sseinsnmode>")])
7312
7313 (define_mode_attr extract_type_2
7314 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
7315
7316 (define_mode_attr extract_suf_2
7317 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
7318
7319 (define_mode_iterator AVX512_VEC_2
7320 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
7321
7322 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
7323 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7324 (match_operand:AVX512_VEC_2 1 "register_operand")
7325 (match_operand:SI 2 "const_0_to_1_operand")
7326 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
7327 (match_operand:QI 4 "register_operand")]
7328 "TARGET_AVX512F"
7329 {
7330 rtx (*insn)(rtx, rtx, rtx, rtx);
7331 rtx dest = operands[0];
7332
7333 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
7334 dest = gen_reg_rtx (<ssehalfvecmode>mode);
7335
7336 switch (INTVAL (operands[2]))
7337 {
7338 case 0:
7339 insn = gen_vec_extract_lo_<mode>_mask;
7340 break;
7341 case 1:
7342 insn = gen_vec_extract_hi_<mode>_mask;
7343 break;
7344 default:
7345 gcc_unreachable ();
7346 }
7347
7348 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7349 if (dest != operands[0])
7350 emit_move_insn (operands[0], dest);
7351 DONE;
7352 })
7353
7354 (define_split
7355 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7356 (vec_select:<ssehalfvecmode>
7357 (match_operand:V8FI 1 "nonimmediate_operand")
7358 (parallel [(const_int 0) (const_int 1)
7359 (const_int 2) (const_int 3)])))]
7360 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7361 && reload_completed
7362 && (TARGET_AVX512VL
7363 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
7364 [(set (match_dup 0) (match_dup 1))]
7365 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7366
7367 (define_insn "vec_extract_lo_<mode>_maskm"
7368 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7369 (vec_merge:<ssehalfvecmode>
7370 (vec_select:<ssehalfvecmode>
7371 (match_operand:V8FI 1 "register_operand" "v")
7372 (parallel [(const_int 0) (const_int 1)
7373 (const_int 2) (const_int 3)]))
7374 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7375 (match_operand:QI 3 "register_operand" "Yk")))]
7376 "TARGET_AVX512F
7377 && rtx_equal_p (operands[2], operands[0])"
7378 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7379 [(set_attr "type" "sselog1")
7380 (set_attr "prefix_extra" "1")
7381 (set_attr "length_immediate" "1")
7382 (set_attr "prefix" "evex")
7383 (set_attr "mode" "<sseinsnmode>")])
7384
7385 (define_insn "vec_extract_lo_<mode><mask_name>"
7386 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
7387 (vec_select:<ssehalfvecmode>
7388 (match_operand:V8FI 1 "<store_mask_predicate>" "v,<store_mask_constraint>")
7389 (parallel [(const_int 0) (const_int 1)
7390 (const_int 2) (const_int 3)])))]
7391 "TARGET_AVX512F
7392 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7393 {
7394 if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
7395 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7396 else
7397 return "#";
7398 }
7399 [(set_attr "type" "sselog1")
7400 (set_attr "prefix_extra" "1")
7401 (set_attr "length_immediate" "1")
7402 (set_attr "prefix" "evex")
7403 (set_attr "mode" "<sseinsnmode>")])
7404
7405 (define_insn "vec_extract_hi_<mode>_maskm"
7406 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7407 (vec_merge:<ssehalfvecmode>
7408 (vec_select:<ssehalfvecmode>
7409 (match_operand:V8FI 1 "register_operand" "v")
7410 (parallel [(const_int 4) (const_int 5)
7411 (const_int 6) (const_int 7)]))
7412 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7413 (match_operand:QI 3 "register_operand" "Yk")))]
7414 "TARGET_AVX512F
7415 && rtx_equal_p (operands[2], operands[0])"
7416 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7417 [(set_attr "type" "sselog")
7418 (set_attr "prefix_extra" "1")
7419 (set_attr "length_immediate" "1")
7420 (set_attr "memory" "store")
7421 (set_attr "prefix" "evex")
7422 (set_attr "mode" "<sseinsnmode>")])
7423
7424 (define_insn "vec_extract_hi_<mode><mask_name>"
7425 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7426 (vec_select:<ssehalfvecmode>
7427 (match_operand:V8FI 1 "register_operand" "v")
7428 (parallel [(const_int 4) (const_int 5)
7429 (const_int 6) (const_int 7)])))]
7430 "TARGET_AVX512F"
7431 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
7432 [(set_attr "type" "sselog1")
7433 (set_attr "prefix_extra" "1")
7434 (set_attr "length_immediate" "1")
7435 (set_attr "prefix" "evex")
7436 (set_attr "mode" "<sseinsnmode>")])
7437
7438 (define_insn "vec_extract_hi_<mode>_maskm"
7439 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7440 (vec_merge:<ssehalfvecmode>
7441 (vec_select:<ssehalfvecmode>
7442 (match_operand:V16FI 1 "register_operand" "v")
7443 (parallel [(const_int 8) (const_int 9)
7444 (const_int 10) (const_int 11)
7445 (const_int 12) (const_int 13)
7446 (const_int 14) (const_int 15)]))
7447 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7448 (match_operand:QI 3 "register_operand" "Yk")))]
7449 "TARGET_AVX512DQ
7450 && rtx_equal_p (operands[2], operands[0])"
7451 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7452 [(set_attr "type" "sselog1")
7453 (set_attr "prefix_extra" "1")
7454 (set_attr "length_immediate" "1")
7455 (set_attr "prefix" "evex")
7456 (set_attr "mode" "<sseinsnmode>")])
7457
7458 (define_insn "vec_extract_hi_<mode><mask_name>"
7459 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
7460 (vec_select:<ssehalfvecmode>
7461 (match_operand:V16FI 1 "register_operand" "v,v")
7462 (parallel [(const_int 8) (const_int 9)
7463 (const_int 10) (const_int 11)
7464 (const_int 12) (const_int 13)
7465 (const_int 14) (const_int 15)])))]
7466 "TARGET_AVX512F && <mask_avx512dq_condition>"
7467 "@
7468 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
7469 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7470 [(set_attr "type" "sselog1")
7471 (set_attr "prefix_extra" "1")
7472 (set_attr "isa" "avx512dq,noavx512dq")
7473 (set_attr "length_immediate" "1")
7474 (set_attr "prefix" "evex")
7475 (set_attr "mode" "<sseinsnmode>")])
7476
7477 (define_expand "avx512vl_vextractf128<mode>"
7478 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7479 (match_operand:VI48F_256 1 "register_operand")
7480 (match_operand:SI 2 "const_0_to_1_operand")
7481 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
7482 (match_operand:QI 4 "register_operand")]
7483 "TARGET_AVX512DQ && TARGET_AVX512VL"
7484 {
7485 rtx (*insn)(rtx, rtx, rtx, rtx);
7486 rtx dest = operands[0];
7487
7488 if (MEM_P (dest)
7489 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
7490 /* For V8S[IF]mode there are maskm insns with =m and 0
7491 constraints. */
7492 ? !rtx_equal_p (dest, operands[3])
7493 /* For V4D[IF]mode, hi insns don't allow memory, and
7494 lo insns have =m and 0C constraints. */
7495 : (operands[2] != const0_rtx
7496 || (!rtx_equal_p (dest, operands[3])
7497 && GET_CODE (operands[3]) != CONST_VECTOR))))
7498 dest = gen_reg_rtx (<ssehalfvecmode>mode);
7499 switch (INTVAL (operands[2]))
7500 {
7501 case 0:
7502 insn = gen_vec_extract_lo_<mode>_mask;
7503 break;
7504 case 1:
7505 insn = gen_vec_extract_hi_<mode>_mask;
7506 break;
7507 default:
7508 gcc_unreachable ();
7509 }
7510
7511 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7512 if (dest != operands[0])
7513 emit_move_insn (operands[0], dest);
7514 DONE;
7515 })
7516
7517 (define_expand "avx_vextractf128<mode>"
7518 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7519 (match_operand:V_256 1 "register_operand")
7520 (match_operand:SI 2 "const_0_to_1_operand")]
7521 "TARGET_AVX"
7522 {
7523 rtx (*insn)(rtx, rtx);
7524
7525 switch (INTVAL (operands[2]))
7526 {
7527 case 0:
7528 insn = gen_vec_extract_lo_<mode>;
7529 break;
7530 case 1:
7531 insn = gen_vec_extract_hi_<mode>;
7532 break;
7533 default:
7534 gcc_unreachable ();
7535 }
7536
7537 emit_insn (insn (operands[0], operands[1]));
7538 DONE;
7539 })
7540
7541 (define_insn "vec_extract_lo_<mode><mask_name>"
7542 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
7543 (vec_select:<ssehalfvecmode>
7544 (match_operand:V16FI 1 "<store_mask_predicate>"
7545 "<store_mask_constraint>,v")
7546 (parallel [(const_int 0) (const_int 1)
7547 (const_int 2) (const_int 3)
7548 (const_int 4) (const_int 5)
7549 (const_int 6) (const_int 7)])))]
7550 "TARGET_AVX512F
7551 && <mask_mode512bit_condition>
7552 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7553 {
7554 if (<mask_applied>)
7555 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7556 else
7557 return "#";
7558 })
7559
7560 (define_split
7561 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7562 (vec_select:<ssehalfvecmode>
7563 (match_operand:V16FI 1 "nonimmediate_operand")
7564 (parallel [(const_int 0) (const_int 1)
7565 (const_int 2) (const_int 3)
7566 (const_int 4) (const_int 5)
7567 (const_int 6) (const_int 7)])))]
7568 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7569 && reload_completed"
7570 [(set (match_dup 0) (match_dup 1))]
7571 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7572
7573 (define_insn "vec_extract_lo_<mode><mask_name>"
7574 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
7575 (vec_select:<ssehalfvecmode>
7576 (match_operand:VI8F_256 1 "<store_mask_predicate>"
7577 "<store_mask_constraint>,v")
7578 (parallel [(const_int 0) (const_int 1)])))]
7579 "TARGET_AVX
7580 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7581 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7582 {
7583 if (<mask_applied>)
7584 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
7585 else
7586 return "#";
7587 }
7588 [(set_attr "type" "sselog")
7589 (set_attr "prefix_extra" "1")
7590 (set_attr "length_immediate" "1")
7591 (set_attr "memory" "none,store")
7592 (set_attr "prefix" "evex")
7593 (set_attr "mode" "XI")])
7594
7595 (define_split
7596 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7597 (vec_select:<ssehalfvecmode>
7598 (match_operand:VI8F_256 1 "nonimmediate_operand")
7599 (parallel [(const_int 0) (const_int 1)])))]
7600 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7601 && reload_completed"
7602 [(set (match_dup 0) (match_dup 1))]
7603 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7604
7605 (define_insn "vec_extract_hi_<mode><mask_name>"
7606 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7607 (vec_select:<ssehalfvecmode>
7608 (match_operand:VI8F_256 1 "register_operand" "v,v")
7609 (parallel [(const_int 2) (const_int 3)])))]
7610 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7611 {
7612 if (TARGET_AVX512VL)
7613 {
7614 if (TARGET_AVX512DQ)
7615 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7616 else
7617 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7618 }
7619 else
7620 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7621 }
7622 [(set_attr "type" "sselog")
7623 (set_attr "prefix_extra" "1")
7624 (set_attr "length_immediate" "1")
7625 (set_attr "memory" "none,store")
7626 (set_attr "prefix" "vex")
7627 (set_attr "mode" "<sseinsnmode>")])
7628
7629 (define_split
7630 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7631 (vec_select:<ssehalfvecmode>
7632 (match_operand:VI4F_256 1 "nonimmediate_operand")
7633 (parallel [(const_int 0) (const_int 1)
7634 (const_int 2) (const_int 3)])))]
7635 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7636 && reload_completed"
7637 [(set (match_dup 0) (match_dup 1))]
7638 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7639
7640 (define_insn "vec_extract_lo_<mode><mask_name>"
7641 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
7642 "=<store_mask_constraint>,v")
7643 (vec_select:<ssehalfvecmode>
7644 (match_operand:VI4F_256 1 "<store_mask_predicate>"
7645 "v,<store_mask_constraint>")
7646 (parallel [(const_int 0) (const_int 1)
7647 (const_int 2) (const_int 3)])))]
7648 "TARGET_AVX
7649 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7650 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7651 {
7652 if (<mask_applied>)
7653 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7654 else
7655 return "#";
7656 }
7657 [(set_attr "type" "sselog1")
7658 (set_attr "prefix_extra" "1")
7659 (set_attr "length_immediate" "1")
7660 (set_attr "prefix" "evex")
7661 (set_attr "mode" "<sseinsnmode>")])
7662
7663 (define_insn "vec_extract_lo_<mode>_maskm"
7664 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7665 (vec_merge:<ssehalfvecmode>
7666 (vec_select:<ssehalfvecmode>
7667 (match_operand:VI4F_256 1 "register_operand" "v")
7668 (parallel [(const_int 0) (const_int 1)
7669 (const_int 2) (const_int 3)]))
7670 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7671 (match_operand:QI 3 "register_operand" "Yk")))]
7672 "TARGET_AVX512VL && TARGET_AVX512F
7673 && rtx_equal_p (operands[2], operands[0])"
7674 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7675 [(set_attr "type" "sselog1")
7676 (set_attr "prefix_extra" "1")
7677 (set_attr "length_immediate" "1")
7678 (set_attr "prefix" "evex")
7679 (set_attr "mode" "<sseinsnmode>")])
7680
7681 (define_insn "vec_extract_hi_<mode>_maskm"
7682 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7683 (vec_merge:<ssehalfvecmode>
7684 (vec_select:<ssehalfvecmode>
7685 (match_operand:VI4F_256 1 "register_operand" "v")
7686 (parallel [(const_int 4) (const_int 5)
7687 (const_int 6) (const_int 7)]))
7688 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7689 (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
7690 "TARGET_AVX512F && TARGET_AVX512VL
7691 && rtx_equal_p (operands[2], operands[0])"
7692 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7693 [(set_attr "type" "sselog1")
7694 (set_attr "length_immediate" "1")
7695 (set_attr "prefix" "evex")
7696 (set_attr "mode" "<sseinsnmode>")])
7697
7698 (define_insn "vec_extract_hi_<mode>_mask"
7699 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
7700 (vec_merge:<ssehalfvecmode>
7701 (vec_select:<ssehalfvecmode>
7702 (match_operand:VI4F_256 1 "register_operand" "v")
7703 (parallel [(const_int 4) (const_int 5)
7704 (const_int 6) (const_int 7)]))
7705 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C")
7706 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
7707 "TARGET_AVX512VL"
7708 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
7709 [(set_attr "type" "sselog1")
7710 (set_attr "length_immediate" "1")
7711 (set_attr "prefix" "evex")
7712 (set_attr "mode" "<sseinsnmode>")])
7713
7714 (define_insn "vec_extract_hi_<mode>"
7715 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
7716 (vec_select:<ssehalfvecmode>
7717 (match_operand:VI4F_256 1 "register_operand" "x, v")
7718 (parallel [(const_int 4) (const_int 5)
7719 (const_int 6) (const_int 7)])))]
7720 "TARGET_AVX"
7721 "@
7722 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
7723 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7724 [(set_attr "isa" "*, avx512vl")
7725 (set_attr "prefix" "vex, evex")
7726 (set_attr "type" "sselog1")
7727 (set_attr "length_immediate" "1")
7728 (set_attr "mode" "<sseinsnmode>")])
7729
7730 (define_insn_and_split "vec_extract_lo_v32hi"
7731 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7732 (vec_select:V16HI
7733 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7734 (parallel [(const_int 0) (const_int 1)
7735 (const_int 2) (const_int 3)
7736 (const_int 4) (const_int 5)
7737 (const_int 6) (const_int 7)
7738 (const_int 8) (const_int 9)
7739 (const_int 10) (const_int 11)
7740 (const_int 12) (const_int 13)
7741 (const_int 14) (const_int 15)])))]
7742 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7743 "#"
7744 "&& reload_completed"
7745 [(set (match_dup 0) (match_dup 1))]
7746 "operands[1] = gen_lowpart (V16HImode, operands[1]);")
7747
7748 (define_insn "vec_extract_hi_v32hi"
7749 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7750 (vec_select:V16HI
7751 (match_operand:V32HI 1 "register_operand" "v,v")
7752 (parallel [(const_int 16) (const_int 17)
7753 (const_int 18) (const_int 19)
7754 (const_int 20) (const_int 21)
7755 (const_int 22) (const_int 23)
7756 (const_int 24) (const_int 25)
7757 (const_int 26) (const_int 27)
7758 (const_int 28) (const_int 29)
7759 (const_int 30) (const_int 31)])))]
7760 "TARGET_AVX512F"
7761 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7762 [(set_attr "type" "sselog")
7763 (set_attr "prefix_extra" "1")
7764 (set_attr "length_immediate" "1")
7765 (set_attr "memory" "none,store")
7766 (set_attr "prefix" "evex")
7767 (set_attr "mode" "XI")])
7768
7769 (define_insn_and_split "vec_extract_lo_v16hi"
7770 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
7771 (vec_select:V8HI
7772 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
7773 (parallel [(const_int 0) (const_int 1)
7774 (const_int 2) (const_int 3)
7775 (const_int 4) (const_int 5)
7776 (const_int 6) (const_int 7)])))]
7777 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7778 "#"
7779 "&& reload_completed"
7780 [(set (match_dup 0) (match_dup 1))]
7781 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
7782
7783 (define_insn "vec_extract_hi_v16hi"
7784 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
7785 (vec_select:V8HI
7786 (match_operand:V16HI 1 "register_operand" "x,x,v,v,v,v")
7787 (parallel [(const_int 8) (const_int 9)
7788 (const_int 10) (const_int 11)
7789 (const_int 12) (const_int 13)
7790 (const_int 14) (const_int 15)])))]
7791 "TARGET_AVX"
7792 "@
7793 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7794 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7795 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7796 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7797 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
7798 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7799 [(set_attr "type" "sselog")
7800 (set_attr "prefix_extra" "1")
7801 (set_attr "length_immediate" "1")
7802 (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
7803 (set_attr "memory" "none,store,none,store,none,store")
7804 (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
7805 (set_attr "mode" "OI")])
7806
7807 (define_insn_and_split "vec_extract_lo_v64qi"
7808 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7809 (vec_select:V32QI
7810 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7811 (parallel [(const_int 0) (const_int 1)
7812 (const_int 2) (const_int 3)
7813 (const_int 4) (const_int 5)
7814 (const_int 6) (const_int 7)
7815 (const_int 8) (const_int 9)
7816 (const_int 10) (const_int 11)
7817 (const_int 12) (const_int 13)
7818 (const_int 14) (const_int 15)
7819 (const_int 16) (const_int 17)
7820 (const_int 18) (const_int 19)
7821 (const_int 20) (const_int 21)
7822 (const_int 22) (const_int 23)
7823 (const_int 24) (const_int 25)
7824 (const_int 26) (const_int 27)
7825 (const_int 28) (const_int 29)
7826 (const_int 30) (const_int 31)])))]
7827 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7828 "#"
7829 "&& reload_completed"
7830 [(set (match_dup 0) (match_dup 1))]
7831 "operands[1] = gen_lowpart (V32QImode, operands[1]);")
7832
7833 (define_insn "vec_extract_hi_v64qi"
7834 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7835 (vec_select:V32QI
7836 (match_operand:V64QI 1 "register_operand" "v,v")
7837 (parallel [(const_int 32) (const_int 33)
7838 (const_int 34) (const_int 35)
7839 (const_int 36) (const_int 37)
7840 (const_int 38) (const_int 39)
7841 (const_int 40) (const_int 41)
7842 (const_int 42) (const_int 43)
7843 (const_int 44) (const_int 45)
7844 (const_int 46) (const_int 47)
7845 (const_int 48) (const_int 49)
7846 (const_int 50) (const_int 51)
7847 (const_int 52) (const_int 53)
7848 (const_int 54) (const_int 55)
7849 (const_int 56) (const_int 57)
7850 (const_int 58) (const_int 59)
7851 (const_int 60) (const_int 61)
7852 (const_int 62) (const_int 63)])))]
7853 "TARGET_AVX512F"
7854 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7855 [(set_attr "type" "sselog")
7856 (set_attr "prefix_extra" "1")
7857 (set_attr "length_immediate" "1")
7858 (set_attr "memory" "none,store")
7859 (set_attr "prefix" "evex")
7860 (set_attr "mode" "XI")])
7861
7862 (define_insn_and_split "vec_extract_lo_v32qi"
7863 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
7864 (vec_select:V16QI
7865 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
7866 (parallel [(const_int 0) (const_int 1)
7867 (const_int 2) (const_int 3)
7868 (const_int 4) (const_int 5)
7869 (const_int 6) (const_int 7)
7870 (const_int 8) (const_int 9)
7871 (const_int 10) (const_int 11)
7872 (const_int 12) (const_int 13)
7873 (const_int 14) (const_int 15)])))]
7874 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7875 "#"
7876 "&& reload_completed"
7877 [(set (match_dup 0) (match_dup 1))]
7878 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
7879
7880 (define_insn "vec_extract_hi_v32qi"
7881 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
7882 (vec_select:V16QI
7883 (match_operand:V32QI 1 "register_operand" "x,x,v,v,v,v")
7884 (parallel [(const_int 16) (const_int 17)
7885 (const_int 18) (const_int 19)
7886 (const_int 20) (const_int 21)
7887 (const_int 22) (const_int 23)
7888 (const_int 24) (const_int 25)
7889 (const_int 26) (const_int 27)
7890 (const_int 28) (const_int 29)
7891 (const_int 30) (const_int 31)])))]
7892 "TARGET_AVX"
7893 "@
7894 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7895 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7896 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7897 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7898 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
7899 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7900 [(set_attr "type" "sselog")
7901 (set_attr "prefix_extra" "1")
7902 (set_attr "length_immediate" "1")
7903 (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
7904 (set_attr "memory" "none,store,none,store,none,store")
7905 (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
7906 (set_attr "mode" "OI")])
7907
7908 ;; Modes handled by vec_extract patterns.
7909 (define_mode_iterator VEC_EXTRACT_MODE
7910 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7911 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7912 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7913 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7914 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7915 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
7916 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
7917
7918 (define_expand "vec_extract<mode><ssescalarmodelower>"
7919 [(match_operand:<ssescalarmode> 0 "register_operand")
7920 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7921 (match_operand 2 "const_int_operand")]
7922 "TARGET_SSE"
7923 {
7924 ix86_expand_vector_extract (false, operands[0], operands[1],
7925 INTVAL (operands[2]));
7926 DONE;
7927 })
7928
7929 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
7930 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7931 (match_operand:V_512 1 "register_operand")
7932 (match_operand 2 "const_0_to_1_operand")]
7933 "TARGET_AVX512F"
7934 {
7935 if (INTVAL (operands[2]))
7936 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
7937 else
7938 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
7939 DONE;
7940 })
7941
7942 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7943 ;;
7944 ;; Parallel double-precision floating point element swizzling
7945 ;;
7946 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7947
7948 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7949 [(set (match_operand:V8DF 0 "register_operand" "=v")
7950 (vec_select:V8DF
7951 (vec_concat:V16DF
7952 (match_operand:V8DF 1 "register_operand" "v")
7953 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7954 (parallel [(const_int 1) (const_int 9)
7955 (const_int 3) (const_int 11)
7956 (const_int 5) (const_int 13)
7957 (const_int 7) (const_int 15)])))]
7958 "TARGET_AVX512F"
7959 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7960 [(set_attr "type" "sselog")
7961 (set_attr "prefix" "evex")
7962 (set_attr "mode" "V8DF")])
7963
7964 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7965 (define_insn "avx_unpckhpd256<mask_name>"
7966 [(set (match_operand:V4DF 0 "register_operand" "=v")
7967 (vec_select:V4DF
7968 (vec_concat:V8DF
7969 (match_operand:V4DF 1 "register_operand" "v")
7970 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7971 (parallel [(const_int 1) (const_int 5)
7972 (const_int 3) (const_int 7)])))]
7973 "TARGET_AVX && <mask_avx512vl_condition>"
7974 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7975 [(set_attr "type" "sselog")
7976 (set_attr "prefix" "vex")
7977 (set_attr "mode" "V4DF")])
7978
7979 (define_expand "vec_interleave_highv4df"
7980 [(set (match_dup 3)
7981 (vec_select:V4DF
7982 (vec_concat:V8DF
7983 (match_operand:V4DF 1 "register_operand")
7984 (match_operand:V4DF 2 "nonimmediate_operand"))
7985 (parallel [(const_int 0) (const_int 4)
7986 (const_int 2) (const_int 6)])))
7987 (set (match_dup 4)
7988 (vec_select:V4DF
7989 (vec_concat:V8DF
7990 (match_dup 1)
7991 (match_dup 2))
7992 (parallel [(const_int 1) (const_int 5)
7993 (const_int 3) (const_int 7)])))
7994 (set (match_operand:V4DF 0 "register_operand")
7995 (vec_select:V4DF
7996 (vec_concat:V8DF
7997 (match_dup 3)
7998 (match_dup 4))
7999 (parallel [(const_int 2) (const_int 3)
8000 (const_int 6) (const_int 7)])))]
8001 "TARGET_AVX"
8002 {
8003 operands[3] = gen_reg_rtx (V4DFmode);
8004 operands[4] = gen_reg_rtx (V4DFmode);
8005 })
8006
8007
8008 (define_insn "avx512vl_unpckhpd128_mask"
8009 [(set (match_operand:V2DF 0 "register_operand" "=v")
8010 (vec_merge:V2DF
8011 (vec_select:V2DF
8012 (vec_concat:V4DF
8013 (match_operand:V2DF 1 "register_operand" "v")
8014 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8015 (parallel [(const_int 1) (const_int 3)]))
8016 (match_operand:V2DF 3 "vector_move_operand" "0C")
8017 (match_operand:QI 4 "register_operand" "Yk")))]
8018 "TARGET_AVX512VL"
8019 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8020 [(set_attr "type" "sselog")
8021 (set_attr "prefix" "evex")
8022 (set_attr "mode" "V2DF")])
8023
8024 (define_expand "vec_interleave_highv2df"
8025 [(set (match_operand:V2DF 0 "register_operand")
8026 (vec_select:V2DF
8027 (vec_concat:V4DF
8028 (match_operand:V2DF 1 "nonimmediate_operand")
8029 (match_operand:V2DF 2 "nonimmediate_operand"))
8030 (parallel [(const_int 1)
8031 (const_int 3)])))]
8032 "TARGET_SSE2"
8033 {
8034 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
8035 operands[2] = force_reg (V2DFmode, operands[2]);
8036 })
8037
8038 (define_insn "*vec_interleave_highv2df"
8039 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
8040 (vec_select:V2DF
8041 (vec_concat:V4DF
8042 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
8043 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
8044 (parallel [(const_int 1)
8045 (const_int 3)])))]
8046 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
8047 "@
8048 unpckhpd\t{%2, %0|%0, %2}
8049 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
8050 %vmovddup\t{%H1, %0|%0, %H1}
8051 movlpd\t{%H1, %0|%0, %H1}
8052 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
8053 %vmovhpd\t{%1, %0|%q0, %1}"
8054 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8055 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8056 (set (attr "prefix_data16")
8057 (if_then_else (eq_attr "alternative" "3,5")
8058 (const_string "1")
8059 (const_string "*")))
8060 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8061 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8062
8063 (define_expand "avx512f_movddup512<mask_name>"
8064 [(set (match_operand:V8DF 0 "register_operand")
8065 (vec_select:V8DF
8066 (vec_concat:V16DF
8067 (match_operand:V8DF 1 "nonimmediate_operand")
8068 (match_dup 1))
8069 (parallel [(const_int 0) (const_int 8)
8070 (const_int 2) (const_int 10)
8071 (const_int 4) (const_int 12)
8072 (const_int 6) (const_int 14)])))]
8073 "TARGET_AVX512F")
8074
8075 (define_expand "avx512f_unpcklpd512<mask_name>"
8076 [(set (match_operand:V8DF 0 "register_operand")
8077 (vec_select:V8DF
8078 (vec_concat:V16DF
8079 (match_operand:V8DF 1 "register_operand")
8080 (match_operand:V8DF 2 "nonimmediate_operand"))
8081 (parallel [(const_int 0) (const_int 8)
8082 (const_int 2) (const_int 10)
8083 (const_int 4) (const_int 12)
8084 (const_int 6) (const_int 14)])))]
8085 "TARGET_AVX512F")
8086
8087 (define_insn "*avx512f_unpcklpd512<mask_name>"
8088 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
8089 (vec_select:V8DF
8090 (vec_concat:V16DF
8091 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
8092 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
8093 (parallel [(const_int 0) (const_int 8)
8094 (const_int 2) (const_int 10)
8095 (const_int 4) (const_int 12)
8096 (const_int 6) (const_int 14)])))]
8097 "TARGET_AVX512F"
8098 "@
8099 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
8100 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8101 [(set_attr "type" "sselog")
8102 (set_attr "prefix" "evex")
8103 (set_attr "mode" "V8DF")])
8104
8105 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8106 (define_expand "avx_movddup256<mask_name>"
8107 [(set (match_operand:V4DF 0 "register_operand")
8108 (vec_select:V4DF
8109 (vec_concat:V8DF
8110 (match_operand:V4DF 1 "nonimmediate_operand")
8111 (match_dup 1))
8112 (parallel [(const_int 0) (const_int 4)
8113 (const_int 2) (const_int 6)])))]
8114 "TARGET_AVX && <mask_avx512vl_condition>")
8115
8116 (define_expand "avx_unpcklpd256<mask_name>"
8117 [(set (match_operand:V4DF 0 "register_operand")
8118 (vec_select:V4DF
8119 (vec_concat:V8DF
8120 (match_operand:V4DF 1 "register_operand")
8121 (match_operand:V4DF 2 "nonimmediate_operand"))
8122 (parallel [(const_int 0) (const_int 4)
8123 (const_int 2) (const_int 6)])))]
8124 "TARGET_AVX && <mask_avx512vl_condition>")
8125
8126 (define_insn "*avx_unpcklpd256<mask_name>"
8127 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
8128 (vec_select:V4DF
8129 (vec_concat:V8DF
8130 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
8131 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
8132 (parallel [(const_int 0) (const_int 4)
8133 (const_int 2) (const_int 6)])))]
8134 "TARGET_AVX && <mask_avx512vl_condition>"
8135 "@
8136 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
8137 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
8138 [(set_attr "type" "sselog")
8139 (set_attr "prefix" "vex")
8140 (set_attr "mode" "V4DF")])
8141
8142 (define_expand "vec_interleave_lowv4df"
8143 [(set (match_dup 3)
8144 (vec_select:V4DF
8145 (vec_concat:V8DF
8146 (match_operand:V4DF 1 "register_operand")
8147 (match_operand:V4DF 2 "nonimmediate_operand"))
8148 (parallel [(const_int 0) (const_int 4)
8149 (const_int 2) (const_int 6)])))
8150 (set (match_dup 4)
8151 (vec_select:V4DF
8152 (vec_concat:V8DF
8153 (match_dup 1)
8154 (match_dup 2))
8155 (parallel [(const_int 1) (const_int 5)
8156 (const_int 3) (const_int 7)])))
8157 (set (match_operand:V4DF 0 "register_operand")
8158 (vec_select:V4DF
8159 (vec_concat:V8DF
8160 (match_dup 3)
8161 (match_dup 4))
8162 (parallel [(const_int 0) (const_int 1)
8163 (const_int 4) (const_int 5)])))]
8164 "TARGET_AVX"
8165 {
8166 operands[3] = gen_reg_rtx (V4DFmode);
8167 operands[4] = gen_reg_rtx (V4DFmode);
8168 })
8169
8170 (define_insn "avx512vl_unpcklpd128_mask"
8171 [(set (match_operand:V2DF 0 "register_operand" "=v")
8172 (vec_merge:V2DF
8173 (vec_select:V2DF
8174 (vec_concat:V4DF
8175 (match_operand:V2DF 1 "register_operand" "v")
8176 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8177 (parallel [(const_int 0) (const_int 2)]))
8178 (match_operand:V2DF 3 "vector_move_operand" "0C")
8179 (match_operand:QI 4 "register_operand" "Yk")))]
8180 "TARGET_AVX512VL"
8181 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8182 [(set_attr "type" "sselog")
8183 (set_attr "prefix" "evex")
8184 (set_attr "mode" "V2DF")])
8185
8186 (define_expand "vec_interleave_lowv2df"
8187 [(set (match_operand:V2DF 0 "register_operand")
8188 (vec_select:V2DF
8189 (vec_concat:V4DF
8190 (match_operand:V2DF 1 "nonimmediate_operand")
8191 (match_operand:V2DF 2 "nonimmediate_operand"))
8192 (parallel [(const_int 0)
8193 (const_int 2)])))]
8194 "TARGET_SSE2"
8195 {
8196 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
8197 operands[1] = force_reg (V2DFmode, operands[1]);
8198 })
8199
8200 (define_insn "*vec_interleave_lowv2df"
8201 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
8202 (vec_select:V2DF
8203 (vec_concat:V4DF
8204 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
8205 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
8206 (parallel [(const_int 0)
8207 (const_int 2)])))]
8208 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
8209 "@
8210 unpcklpd\t{%2, %0|%0, %2}
8211 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8212 %vmovddup\t{%1, %0|%0, %q1}
8213 movhpd\t{%2, %0|%0, %q2}
8214 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
8215 %vmovlpd\t{%2, %H0|%H0, %2}"
8216 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8217 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8218 (set (attr "prefix_data16")
8219 (if_then_else (eq_attr "alternative" "3,5")
8220 (const_string "1")
8221 (const_string "*")))
8222 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8223 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8224
8225 (define_split
8226 [(set (match_operand:V2DF 0 "memory_operand")
8227 (vec_select:V2DF
8228 (vec_concat:V4DF
8229 (match_operand:V2DF 1 "register_operand")
8230 (match_dup 1))
8231 (parallel [(const_int 0)
8232 (const_int 2)])))]
8233 "TARGET_SSE3 && reload_completed"
8234 [(const_int 0)]
8235 {
8236 rtx low = gen_lowpart (DFmode, operands[1]);
8237
8238 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
8239 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
8240 DONE;
8241 })
8242
8243 (define_split
8244 [(set (match_operand:V2DF 0 "register_operand")
8245 (vec_select:V2DF
8246 (vec_concat:V4DF
8247 (match_operand:V2DF 1 "memory_operand")
8248 (match_dup 1))
8249 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
8250 (match_operand:SI 3 "const_int_operand")])))]
8251 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
8252 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
8253 {
8254 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
8255 })
8256
8257 (define_insn "avx512f_vmscalef<mode><round_name>"
8258 [(set (match_operand:VF_128 0 "register_operand" "=v")
8259 (vec_merge:VF_128
8260 (unspec:VF_128
8261 [(match_operand:VF_128 1 "register_operand" "v")
8262 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
8263 UNSPEC_SCALEF)
8264 (match_dup 1)
8265 (const_int 1)))]
8266 "TARGET_AVX512F"
8267 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
8268 [(set_attr "prefix" "evex")
8269 (set_attr "mode" "<ssescalarmode>")])
8270
8271 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
8272 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8273 (unspec:VF_AVX512VL
8274 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
8275 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
8276 UNSPEC_SCALEF))]
8277 "TARGET_AVX512F"
8278 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8279 [(set_attr "prefix" "evex")
8280 (set_attr "mode" "<MODE>")])
8281
8282 (define_expand "<avx512>_vternlog<mode>_maskz"
8283 [(match_operand:VI48_AVX512VL 0 "register_operand")
8284 (match_operand:VI48_AVX512VL 1 "register_operand")
8285 (match_operand:VI48_AVX512VL 2 "register_operand")
8286 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
8287 (match_operand:SI 4 "const_0_to_255_operand")
8288 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8289 "TARGET_AVX512F"
8290 {
8291 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
8292 operands[0], operands[1], operands[2], operands[3],
8293 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
8294 DONE;
8295 })
8296
8297 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
8298 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8299 (unspec:VI48_AVX512VL
8300 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8301 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8302 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8303 (match_operand:SI 4 "const_0_to_255_operand")]
8304 UNSPEC_VTERNLOG))]
8305 "TARGET_AVX512F"
8306 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
8307 [(set_attr "type" "sselog")
8308 (set_attr "prefix" "evex")
8309 (set_attr "mode" "<sseinsnmode>")])
8310
8311 (define_insn "<avx512>_vternlog<mode>_mask"
8312 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8313 (vec_merge:VI48_AVX512VL
8314 (unspec:VI48_AVX512VL
8315 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8316 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8317 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8318 (match_operand:SI 4 "const_0_to_255_operand")]
8319 UNSPEC_VTERNLOG)
8320 (match_dup 1)
8321 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8322 "TARGET_AVX512F"
8323 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
8324 [(set_attr "type" "sselog")
8325 (set_attr "prefix" "evex")
8326 (set_attr "mode" "<sseinsnmode>")])
8327
8328 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
8329 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8330 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
8331 UNSPEC_GETEXP))]
8332 "TARGET_AVX512F"
8333 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
8334 [(set_attr "prefix" "evex")
8335 (set_attr "mode" "<MODE>")])
8336
8337 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
8338 [(set (match_operand:VF_128 0 "register_operand" "=v")
8339 (vec_merge:VF_128
8340 (unspec:VF_128
8341 [(match_operand:VF_128 1 "register_operand" "v")
8342 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
8343 UNSPEC_GETEXP)
8344 (match_dup 1)
8345 (const_int 1)))]
8346 "TARGET_AVX512F"
8347 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_saeonly_scalar_mask_op3>}";
8348 [(set_attr "prefix" "evex")
8349 (set_attr "mode" "<ssescalarmode>")])
8350
8351 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
8352 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8353 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
8354 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
8355 (match_operand:SI 3 "const_0_to_255_operand")]
8356 UNSPEC_ALIGN))]
8357 "TARGET_AVX512F"
8358 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
8359 [(set_attr "prefix" "evex")
8360 (set_attr "mode" "<sseinsnmode>")])
8361
8362 (define_expand "avx512f_shufps512_mask"
8363 [(match_operand:V16SF 0 "register_operand")
8364 (match_operand:V16SF 1 "register_operand")
8365 (match_operand:V16SF 2 "nonimmediate_operand")
8366 (match_operand:SI 3 "const_0_to_255_operand")
8367 (match_operand:V16SF 4 "register_operand")
8368 (match_operand:HI 5 "register_operand")]
8369 "TARGET_AVX512F"
8370 {
8371 int mask = INTVAL (operands[3]);
8372 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
8373 GEN_INT ((mask >> 0) & 3),
8374 GEN_INT ((mask >> 2) & 3),
8375 GEN_INT (((mask >> 4) & 3) + 16),
8376 GEN_INT (((mask >> 6) & 3) + 16),
8377 GEN_INT (((mask >> 0) & 3) + 4),
8378 GEN_INT (((mask >> 2) & 3) + 4),
8379 GEN_INT (((mask >> 4) & 3) + 20),
8380 GEN_INT (((mask >> 6) & 3) + 20),
8381 GEN_INT (((mask >> 0) & 3) + 8),
8382 GEN_INT (((mask >> 2) & 3) + 8),
8383 GEN_INT (((mask >> 4) & 3) + 24),
8384 GEN_INT (((mask >> 6) & 3) + 24),
8385 GEN_INT (((mask >> 0) & 3) + 12),
8386 GEN_INT (((mask >> 2) & 3) + 12),
8387 GEN_INT (((mask >> 4) & 3) + 28),
8388 GEN_INT (((mask >> 6) & 3) + 28),
8389 operands[4], operands[5]));
8390 DONE;
8391 })
8392
8393
8394 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
8395 [(match_operand:VF_AVX512VL 0 "register_operand")
8396 (match_operand:VF_AVX512VL 1 "register_operand")
8397 (match_operand:VF_AVX512VL 2 "register_operand")
8398 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8399 (match_operand:SI 4 "const_0_to_255_operand")
8400 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8401 "TARGET_AVX512F"
8402 {
8403 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8404 operands[0], operands[1], operands[2], operands[3],
8405 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8406 <round_saeonly_expand_operand6>));
8407 DONE;
8408 })
8409
8410 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
8411 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8412 (unspec:VF_AVX512VL
8413 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8414 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8415 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8416 (match_operand:SI 4 "const_0_to_255_operand")]
8417 UNSPEC_FIXUPIMM))]
8418 "TARGET_AVX512F"
8419 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8420 [(set_attr "prefix" "evex")
8421 (set_attr "mode" "<MODE>")])
8422
8423 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
8424 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8425 (vec_merge:VF_AVX512VL
8426 (unspec:VF_AVX512VL
8427 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8428 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8429 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8430 (match_operand:SI 4 "const_0_to_255_operand")]
8431 UNSPEC_FIXUPIMM)
8432 (match_dup 1)
8433 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8434 "TARGET_AVX512F"
8435 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8436 [(set_attr "prefix" "evex")
8437 (set_attr "mode" "<MODE>")])
8438
8439 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
8440 [(match_operand:VF_128 0 "register_operand")
8441 (match_operand:VF_128 1 "register_operand")
8442 (match_operand:VF_128 2 "register_operand")
8443 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8444 (match_operand:SI 4 "const_0_to_255_operand")
8445 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8446 "TARGET_AVX512F"
8447 {
8448 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8449 operands[0], operands[1], operands[2], operands[3],
8450 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8451 <round_saeonly_expand_operand6>));
8452 DONE;
8453 })
8454
8455 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
8456 [(set (match_operand:VF_128 0 "register_operand" "=v")
8457 (vec_merge:VF_128
8458 (unspec:VF_128
8459 [(match_operand:VF_128 1 "register_operand" "0")
8460 (match_operand:VF_128 2 "register_operand" "v")
8461 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8462 (match_operand:SI 4 "const_0_to_255_operand")]
8463 UNSPEC_FIXUPIMM)
8464 (match_dup 1)
8465 (const_int 1)))]
8466 "TARGET_AVX512F"
8467 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8468 [(set_attr "prefix" "evex")
8469 (set_attr "mode" "<ssescalarmode>")])
8470
8471 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
8472 [(set (match_operand:VF_128 0 "register_operand" "=v")
8473 (vec_merge:VF_128
8474 (vec_merge:VF_128
8475 (unspec:VF_128
8476 [(match_operand:VF_128 1 "register_operand" "0")
8477 (match_operand:VF_128 2 "register_operand" "v")
8478 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8479 (match_operand:SI 4 "const_0_to_255_operand")]
8480 UNSPEC_FIXUPIMM)
8481 (match_dup 1)
8482 (const_int 1))
8483 (match_dup 1)
8484 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8485 "TARGET_AVX512F"
8486 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8487 [(set_attr "prefix" "evex")
8488 (set_attr "mode" "<ssescalarmode>")])
8489
8490 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
8491 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8492 (unspec:VF_AVX512VL
8493 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
8494 (match_operand:SI 2 "const_0_to_255_operand")]
8495 UNSPEC_ROUND))]
8496 "TARGET_AVX512F"
8497 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
8498 [(set_attr "length_immediate" "1")
8499 (set_attr "prefix" "evex")
8500 (set_attr "mode" "<MODE>")])
8501
8502 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
8503 [(set (match_operand:VF_128 0 "register_operand" "=v")
8504 (vec_merge:VF_128
8505 (unspec:VF_128
8506 [(match_operand:VF_128 1 "register_operand" "v")
8507 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8508 (match_operand:SI 3 "const_0_to_255_operand")]
8509 UNSPEC_ROUND)
8510 (match_dup 1)
8511 (const_int 1)))]
8512 "TARGET_AVX512F"
8513 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
8514 [(set_attr "length_immediate" "1")
8515 (set_attr "prefix" "evex")
8516 (set_attr "mode" "<MODE>")])
8517
8518 ;; One bit in mask selects 2 elements.
8519 (define_insn "avx512f_shufps512_1<mask_name>"
8520 [(set (match_operand:V16SF 0 "register_operand" "=v")
8521 (vec_select:V16SF
8522 (vec_concat:V32SF
8523 (match_operand:V16SF 1 "register_operand" "v")
8524 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
8525 (parallel [(match_operand 3 "const_0_to_3_operand")
8526 (match_operand 4 "const_0_to_3_operand")
8527 (match_operand 5 "const_16_to_19_operand")
8528 (match_operand 6 "const_16_to_19_operand")
8529 (match_operand 7 "const_4_to_7_operand")
8530 (match_operand 8 "const_4_to_7_operand")
8531 (match_operand 9 "const_20_to_23_operand")
8532 (match_operand 10 "const_20_to_23_operand")
8533 (match_operand 11 "const_8_to_11_operand")
8534 (match_operand 12 "const_8_to_11_operand")
8535 (match_operand 13 "const_24_to_27_operand")
8536 (match_operand 14 "const_24_to_27_operand")
8537 (match_operand 15 "const_12_to_15_operand")
8538 (match_operand 16 "const_12_to_15_operand")
8539 (match_operand 17 "const_28_to_31_operand")
8540 (match_operand 18 "const_28_to_31_operand")])))]
8541 "TARGET_AVX512F
8542 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
8543 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
8544 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
8545 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
8546 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
8547 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
8548 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
8549 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
8550 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
8551 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
8552 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
8553 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
8554 {
8555 int mask;
8556 mask = INTVAL (operands[3]);
8557 mask |= INTVAL (operands[4]) << 2;
8558 mask |= (INTVAL (operands[5]) - 16) << 4;
8559 mask |= (INTVAL (operands[6]) - 16) << 6;
8560 operands[3] = GEN_INT (mask);
8561
8562 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
8563 }
8564 [(set_attr "type" "sselog")
8565 (set_attr "length_immediate" "1")
8566 (set_attr "prefix" "evex")
8567 (set_attr "mode" "V16SF")])
8568
8569 (define_expand "avx512f_shufpd512_mask"
8570 [(match_operand:V8DF 0 "register_operand")
8571 (match_operand:V8DF 1 "register_operand")
8572 (match_operand:V8DF 2 "nonimmediate_operand")
8573 (match_operand:SI 3 "const_0_to_255_operand")
8574 (match_operand:V8DF 4 "register_operand")
8575 (match_operand:QI 5 "register_operand")]
8576 "TARGET_AVX512F"
8577 {
8578 int mask = INTVAL (operands[3]);
8579 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8580 GEN_INT (mask & 1),
8581 GEN_INT (mask & 2 ? 9 : 8),
8582 GEN_INT (mask & 4 ? 3 : 2),
8583 GEN_INT (mask & 8 ? 11 : 10),
8584 GEN_INT (mask & 16 ? 5 : 4),
8585 GEN_INT (mask & 32 ? 13 : 12),
8586 GEN_INT (mask & 64 ? 7 : 6),
8587 GEN_INT (mask & 128 ? 15 : 14),
8588 operands[4], operands[5]));
8589 DONE;
8590 })
8591
8592 (define_insn "avx512f_shufpd512_1<mask_name>"
8593 [(set (match_operand:V8DF 0 "register_operand" "=v")
8594 (vec_select:V8DF
8595 (vec_concat:V16DF
8596 (match_operand:V8DF 1 "register_operand" "v")
8597 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8598 (parallel [(match_operand 3 "const_0_to_1_operand")
8599 (match_operand 4 "const_8_to_9_operand")
8600 (match_operand 5 "const_2_to_3_operand")
8601 (match_operand 6 "const_10_to_11_operand")
8602 (match_operand 7 "const_4_to_5_operand")
8603 (match_operand 8 "const_12_to_13_operand")
8604 (match_operand 9 "const_6_to_7_operand")
8605 (match_operand 10 "const_14_to_15_operand")])))]
8606 "TARGET_AVX512F"
8607 {
8608 int mask;
8609 mask = INTVAL (operands[3]);
8610 mask |= (INTVAL (operands[4]) - 8) << 1;
8611 mask |= (INTVAL (operands[5]) - 2) << 2;
8612 mask |= (INTVAL (operands[6]) - 10) << 3;
8613 mask |= (INTVAL (operands[7]) - 4) << 4;
8614 mask |= (INTVAL (operands[8]) - 12) << 5;
8615 mask |= (INTVAL (operands[9]) - 6) << 6;
8616 mask |= (INTVAL (operands[10]) - 14) << 7;
8617 operands[3] = GEN_INT (mask);
8618
8619 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8620 }
8621 [(set_attr "type" "sselog")
8622 (set_attr "length_immediate" "1")
8623 (set_attr "prefix" "evex")
8624 (set_attr "mode" "V8DF")])
8625
8626 (define_expand "avx_shufpd256<mask_expand4_name>"
8627 [(match_operand:V4DF 0 "register_operand")
8628 (match_operand:V4DF 1 "register_operand")
8629 (match_operand:V4DF 2 "nonimmediate_operand")
8630 (match_operand:SI 3 "const_int_operand")]
8631 "TARGET_AVX"
8632 {
8633 int mask = INTVAL (operands[3]);
8634 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8635 operands[1],
8636 operands[2],
8637 GEN_INT (mask & 1),
8638 GEN_INT (mask & 2 ? 5 : 4),
8639 GEN_INT (mask & 4 ? 3 : 2),
8640 GEN_INT (mask & 8 ? 7 : 6)
8641 <mask_expand4_args>));
8642 DONE;
8643 })
8644
8645 (define_insn "avx_shufpd256_1<mask_name>"
8646 [(set (match_operand:V4DF 0 "register_operand" "=v")
8647 (vec_select:V4DF
8648 (vec_concat:V8DF
8649 (match_operand:V4DF 1 "register_operand" "v")
8650 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8651 (parallel [(match_operand 3 "const_0_to_1_operand")
8652 (match_operand 4 "const_4_to_5_operand")
8653 (match_operand 5 "const_2_to_3_operand")
8654 (match_operand 6 "const_6_to_7_operand")])))]
8655 "TARGET_AVX && <mask_avx512vl_condition>"
8656 {
8657 int mask;
8658 mask = INTVAL (operands[3]);
8659 mask |= (INTVAL (operands[4]) - 4) << 1;
8660 mask |= (INTVAL (operands[5]) - 2) << 2;
8661 mask |= (INTVAL (operands[6]) - 6) << 3;
8662 operands[3] = GEN_INT (mask);
8663
8664 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8665 }
8666 [(set_attr "type" "sseshuf")
8667 (set_attr "length_immediate" "1")
8668 (set_attr "prefix" "vex")
8669 (set_attr "mode" "V4DF")])
8670
8671 (define_expand "sse2_shufpd<mask_expand4_name>"
8672 [(match_operand:V2DF 0 "register_operand")
8673 (match_operand:V2DF 1 "register_operand")
8674 (match_operand:V2DF 2 "vector_operand")
8675 (match_operand:SI 3 "const_int_operand")]
8676 "TARGET_SSE2"
8677 {
8678 int mask = INTVAL (operands[3]);
8679 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8680 operands[2], GEN_INT (mask & 1),
8681 GEN_INT (mask & 2 ? 3 : 2)
8682 <mask_expand4_args>));
8683 DONE;
8684 })
8685
8686 (define_insn "sse2_shufpd_v2df_mask"
8687 [(set (match_operand:V2DF 0 "register_operand" "=v")
8688 (vec_merge:V2DF
8689 (vec_select:V2DF
8690 (vec_concat:V4DF
8691 (match_operand:V2DF 1 "register_operand" "v")
8692 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8693 (parallel [(match_operand 3 "const_0_to_1_operand")
8694 (match_operand 4 "const_2_to_3_operand")]))
8695 (match_operand:V2DF 5 "vector_move_operand" "0C")
8696 (match_operand:QI 6 "register_operand" "Yk")))]
8697 "TARGET_AVX512VL"
8698 {
8699 int mask;
8700 mask = INTVAL (operands[3]);
8701 mask |= (INTVAL (operands[4]) - 2) << 1;
8702 operands[3] = GEN_INT (mask);
8703
8704 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8705 }
8706 [(set_attr "type" "sseshuf")
8707 (set_attr "length_immediate" "1")
8708 (set_attr "prefix" "evex")
8709 (set_attr "mode" "V2DF")])
8710
8711 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8712 (define_insn "avx2_interleave_highv4di<mask_name>"
8713 [(set (match_operand:V4DI 0 "register_operand" "=v")
8714 (vec_select:V4DI
8715 (vec_concat:V8DI
8716 (match_operand:V4DI 1 "register_operand" "v")
8717 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8718 (parallel [(const_int 1)
8719 (const_int 5)
8720 (const_int 3)
8721 (const_int 7)])))]
8722 "TARGET_AVX2 && <mask_avx512vl_condition>"
8723 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8724 [(set_attr "type" "sselog")
8725 (set_attr "prefix" "vex")
8726 (set_attr "mode" "OI")])
8727
8728 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8729 [(set (match_operand:V8DI 0 "register_operand" "=v")
8730 (vec_select:V8DI
8731 (vec_concat:V16DI
8732 (match_operand:V8DI 1 "register_operand" "v")
8733 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8734 (parallel [(const_int 1) (const_int 9)
8735 (const_int 3) (const_int 11)
8736 (const_int 5) (const_int 13)
8737 (const_int 7) (const_int 15)])))]
8738 "TARGET_AVX512F"
8739 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8740 [(set_attr "type" "sselog")
8741 (set_attr "prefix" "evex")
8742 (set_attr "mode" "XI")])
8743
8744 (define_insn "vec_interleave_highv2di<mask_name>"
8745 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8746 (vec_select:V2DI
8747 (vec_concat:V4DI
8748 (match_operand:V2DI 1 "register_operand" "0,v")
8749 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8750 (parallel [(const_int 1)
8751 (const_int 3)])))]
8752 "TARGET_SSE2 && <mask_avx512vl_condition>"
8753 "@
8754 punpckhqdq\t{%2, %0|%0, %2}
8755 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8756 [(set_attr "isa" "noavx,avx")
8757 (set_attr "type" "sselog")
8758 (set_attr "prefix_data16" "1,*")
8759 (set_attr "prefix" "orig,<mask_prefix>")
8760 (set_attr "mode" "TI")])
8761
8762 (define_insn "avx2_interleave_lowv4di<mask_name>"
8763 [(set (match_operand:V4DI 0 "register_operand" "=v")
8764 (vec_select:V4DI
8765 (vec_concat:V8DI
8766 (match_operand:V4DI 1 "register_operand" "v")
8767 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8768 (parallel [(const_int 0)
8769 (const_int 4)
8770 (const_int 2)
8771 (const_int 6)])))]
8772 "TARGET_AVX2 && <mask_avx512vl_condition>"
8773 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8774 [(set_attr "type" "sselog")
8775 (set_attr "prefix" "vex")
8776 (set_attr "mode" "OI")])
8777
8778 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8779 [(set (match_operand:V8DI 0 "register_operand" "=v")
8780 (vec_select:V8DI
8781 (vec_concat:V16DI
8782 (match_operand:V8DI 1 "register_operand" "v")
8783 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8784 (parallel [(const_int 0) (const_int 8)
8785 (const_int 2) (const_int 10)
8786 (const_int 4) (const_int 12)
8787 (const_int 6) (const_int 14)])))]
8788 "TARGET_AVX512F"
8789 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8790 [(set_attr "type" "sselog")
8791 (set_attr "prefix" "evex")
8792 (set_attr "mode" "XI")])
8793
8794 (define_insn "vec_interleave_lowv2di<mask_name>"
8795 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8796 (vec_select:V2DI
8797 (vec_concat:V4DI
8798 (match_operand:V2DI 1 "register_operand" "0,v")
8799 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8800 (parallel [(const_int 0)
8801 (const_int 2)])))]
8802 "TARGET_SSE2 && <mask_avx512vl_condition>"
8803 "@
8804 punpcklqdq\t{%2, %0|%0, %2}
8805 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8806 [(set_attr "isa" "noavx,avx")
8807 (set_attr "type" "sselog")
8808 (set_attr "prefix_data16" "1,*")
8809 (set_attr "prefix" "orig,vex")
8810 (set_attr "mode" "TI")])
8811
8812 (define_insn "sse2_shufpd_<mode>"
8813 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
8814 (vec_select:VI8F_128
8815 (vec_concat:<ssedoublevecmode>
8816 (match_operand:VI8F_128 1 "register_operand" "0,v")
8817 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
8818 (parallel [(match_operand 3 "const_0_to_1_operand")
8819 (match_operand 4 "const_2_to_3_operand")])))]
8820 "TARGET_SSE2"
8821 {
8822 int mask;
8823 mask = INTVAL (operands[3]);
8824 mask |= (INTVAL (operands[4]) - 2) << 1;
8825 operands[3] = GEN_INT (mask);
8826
8827 switch (which_alternative)
8828 {
8829 case 0:
8830 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8831 case 1:
8832 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8833 default:
8834 gcc_unreachable ();
8835 }
8836 }
8837 [(set_attr "isa" "noavx,avx")
8838 (set_attr "type" "sseshuf")
8839 (set_attr "length_immediate" "1")
8840 (set_attr "prefix" "orig,maybe_evex")
8841 (set_attr "mode" "V2DF")])
8842
8843 ;; Avoid combining registers from different units in a single alternative,
8844 ;; see comment above inline_secondary_memory_needed function in i386.c
8845 (define_insn "sse2_storehpd"
8846 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
8847 (vec_select:DF
8848 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
8849 (parallel [(const_int 1)])))]
8850 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8851 "@
8852 %vmovhpd\t{%1, %0|%0, %1}
8853 unpckhpd\t%0, %0
8854 vunpckhpd\t{%d1, %0|%0, %d1}
8855 #
8856 #
8857 #"
8858 [(set_attr "isa" "*,noavx,avx,*,*,*")
8859 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8860 (set (attr "prefix_data16")
8861 (if_then_else
8862 (and (eq_attr "alternative" "0")
8863 (not (match_test "TARGET_AVX")))
8864 (const_string "1")
8865 (const_string "*")))
8866 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
8867 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8868
8869 (define_split
8870 [(set (match_operand:DF 0 "register_operand")
8871 (vec_select:DF
8872 (match_operand:V2DF 1 "memory_operand")
8873 (parallel [(const_int 1)])))]
8874 "TARGET_SSE2 && reload_completed"
8875 [(set (match_dup 0) (match_dup 1))]
8876 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8877
8878 (define_insn "*vec_extractv2df_1_sse"
8879 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8880 (vec_select:DF
8881 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8882 (parallel [(const_int 1)])))]
8883 "!TARGET_SSE2 && TARGET_SSE
8884 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8885 "@
8886 movhps\t{%1, %0|%q0, %1}
8887 movhlps\t{%1, %0|%0, %1}
8888 movlps\t{%H1, %0|%0, %H1}"
8889 [(set_attr "type" "ssemov")
8890 (set_attr "mode" "V2SF,V4SF,V2SF")])
8891
8892 ;; Avoid combining registers from different units in a single alternative,
8893 ;; see comment above inline_secondary_memory_needed function in i386.c
8894 (define_insn "sse2_storelpd"
8895 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8896 (vec_select:DF
8897 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
8898 (parallel [(const_int 0)])))]
8899 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8900 "@
8901 %vmovlpd\t{%1, %0|%0, %1}
8902 #
8903 #
8904 #
8905 #"
8906 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8907 (set (attr "prefix_data16")
8908 (if_then_else (eq_attr "alternative" "0")
8909 (const_string "1")
8910 (const_string "*")))
8911 (set_attr "prefix" "maybe_vex")
8912 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8913
8914 (define_split
8915 [(set (match_operand:DF 0 "register_operand")
8916 (vec_select:DF
8917 (match_operand:V2DF 1 "nonimmediate_operand")
8918 (parallel [(const_int 0)])))]
8919 "TARGET_SSE2 && reload_completed"
8920 [(set (match_dup 0) (match_dup 1))]
8921 "operands[1] = gen_lowpart (DFmode, operands[1]);")
8922
8923 (define_insn "*vec_extractv2df_0_sse"
8924 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8925 (vec_select:DF
8926 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8927 (parallel [(const_int 0)])))]
8928 "!TARGET_SSE2 && TARGET_SSE
8929 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8930 "@
8931 movlps\t{%1, %0|%0, %1}
8932 movaps\t{%1, %0|%0, %1}
8933 movlps\t{%1, %0|%0, %q1}"
8934 [(set_attr "type" "ssemov")
8935 (set_attr "mode" "V2SF,V4SF,V2SF")])
8936
8937 (define_expand "sse2_loadhpd_exp"
8938 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8939 (vec_concat:V2DF
8940 (vec_select:DF
8941 (match_operand:V2DF 1 "nonimmediate_operand")
8942 (parallel [(const_int 0)]))
8943 (match_operand:DF 2 "nonimmediate_operand")))]
8944 "TARGET_SSE2"
8945 {
8946 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8947
8948 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8949
8950 /* Fix up the destination if needed. */
8951 if (dst != operands[0])
8952 emit_move_insn (operands[0], dst);
8953
8954 DONE;
8955 })
8956
8957 ;; Avoid combining registers from different units in a single alternative,
8958 ;; see comment above inline_secondary_memory_needed function in i386.c
8959 (define_insn "sse2_loadhpd"
8960 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8961 "=x,v,x,v,o,o ,o")
8962 (vec_concat:V2DF
8963 (vec_select:DF
8964 (match_operand:V2DF 1 "nonimmediate_operand"
8965 " 0,v,0,v,0,0 ,0")
8966 (parallel [(const_int 0)]))
8967 (match_operand:DF 2 "nonimmediate_operand"
8968 " m,m,x,v,x,*f,r")))]
8969 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8970 "@
8971 movhpd\t{%2, %0|%0, %2}
8972 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8973 unpcklpd\t{%2, %0|%0, %2}
8974 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8975 #
8976 #
8977 #"
8978 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8979 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8980 (set (attr "prefix_data16")
8981 (if_then_else (eq_attr "alternative" "0")
8982 (const_string "1")
8983 (const_string "*")))
8984 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
8985 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8986
8987 (define_split
8988 [(set (match_operand:V2DF 0 "memory_operand")
8989 (vec_concat:V2DF
8990 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8991 (match_operand:DF 1 "register_operand")))]
8992 "TARGET_SSE2 && reload_completed"
8993 [(set (match_dup 0) (match_dup 1))]
8994 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8995
8996 (define_expand "sse2_loadlpd_exp"
8997 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8998 (vec_concat:V2DF
8999 (match_operand:DF 2 "nonimmediate_operand")
9000 (vec_select:DF
9001 (match_operand:V2DF 1 "nonimmediate_operand")
9002 (parallel [(const_int 1)]))))]
9003 "TARGET_SSE2"
9004 {
9005 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9006
9007 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
9008
9009 /* Fix up the destination if needed. */
9010 if (dst != operands[0])
9011 emit_move_insn (operands[0], dst);
9012
9013 DONE;
9014 })
9015
9016 ;; Avoid combining registers from different units in a single alternative,
9017 ;; see comment above inline_secondary_memory_needed function in i386.c
9018 (define_insn "sse2_loadlpd"
9019 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9020 "=v,x,v,x,v,x,x,v,m,m ,m")
9021 (vec_concat:V2DF
9022 (match_operand:DF 2 "nonimmediate_operand"
9023 "vm,m,m,x,v,0,0,v,x,*f,r")
9024 (vec_select:DF
9025 (match_operand:V2DF 1 "vector_move_operand"
9026 " C,0,v,0,v,x,o,o,0,0 ,0")
9027 (parallel [(const_int 1)]))))]
9028 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9029 "@
9030 %vmovq\t{%2, %0|%0, %2}
9031 movlpd\t{%2, %0|%0, %2}
9032 vmovlpd\t{%2, %1, %0|%0, %1, %2}
9033 movsd\t{%2, %0|%0, %2}
9034 vmovsd\t{%2, %1, %0|%0, %1, %2}
9035 shufpd\t{$2, %1, %0|%0, %1, 2}
9036 movhpd\t{%H1, %0|%0, %H1}
9037 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
9038 #
9039 #
9040 #"
9041 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
9042 (set (attr "type")
9043 (cond [(eq_attr "alternative" "5")
9044 (const_string "sselog")
9045 (eq_attr "alternative" "9")
9046 (const_string "fmov")
9047 (eq_attr "alternative" "10")
9048 (const_string "imov")
9049 ]
9050 (const_string "ssemov")))
9051 (set (attr "prefix_data16")
9052 (if_then_else (eq_attr "alternative" "1,6")
9053 (const_string "1")
9054 (const_string "*")))
9055 (set (attr "length_immediate")
9056 (if_then_else (eq_attr "alternative" "5")
9057 (const_string "1")
9058 (const_string "*")))
9059 (set (attr "prefix")
9060 (cond [(eq_attr "alternative" "0")
9061 (const_string "maybe_vex")
9062 (eq_attr "alternative" "1,3,5,6")
9063 (const_string "orig")
9064 (eq_attr "alternative" "2,4,7")
9065 (const_string "maybe_evex")
9066 ]
9067 (const_string "*")))
9068 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
9069
9070 (define_split
9071 [(set (match_operand:V2DF 0 "memory_operand")
9072 (vec_concat:V2DF
9073 (match_operand:DF 1 "register_operand")
9074 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
9075 "TARGET_SSE2 && reload_completed"
9076 [(set (match_dup 0) (match_dup 1))]
9077 "operands[0] = adjust_address (operands[0], DFmode, 0);")
9078
9079 (define_insn "sse2_movsd"
9080 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
9081 (vec_merge:V2DF
9082 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
9083 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
9084 (const_int 1)))]
9085 "TARGET_SSE2"
9086 "@
9087 movsd\t{%2, %0|%0, %2}
9088 vmovsd\t{%2, %1, %0|%0, %1, %2}
9089 movlpd\t{%2, %0|%0, %q2}
9090 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
9091 %vmovlpd\t{%2, %0|%q0, %2}
9092 shufpd\t{$2, %1, %0|%0, %1, 2}
9093 movhps\t{%H1, %0|%0, %H1}
9094 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
9095 %vmovhps\t{%1, %H0|%H0, %1}"
9096 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
9097 (set (attr "type")
9098 (if_then_else
9099 (eq_attr "alternative" "5")
9100 (const_string "sselog")
9101 (const_string "ssemov")))
9102 (set (attr "prefix_data16")
9103 (if_then_else
9104 (and (eq_attr "alternative" "2,4")
9105 (not (match_test "TARGET_AVX")))
9106 (const_string "1")
9107 (const_string "*")))
9108 (set (attr "length_immediate")
9109 (if_then_else (eq_attr "alternative" "5")
9110 (const_string "1")
9111 (const_string "*")))
9112 (set (attr "prefix")
9113 (cond [(eq_attr "alternative" "1,3,7")
9114 (const_string "maybe_evex")
9115 (eq_attr "alternative" "4,8")
9116 (const_string "maybe_vex")
9117 ]
9118 (const_string "orig")))
9119 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
9120
9121 (define_insn "vec_dupv2df<mask_name>"
9122 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
9123 (vec_duplicate:V2DF
9124 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
9125 "TARGET_SSE2 && <mask_avx512vl_condition>"
9126 "@
9127 unpcklpd\t%0, %0
9128 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
9129 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
9130 [(set_attr "isa" "noavx,sse3,avx512vl")
9131 (set_attr "type" "sselog1")
9132 (set_attr "prefix" "orig,maybe_vex,evex")
9133 (set_attr "mode" "V2DF,DF,DF")])
9134
9135 (define_insn "vec_concatv2df"
9136 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
9137 (vec_concat:V2DF
9138 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,xm,0,0")
9139 (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m, C,x,m")))]
9140 "TARGET_SSE
9141 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
9142 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
9143 "@
9144 unpcklpd\t{%2, %0|%0, %2}
9145 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9146 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9147 %vmovddup\t{%1, %0|%0, %1}
9148 vmovddup\t{%1, %0|%0, %1}
9149 movhpd\t{%2, %0|%0, %2}
9150 vmovhpd\t{%2, %1, %0|%0, %1, %2}
9151 %vmovq\t{%1, %0|%0, %1}
9152 movlhps\t{%2, %0|%0, %2}
9153 movhps\t{%2, %0|%0, %2}"
9154 [(set (attr "isa")
9155 (cond [(eq_attr "alternative" "0,5")
9156 (const_string "sse2_noavx")
9157 (eq_attr "alternative" "1,6")
9158 (const_string "avx")
9159 (eq_attr "alternative" "2,4")
9160 (const_string "avx512vl")
9161 (eq_attr "alternative" "3")
9162 (const_string "sse3")
9163 (eq_attr "alternative" "7")
9164 (const_string "sse2")
9165 ]
9166 (const_string "noavx")))
9167 (set (attr "type")
9168 (if_then_else
9169 (eq_attr "alternative" "0,1,2,3,4")
9170 (const_string "sselog")
9171 (const_string "ssemov")))
9172 (set (attr "prefix_data16")
9173 (if_then_else (eq_attr "alternative" "5")
9174 (const_string "1")
9175 (const_string "*")))
9176 (set (attr "prefix")
9177 (cond [(eq_attr "alternative" "1,6")
9178 (const_string "vex")
9179 (eq_attr "alternative" "2,4")
9180 (const_string "evex")
9181 (eq_attr "alternative" "3,7")
9182 (const_string "maybe_vex")
9183 ]
9184 (const_string "orig")))
9185 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
9186
9187 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9188 ;;
9189 ;; Parallel integer down-conversion operations
9190 ;;
9191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9192
9193 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
9194 (define_mode_attr pmov_src_mode
9195 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
9196 (define_mode_attr pmov_src_lower
9197 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
9198 (define_mode_attr pmov_suff_1
9199 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
9200
9201 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
9202 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9203 (any_truncate:PMOV_DST_MODE_1
9204 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
9205 "TARGET_AVX512F"
9206 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
9207 [(set_attr "type" "ssemov")
9208 (set_attr "memory" "none,store")
9209 (set_attr "prefix" "evex")
9210 (set_attr "mode" "<sseinsnmode>")])
9211
9212 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
9213 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9214 (vec_merge:PMOV_DST_MODE_1
9215 (any_truncate:PMOV_DST_MODE_1
9216 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
9217 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
9218 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9219 "TARGET_AVX512F"
9220 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9221 [(set_attr "type" "ssemov")
9222 (set_attr "memory" "none,store")
9223 (set_attr "prefix" "evex")
9224 (set_attr "mode" "<sseinsnmode>")])
9225
9226 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
9227 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
9228 (vec_merge:PMOV_DST_MODE_1
9229 (any_truncate:PMOV_DST_MODE_1
9230 (match_operand:<pmov_src_mode> 1 "register_operand"))
9231 (match_dup 0)
9232 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9233 "TARGET_AVX512F")
9234
9235 (define_insn "avx512bw_<code>v32hiv32qi2"
9236 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9237 (any_truncate:V32QI
9238 (match_operand:V32HI 1 "register_operand" "v,v")))]
9239 "TARGET_AVX512BW"
9240 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
9241 [(set_attr "type" "ssemov")
9242 (set_attr "memory" "none,store")
9243 (set_attr "prefix" "evex")
9244 (set_attr "mode" "XI")])
9245
9246 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
9247 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9248 (vec_merge:V32QI
9249 (any_truncate:V32QI
9250 (match_operand:V32HI 1 "register_operand" "v,v"))
9251 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
9252 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
9253 "TARGET_AVX512BW"
9254 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9255 [(set_attr "type" "ssemov")
9256 (set_attr "memory" "none,store")
9257 (set_attr "prefix" "evex")
9258 (set_attr "mode" "XI")])
9259
9260 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
9261 [(set (match_operand:V32QI 0 "nonimmediate_operand")
9262 (vec_merge:V32QI
9263 (any_truncate:V32QI
9264 (match_operand:V32HI 1 "register_operand"))
9265 (match_dup 0)
9266 (match_operand:SI 2 "register_operand")))]
9267 "TARGET_AVX512BW")
9268
9269 (define_mode_iterator PMOV_DST_MODE_2
9270 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
9271 (define_mode_attr pmov_suff_2
9272 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
9273
9274 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
9275 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9276 (any_truncate:PMOV_DST_MODE_2
9277 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
9278 "TARGET_AVX512VL"
9279 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
9280 [(set_attr "type" "ssemov")
9281 (set_attr "memory" "none,store")
9282 (set_attr "prefix" "evex")
9283 (set_attr "mode" "<sseinsnmode>")])
9284
9285 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
9286 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9287 (vec_merge:PMOV_DST_MODE_2
9288 (any_truncate:PMOV_DST_MODE_2
9289 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
9290 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
9291 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9292 "TARGET_AVX512VL"
9293 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9294 [(set_attr "type" "ssemov")
9295 (set_attr "memory" "none,store")
9296 (set_attr "prefix" "evex")
9297 (set_attr "mode" "<sseinsnmode>")])
9298
9299 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
9300 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
9301 (vec_merge:PMOV_DST_MODE_2
9302 (any_truncate:PMOV_DST_MODE_2
9303 (match_operand:<ssedoublemode> 1 "register_operand"))
9304 (match_dup 0)
9305 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9306 "TARGET_AVX512VL")
9307
9308 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
9309 (define_mode_attr pmov_dst_3
9310 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
9311 (define_mode_attr pmov_dst_zeroed_3
9312 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
9313 (define_mode_attr pmov_suff_3
9314 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
9315
9316 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
9317 [(set (match_operand:V16QI 0 "register_operand" "=v")
9318 (vec_concat:V16QI
9319 (any_truncate:<pmov_dst_3>
9320 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
9321 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
9322 "TARGET_AVX512VL"
9323 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9324 [(set_attr "type" "ssemov")
9325 (set_attr "prefix" "evex")
9326 (set_attr "mode" "TI")])
9327
9328 (define_insn "*avx512vl_<code>v2div2qi2_store"
9329 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9330 (vec_concat:V16QI
9331 (any_truncate:V2QI
9332 (match_operand:V2DI 1 "register_operand" "v"))
9333 (vec_select:V14QI
9334 (match_dup 0)
9335 (parallel [(const_int 2) (const_int 3)
9336 (const_int 4) (const_int 5)
9337 (const_int 6) (const_int 7)
9338 (const_int 8) (const_int 9)
9339 (const_int 10) (const_int 11)
9340 (const_int 12) (const_int 13)
9341 (const_int 14) (const_int 15)]))))]
9342 "TARGET_AVX512VL"
9343 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9344 [(set_attr "type" "ssemov")
9345 (set_attr "memory" "store")
9346 (set_attr "prefix" "evex")
9347 (set_attr "mode" "TI")])
9348
9349 (define_insn "avx512vl_<code>v2div2qi2_mask"
9350 [(set (match_operand:V16QI 0 "register_operand" "=v")
9351 (vec_concat:V16QI
9352 (vec_merge:V2QI
9353 (any_truncate:V2QI
9354 (match_operand:V2DI 1 "register_operand" "v"))
9355 (vec_select:V2QI
9356 (match_operand:V16QI 2 "vector_move_operand" "0C")
9357 (parallel [(const_int 0) (const_int 1)]))
9358 (match_operand:QI 3 "register_operand" "Yk"))
9359 (const_vector:V14QI [(const_int 0) (const_int 0)
9360 (const_int 0) (const_int 0)
9361 (const_int 0) (const_int 0)
9362 (const_int 0) (const_int 0)
9363 (const_int 0) (const_int 0)
9364 (const_int 0) (const_int 0)
9365 (const_int 0) (const_int 0)])))]
9366 "TARGET_AVX512VL"
9367 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9368 [(set_attr "type" "ssemov")
9369 (set_attr "prefix" "evex")
9370 (set_attr "mode" "TI")])
9371
9372 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
9373 [(set (match_operand:V16QI 0 "register_operand" "=v")
9374 (vec_concat:V16QI
9375 (vec_merge:V2QI
9376 (any_truncate:V2QI
9377 (match_operand:V2DI 1 "register_operand" "v"))
9378 (const_vector:V2QI [(const_int 0) (const_int 0)])
9379 (match_operand:QI 2 "register_operand" "Yk"))
9380 (const_vector:V14QI [(const_int 0) (const_int 0)
9381 (const_int 0) (const_int 0)
9382 (const_int 0) (const_int 0)
9383 (const_int 0) (const_int 0)
9384 (const_int 0) (const_int 0)
9385 (const_int 0) (const_int 0)
9386 (const_int 0) (const_int 0)])))]
9387 "TARGET_AVX512VL"
9388 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9389 [(set_attr "type" "ssemov")
9390 (set_attr "prefix" "evex")
9391 (set_attr "mode" "TI")])
9392
9393 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
9394 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9395 (vec_concat:V16QI
9396 (vec_merge:V2QI
9397 (any_truncate:V2QI
9398 (match_operand:V2DI 1 "register_operand" "v"))
9399 (vec_select:V2QI
9400 (match_dup 0)
9401 (parallel [(const_int 0) (const_int 1)]))
9402 (match_operand:QI 2 "register_operand" "Yk"))
9403 (vec_select:V14QI
9404 (match_dup 0)
9405 (parallel [(const_int 2) (const_int 3)
9406 (const_int 4) (const_int 5)
9407 (const_int 6) (const_int 7)
9408 (const_int 8) (const_int 9)
9409 (const_int 10) (const_int 11)
9410 (const_int 12) (const_int 13)
9411 (const_int 14) (const_int 15)]))))]
9412 "TARGET_AVX512VL"
9413 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
9414 [(set_attr "type" "ssemov")
9415 (set_attr "memory" "store")
9416 (set_attr "prefix" "evex")
9417 (set_attr "mode" "TI")])
9418
9419 (define_insn "*avx512vl_<code><mode>v4qi2_store"
9420 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9421 (vec_concat:V16QI
9422 (any_truncate:V4QI
9423 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9424 (vec_select:V12QI
9425 (match_dup 0)
9426 (parallel [(const_int 4) (const_int 5)
9427 (const_int 6) (const_int 7)
9428 (const_int 8) (const_int 9)
9429 (const_int 10) (const_int 11)
9430 (const_int 12) (const_int 13)
9431 (const_int 14) (const_int 15)]))))]
9432 "TARGET_AVX512VL"
9433 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9434 [(set_attr "type" "ssemov")
9435 (set_attr "memory" "store")
9436 (set_attr "prefix" "evex")
9437 (set_attr "mode" "TI")])
9438
9439 (define_insn "avx512vl_<code><mode>v4qi2_mask"
9440 [(set (match_operand:V16QI 0 "register_operand" "=v")
9441 (vec_concat:V16QI
9442 (vec_merge:V4QI
9443 (any_truncate:V4QI
9444 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9445 (vec_select:V4QI
9446 (match_operand:V16QI 2 "vector_move_operand" "0C")
9447 (parallel [(const_int 0) (const_int 1)
9448 (const_int 2) (const_int 3)]))
9449 (match_operand:QI 3 "register_operand" "Yk"))
9450 (const_vector:V12QI [(const_int 0) (const_int 0)
9451 (const_int 0) (const_int 0)
9452 (const_int 0) (const_int 0)
9453 (const_int 0) (const_int 0)
9454 (const_int 0) (const_int 0)
9455 (const_int 0) (const_int 0)])))]
9456 "TARGET_AVX512VL"
9457 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9458 [(set_attr "type" "ssemov")
9459 (set_attr "prefix" "evex")
9460 (set_attr "mode" "TI")])
9461
9462 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
9463 [(set (match_operand:V16QI 0 "register_operand" "=v")
9464 (vec_concat:V16QI
9465 (vec_merge:V4QI
9466 (any_truncate:V4QI
9467 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9468 (const_vector:V4QI [(const_int 0) (const_int 0)
9469 (const_int 0) (const_int 0)])
9470 (match_operand:QI 2 "register_operand" "Yk"))
9471 (const_vector:V12QI [(const_int 0) (const_int 0)
9472 (const_int 0) (const_int 0)
9473 (const_int 0) (const_int 0)
9474 (const_int 0) (const_int 0)
9475 (const_int 0) (const_int 0)
9476 (const_int 0) (const_int 0)])))]
9477 "TARGET_AVX512VL"
9478 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9479 [(set_attr "type" "ssemov")
9480 (set_attr "prefix" "evex")
9481 (set_attr "mode" "TI")])
9482
9483 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
9484 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9485 (vec_concat:V16QI
9486 (vec_merge:V4QI
9487 (any_truncate:V4QI
9488 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9489 (vec_select:V4QI
9490 (match_dup 0)
9491 (parallel [(const_int 0) (const_int 1)
9492 (const_int 2) (const_int 3)]))
9493 (match_operand:QI 2 "register_operand" "Yk"))
9494 (vec_select:V12QI
9495 (match_dup 0)
9496 (parallel [(const_int 4) (const_int 5)
9497 (const_int 6) (const_int 7)
9498 (const_int 8) (const_int 9)
9499 (const_int 10) (const_int 11)
9500 (const_int 12) (const_int 13)
9501 (const_int 14) (const_int 15)]))))]
9502 "TARGET_AVX512VL"
9503 {
9504 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
9505 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}";
9506 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9507 }
9508 [(set_attr "type" "ssemov")
9509 (set_attr "memory" "store")
9510 (set_attr "prefix" "evex")
9511 (set_attr "mode" "TI")])
9512
9513 (define_mode_iterator VI2_128_BW_4_256
9514 [(V8HI "TARGET_AVX512BW") V8SI])
9515
9516 (define_insn "*avx512vl_<code><mode>v8qi2_store"
9517 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9518 (vec_concat:V16QI
9519 (any_truncate:V8QI
9520 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9521 (vec_select:V8QI
9522 (match_dup 0)
9523 (parallel [(const_int 8) (const_int 9)
9524 (const_int 10) (const_int 11)
9525 (const_int 12) (const_int 13)
9526 (const_int 14) (const_int 15)]))))]
9527 "TARGET_AVX512VL"
9528 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9529 [(set_attr "type" "ssemov")
9530 (set_attr "memory" "store")
9531 (set_attr "prefix" "evex")
9532 (set_attr "mode" "TI")])
9533
9534 (define_insn "avx512vl_<code><mode>v8qi2_mask"
9535 [(set (match_operand:V16QI 0 "register_operand" "=v")
9536 (vec_concat:V16QI
9537 (vec_merge:V8QI
9538 (any_truncate:V8QI
9539 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9540 (vec_select:V8QI
9541 (match_operand:V16QI 2 "vector_move_operand" "0C")
9542 (parallel [(const_int 0) (const_int 1)
9543 (const_int 2) (const_int 3)
9544 (const_int 4) (const_int 5)
9545 (const_int 6) (const_int 7)]))
9546 (match_operand:QI 3 "register_operand" "Yk"))
9547 (const_vector:V8QI [(const_int 0) (const_int 0)
9548 (const_int 0) (const_int 0)
9549 (const_int 0) (const_int 0)
9550 (const_int 0) (const_int 0)])))]
9551 "TARGET_AVX512VL"
9552 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9553 [(set_attr "type" "ssemov")
9554 (set_attr "prefix" "evex")
9555 (set_attr "mode" "TI")])
9556
9557 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
9558 [(set (match_operand:V16QI 0 "register_operand" "=v")
9559 (vec_concat:V16QI
9560 (vec_merge:V8QI
9561 (any_truncate:V8QI
9562 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9563 (const_vector:V8QI [(const_int 0) (const_int 0)
9564 (const_int 0) (const_int 0)
9565 (const_int 0) (const_int 0)
9566 (const_int 0) (const_int 0)])
9567 (match_operand:QI 2 "register_operand" "Yk"))
9568 (const_vector:V8QI [(const_int 0) (const_int 0)
9569 (const_int 0) (const_int 0)
9570 (const_int 0) (const_int 0)
9571 (const_int 0) (const_int 0)])))]
9572 "TARGET_AVX512VL"
9573 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9574 [(set_attr "type" "ssemov")
9575 (set_attr "prefix" "evex")
9576 (set_attr "mode" "TI")])
9577
9578 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
9579 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9580 (vec_concat:V16QI
9581 (vec_merge:V8QI
9582 (any_truncate:V8QI
9583 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9584 (vec_select:V8QI
9585 (match_dup 0)
9586 (parallel [(const_int 0) (const_int 1)
9587 (const_int 2) (const_int 3)
9588 (const_int 4) (const_int 5)
9589 (const_int 6) (const_int 7)]))
9590 (match_operand:QI 2 "register_operand" "Yk"))
9591 (vec_select:V8QI
9592 (match_dup 0)
9593 (parallel [(const_int 8) (const_int 9)
9594 (const_int 10) (const_int 11)
9595 (const_int 12) (const_int 13)
9596 (const_int 14) (const_int 15)]))))]
9597 "TARGET_AVX512VL"
9598 {
9599 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
9600 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9601 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
9602 }
9603 [(set_attr "type" "ssemov")
9604 (set_attr "memory" "store")
9605 (set_attr "prefix" "evex")
9606 (set_attr "mode" "TI")])
9607
9608 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
9609 (define_mode_attr pmov_dst_4
9610 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
9611 (define_mode_attr pmov_dst_zeroed_4
9612 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
9613 (define_mode_attr pmov_suff_4
9614 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
9615
9616 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
9617 [(set (match_operand:V8HI 0 "register_operand" "=v")
9618 (vec_concat:V8HI
9619 (any_truncate:<pmov_dst_4>
9620 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
9621 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
9622 "TARGET_AVX512VL"
9623 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9624 [(set_attr "type" "ssemov")
9625 (set_attr "prefix" "evex")
9626 (set_attr "mode" "TI")])
9627
9628 (define_insn "*avx512vl_<code><mode>v4hi2_store"
9629 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9630 (vec_concat:V8HI
9631 (any_truncate:V4HI
9632 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9633 (vec_select:V4HI
9634 (match_dup 0)
9635 (parallel [(const_int 4) (const_int 5)
9636 (const_int 6) (const_int 7)]))))]
9637 "TARGET_AVX512VL"
9638 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9639 [(set_attr "type" "ssemov")
9640 (set_attr "memory" "store")
9641 (set_attr "prefix" "evex")
9642 (set_attr "mode" "TI")])
9643
9644 (define_insn "avx512vl_<code><mode>v4hi2_mask"
9645 [(set (match_operand:V8HI 0 "register_operand" "=v")
9646 (vec_concat:V8HI
9647 (vec_merge:V4HI
9648 (any_truncate:V4HI
9649 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9650 (vec_select:V4HI
9651 (match_operand:V8HI 2 "vector_move_operand" "0C")
9652 (parallel [(const_int 0) (const_int 1)
9653 (const_int 2) (const_int 3)]))
9654 (match_operand:QI 3 "register_operand" "Yk"))
9655 (const_vector:V4HI [(const_int 0) (const_int 0)
9656 (const_int 0) (const_int 0)])))]
9657 "TARGET_AVX512VL"
9658 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9659 [(set_attr "type" "ssemov")
9660 (set_attr "prefix" "evex")
9661 (set_attr "mode" "TI")])
9662
9663 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
9664 [(set (match_operand:V8HI 0 "register_operand" "=v")
9665 (vec_concat:V8HI
9666 (vec_merge:V4HI
9667 (any_truncate:V4HI
9668 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9669 (const_vector:V4HI [(const_int 0) (const_int 0)
9670 (const_int 0) (const_int 0)])
9671 (match_operand:QI 2 "register_operand" "Yk"))
9672 (const_vector:V4HI [(const_int 0) (const_int 0)
9673 (const_int 0) (const_int 0)])))]
9674 "TARGET_AVX512VL"
9675 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9676 [(set_attr "type" "ssemov")
9677 (set_attr "prefix" "evex")
9678 (set_attr "mode" "TI")])
9679
9680 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
9681 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9682 (vec_concat:V8HI
9683 (vec_merge:V4HI
9684 (any_truncate:V4HI
9685 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9686 (vec_select:V4HI
9687 (match_dup 0)
9688 (parallel [(const_int 0) (const_int 1)
9689 (const_int 2) (const_int 3)]))
9690 (match_operand:QI 2 "register_operand" "Yk"))
9691 (vec_select:V4HI
9692 (match_dup 0)
9693 (parallel [(const_int 4) (const_int 5)
9694 (const_int 6) (const_int 7)]))))]
9695 "TARGET_AVX512VL"
9696 {
9697 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
9698 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
9699 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9700 }
9701 [(set_attr "type" "ssemov")
9702 (set_attr "memory" "store")
9703 (set_attr "prefix" "evex")
9704 (set_attr "mode" "TI")])
9705
9706 (define_insn "*avx512vl_<code>v2div2hi2_store"
9707 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9708 (vec_concat:V8HI
9709 (any_truncate:V2HI
9710 (match_operand:V2DI 1 "register_operand" "v"))
9711 (vec_select:V6HI
9712 (match_dup 0)
9713 (parallel [(const_int 2) (const_int 3)
9714 (const_int 4) (const_int 5)
9715 (const_int 6) (const_int 7)]))))]
9716 "TARGET_AVX512VL"
9717 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
9718 [(set_attr "type" "ssemov")
9719 (set_attr "memory" "store")
9720 (set_attr "prefix" "evex")
9721 (set_attr "mode" "TI")])
9722
9723 (define_insn "avx512vl_<code>v2div2hi2_mask"
9724 [(set (match_operand:V8HI 0 "register_operand" "=v")
9725 (vec_concat:V8HI
9726 (vec_merge:V2HI
9727 (any_truncate:V2HI
9728 (match_operand:V2DI 1 "register_operand" "v"))
9729 (vec_select:V2HI
9730 (match_operand:V8HI 2 "vector_move_operand" "0C")
9731 (parallel [(const_int 0) (const_int 1)]))
9732 (match_operand:QI 3 "register_operand" "Yk"))
9733 (const_vector:V6HI [(const_int 0) (const_int 0)
9734 (const_int 0) (const_int 0)
9735 (const_int 0) (const_int 0)])))]
9736 "TARGET_AVX512VL"
9737 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9738 [(set_attr "type" "ssemov")
9739 (set_attr "prefix" "evex")
9740 (set_attr "mode" "TI")])
9741
9742 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
9743 [(set (match_operand:V8HI 0 "register_operand" "=v")
9744 (vec_concat:V8HI
9745 (vec_merge:V2HI
9746 (any_truncate:V2HI
9747 (match_operand:V2DI 1 "register_operand" "v"))
9748 (const_vector:V2HI [(const_int 0) (const_int 0)])
9749 (match_operand:QI 2 "register_operand" "Yk"))
9750 (const_vector:V6HI [(const_int 0) (const_int 0)
9751 (const_int 0) (const_int 0)
9752 (const_int 0) (const_int 0)])))]
9753 "TARGET_AVX512VL"
9754 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9755 [(set_attr "type" "ssemov")
9756 (set_attr "prefix" "evex")
9757 (set_attr "mode" "TI")])
9758
9759 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
9760 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9761 (vec_concat:V8HI
9762 (vec_merge:V2HI
9763 (any_truncate:V2HI
9764 (match_operand:V2DI 1 "register_operand" "v"))
9765 (vec_select:V2HI
9766 (match_dup 0)
9767 (parallel [(const_int 0) (const_int 1)]))
9768 (match_operand:QI 2 "register_operand" "Yk"))
9769 (vec_select:V6HI
9770 (match_dup 0)
9771 (parallel [(const_int 2) (const_int 3)
9772 (const_int 4) (const_int 5)
9773 (const_int 6) (const_int 7)]))))]
9774 "TARGET_AVX512VL"
9775 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
9776 [(set_attr "type" "ssemov")
9777 (set_attr "memory" "store")
9778 (set_attr "prefix" "evex")
9779 (set_attr "mode" "TI")])
9780
9781 (define_insn "*avx512vl_<code>v2div2si2"
9782 [(set (match_operand:V4SI 0 "register_operand" "=v")
9783 (vec_concat:V4SI
9784 (any_truncate:V2SI
9785 (match_operand:V2DI 1 "register_operand" "v"))
9786 (match_operand:V2SI 2 "const0_operand")))]
9787 "TARGET_AVX512VL"
9788 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9789 [(set_attr "type" "ssemov")
9790 (set_attr "prefix" "evex")
9791 (set_attr "mode" "TI")])
9792
9793 (define_insn "*avx512vl_<code>v2div2si2_store"
9794 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9795 (vec_concat:V4SI
9796 (any_truncate:V2SI
9797 (match_operand:V2DI 1 "register_operand" "v"))
9798 (vec_select:V2SI
9799 (match_dup 0)
9800 (parallel [(const_int 2) (const_int 3)]))))]
9801 "TARGET_AVX512VL"
9802 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9803 [(set_attr "type" "ssemov")
9804 (set_attr "memory" "store")
9805 (set_attr "prefix" "evex")
9806 (set_attr "mode" "TI")])
9807
9808 (define_insn "avx512vl_<code>v2div2si2_mask"
9809 [(set (match_operand:V4SI 0 "register_operand" "=v")
9810 (vec_concat:V4SI
9811 (vec_merge:V2SI
9812 (any_truncate:V2SI
9813 (match_operand:V2DI 1 "register_operand" "v"))
9814 (vec_select:V2SI
9815 (match_operand:V4SI 2 "vector_move_operand" "0C")
9816 (parallel [(const_int 0) (const_int 1)]))
9817 (match_operand:QI 3 "register_operand" "Yk"))
9818 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9819 "TARGET_AVX512VL"
9820 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9821 [(set_attr "type" "ssemov")
9822 (set_attr "prefix" "evex")
9823 (set_attr "mode" "TI")])
9824
9825 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
9826 [(set (match_operand:V4SI 0 "register_operand" "=v")
9827 (vec_concat:V4SI
9828 (vec_merge:V2SI
9829 (any_truncate:V2SI
9830 (match_operand:V2DI 1 "register_operand" "v"))
9831 (const_vector:V2SI [(const_int 0) (const_int 0)])
9832 (match_operand:QI 2 "register_operand" "Yk"))
9833 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9834 "TARGET_AVX512VL"
9835 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9836 [(set_attr "type" "ssemov")
9837 (set_attr "prefix" "evex")
9838 (set_attr "mode" "TI")])
9839
9840 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9841 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9842 (vec_concat:V4SI
9843 (vec_merge:V2SI
9844 (any_truncate:V2SI
9845 (match_operand:V2DI 1 "register_operand" "v"))
9846 (vec_select:V2SI
9847 (match_dup 0)
9848 (parallel [(const_int 0) (const_int 1)]))
9849 (match_operand:QI 2 "register_operand" "Yk"))
9850 (vec_select:V2SI
9851 (match_dup 0)
9852 (parallel [(const_int 2) (const_int 3)]))))]
9853 "TARGET_AVX512VL"
9854 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
9855 [(set_attr "type" "ssemov")
9856 (set_attr "memory" "store")
9857 (set_attr "prefix" "evex")
9858 (set_attr "mode" "TI")])
9859
9860 (define_insn "*avx512f_<code>v8div16qi2"
9861 [(set (match_operand:V16QI 0 "register_operand" "=v")
9862 (vec_concat:V16QI
9863 (any_truncate:V8QI
9864 (match_operand:V8DI 1 "register_operand" "v"))
9865 (const_vector:V8QI [(const_int 0) (const_int 0)
9866 (const_int 0) (const_int 0)
9867 (const_int 0) (const_int 0)
9868 (const_int 0) (const_int 0)])))]
9869 "TARGET_AVX512F"
9870 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9871 [(set_attr "type" "ssemov")
9872 (set_attr "prefix" "evex")
9873 (set_attr "mode" "TI")])
9874
9875 (define_insn "*avx512f_<code>v8div16qi2_store"
9876 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9877 (vec_concat:V16QI
9878 (any_truncate:V8QI
9879 (match_operand:V8DI 1 "register_operand" "v"))
9880 (vec_select:V8QI
9881 (match_dup 0)
9882 (parallel [(const_int 8) (const_int 9)
9883 (const_int 10) (const_int 11)
9884 (const_int 12) (const_int 13)
9885 (const_int 14) (const_int 15)]))))]
9886 "TARGET_AVX512F"
9887 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9888 [(set_attr "type" "ssemov")
9889 (set_attr "memory" "store")
9890 (set_attr "prefix" "evex")
9891 (set_attr "mode" "TI")])
9892
9893 (define_insn "avx512f_<code>v8div16qi2_mask"
9894 [(set (match_operand:V16QI 0 "register_operand" "=v")
9895 (vec_concat:V16QI
9896 (vec_merge:V8QI
9897 (any_truncate:V8QI
9898 (match_operand:V8DI 1 "register_operand" "v"))
9899 (vec_select:V8QI
9900 (match_operand:V16QI 2 "vector_move_operand" "0C")
9901 (parallel [(const_int 0) (const_int 1)
9902 (const_int 2) (const_int 3)
9903 (const_int 4) (const_int 5)
9904 (const_int 6) (const_int 7)]))
9905 (match_operand:QI 3 "register_operand" "Yk"))
9906 (const_vector:V8QI [(const_int 0) (const_int 0)
9907 (const_int 0) (const_int 0)
9908 (const_int 0) (const_int 0)
9909 (const_int 0) (const_int 0)])))]
9910 "TARGET_AVX512F"
9911 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9912 [(set_attr "type" "ssemov")
9913 (set_attr "prefix" "evex")
9914 (set_attr "mode" "TI")])
9915
9916 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
9917 [(set (match_operand:V16QI 0 "register_operand" "=v")
9918 (vec_concat:V16QI
9919 (vec_merge:V8QI
9920 (any_truncate:V8QI
9921 (match_operand:V8DI 1 "register_operand" "v"))
9922 (const_vector:V8QI [(const_int 0) (const_int 0)
9923 (const_int 0) (const_int 0)
9924 (const_int 0) (const_int 0)
9925 (const_int 0) (const_int 0)])
9926 (match_operand:QI 2 "register_operand" "Yk"))
9927 (const_vector:V8QI [(const_int 0) (const_int 0)
9928 (const_int 0) (const_int 0)
9929 (const_int 0) (const_int 0)
9930 (const_int 0) (const_int 0)])))]
9931 "TARGET_AVX512F"
9932 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9933 [(set_attr "type" "ssemov")
9934 (set_attr "prefix" "evex")
9935 (set_attr "mode" "TI")])
9936
9937 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9938 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9939 (vec_concat:V16QI
9940 (vec_merge:V8QI
9941 (any_truncate:V8QI
9942 (match_operand:V8DI 1 "register_operand" "v"))
9943 (vec_select:V8QI
9944 (match_dup 0)
9945 (parallel [(const_int 0) (const_int 1)
9946 (const_int 2) (const_int 3)
9947 (const_int 4) (const_int 5)
9948 (const_int 6) (const_int 7)]))
9949 (match_operand:QI 2 "register_operand" "Yk"))
9950 (vec_select:V8QI
9951 (match_dup 0)
9952 (parallel [(const_int 8) (const_int 9)
9953 (const_int 10) (const_int 11)
9954 (const_int 12) (const_int 13)
9955 (const_int 14) (const_int 15)]))))]
9956 "TARGET_AVX512F"
9957 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
9958 [(set_attr "type" "ssemov")
9959 (set_attr "memory" "store")
9960 (set_attr "prefix" "evex")
9961 (set_attr "mode" "TI")])
9962
9963 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9964 ;;
9965 ;; Parallel integral arithmetic
9966 ;;
9967 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9968
9969 (define_expand "neg<mode>2"
9970 [(set (match_operand:VI_AVX2 0 "register_operand")
9971 (minus:VI_AVX2
9972 (match_dup 2)
9973 (match_operand:VI_AVX2 1 "vector_operand")))]
9974 "TARGET_SSE2"
9975 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9976
9977 (define_expand "<plusminus_insn><mode>3"
9978 [(set (match_operand:VI_AVX2 0 "register_operand")
9979 (plusminus:VI_AVX2
9980 (match_operand:VI_AVX2 1 "vector_operand")
9981 (match_operand:VI_AVX2 2 "vector_operand")))]
9982 "TARGET_SSE2"
9983 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9984
9985 (define_expand "<plusminus_insn><mode>3_mask"
9986 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9987 (vec_merge:VI48_AVX512VL
9988 (plusminus:VI48_AVX512VL
9989 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9990 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9991 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9992 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9993 "TARGET_AVX512F"
9994 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9995
9996 (define_expand "<plusminus_insn><mode>3_mask"
9997 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9998 (vec_merge:VI12_AVX512VL
9999 (plusminus:VI12_AVX512VL
10000 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
10001 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10002 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10003 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10004 "TARGET_AVX512BW"
10005 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10006
10007 (define_insn "*<plusminus_insn><mode>3"
10008 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
10009 (plusminus:VI_AVX2
10010 (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
10011 (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
10012 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10013 "@
10014 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10015 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10016 [(set_attr "isa" "noavx,avx")
10017 (set_attr "type" "sseiadd")
10018 (set_attr "prefix_data16" "1,*")
10019 (set_attr "prefix" "<mask_prefix3>")
10020 (set_attr "mode" "<sseinsnmode>")])
10021
10022 (define_insn "*<plusminus_insn><mode>3_mask"
10023 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10024 (vec_merge:VI48_AVX512VL
10025 (plusminus:VI48_AVX512VL
10026 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10027 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10028 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10029 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10030 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10031 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10032 [(set_attr "type" "sseiadd")
10033 (set_attr "prefix" "evex")
10034 (set_attr "mode" "<sseinsnmode>")])
10035
10036 (define_insn "*<plusminus_insn><mode>3_mask"
10037 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10038 (vec_merge:VI12_AVX512VL
10039 (plusminus:VI12_AVX512VL
10040 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10041 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10042 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10043 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10044 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10045 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10046 [(set_attr "type" "sseiadd")
10047 (set_attr "prefix" "evex")
10048 (set_attr "mode" "<sseinsnmode>")])
10049
10050 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10051 [(set (match_operand:VI12_AVX2 0 "register_operand")
10052 (sat_plusminus:VI12_AVX2
10053 (match_operand:VI12_AVX2 1 "vector_operand")
10054 (match_operand:VI12_AVX2 2 "vector_operand")))]
10055 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10056 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10057
10058 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10059 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
10060 (sat_plusminus:VI12_AVX2
10061 (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
10062 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
10063 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
10064 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10065 "@
10066 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10067 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10068 [(set_attr "isa" "noavx,avx")
10069 (set_attr "type" "sseiadd")
10070 (set_attr "prefix_data16" "1,*")
10071 (set_attr "prefix" "orig,maybe_evex")
10072 (set_attr "mode" "TI")])
10073
10074 (define_expand "mul<mode>3<mask_name>"
10075 [(set (match_operand:VI1_AVX512 0 "register_operand")
10076 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
10077 (match_operand:VI1_AVX512 2 "register_operand")))]
10078 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10079 {
10080 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
10081 DONE;
10082 })
10083
10084 (define_expand "mul<mode>3<mask_name>"
10085 [(set (match_operand:VI2_AVX2 0 "register_operand")
10086 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
10087 (match_operand:VI2_AVX2 2 "vector_operand")))]
10088 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10089 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10090
10091 (define_insn "*mul<mode>3<mask_name>"
10092 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10093 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
10094 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
10095 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10096 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10097 "@
10098 pmullw\t{%2, %0|%0, %2}
10099 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10100 [(set_attr "isa" "noavx,avx")
10101 (set_attr "type" "sseimul")
10102 (set_attr "prefix_data16" "1,*")
10103 (set_attr "prefix" "orig,vex")
10104 (set_attr "mode" "<sseinsnmode>")])
10105
10106 (define_expand "<s>mul<mode>3_highpart<mask_name>"
10107 [(set (match_operand:VI2_AVX2 0 "register_operand")
10108 (truncate:VI2_AVX2
10109 (lshiftrt:<ssedoublemode>
10110 (mult:<ssedoublemode>
10111 (any_extend:<ssedoublemode>
10112 (match_operand:VI2_AVX2 1 "vector_operand"))
10113 (any_extend:<ssedoublemode>
10114 (match_operand:VI2_AVX2 2 "vector_operand")))
10115 (const_int 16))))]
10116 "TARGET_SSE2
10117 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10118 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10119
10120 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
10121 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10122 (truncate:VI2_AVX2
10123 (lshiftrt:<ssedoublemode>
10124 (mult:<ssedoublemode>
10125 (any_extend:<ssedoublemode>
10126 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
10127 (any_extend:<ssedoublemode>
10128 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
10129 (const_int 16))))]
10130 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10131 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10132 "@
10133 pmulh<u>w\t{%2, %0|%0, %2}
10134 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10135 [(set_attr "isa" "noavx,avx")
10136 (set_attr "type" "sseimul")
10137 (set_attr "prefix_data16" "1,*")
10138 (set_attr "prefix" "orig,vex")
10139 (set_attr "mode" "<sseinsnmode>")])
10140
10141 (define_expand "vec_widen_umult_even_v16si<mask_name>"
10142 [(set (match_operand:V8DI 0 "register_operand")
10143 (mult:V8DI
10144 (zero_extend:V8DI
10145 (vec_select:V8SI
10146 (match_operand:V16SI 1 "nonimmediate_operand")
10147 (parallel [(const_int 0) (const_int 2)
10148 (const_int 4) (const_int 6)
10149 (const_int 8) (const_int 10)
10150 (const_int 12) (const_int 14)])))
10151 (zero_extend:V8DI
10152 (vec_select:V8SI
10153 (match_operand:V16SI 2 "nonimmediate_operand")
10154 (parallel [(const_int 0) (const_int 2)
10155 (const_int 4) (const_int 6)
10156 (const_int 8) (const_int 10)
10157 (const_int 12) (const_int 14)])))))]
10158 "TARGET_AVX512F"
10159 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10160
10161 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
10162 [(set (match_operand:V8DI 0 "register_operand" "=v")
10163 (mult:V8DI
10164 (zero_extend:V8DI
10165 (vec_select:V8SI
10166 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10167 (parallel [(const_int 0) (const_int 2)
10168 (const_int 4) (const_int 6)
10169 (const_int 8) (const_int 10)
10170 (const_int 12) (const_int 14)])))
10171 (zero_extend:V8DI
10172 (vec_select:V8SI
10173 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10174 (parallel [(const_int 0) (const_int 2)
10175 (const_int 4) (const_int 6)
10176 (const_int 8) (const_int 10)
10177 (const_int 12) (const_int 14)])))))]
10178 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10179 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10180 [(set_attr "type" "sseimul")
10181 (set_attr "prefix_extra" "1")
10182 (set_attr "prefix" "evex")
10183 (set_attr "mode" "XI")])
10184
10185 (define_expand "vec_widen_umult_even_v8si<mask_name>"
10186 [(set (match_operand:V4DI 0 "register_operand")
10187 (mult:V4DI
10188 (zero_extend:V4DI
10189 (vec_select:V4SI
10190 (match_operand:V8SI 1 "nonimmediate_operand")
10191 (parallel [(const_int 0) (const_int 2)
10192 (const_int 4) (const_int 6)])))
10193 (zero_extend:V4DI
10194 (vec_select:V4SI
10195 (match_operand:V8SI 2 "nonimmediate_operand")
10196 (parallel [(const_int 0) (const_int 2)
10197 (const_int 4) (const_int 6)])))))]
10198 "TARGET_AVX2 && <mask_avx512vl_condition>"
10199 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10200
10201 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
10202 [(set (match_operand:V4DI 0 "register_operand" "=v")
10203 (mult:V4DI
10204 (zero_extend:V4DI
10205 (vec_select:V4SI
10206 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10207 (parallel [(const_int 0) (const_int 2)
10208 (const_int 4) (const_int 6)])))
10209 (zero_extend:V4DI
10210 (vec_select:V4SI
10211 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10212 (parallel [(const_int 0) (const_int 2)
10213 (const_int 4) (const_int 6)])))))]
10214 "TARGET_AVX2 && <mask_avx512vl_condition>
10215 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10216 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10217 [(set_attr "type" "sseimul")
10218 (set_attr "prefix" "maybe_evex")
10219 (set_attr "mode" "OI")])
10220
10221 (define_expand "vec_widen_umult_even_v4si<mask_name>"
10222 [(set (match_operand:V2DI 0 "register_operand")
10223 (mult:V2DI
10224 (zero_extend:V2DI
10225 (vec_select:V2SI
10226 (match_operand:V4SI 1 "vector_operand")
10227 (parallel [(const_int 0) (const_int 2)])))
10228 (zero_extend:V2DI
10229 (vec_select:V2SI
10230 (match_operand:V4SI 2 "vector_operand")
10231 (parallel [(const_int 0) (const_int 2)])))))]
10232 "TARGET_SSE2 && <mask_avx512vl_condition>"
10233 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10234
10235 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
10236 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10237 (mult:V2DI
10238 (zero_extend:V2DI
10239 (vec_select:V2SI
10240 (match_operand:V4SI 1 "vector_operand" "%0,v")
10241 (parallel [(const_int 0) (const_int 2)])))
10242 (zero_extend:V2DI
10243 (vec_select:V2SI
10244 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
10245 (parallel [(const_int 0) (const_int 2)])))))]
10246 "TARGET_SSE2 && <mask_avx512vl_condition>
10247 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10248 "@
10249 pmuludq\t{%2, %0|%0, %2}
10250 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10251 [(set_attr "isa" "noavx,avx")
10252 (set_attr "type" "sseimul")
10253 (set_attr "prefix_data16" "1,*")
10254 (set_attr "prefix" "orig,maybe_evex")
10255 (set_attr "mode" "TI")])
10256
10257 (define_expand "vec_widen_smult_even_v16si<mask_name>"
10258 [(set (match_operand:V8DI 0 "register_operand")
10259 (mult:V8DI
10260 (sign_extend:V8DI
10261 (vec_select:V8SI
10262 (match_operand:V16SI 1 "nonimmediate_operand")
10263 (parallel [(const_int 0) (const_int 2)
10264 (const_int 4) (const_int 6)
10265 (const_int 8) (const_int 10)
10266 (const_int 12) (const_int 14)])))
10267 (sign_extend:V8DI
10268 (vec_select:V8SI
10269 (match_operand:V16SI 2 "nonimmediate_operand")
10270 (parallel [(const_int 0) (const_int 2)
10271 (const_int 4) (const_int 6)
10272 (const_int 8) (const_int 10)
10273 (const_int 12) (const_int 14)])))))]
10274 "TARGET_AVX512F"
10275 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10276
10277 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
10278 [(set (match_operand:V8DI 0 "register_operand" "=v")
10279 (mult:V8DI
10280 (sign_extend:V8DI
10281 (vec_select:V8SI
10282 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10283 (parallel [(const_int 0) (const_int 2)
10284 (const_int 4) (const_int 6)
10285 (const_int 8) (const_int 10)
10286 (const_int 12) (const_int 14)])))
10287 (sign_extend:V8DI
10288 (vec_select:V8SI
10289 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10290 (parallel [(const_int 0) (const_int 2)
10291 (const_int 4) (const_int 6)
10292 (const_int 8) (const_int 10)
10293 (const_int 12) (const_int 14)])))))]
10294 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10295 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10296 [(set_attr "type" "sseimul")
10297 (set_attr "prefix_extra" "1")
10298 (set_attr "prefix" "evex")
10299 (set_attr "mode" "XI")])
10300
10301 (define_expand "vec_widen_smult_even_v8si<mask_name>"
10302 [(set (match_operand:V4DI 0 "register_operand")
10303 (mult:V4DI
10304 (sign_extend:V4DI
10305 (vec_select:V4SI
10306 (match_operand:V8SI 1 "nonimmediate_operand")
10307 (parallel [(const_int 0) (const_int 2)
10308 (const_int 4) (const_int 6)])))
10309 (sign_extend:V4DI
10310 (vec_select:V4SI
10311 (match_operand:V8SI 2 "nonimmediate_operand")
10312 (parallel [(const_int 0) (const_int 2)
10313 (const_int 4) (const_int 6)])))))]
10314 "TARGET_AVX2 && <mask_avx512vl_condition>"
10315 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10316
10317 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
10318 [(set (match_operand:V4DI 0 "register_operand" "=v")
10319 (mult:V4DI
10320 (sign_extend:V4DI
10321 (vec_select:V4SI
10322 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10323 (parallel [(const_int 0) (const_int 2)
10324 (const_int 4) (const_int 6)])))
10325 (sign_extend:V4DI
10326 (vec_select:V4SI
10327 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10328 (parallel [(const_int 0) (const_int 2)
10329 (const_int 4) (const_int 6)])))))]
10330 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10331 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10332 [(set_attr "type" "sseimul")
10333 (set_attr "prefix_extra" "1")
10334 (set_attr "prefix" "vex")
10335 (set_attr "mode" "OI")])
10336
10337 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
10338 [(set (match_operand:V2DI 0 "register_operand")
10339 (mult:V2DI
10340 (sign_extend:V2DI
10341 (vec_select:V2SI
10342 (match_operand:V4SI 1 "vector_operand")
10343 (parallel [(const_int 0) (const_int 2)])))
10344 (sign_extend:V2DI
10345 (vec_select:V2SI
10346 (match_operand:V4SI 2 "vector_operand")
10347 (parallel [(const_int 0) (const_int 2)])))))]
10348 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
10349 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10350
10351 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
10352 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
10353 (mult:V2DI
10354 (sign_extend:V2DI
10355 (vec_select:V2SI
10356 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
10357 (parallel [(const_int 0) (const_int 2)])))
10358 (sign_extend:V2DI
10359 (vec_select:V2SI
10360 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
10361 (parallel [(const_int 0) (const_int 2)])))))]
10362 "TARGET_SSE4_1 && <mask_avx512vl_condition>
10363 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10364 "@
10365 pmuldq\t{%2, %0|%0, %2}
10366 pmuldq\t{%2, %0|%0, %2}
10367 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10368 [(set_attr "isa" "noavx,noavx,avx")
10369 (set_attr "type" "sseimul")
10370 (set_attr "prefix_data16" "1,1,*")
10371 (set_attr "prefix_extra" "1")
10372 (set_attr "prefix" "orig,orig,vex")
10373 (set_attr "mode" "TI")])
10374
10375 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
10376 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
10377 (unspec:<sseunpackmode>
10378 [(match_operand:VI2_AVX2 1 "register_operand" "v")
10379 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
10380 UNSPEC_PMADDWD512))]
10381 "TARGET_AVX512BW && <mask_mode512bit_condition>"
10382 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
10383 [(set_attr "type" "sseiadd")
10384 (set_attr "prefix" "evex")
10385 (set_attr "mode" "XI")])
10386
10387 (define_expand "avx2_pmaddwd"
10388 [(set (match_operand:V8SI 0 "register_operand")
10389 (plus:V8SI
10390 (mult:V8SI
10391 (sign_extend:V8SI
10392 (vec_select:V8HI
10393 (match_operand:V16HI 1 "nonimmediate_operand")
10394 (parallel [(const_int 0) (const_int 2)
10395 (const_int 4) (const_int 6)
10396 (const_int 8) (const_int 10)
10397 (const_int 12) (const_int 14)])))
10398 (sign_extend:V8SI
10399 (vec_select:V8HI
10400 (match_operand:V16HI 2 "nonimmediate_operand")
10401 (parallel [(const_int 0) (const_int 2)
10402 (const_int 4) (const_int 6)
10403 (const_int 8) (const_int 10)
10404 (const_int 12) (const_int 14)]))))
10405 (mult:V8SI
10406 (sign_extend:V8SI
10407 (vec_select:V8HI (match_dup 1)
10408 (parallel [(const_int 1) (const_int 3)
10409 (const_int 5) (const_int 7)
10410 (const_int 9) (const_int 11)
10411 (const_int 13) (const_int 15)])))
10412 (sign_extend:V8SI
10413 (vec_select:V8HI (match_dup 2)
10414 (parallel [(const_int 1) (const_int 3)
10415 (const_int 5) (const_int 7)
10416 (const_int 9) (const_int 11)
10417 (const_int 13) (const_int 15)]))))))]
10418 "TARGET_AVX2"
10419 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
10420
10421 (define_insn "*avx2_pmaddwd"
10422 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
10423 (plus:V8SI
10424 (mult:V8SI
10425 (sign_extend:V8SI
10426 (vec_select:V8HI
10427 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
10428 (parallel [(const_int 0) (const_int 2)
10429 (const_int 4) (const_int 6)
10430 (const_int 8) (const_int 10)
10431 (const_int 12) (const_int 14)])))
10432 (sign_extend:V8SI
10433 (vec_select:V8HI
10434 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
10435 (parallel [(const_int 0) (const_int 2)
10436 (const_int 4) (const_int 6)
10437 (const_int 8) (const_int 10)
10438 (const_int 12) (const_int 14)]))))
10439 (mult:V8SI
10440 (sign_extend:V8SI
10441 (vec_select:V8HI (match_dup 1)
10442 (parallel [(const_int 1) (const_int 3)
10443 (const_int 5) (const_int 7)
10444 (const_int 9) (const_int 11)
10445 (const_int 13) (const_int 15)])))
10446 (sign_extend:V8SI
10447 (vec_select:V8HI (match_dup 2)
10448 (parallel [(const_int 1) (const_int 3)
10449 (const_int 5) (const_int 7)
10450 (const_int 9) (const_int 11)
10451 (const_int 13) (const_int 15)]))))))]
10452 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10453 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10454 [(set_attr "type" "sseiadd")
10455 (set_attr "isa" "*,avx512bw")
10456 (set_attr "prefix" "vex,evex")
10457 (set_attr "mode" "OI")])
10458
10459 (define_expand "sse2_pmaddwd"
10460 [(set (match_operand:V4SI 0 "register_operand")
10461 (plus:V4SI
10462 (mult:V4SI
10463 (sign_extend:V4SI
10464 (vec_select:V4HI
10465 (match_operand:V8HI 1 "vector_operand")
10466 (parallel [(const_int 0) (const_int 2)
10467 (const_int 4) (const_int 6)])))
10468 (sign_extend:V4SI
10469 (vec_select:V4HI
10470 (match_operand:V8HI 2 "vector_operand")
10471 (parallel [(const_int 0) (const_int 2)
10472 (const_int 4) (const_int 6)]))))
10473 (mult:V4SI
10474 (sign_extend:V4SI
10475 (vec_select:V4HI (match_dup 1)
10476 (parallel [(const_int 1) (const_int 3)
10477 (const_int 5) (const_int 7)])))
10478 (sign_extend:V4SI
10479 (vec_select:V4HI (match_dup 2)
10480 (parallel [(const_int 1) (const_int 3)
10481 (const_int 5) (const_int 7)]))))))]
10482 "TARGET_SSE2"
10483 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
10484
10485 (define_insn "*sse2_pmaddwd"
10486 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
10487 (plus:V4SI
10488 (mult:V4SI
10489 (sign_extend:V4SI
10490 (vec_select:V4HI
10491 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
10492 (parallel [(const_int 0) (const_int 2)
10493 (const_int 4) (const_int 6)])))
10494 (sign_extend:V4SI
10495 (vec_select:V4HI
10496 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
10497 (parallel [(const_int 0) (const_int 2)
10498 (const_int 4) (const_int 6)]))))
10499 (mult:V4SI
10500 (sign_extend:V4SI
10501 (vec_select:V4HI (match_dup 1)
10502 (parallel [(const_int 1) (const_int 3)
10503 (const_int 5) (const_int 7)])))
10504 (sign_extend:V4SI
10505 (vec_select:V4HI (match_dup 2)
10506 (parallel [(const_int 1) (const_int 3)
10507 (const_int 5) (const_int 7)]))))))]
10508 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10509 "@
10510 pmaddwd\t{%2, %0|%0, %2}
10511 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
10512 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10513 [(set_attr "isa" "noavx,avx,avx512bw")
10514 (set_attr "type" "sseiadd")
10515 (set_attr "atom_unit" "simul")
10516 (set_attr "prefix_data16" "1,*,*")
10517 (set_attr "prefix" "orig,vex,evex")
10518 (set_attr "mode" "TI")])
10519
10520 (define_insn "avx512dq_mul<mode>3<mask_name>"
10521 [(set (match_operand:VI8 0 "register_operand" "=v")
10522 (mult:VI8
10523 (match_operand:VI8 1 "register_operand" "v")
10524 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
10525 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
10526 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10527 [(set_attr "type" "sseimul")
10528 (set_attr "prefix" "evex")
10529 (set_attr "mode" "<sseinsnmode>")])
10530
10531 (define_expand "mul<mode>3<mask_name>"
10532 [(set (match_operand:VI4_AVX512F 0 "register_operand")
10533 (mult:VI4_AVX512F
10534 (match_operand:VI4_AVX512F 1 "general_vector_operand")
10535 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
10536 "TARGET_SSE2 && <mask_mode512bit_condition>"
10537 {
10538 if (TARGET_SSE4_1)
10539 {
10540 if (!vector_operand (operands[1], <MODE>mode))
10541 operands[1] = force_reg (<MODE>mode, operands[1]);
10542 if (!vector_operand (operands[2], <MODE>mode))
10543 operands[2] = force_reg (<MODE>mode, operands[2]);
10544 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10545 }
10546 else
10547 {
10548 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
10549 DONE;
10550 }
10551 })
10552
10553 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
10554 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
10555 (mult:VI4_AVX512F
10556 (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
10557 (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
10558 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10559 && <mask_mode512bit_condition>"
10560 "@
10561 pmulld\t{%2, %0|%0, %2}
10562 pmulld\t{%2, %0|%0, %2}
10563 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10564 [(set_attr "isa" "noavx,noavx,avx")
10565 (set_attr "type" "sseimul")
10566 (set_attr "prefix_extra" "1")
10567 (set_attr "prefix" "<mask_prefix4>")
10568 (set_attr "btver2_decode" "vector,vector,vector")
10569 (set_attr "mode" "<sseinsnmode>")])
10570
10571 (define_expand "mul<mode>3"
10572 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10573 (mult:VI8_AVX2_AVX512F
10574 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10575 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10576 "TARGET_SSE2"
10577 {
10578 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
10579 DONE;
10580 })
10581
10582 (define_expand "vec_widen_<s>mult_hi_<mode>"
10583 [(match_operand:<sseunpackmode> 0 "register_operand")
10584 (any_extend:<sseunpackmode>
10585 (match_operand:VI124_AVX2 1 "register_operand"))
10586 (match_operand:VI124_AVX2 2 "register_operand")]
10587 "TARGET_SSE2"
10588 {
10589 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10590 <u_bool>, true);
10591 DONE;
10592 })
10593
10594 (define_expand "vec_widen_<s>mult_lo_<mode>"
10595 [(match_operand:<sseunpackmode> 0 "register_operand")
10596 (any_extend:<sseunpackmode>
10597 (match_operand:VI124_AVX2 1 "register_operand"))
10598 (match_operand:VI124_AVX2 2 "register_operand")]
10599 "TARGET_SSE2"
10600 {
10601 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10602 <u_bool>, false);
10603 DONE;
10604 })
10605
10606 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
10607 ;; named patterns, but signed V4SI needs special help for plain SSE2.
10608 (define_expand "vec_widen_smult_even_v4si"
10609 [(match_operand:V2DI 0 "register_operand")
10610 (match_operand:V4SI 1 "vector_operand")
10611 (match_operand:V4SI 2 "vector_operand")]
10612 "TARGET_SSE2"
10613 {
10614 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10615 false, false);
10616 DONE;
10617 })
10618
10619 (define_expand "vec_widen_<s>mult_odd_<mode>"
10620 [(match_operand:<sseunpackmode> 0 "register_operand")
10621 (any_extend:<sseunpackmode>
10622 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
10623 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
10624 "TARGET_SSE2"
10625 {
10626 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10627 <u_bool>, true);
10628 DONE;
10629 })
10630
10631 (define_mode_attr SDOT_PMADD_SUF
10632 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
10633
10634 (define_expand "sdot_prod<mode>"
10635 [(match_operand:<sseunpackmode> 0 "register_operand")
10636 (match_operand:VI2_AVX2 1 "register_operand")
10637 (match_operand:VI2_AVX2 2 "register_operand")
10638 (match_operand:<sseunpackmode> 3 "register_operand")]
10639 "TARGET_SSE2"
10640 {
10641 rtx t = gen_reg_rtx (<sseunpackmode>mode);
10642 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
10643 emit_insn (gen_rtx_SET (operands[0],
10644 gen_rtx_PLUS (<sseunpackmode>mode,
10645 operands[3], t)));
10646 DONE;
10647 })
10648
10649 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
10650 ;; back together when madd is available.
10651 (define_expand "sdot_prodv4si"
10652 [(match_operand:V2DI 0 "register_operand")
10653 (match_operand:V4SI 1 "register_operand")
10654 (match_operand:V4SI 2 "register_operand")
10655 (match_operand:V2DI 3 "register_operand")]
10656 "TARGET_XOP"
10657 {
10658 rtx t = gen_reg_rtx (V2DImode);
10659 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
10660 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
10661 DONE;
10662 })
10663
10664 (define_expand "usadv16qi"
10665 [(match_operand:V4SI 0 "register_operand")
10666 (match_operand:V16QI 1 "register_operand")
10667 (match_operand:V16QI 2 "vector_operand")
10668 (match_operand:V4SI 3 "vector_operand")]
10669 "TARGET_SSE2"
10670 {
10671 rtx t1 = gen_reg_rtx (V2DImode);
10672 rtx t2 = gen_reg_rtx (V4SImode);
10673 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
10674 convert_move (t2, t1, 0);
10675 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
10676 DONE;
10677 })
10678
10679 (define_expand "usadv32qi"
10680 [(match_operand:V8SI 0 "register_operand")
10681 (match_operand:V32QI 1 "register_operand")
10682 (match_operand:V32QI 2 "nonimmediate_operand")
10683 (match_operand:V8SI 3 "nonimmediate_operand")]
10684 "TARGET_AVX2"
10685 {
10686 rtx t1 = gen_reg_rtx (V4DImode);
10687 rtx t2 = gen_reg_rtx (V8SImode);
10688 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
10689 convert_move (t2, t1, 0);
10690 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
10691 DONE;
10692 })
10693
10694 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
10695 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
10696 (ashiftrt:VI248_AVX512BW_1
10697 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
10698 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10699 "TARGET_AVX512VL"
10700 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10701 [(set_attr "type" "sseishft")
10702 (set (attr "length_immediate")
10703 (if_then_else (match_operand 2 "const_int_operand")
10704 (const_string "1")
10705 (const_string "0")))
10706 (set_attr "mode" "<sseinsnmode>")])
10707
10708 (define_insn "ashr<mode>3"
10709 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
10710 (ashiftrt:VI24_AVX2
10711 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
10712 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
10713 "TARGET_SSE2"
10714 "@
10715 psra<ssemodesuffix>\t{%2, %0|%0, %2}
10716 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10717 [(set_attr "isa" "noavx,avx")
10718 (set_attr "type" "sseishft")
10719 (set (attr "length_immediate")
10720 (if_then_else (match_operand 2 "const_int_operand")
10721 (const_string "1")
10722 (const_string "0")))
10723 (set_attr "prefix_data16" "1,*")
10724 (set_attr "prefix" "orig,vex")
10725 (set_attr "mode" "<sseinsnmode>")])
10726
10727 (define_insn "ashr<mode>3<mask_name>"
10728 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
10729 (ashiftrt:VI248_AVX512BW_AVX512VL
10730 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
10731 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10732 "TARGET_AVX512F"
10733 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10734 [(set_attr "type" "sseishft")
10735 (set (attr "length_immediate")
10736 (if_then_else (match_operand 2 "const_int_operand")
10737 (const_string "1")
10738 (const_string "0")))
10739 (set_attr "mode" "<sseinsnmode>")])
10740
10741 (define_insn "<mask_codefor><shift_insn><mode>3<mask_name>"
10742 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
10743 (any_lshift:VI248_AVX512BW_2
10744 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
10745 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10746 "TARGET_AVX512VL"
10747 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10748 [(set_attr "type" "sseishft")
10749 (set (attr "length_immediate")
10750 (if_then_else (match_operand 2 "const_int_operand")
10751 (const_string "1")
10752 (const_string "0")))
10753 (set_attr "mode" "<sseinsnmode>")])
10754
10755 (define_insn "<shift_insn><mode>3"
10756 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
10757 (any_lshift:VI248_AVX2
10758 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
10759 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
10760 "TARGET_SSE2"
10761 "@
10762 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10763 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10764 [(set_attr "isa" "noavx,avx")
10765 (set_attr "type" "sseishft")
10766 (set (attr "length_immediate")
10767 (if_then_else (match_operand 2 "const_int_operand")
10768 (const_string "1")
10769 (const_string "0")))
10770 (set_attr "prefix_data16" "1,*")
10771 (set_attr "prefix" "orig,vex")
10772 (set_attr "mode" "<sseinsnmode>")])
10773
10774 (define_insn "<shift_insn><mode>3<mask_name>"
10775 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
10776 (any_lshift:VI248_AVX512BW
10777 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
10778 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
10779 "TARGET_AVX512F"
10780 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10781 [(set_attr "type" "sseishft")
10782 (set (attr "length_immediate")
10783 (if_then_else (match_operand 2 "const_int_operand")
10784 (const_string "1")
10785 (const_string "0")))
10786 (set_attr "mode" "<sseinsnmode>")])
10787
10788
10789 (define_expand "vec_shr_<mode>"
10790 [(set (match_dup 3)
10791 (lshiftrt:V1TI
10792 (match_operand:VI_128 1 "register_operand")
10793 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10794 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10795 "TARGET_SSE2"
10796 {
10797 operands[1] = gen_lowpart (V1TImode, operands[1]);
10798 operands[3] = gen_reg_rtx (V1TImode);
10799 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10800 })
10801
10802 (define_insn "avx512bw_<shift_insn><mode>3"
10803 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
10804 (any_lshift:VIMAX_AVX512VL
10805 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
10806 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
10807 "TARGET_AVX512BW"
10808 {
10809 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10810 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
10811 }
10812 [(set_attr "type" "sseishft")
10813 (set_attr "length_immediate" "1")
10814 (set_attr "prefix" "maybe_evex")
10815 (set_attr "mode" "<sseinsnmode>")])
10816
10817 (define_insn "<sse2_avx2>_<shift_insn><mode>3"
10818 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10819 (any_lshift:VIMAX_AVX2
10820 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10821 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10822 "TARGET_SSE2"
10823 {
10824 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10825
10826 switch (which_alternative)
10827 {
10828 case 0:
10829 return "p<vshift>dq\t{%2, %0|%0, %2}";
10830 case 1:
10831 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
10832 default:
10833 gcc_unreachable ();
10834 }
10835 }
10836 [(set_attr "isa" "noavx,avx")
10837 (set_attr "type" "sseishft")
10838 (set_attr "length_immediate" "1")
10839 (set_attr "atom_unit" "sishuf")
10840 (set_attr "prefix_data16" "1,*")
10841 (set_attr "prefix" "orig,vex")
10842 (set_attr "mode" "<sseinsnmode>")])
10843
10844 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10845 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10846 (any_rotate:VI48_AVX512VL
10847 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10848 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10849 "TARGET_AVX512F"
10850 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10851 [(set_attr "prefix" "evex")
10852 (set_attr "mode" "<sseinsnmode>")])
10853
10854 (define_insn "<avx512>_<rotate><mode><mask_name>"
10855 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10856 (any_rotate:VI48_AVX512VL
10857 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10858 (match_operand:SI 2 "const_0_to_255_operand")))]
10859 "TARGET_AVX512F"
10860 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10861 [(set_attr "prefix" "evex")
10862 (set_attr "mode" "<sseinsnmode>")])
10863
10864 (define_expand "<code><mode>3"
10865 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10866 (maxmin:VI124_256_AVX512F_AVX512BW
10867 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10868 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10869 "TARGET_AVX2"
10870 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10871
10872 (define_insn "*avx2_<code><mode>3"
10873 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10874 (maxmin:VI124_256
10875 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10876 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10877 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10878 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10879 [(set_attr "type" "sseiadd")
10880 (set_attr "prefix_extra" "1")
10881 (set_attr "prefix" "vex")
10882 (set_attr "mode" "OI")])
10883
10884 (define_expand "<code><mode>3_mask"
10885 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10886 (vec_merge:VI48_AVX512VL
10887 (maxmin:VI48_AVX512VL
10888 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10889 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10890 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10891 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10892 "TARGET_AVX512F"
10893 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10894
10895 (define_insn "*avx512f_<code><mode>3<mask_name>"
10896 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10897 (maxmin:VI48_AVX512VL
10898 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10899 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10900 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10901 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10902 [(set_attr "type" "sseiadd")
10903 (set_attr "prefix_extra" "1")
10904 (set_attr "prefix" "maybe_evex")
10905 (set_attr "mode" "<sseinsnmode>")])
10906
10907 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10908 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10909 (maxmin:VI12_AVX512VL
10910 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10911 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10912 "TARGET_AVX512BW"
10913 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10914 [(set_attr "type" "sseiadd")
10915 (set_attr "prefix" "evex")
10916 (set_attr "mode" "<sseinsnmode>")])
10917
10918 (define_expand "<code><mode>3"
10919 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10920 (maxmin:VI8_AVX2_AVX512F
10921 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10922 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10923 "TARGET_SSE4_2"
10924 {
10925 if (TARGET_AVX512F
10926 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10927 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10928 else
10929 {
10930 enum rtx_code code;
10931 rtx xops[6];
10932 bool ok;
10933
10934
10935 xops[0] = operands[0];
10936
10937 if (<CODE> == SMAX || <CODE> == UMAX)
10938 {
10939 xops[1] = operands[1];
10940 xops[2] = operands[2];
10941 }
10942 else
10943 {
10944 xops[1] = operands[2];
10945 xops[2] = operands[1];
10946 }
10947
10948 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10949
10950 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10951 xops[4] = operands[1];
10952 xops[5] = operands[2];
10953
10954 ok = ix86_expand_int_vcond (xops);
10955 gcc_assert (ok);
10956 DONE;
10957 }
10958 })
10959
10960 (define_expand "<code><mode>3"
10961 [(set (match_operand:VI124_128 0 "register_operand")
10962 (smaxmin:VI124_128
10963 (match_operand:VI124_128 1 "vector_operand")
10964 (match_operand:VI124_128 2 "vector_operand")))]
10965 "TARGET_SSE2"
10966 {
10967 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10968 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10969 else
10970 {
10971 rtx xops[6];
10972 bool ok;
10973
10974 xops[0] = operands[0];
10975 operands[1] = force_reg (<MODE>mode, operands[1]);
10976 operands[2] = force_reg (<MODE>mode, operands[2]);
10977
10978 if (<CODE> == SMAX)
10979 {
10980 xops[1] = operands[1];
10981 xops[2] = operands[2];
10982 }
10983 else
10984 {
10985 xops[1] = operands[2];
10986 xops[2] = operands[1];
10987 }
10988
10989 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10990 xops[4] = operands[1];
10991 xops[5] = operands[2];
10992
10993 ok = ix86_expand_int_vcond (xops);
10994 gcc_assert (ok);
10995 DONE;
10996 }
10997 })
10998
10999 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11000 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
11001 (smaxmin:VI14_128
11002 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
11003 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11004 "TARGET_SSE4_1
11005 && <mask_mode512bit_condition>
11006 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11007 "@
11008 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11009 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11010 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11011 [(set_attr "isa" "noavx,noavx,avx")
11012 (set_attr "type" "sseiadd")
11013 (set_attr "prefix_extra" "1,1,*")
11014 (set_attr "prefix" "orig,orig,vex")
11015 (set_attr "mode" "TI")])
11016
11017 (define_insn "*<code>v8hi3"
11018 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
11019 (smaxmin:V8HI
11020 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11021 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
11022 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11023 "@
11024 p<maxmin_int>w\t{%2, %0|%0, %2}
11025 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
11026 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
11027 [(set_attr "isa" "noavx,avx,avx512bw")
11028 (set_attr "type" "sseiadd")
11029 (set_attr "prefix_data16" "1,*,*")
11030 (set_attr "prefix_extra" "*,1,1")
11031 (set_attr "prefix" "orig,vex,evex")
11032 (set_attr "mode" "TI")])
11033
11034 (define_expand "<code><mode>3"
11035 [(set (match_operand:VI124_128 0 "register_operand")
11036 (umaxmin:VI124_128
11037 (match_operand:VI124_128 1 "vector_operand")
11038 (match_operand:VI124_128 2 "vector_operand")))]
11039 "TARGET_SSE2"
11040 {
11041 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
11042 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11043 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
11044 {
11045 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
11046 operands[1] = force_reg (<MODE>mode, operands[1]);
11047 if (rtx_equal_p (op3, op2))
11048 op3 = gen_reg_rtx (V8HImode);
11049 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
11050 emit_insn (gen_addv8hi3 (op0, op3, op2));
11051 DONE;
11052 }
11053 else
11054 {
11055 rtx xops[6];
11056 bool ok;
11057
11058 operands[1] = force_reg (<MODE>mode, operands[1]);
11059 operands[2] = force_reg (<MODE>mode, operands[2]);
11060
11061 xops[0] = operands[0];
11062
11063 if (<CODE> == UMAX)
11064 {
11065 xops[1] = operands[1];
11066 xops[2] = operands[2];
11067 }
11068 else
11069 {
11070 xops[1] = operands[2];
11071 xops[2] = operands[1];
11072 }
11073
11074 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
11075 xops[4] = operands[1];
11076 xops[5] = operands[2];
11077
11078 ok = ix86_expand_int_vcond (xops);
11079 gcc_assert (ok);
11080 DONE;
11081 }
11082 })
11083
11084 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11085 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
11086 (umaxmin:VI24_128
11087 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
11088 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11089 "TARGET_SSE4_1
11090 && <mask_mode512bit_condition>
11091 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11092 "@
11093 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11094 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11095 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11096 [(set_attr "isa" "noavx,noavx,avx")
11097 (set_attr "type" "sseiadd")
11098 (set_attr "prefix_extra" "1,1,*")
11099 (set_attr "prefix" "orig,orig,vex")
11100 (set_attr "mode" "TI")])
11101
11102 (define_insn "*<code>v16qi3"
11103 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
11104 (umaxmin:V16QI
11105 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
11106 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
11107 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11108 "@
11109 p<maxmin_int>b\t{%2, %0|%0, %2}
11110 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
11111 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
11112 [(set_attr "isa" "noavx,avx,avx512bw")
11113 (set_attr "type" "sseiadd")
11114 (set_attr "prefix_data16" "1,*,*")
11115 (set_attr "prefix_extra" "*,1,1")
11116 (set_attr "prefix" "orig,vex,evex")
11117 (set_attr "mode" "TI")])
11118
11119 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11120 ;;
11121 ;; Parallel integral comparisons
11122 ;;
11123 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11124
11125 (define_expand "avx2_eq<mode>3"
11126 [(set (match_operand:VI_256 0 "register_operand")
11127 (eq:VI_256
11128 (match_operand:VI_256 1 "nonimmediate_operand")
11129 (match_operand:VI_256 2 "nonimmediate_operand")))]
11130 "TARGET_AVX2"
11131 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11132
11133 (define_insn "*avx2_eq<mode>3"
11134 [(set (match_operand:VI_256 0 "register_operand" "=x")
11135 (eq:VI_256
11136 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
11137 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11138 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11139 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11140 [(set_attr "type" "ssecmp")
11141 (set_attr "prefix_extra" "1")
11142 (set_attr "prefix" "vex")
11143 (set_attr "mode" "OI")])
11144
11145 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11146 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11147 (unspec:<avx512fmaskmode>
11148 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11149 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
11150 UNSPEC_MASKED_EQ))]
11151 "TARGET_AVX512BW"
11152 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11153
11154 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11155 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11156 (unspec:<avx512fmaskmode>
11157 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11158 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
11159 UNSPEC_MASKED_EQ))]
11160 "TARGET_AVX512F"
11161 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11162
11163 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11164 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11165 (unspec:<avx512fmaskmode>
11166 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "%v")
11167 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11168 UNSPEC_MASKED_EQ))]
11169 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11170 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11171 [(set_attr "type" "ssecmp")
11172 (set_attr "prefix_extra" "1")
11173 (set_attr "prefix" "evex")
11174 (set_attr "mode" "<sseinsnmode>")])
11175
11176 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11177 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11178 (unspec:<avx512fmaskmode>
11179 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
11180 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11181 UNSPEC_MASKED_EQ))]
11182 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11183 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11184 [(set_attr "type" "ssecmp")
11185 (set_attr "prefix_extra" "1")
11186 (set_attr "prefix" "evex")
11187 (set_attr "mode" "<sseinsnmode>")])
11188
11189 (define_insn "*sse4_1_eqv2di3"
11190 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11191 (eq:V2DI
11192 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
11193 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11194 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11195 "@
11196 pcmpeqq\t{%2, %0|%0, %2}
11197 pcmpeqq\t{%2, %0|%0, %2}
11198 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
11199 [(set_attr "isa" "noavx,noavx,avx")
11200 (set_attr "type" "ssecmp")
11201 (set_attr "prefix_extra" "1")
11202 (set_attr "prefix" "orig,orig,vex")
11203 (set_attr "mode" "TI")])
11204
11205 (define_insn "*sse2_eq<mode>3"
11206 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11207 (eq:VI124_128
11208 (match_operand:VI124_128 1 "vector_operand" "%0,x")
11209 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11210 "TARGET_SSE2 && !TARGET_XOP
11211 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11212 "@
11213 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
11214 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11215 [(set_attr "isa" "noavx,avx")
11216 (set_attr "type" "ssecmp")
11217 (set_attr "prefix_data16" "1,*")
11218 (set_attr "prefix" "orig,vex")
11219 (set_attr "mode" "TI")])
11220
11221 (define_expand "sse2_eq<mode>3"
11222 [(set (match_operand:VI124_128 0 "register_operand")
11223 (eq:VI124_128
11224 (match_operand:VI124_128 1 "vector_operand")
11225 (match_operand:VI124_128 2 "vector_operand")))]
11226 "TARGET_SSE2 && !TARGET_XOP "
11227 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11228
11229 (define_expand "sse4_1_eqv2di3"
11230 [(set (match_operand:V2DI 0 "register_operand")
11231 (eq:V2DI
11232 (match_operand:V2DI 1 "vector_operand")
11233 (match_operand:V2DI 2 "vector_operand")))]
11234 "TARGET_SSE4_1"
11235 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
11236
11237 (define_insn "sse4_2_gtv2di3"
11238 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11239 (gt:V2DI
11240 (match_operand:V2DI 1 "register_operand" "0,0,x")
11241 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11242 "TARGET_SSE4_2"
11243 "@
11244 pcmpgtq\t{%2, %0|%0, %2}
11245 pcmpgtq\t{%2, %0|%0, %2}
11246 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
11247 [(set_attr "isa" "noavx,noavx,avx")
11248 (set_attr "type" "ssecmp")
11249 (set_attr "prefix_extra" "1")
11250 (set_attr "prefix" "orig,orig,vex")
11251 (set_attr "mode" "TI")])
11252
11253 (define_insn "avx2_gt<mode>3"
11254 [(set (match_operand:VI_256 0 "register_operand" "=x")
11255 (gt:VI_256
11256 (match_operand:VI_256 1 "register_operand" "x")
11257 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11258 "TARGET_AVX2"
11259 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11260 [(set_attr "type" "ssecmp")
11261 (set_attr "prefix_extra" "1")
11262 (set_attr "prefix" "vex")
11263 (set_attr "mode" "OI")])
11264
11265 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11266 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11267 (unspec:<avx512fmaskmode>
11268 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11269 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11270 "TARGET_AVX512F"
11271 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11272 [(set_attr "type" "ssecmp")
11273 (set_attr "prefix_extra" "1")
11274 (set_attr "prefix" "evex")
11275 (set_attr "mode" "<sseinsnmode>")])
11276
11277 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11278 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11279 (unspec:<avx512fmaskmode>
11280 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11281 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11282 "TARGET_AVX512BW"
11283 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11284 [(set_attr "type" "ssecmp")
11285 (set_attr "prefix_extra" "1")
11286 (set_attr "prefix" "evex")
11287 (set_attr "mode" "<sseinsnmode>")])
11288
11289 (define_insn "sse2_gt<mode>3"
11290 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11291 (gt:VI124_128
11292 (match_operand:VI124_128 1 "register_operand" "0,x")
11293 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11294 "TARGET_SSE2 && !TARGET_XOP"
11295 "@
11296 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
11297 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11298 [(set_attr "isa" "noavx,avx")
11299 (set_attr "type" "ssecmp")
11300 (set_attr "prefix_data16" "1,*")
11301 (set_attr "prefix" "orig,vex")
11302 (set_attr "mode" "TI")])
11303
11304 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
11305 [(set (match_operand:V_512 0 "register_operand")
11306 (if_then_else:V_512
11307 (match_operator 3 ""
11308 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11309 (match_operand:VI_AVX512BW 5 "general_operand")])
11310 (match_operand:V_512 1)
11311 (match_operand:V_512 2)))]
11312 "TARGET_AVX512F
11313 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11314 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11315 {
11316 bool ok = ix86_expand_int_vcond (operands);
11317 gcc_assert (ok);
11318 DONE;
11319 })
11320
11321 (define_expand "vcond<V_256:mode><VI_256:mode>"
11322 [(set (match_operand:V_256 0 "register_operand")
11323 (if_then_else:V_256
11324 (match_operator 3 ""
11325 [(match_operand:VI_256 4 "nonimmediate_operand")
11326 (match_operand:VI_256 5 "general_operand")])
11327 (match_operand:V_256 1)
11328 (match_operand:V_256 2)))]
11329 "TARGET_AVX2
11330 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11331 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11332 {
11333 bool ok = ix86_expand_int_vcond (operands);
11334 gcc_assert (ok);
11335 DONE;
11336 })
11337
11338 (define_expand "vcond<V_128:mode><VI124_128:mode>"
11339 [(set (match_operand:V_128 0 "register_operand")
11340 (if_then_else:V_128
11341 (match_operator 3 ""
11342 [(match_operand:VI124_128 4 "vector_operand")
11343 (match_operand:VI124_128 5 "general_operand")])
11344 (match_operand:V_128 1)
11345 (match_operand:V_128 2)))]
11346 "TARGET_SSE2
11347 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11348 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11349 {
11350 bool ok = ix86_expand_int_vcond (operands);
11351 gcc_assert (ok);
11352 DONE;
11353 })
11354
11355 (define_expand "vcond<VI8F_128:mode>v2di"
11356 [(set (match_operand:VI8F_128 0 "register_operand")
11357 (if_then_else:VI8F_128
11358 (match_operator 3 ""
11359 [(match_operand:V2DI 4 "vector_operand")
11360 (match_operand:V2DI 5 "general_operand")])
11361 (match_operand:VI8F_128 1)
11362 (match_operand:VI8F_128 2)))]
11363 "TARGET_SSE4_2"
11364 {
11365 bool ok = ix86_expand_int_vcond (operands);
11366 gcc_assert (ok);
11367 DONE;
11368 })
11369
11370 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
11371 [(set (match_operand:V_512 0 "register_operand")
11372 (if_then_else:V_512
11373 (match_operator 3 ""
11374 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11375 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
11376 (match_operand:V_512 1 "general_operand")
11377 (match_operand:V_512 2 "general_operand")))]
11378 "TARGET_AVX512F
11379 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11380 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11381 {
11382 bool ok = ix86_expand_int_vcond (operands);
11383 gcc_assert (ok);
11384 DONE;
11385 })
11386
11387 (define_expand "vcondu<V_256:mode><VI_256:mode>"
11388 [(set (match_operand:V_256 0 "register_operand")
11389 (if_then_else:V_256
11390 (match_operator 3 ""
11391 [(match_operand:VI_256 4 "nonimmediate_operand")
11392 (match_operand:VI_256 5 "nonimmediate_operand")])
11393 (match_operand:V_256 1 "general_operand")
11394 (match_operand:V_256 2 "general_operand")))]
11395 "TARGET_AVX2
11396 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11397 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11398 {
11399 bool ok = ix86_expand_int_vcond (operands);
11400 gcc_assert (ok);
11401 DONE;
11402 })
11403
11404 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
11405 [(set (match_operand:V_128 0 "register_operand")
11406 (if_then_else:V_128
11407 (match_operator 3 ""
11408 [(match_operand:VI124_128 4 "vector_operand")
11409 (match_operand:VI124_128 5 "vector_operand")])
11410 (match_operand:V_128 1 "general_operand")
11411 (match_operand:V_128 2 "general_operand")))]
11412 "TARGET_SSE2
11413 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11414 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11415 {
11416 bool ok = ix86_expand_int_vcond (operands);
11417 gcc_assert (ok);
11418 DONE;
11419 })
11420
11421 (define_expand "vcondu<VI8F_128:mode>v2di"
11422 [(set (match_operand:VI8F_128 0 "register_operand")
11423 (if_then_else:VI8F_128
11424 (match_operator 3 ""
11425 [(match_operand:V2DI 4 "vector_operand")
11426 (match_operand:V2DI 5 "vector_operand")])
11427 (match_operand:VI8F_128 1 "general_operand")
11428 (match_operand:VI8F_128 2 "general_operand")))]
11429 "TARGET_SSE4_2"
11430 {
11431 bool ok = ix86_expand_int_vcond (operands);
11432 gcc_assert (ok);
11433 DONE;
11434 })
11435
11436 (define_expand "vcondeq<VI8F_128:mode>v2di"
11437 [(set (match_operand:VI8F_128 0 "register_operand")
11438 (if_then_else:VI8F_128
11439 (match_operator 3 ""
11440 [(match_operand:V2DI 4 "vector_operand")
11441 (match_operand:V2DI 5 "general_operand")])
11442 (match_operand:VI8F_128 1)
11443 (match_operand:VI8F_128 2)))]
11444 "TARGET_SSE4_1"
11445 {
11446 bool ok = ix86_expand_int_vcond (operands);
11447 gcc_assert (ok);
11448 DONE;
11449 })
11450
11451 (define_mode_iterator VEC_PERM_AVX2
11452 [V16QI V8HI V4SI V2DI V4SF V2DF
11453 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11454 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
11455 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
11456 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11457 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11458 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
11459
11460 (define_expand "vec_perm<mode>"
11461 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
11462 (match_operand:VEC_PERM_AVX2 1 "register_operand")
11463 (match_operand:VEC_PERM_AVX2 2 "register_operand")
11464 (match_operand:<sseintvecmode> 3 "register_operand")]
11465 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
11466 {
11467 ix86_expand_vec_perm (operands);
11468 DONE;
11469 })
11470
11471 (define_mode_iterator VEC_PERM_CONST
11472 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
11473 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
11474 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
11475 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
11476 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
11477 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11478 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11479 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11480 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
11481
11482 (define_expand "vec_perm_const<mode>"
11483 [(match_operand:VEC_PERM_CONST 0 "register_operand")
11484 (match_operand:VEC_PERM_CONST 1 "register_operand")
11485 (match_operand:VEC_PERM_CONST 2 "register_operand")
11486 (match_operand:<sseintvecmode> 3)]
11487 ""
11488 {
11489 if (ix86_expand_vec_perm_const (operands))
11490 DONE;
11491 else
11492 FAIL;
11493 })
11494
11495 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11496 ;;
11497 ;; Parallel bitwise logical operations
11498 ;;
11499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11500
11501 (define_expand "one_cmpl<mode>2"
11502 [(set (match_operand:VI 0 "register_operand")
11503 (xor:VI (match_operand:VI 1 "vector_operand")
11504 (match_dup 2)))]
11505 "TARGET_SSE"
11506 {
11507 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
11508 })
11509
11510 (define_expand "<sse2_avx2>_andnot<mode>3"
11511 [(set (match_operand:VI_AVX2 0 "register_operand")
11512 (and:VI_AVX2
11513 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
11514 (match_operand:VI_AVX2 2 "vector_operand")))]
11515 "TARGET_SSE2")
11516
11517 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11518 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11519 (vec_merge:VI48_AVX512VL
11520 (and:VI48_AVX512VL
11521 (not:VI48_AVX512VL
11522 (match_operand:VI48_AVX512VL 1 "register_operand"))
11523 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11524 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
11525 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11526 "TARGET_AVX512F")
11527
11528 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11529 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11530 (vec_merge:VI12_AVX512VL
11531 (and:VI12_AVX512VL
11532 (not:VI12_AVX512VL
11533 (match_operand:VI12_AVX512VL 1 "register_operand"))
11534 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11535 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
11536 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11537 "TARGET_AVX512BW")
11538
11539 (define_insn "*andnot<mode>3"
11540 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
11541 (and:VI
11542 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
11543 (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
11544 "TARGET_SSE"
11545 {
11546 static char buf[64];
11547 const char *ops;
11548 const char *tmp;
11549 const char *ssesuffix;
11550
11551 switch (get_attr_mode (insn))
11552 {
11553 case MODE_XI:
11554 gcc_assert (TARGET_AVX512F);
11555 /* FALLTHRU */
11556 case MODE_OI:
11557 gcc_assert (TARGET_AVX2);
11558 /* FALLTHRU */
11559 case MODE_TI:
11560 gcc_assert (TARGET_SSE2);
11561 tmp = "pandn";
11562 switch (<MODE>mode)
11563 {
11564 case E_V64QImode:
11565 case E_V32HImode:
11566 /* There is no vpandnb or vpandnw instruction, nor vpandn for
11567 512-bit vectors. Use vpandnq instead. */
11568 ssesuffix = "q";
11569 break;
11570 case E_V16SImode:
11571 case E_V8DImode:
11572 ssesuffix = "<ssemodesuffix>";
11573 break;
11574 case E_V8SImode:
11575 case E_V4DImode:
11576 case E_V4SImode:
11577 case E_V2DImode:
11578 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
11579 ? "<ssemodesuffix>" : "");
11580 break;
11581 default:
11582 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
11583 }
11584 break;
11585
11586 case MODE_V16SF:
11587 gcc_assert (TARGET_AVX512F);
11588 /* FALLTHRU */
11589 case MODE_V8SF:
11590 gcc_assert (TARGET_AVX);
11591 /* FALLTHRU */
11592 case MODE_V4SF:
11593 gcc_assert (TARGET_SSE);
11594 tmp = "andn";
11595 ssesuffix = "ps";
11596 break;
11597
11598 default:
11599 gcc_unreachable ();
11600 }
11601
11602 switch (which_alternative)
11603 {
11604 case 0:
11605 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11606 break;
11607 case 1:
11608 case 2:
11609 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11610 break;
11611 default:
11612 gcc_unreachable ();
11613 }
11614
11615 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11616 return buf;
11617 }
11618 [(set_attr "isa" "noavx,avx,avx")
11619 (set_attr "type" "sselog")
11620 (set (attr "prefix_data16")
11621 (if_then_else
11622 (and (eq_attr "alternative" "0")
11623 (eq_attr "mode" "TI"))
11624 (const_string "1")
11625 (const_string "*")))
11626 (set_attr "prefix" "orig,vex,evex")
11627 (set (attr "mode")
11628 (cond [(and (match_test "<MODE_SIZE> == 16")
11629 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11630 (const_string "<ssePSmode>")
11631 (match_test "TARGET_AVX2")
11632 (const_string "<sseinsnmode>")
11633 (match_test "TARGET_AVX")
11634 (if_then_else
11635 (match_test "<MODE_SIZE> > 16")
11636 (const_string "V8SF")
11637 (const_string "<sseinsnmode>"))
11638 (ior (not (match_test "TARGET_SSE2"))
11639 (match_test "optimize_function_for_size_p (cfun)"))
11640 (const_string "V4SF")
11641 ]
11642 (const_string "<sseinsnmode>")))])
11643
11644 (define_insn "*andnot<mode>3_mask"
11645 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11646 (vec_merge:VI48_AVX512VL
11647 (and:VI48_AVX512VL
11648 (not:VI48_AVX512VL
11649 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
11650 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11651 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
11652 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11653 "TARGET_AVX512F"
11654 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11655 [(set_attr "type" "sselog")
11656 (set_attr "prefix" "evex")
11657 (set_attr "mode" "<sseinsnmode>")])
11658
11659 (define_expand "<code><mode>3"
11660 [(set (match_operand:VI 0 "register_operand")
11661 (any_logic:VI
11662 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
11663 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
11664 "TARGET_SSE"
11665 {
11666 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
11667 DONE;
11668 })
11669
11670 (define_insn "<mask_codefor><code><mode>3<mask_name>"
11671 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
11672 (any_logic:VI48_AVX_AVX512F
11673 (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
11674 (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
11675 "TARGET_SSE && <mask_mode512bit_condition>
11676 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11677 {
11678 static char buf[64];
11679 const char *ops;
11680 const char *tmp;
11681 const char *ssesuffix;
11682
11683 switch (get_attr_mode (insn))
11684 {
11685 case MODE_XI:
11686 gcc_assert (TARGET_AVX512F);
11687 /* FALLTHRU */
11688 case MODE_OI:
11689 gcc_assert (TARGET_AVX2);
11690 /* FALLTHRU */
11691 case MODE_TI:
11692 gcc_assert (TARGET_SSE2);
11693 tmp = "p<logic>";
11694 switch (<MODE>mode)
11695 {
11696 case E_V16SImode:
11697 case E_V8DImode:
11698 ssesuffix = "<ssemodesuffix>";
11699 break;
11700 case E_V8SImode:
11701 case E_V4DImode:
11702 case E_V4SImode:
11703 case E_V2DImode:
11704 ssesuffix = (TARGET_AVX512VL
11705 && (<mask_applied> || which_alternative == 2)
11706 ? "<ssemodesuffix>" : "");
11707 break;
11708 default:
11709 gcc_unreachable ();
11710 }
11711 break;
11712
11713 case MODE_V8SF:
11714 gcc_assert (TARGET_AVX);
11715 /* FALLTHRU */
11716 case MODE_V4SF:
11717 gcc_assert (TARGET_SSE);
11718 tmp = "<logic>";
11719 ssesuffix = "ps";
11720 break;
11721
11722 default:
11723 gcc_unreachable ();
11724 }
11725
11726 switch (which_alternative)
11727 {
11728 case 0:
11729 if (<mask_applied>)
11730 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
11731 else
11732 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11733 break;
11734 case 1:
11735 case 2:
11736 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11737 break;
11738 default:
11739 gcc_unreachable ();
11740 }
11741
11742 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11743 return buf;
11744 }
11745 [(set_attr "isa" "noavx,avx,avx")
11746 (set_attr "type" "sselog")
11747 (set (attr "prefix_data16")
11748 (if_then_else
11749 (and (eq_attr "alternative" "0")
11750 (eq_attr "mode" "TI"))
11751 (const_string "1")
11752 (const_string "*")))
11753 (set_attr "prefix" "<mask_prefix3>,evex")
11754 (set (attr "mode")
11755 (cond [(and (match_test "<MODE_SIZE> == 16")
11756 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11757 (const_string "<ssePSmode>")
11758 (match_test "TARGET_AVX2")
11759 (const_string "<sseinsnmode>")
11760 (match_test "TARGET_AVX")
11761 (if_then_else
11762 (match_test "<MODE_SIZE> > 16")
11763 (const_string "V8SF")
11764 (const_string "<sseinsnmode>"))
11765 (ior (not (match_test "TARGET_SSE2"))
11766 (match_test "optimize_function_for_size_p (cfun)"))
11767 (const_string "V4SF")
11768 ]
11769 (const_string "<sseinsnmode>")))])
11770
11771 (define_insn "*<code><mode>3"
11772 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
11773 (any_logic:VI12_AVX_AVX512F
11774 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
11775 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
11776 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11777 {
11778 static char buf[64];
11779 const char *ops;
11780 const char *tmp;
11781 const char *ssesuffix;
11782
11783 switch (get_attr_mode (insn))
11784 {
11785 case MODE_XI:
11786 gcc_assert (TARGET_AVX512F);
11787 /* FALLTHRU */
11788 case MODE_OI:
11789 gcc_assert (TARGET_AVX2);
11790 /* FALLTHRU */
11791 case MODE_TI:
11792 gcc_assert (TARGET_SSE2);
11793 tmp = "p<logic>";
11794 switch (<MODE>mode)
11795 {
11796 case E_V64QImode:
11797 case E_V32HImode:
11798 ssesuffix = "q";
11799 break;
11800 case E_V32QImode:
11801 case E_V16HImode:
11802 case E_V16QImode:
11803 case E_V8HImode:
11804 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
11805 break;
11806 default:
11807 gcc_unreachable ();
11808 }
11809 break;
11810
11811 case MODE_V8SF:
11812 gcc_assert (TARGET_AVX);
11813 /* FALLTHRU */
11814 case MODE_V4SF:
11815 gcc_assert (TARGET_SSE);
11816 tmp = "<logic>";
11817 ssesuffix = "ps";
11818 break;
11819
11820 default:
11821 gcc_unreachable ();
11822 }
11823
11824 switch (which_alternative)
11825 {
11826 case 0:
11827 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11828 break;
11829 case 1:
11830 case 2:
11831 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11832 break;
11833 default:
11834 gcc_unreachable ();
11835 }
11836
11837 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11838 return buf;
11839 }
11840 [(set_attr "isa" "noavx,avx,avx")
11841 (set_attr "type" "sselog")
11842 (set (attr "prefix_data16")
11843 (if_then_else
11844 (and (eq_attr "alternative" "0")
11845 (eq_attr "mode" "TI"))
11846 (const_string "1")
11847 (const_string "*")))
11848 (set_attr "prefix" "<mask_prefix3>,evex")
11849 (set (attr "mode")
11850 (cond [(and (match_test "<MODE_SIZE> == 16")
11851 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11852 (const_string "<ssePSmode>")
11853 (match_test "TARGET_AVX2")
11854 (const_string "<sseinsnmode>")
11855 (match_test "TARGET_AVX")
11856 (if_then_else
11857 (match_test "<MODE_SIZE> > 16")
11858 (const_string "V8SF")
11859 (const_string "<sseinsnmode>"))
11860 (ior (not (match_test "TARGET_SSE2"))
11861 (match_test "optimize_function_for_size_p (cfun)"))
11862 (const_string "V4SF")
11863 ]
11864 (const_string "<sseinsnmode>")))])
11865
11866 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11867 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11868 (unspec:<avx512fmaskmode>
11869 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11870 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11871 UNSPEC_TESTM))]
11872 "TARGET_AVX512BW"
11873 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11874 [(set_attr "prefix" "evex")
11875 (set_attr "mode" "<sseinsnmode>")])
11876
11877 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11878 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11879 (unspec:<avx512fmaskmode>
11880 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11881 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11882 UNSPEC_TESTM))]
11883 "TARGET_AVX512F"
11884 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11885 [(set_attr "prefix" "evex")
11886 (set_attr "mode" "<sseinsnmode>")])
11887
11888 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11889 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11890 (unspec:<avx512fmaskmode>
11891 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11892 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11893 UNSPEC_TESTNM))]
11894 "TARGET_AVX512BW"
11895 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11896 [(set_attr "prefix" "evex")
11897 (set_attr "mode" "<sseinsnmode>")])
11898
11899 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11900 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11901 (unspec:<avx512fmaskmode>
11902 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11903 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11904 UNSPEC_TESTNM))]
11905 "TARGET_AVX512F"
11906 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11907 [(set_attr "prefix" "evex")
11908 (set_attr "mode" "<sseinsnmode>")])
11909
11910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11911 ;;
11912 ;; Parallel integral element swizzling
11913 ;;
11914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11915
11916 (define_expand "vec_pack_trunc_<mode>"
11917 [(match_operand:<ssepackmode> 0 "register_operand")
11918 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
11919 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
11920 "TARGET_SSE2"
11921 {
11922 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11923 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11924 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11925 DONE;
11926 })
11927
11928 (define_expand "vec_pack_trunc_qi"
11929 [(set (match_operand:HI 0 ("register_operand"))
11930 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand")))
11931 (const_int 8))
11932 (zero_extend:HI (match_operand:QI 1 ("register_operand")))))]
11933 "TARGET_AVX512F")
11934
11935 (define_expand "vec_pack_trunc_<mode>"
11936 [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
11937 (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))
11938 (match_dup 3))
11939 (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))))]
11940 "TARGET_AVX512BW"
11941 {
11942 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
11943 })
11944
11945 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11946 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
11947 (vec_concat:VI1_AVX512
11948 (ss_truncate:<ssehalfvecmode>
11949 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
11950 (ss_truncate:<ssehalfvecmode>
11951 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
11952 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11953 "@
11954 packsswb\t{%2, %0|%0, %2}
11955 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
11956 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11957 [(set_attr "isa" "noavx,avx,avx512bw")
11958 (set_attr "type" "sselog")
11959 (set_attr "prefix_data16" "1,*,*")
11960 (set_attr "prefix" "orig,<mask_prefix>,evex")
11961 (set_attr "mode" "<sseinsnmode>")])
11962
11963 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11964 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
11965 (vec_concat:VI2_AVX2
11966 (ss_truncate:<ssehalfvecmode>
11967 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
11968 (ss_truncate:<ssehalfvecmode>
11969 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
11970 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11971 "@
11972 packssdw\t{%2, %0|%0, %2}
11973 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
11974 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11975 [(set_attr "isa" "noavx,avx,avx512bw")
11976 (set_attr "type" "sselog")
11977 (set_attr "prefix_data16" "1,*,*")
11978 (set_attr "prefix" "orig,<mask_prefix>,evex")
11979 (set_attr "mode" "<sseinsnmode>")])
11980
11981 (define_insn "<sse2_avx2>_packuswb<mask_name>"
11982 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
11983 (vec_concat:VI1_AVX512
11984 (us_truncate:<ssehalfvecmode>
11985 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
11986 (us_truncate:<ssehalfvecmode>
11987 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
11988 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11989 "@
11990 packuswb\t{%2, %0|%0, %2}
11991 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
11992 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11993 [(set_attr "isa" "noavx,avx,avx512bw")
11994 (set_attr "type" "sselog")
11995 (set_attr "prefix_data16" "1,*,*")
11996 (set_attr "prefix" "orig,<mask_prefix>,evex")
11997 (set_attr "mode" "<sseinsnmode>")])
11998
11999 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
12000 [(set (match_operand:V64QI 0 "register_operand" "=v")
12001 (vec_select:V64QI
12002 (vec_concat:V128QI
12003 (match_operand:V64QI 1 "register_operand" "v")
12004 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12005 (parallel [(const_int 8) (const_int 72)
12006 (const_int 9) (const_int 73)
12007 (const_int 10) (const_int 74)
12008 (const_int 11) (const_int 75)
12009 (const_int 12) (const_int 76)
12010 (const_int 13) (const_int 77)
12011 (const_int 14) (const_int 78)
12012 (const_int 15) (const_int 79)
12013 (const_int 24) (const_int 88)
12014 (const_int 25) (const_int 89)
12015 (const_int 26) (const_int 90)
12016 (const_int 27) (const_int 91)
12017 (const_int 28) (const_int 92)
12018 (const_int 29) (const_int 93)
12019 (const_int 30) (const_int 94)
12020 (const_int 31) (const_int 95)
12021 (const_int 40) (const_int 104)
12022 (const_int 41) (const_int 105)
12023 (const_int 42) (const_int 106)
12024 (const_int 43) (const_int 107)
12025 (const_int 44) (const_int 108)
12026 (const_int 45) (const_int 109)
12027 (const_int 46) (const_int 110)
12028 (const_int 47) (const_int 111)
12029 (const_int 56) (const_int 120)
12030 (const_int 57) (const_int 121)
12031 (const_int 58) (const_int 122)
12032 (const_int 59) (const_int 123)
12033 (const_int 60) (const_int 124)
12034 (const_int 61) (const_int 125)
12035 (const_int 62) (const_int 126)
12036 (const_int 63) (const_int 127)])))]
12037 "TARGET_AVX512BW"
12038 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12039 [(set_attr "type" "sselog")
12040 (set_attr "prefix" "evex")
12041 (set_attr "mode" "XI")])
12042
12043 (define_insn "avx2_interleave_highv32qi<mask_name>"
12044 [(set (match_operand:V32QI 0 "register_operand" "=v")
12045 (vec_select:V32QI
12046 (vec_concat:V64QI
12047 (match_operand:V32QI 1 "register_operand" "v")
12048 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12049 (parallel [(const_int 8) (const_int 40)
12050 (const_int 9) (const_int 41)
12051 (const_int 10) (const_int 42)
12052 (const_int 11) (const_int 43)
12053 (const_int 12) (const_int 44)
12054 (const_int 13) (const_int 45)
12055 (const_int 14) (const_int 46)
12056 (const_int 15) (const_int 47)
12057 (const_int 24) (const_int 56)
12058 (const_int 25) (const_int 57)
12059 (const_int 26) (const_int 58)
12060 (const_int 27) (const_int 59)
12061 (const_int 28) (const_int 60)
12062 (const_int 29) (const_int 61)
12063 (const_int 30) (const_int 62)
12064 (const_int 31) (const_int 63)])))]
12065 "TARGET_AVX2 && <mask_avx512vl_condition>"
12066 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12067 [(set_attr "type" "sselog")
12068 (set_attr "prefix" "<mask_prefix>")
12069 (set_attr "mode" "OI")])
12070
12071 (define_insn "vec_interleave_highv16qi<mask_name>"
12072 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12073 (vec_select:V16QI
12074 (vec_concat:V32QI
12075 (match_operand:V16QI 1 "register_operand" "0,v")
12076 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12077 (parallel [(const_int 8) (const_int 24)
12078 (const_int 9) (const_int 25)
12079 (const_int 10) (const_int 26)
12080 (const_int 11) (const_int 27)
12081 (const_int 12) (const_int 28)
12082 (const_int 13) (const_int 29)
12083 (const_int 14) (const_int 30)
12084 (const_int 15) (const_int 31)])))]
12085 "TARGET_SSE2 && <mask_avx512vl_condition>"
12086 "@
12087 punpckhbw\t{%2, %0|%0, %2}
12088 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12089 [(set_attr "isa" "noavx,avx")
12090 (set_attr "type" "sselog")
12091 (set_attr "prefix_data16" "1,*")
12092 (set_attr "prefix" "orig,<mask_prefix>")
12093 (set_attr "mode" "TI")])
12094
12095 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
12096 [(set (match_operand:V64QI 0 "register_operand" "=v")
12097 (vec_select:V64QI
12098 (vec_concat:V128QI
12099 (match_operand:V64QI 1 "register_operand" "v")
12100 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12101 (parallel [(const_int 0) (const_int 64)
12102 (const_int 1) (const_int 65)
12103 (const_int 2) (const_int 66)
12104 (const_int 3) (const_int 67)
12105 (const_int 4) (const_int 68)
12106 (const_int 5) (const_int 69)
12107 (const_int 6) (const_int 70)
12108 (const_int 7) (const_int 71)
12109 (const_int 16) (const_int 80)
12110 (const_int 17) (const_int 81)
12111 (const_int 18) (const_int 82)
12112 (const_int 19) (const_int 83)
12113 (const_int 20) (const_int 84)
12114 (const_int 21) (const_int 85)
12115 (const_int 22) (const_int 86)
12116 (const_int 23) (const_int 87)
12117 (const_int 32) (const_int 96)
12118 (const_int 33) (const_int 97)
12119 (const_int 34) (const_int 98)
12120 (const_int 35) (const_int 99)
12121 (const_int 36) (const_int 100)
12122 (const_int 37) (const_int 101)
12123 (const_int 38) (const_int 102)
12124 (const_int 39) (const_int 103)
12125 (const_int 48) (const_int 112)
12126 (const_int 49) (const_int 113)
12127 (const_int 50) (const_int 114)
12128 (const_int 51) (const_int 115)
12129 (const_int 52) (const_int 116)
12130 (const_int 53) (const_int 117)
12131 (const_int 54) (const_int 118)
12132 (const_int 55) (const_int 119)])))]
12133 "TARGET_AVX512BW"
12134 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12135 [(set_attr "type" "sselog")
12136 (set_attr "prefix" "evex")
12137 (set_attr "mode" "XI")])
12138
12139 (define_insn "avx2_interleave_lowv32qi<mask_name>"
12140 [(set (match_operand:V32QI 0 "register_operand" "=v")
12141 (vec_select:V32QI
12142 (vec_concat:V64QI
12143 (match_operand:V32QI 1 "register_operand" "v")
12144 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12145 (parallel [(const_int 0) (const_int 32)
12146 (const_int 1) (const_int 33)
12147 (const_int 2) (const_int 34)
12148 (const_int 3) (const_int 35)
12149 (const_int 4) (const_int 36)
12150 (const_int 5) (const_int 37)
12151 (const_int 6) (const_int 38)
12152 (const_int 7) (const_int 39)
12153 (const_int 16) (const_int 48)
12154 (const_int 17) (const_int 49)
12155 (const_int 18) (const_int 50)
12156 (const_int 19) (const_int 51)
12157 (const_int 20) (const_int 52)
12158 (const_int 21) (const_int 53)
12159 (const_int 22) (const_int 54)
12160 (const_int 23) (const_int 55)])))]
12161 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12162 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12163 [(set_attr "type" "sselog")
12164 (set_attr "prefix" "maybe_vex")
12165 (set_attr "mode" "OI")])
12166
12167 (define_insn "vec_interleave_lowv16qi<mask_name>"
12168 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12169 (vec_select:V16QI
12170 (vec_concat:V32QI
12171 (match_operand:V16QI 1 "register_operand" "0,v")
12172 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12173 (parallel [(const_int 0) (const_int 16)
12174 (const_int 1) (const_int 17)
12175 (const_int 2) (const_int 18)
12176 (const_int 3) (const_int 19)
12177 (const_int 4) (const_int 20)
12178 (const_int 5) (const_int 21)
12179 (const_int 6) (const_int 22)
12180 (const_int 7) (const_int 23)])))]
12181 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12182 "@
12183 punpcklbw\t{%2, %0|%0, %2}
12184 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12185 [(set_attr "isa" "noavx,avx")
12186 (set_attr "type" "sselog")
12187 (set_attr "prefix_data16" "1,*")
12188 (set_attr "prefix" "orig,vex")
12189 (set_attr "mode" "TI")])
12190
12191 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
12192 [(set (match_operand:V32HI 0 "register_operand" "=v")
12193 (vec_select:V32HI
12194 (vec_concat:V64HI
12195 (match_operand:V32HI 1 "register_operand" "v")
12196 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12197 (parallel [(const_int 4) (const_int 36)
12198 (const_int 5) (const_int 37)
12199 (const_int 6) (const_int 38)
12200 (const_int 7) (const_int 39)
12201 (const_int 12) (const_int 44)
12202 (const_int 13) (const_int 45)
12203 (const_int 14) (const_int 46)
12204 (const_int 15) (const_int 47)
12205 (const_int 20) (const_int 52)
12206 (const_int 21) (const_int 53)
12207 (const_int 22) (const_int 54)
12208 (const_int 23) (const_int 55)
12209 (const_int 28) (const_int 60)
12210 (const_int 29) (const_int 61)
12211 (const_int 30) (const_int 62)
12212 (const_int 31) (const_int 63)])))]
12213 "TARGET_AVX512BW"
12214 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12215 [(set_attr "type" "sselog")
12216 (set_attr "prefix" "evex")
12217 (set_attr "mode" "XI")])
12218
12219 (define_insn "avx2_interleave_highv16hi<mask_name>"
12220 [(set (match_operand:V16HI 0 "register_operand" "=v")
12221 (vec_select:V16HI
12222 (vec_concat:V32HI
12223 (match_operand:V16HI 1 "register_operand" "v")
12224 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12225 (parallel [(const_int 4) (const_int 20)
12226 (const_int 5) (const_int 21)
12227 (const_int 6) (const_int 22)
12228 (const_int 7) (const_int 23)
12229 (const_int 12) (const_int 28)
12230 (const_int 13) (const_int 29)
12231 (const_int 14) (const_int 30)
12232 (const_int 15) (const_int 31)])))]
12233 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12234 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12235 [(set_attr "type" "sselog")
12236 (set_attr "prefix" "maybe_evex")
12237 (set_attr "mode" "OI")])
12238
12239 (define_insn "vec_interleave_highv8hi<mask_name>"
12240 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12241 (vec_select:V8HI
12242 (vec_concat:V16HI
12243 (match_operand:V8HI 1 "register_operand" "0,v")
12244 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12245 (parallel [(const_int 4) (const_int 12)
12246 (const_int 5) (const_int 13)
12247 (const_int 6) (const_int 14)
12248 (const_int 7) (const_int 15)])))]
12249 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12250 "@
12251 punpckhwd\t{%2, %0|%0, %2}
12252 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12253 [(set_attr "isa" "noavx,avx")
12254 (set_attr "type" "sselog")
12255 (set_attr "prefix_data16" "1,*")
12256 (set_attr "prefix" "orig,maybe_vex")
12257 (set_attr "mode" "TI")])
12258
12259 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
12260 [(set (match_operand:V32HI 0 "register_operand" "=v")
12261 (vec_select:V32HI
12262 (vec_concat:V64HI
12263 (match_operand:V32HI 1 "register_operand" "v")
12264 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12265 (parallel [(const_int 0) (const_int 32)
12266 (const_int 1) (const_int 33)
12267 (const_int 2) (const_int 34)
12268 (const_int 3) (const_int 35)
12269 (const_int 8) (const_int 40)
12270 (const_int 9) (const_int 41)
12271 (const_int 10) (const_int 42)
12272 (const_int 11) (const_int 43)
12273 (const_int 16) (const_int 48)
12274 (const_int 17) (const_int 49)
12275 (const_int 18) (const_int 50)
12276 (const_int 19) (const_int 51)
12277 (const_int 24) (const_int 56)
12278 (const_int 25) (const_int 57)
12279 (const_int 26) (const_int 58)
12280 (const_int 27) (const_int 59)])))]
12281 "TARGET_AVX512BW"
12282 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12283 [(set_attr "type" "sselog")
12284 (set_attr "prefix" "evex")
12285 (set_attr "mode" "XI")])
12286
12287 (define_insn "avx2_interleave_lowv16hi<mask_name>"
12288 [(set (match_operand:V16HI 0 "register_operand" "=v")
12289 (vec_select:V16HI
12290 (vec_concat:V32HI
12291 (match_operand:V16HI 1 "register_operand" "v")
12292 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12293 (parallel [(const_int 0) (const_int 16)
12294 (const_int 1) (const_int 17)
12295 (const_int 2) (const_int 18)
12296 (const_int 3) (const_int 19)
12297 (const_int 8) (const_int 24)
12298 (const_int 9) (const_int 25)
12299 (const_int 10) (const_int 26)
12300 (const_int 11) (const_int 27)])))]
12301 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12302 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12303 [(set_attr "type" "sselog")
12304 (set_attr "prefix" "maybe_evex")
12305 (set_attr "mode" "OI")])
12306
12307 (define_insn "vec_interleave_lowv8hi<mask_name>"
12308 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12309 (vec_select:V8HI
12310 (vec_concat:V16HI
12311 (match_operand:V8HI 1 "register_operand" "0,v")
12312 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12313 (parallel [(const_int 0) (const_int 8)
12314 (const_int 1) (const_int 9)
12315 (const_int 2) (const_int 10)
12316 (const_int 3) (const_int 11)])))]
12317 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12318 "@
12319 punpcklwd\t{%2, %0|%0, %2}
12320 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12321 [(set_attr "isa" "noavx,avx")
12322 (set_attr "type" "sselog")
12323 (set_attr "prefix_data16" "1,*")
12324 (set_attr "prefix" "orig,maybe_evex")
12325 (set_attr "mode" "TI")])
12326
12327 (define_insn "avx2_interleave_highv8si<mask_name>"
12328 [(set (match_operand:V8SI 0 "register_operand" "=v")
12329 (vec_select:V8SI
12330 (vec_concat:V16SI
12331 (match_operand:V8SI 1 "register_operand" "v")
12332 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12333 (parallel [(const_int 2) (const_int 10)
12334 (const_int 3) (const_int 11)
12335 (const_int 6) (const_int 14)
12336 (const_int 7) (const_int 15)])))]
12337 "TARGET_AVX2 && <mask_avx512vl_condition>"
12338 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12339 [(set_attr "type" "sselog")
12340 (set_attr "prefix" "maybe_evex")
12341 (set_attr "mode" "OI")])
12342
12343 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
12344 [(set (match_operand:V16SI 0 "register_operand" "=v")
12345 (vec_select:V16SI
12346 (vec_concat:V32SI
12347 (match_operand:V16SI 1 "register_operand" "v")
12348 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12349 (parallel [(const_int 2) (const_int 18)
12350 (const_int 3) (const_int 19)
12351 (const_int 6) (const_int 22)
12352 (const_int 7) (const_int 23)
12353 (const_int 10) (const_int 26)
12354 (const_int 11) (const_int 27)
12355 (const_int 14) (const_int 30)
12356 (const_int 15) (const_int 31)])))]
12357 "TARGET_AVX512F"
12358 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12359 [(set_attr "type" "sselog")
12360 (set_attr "prefix" "evex")
12361 (set_attr "mode" "XI")])
12362
12363
12364 (define_insn "vec_interleave_highv4si<mask_name>"
12365 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12366 (vec_select:V4SI
12367 (vec_concat:V8SI
12368 (match_operand:V4SI 1 "register_operand" "0,v")
12369 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12370 (parallel [(const_int 2) (const_int 6)
12371 (const_int 3) (const_int 7)])))]
12372 "TARGET_SSE2 && <mask_avx512vl_condition>"
12373 "@
12374 punpckhdq\t{%2, %0|%0, %2}
12375 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12376 [(set_attr "isa" "noavx,avx")
12377 (set_attr "type" "sselog")
12378 (set_attr "prefix_data16" "1,*")
12379 (set_attr "prefix" "orig,maybe_vex")
12380 (set_attr "mode" "TI")])
12381
12382 (define_insn "avx2_interleave_lowv8si<mask_name>"
12383 [(set (match_operand:V8SI 0 "register_operand" "=v")
12384 (vec_select:V8SI
12385 (vec_concat:V16SI
12386 (match_operand:V8SI 1 "register_operand" "v")
12387 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12388 (parallel [(const_int 0) (const_int 8)
12389 (const_int 1) (const_int 9)
12390 (const_int 4) (const_int 12)
12391 (const_int 5) (const_int 13)])))]
12392 "TARGET_AVX2 && <mask_avx512vl_condition>"
12393 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12394 [(set_attr "type" "sselog")
12395 (set_attr "prefix" "maybe_evex")
12396 (set_attr "mode" "OI")])
12397
12398 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
12399 [(set (match_operand:V16SI 0 "register_operand" "=v")
12400 (vec_select:V16SI
12401 (vec_concat:V32SI
12402 (match_operand:V16SI 1 "register_operand" "v")
12403 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12404 (parallel [(const_int 0) (const_int 16)
12405 (const_int 1) (const_int 17)
12406 (const_int 4) (const_int 20)
12407 (const_int 5) (const_int 21)
12408 (const_int 8) (const_int 24)
12409 (const_int 9) (const_int 25)
12410 (const_int 12) (const_int 28)
12411 (const_int 13) (const_int 29)])))]
12412 "TARGET_AVX512F"
12413 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12414 [(set_attr "type" "sselog")
12415 (set_attr "prefix" "evex")
12416 (set_attr "mode" "XI")])
12417
12418 (define_insn "vec_interleave_lowv4si<mask_name>"
12419 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12420 (vec_select:V4SI
12421 (vec_concat:V8SI
12422 (match_operand:V4SI 1 "register_operand" "0,v")
12423 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12424 (parallel [(const_int 0) (const_int 4)
12425 (const_int 1) (const_int 5)])))]
12426 "TARGET_SSE2 && <mask_avx512vl_condition>"
12427 "@
12428 punpckldq\t{%2, %0|%0, %2}
12429 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12430 [(set_attr "isa" "noavx,avx")
12431 (set_attr "type" "sselog")
12432 (set_attr "prefix_data16" "1,*")
12433 (set_attr "prefix" "orig,vex")
12434 (set_attr "mode" "TI")])
12435
12436 (define_expand "vec_interleave_high<mode>"
12437 [(match_operand:VI_256 0 "register_operand")
12438 (match_operand:VI_256 1 "register_operand")
12439 (match_operand:VI_256 2 "nonimmediate_operand")]
12440 "TARGET_AVX2"
12441 {
12442 rtx t1 = gen_reg_rtx (<MODE>mode);
12443 rtx t2 = gen_reg_rtx (<MODE>mode);
12444 rtx t3 = gen_reg_rtx (V4DImode);
12445 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12446 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12447 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12448 gen_lowpart (V4DImode, t2),
12449 GEN_INT (1 + (3 << 4))));
12450 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12451 DONE;
12452 })
12453
12454 (define_expand "vec_interleave_low<mode>"
12455 [(match_operand:VI_256 0 "register_operand")
12456 (match_operand:VI_256 1 "register_operand")
12457 (match_operand:VI_256 2 "nonimmediate_operand")]
12458 "TARGET_AVX2"
12459 {
12460 rtx t1 = gen_reg_rtx (<MODE>mode);
12461 rtx t2 = gen_reg_rtx (<MODE>mode);
12462 rtx t3 = gen_reg_rtx (V4DImode);
12463 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12464 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12465 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12466 gen_lowpart (V4DImode, t2),
12467 GEN_INT (0 + (2 << 4))));
12468 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12469 DONE;
12470 })
12471
12472 ;; Modes handled by pinsr patterns.
12473 (define_mode_iterator PINSR_MODE
12474 [(V16QI "TARGET_SSE4_1") V8HI
12475 (V4SI "TARGET_SSE4_1")
12476 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
12477
12478 (define_mode_attr sse2p4_1
12479 [(V16QI "sse4_1") (V8HI "sse2")
12480 (V4SI "sse4_1") (V2DI "sse4_1")])
12481
12482 (define_mode_attr pinsr_evex_isa
12483 [(V16QI "avx512bw") (V8HI "avx512bw")
12484 (V4SI "avx512dq") (V2DI "avx512dq")])
12485
12486 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
12487 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
12488 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
12489 (vec_merge:PINSR_MODE
12490 (vec_duplicate:PINSR_MODE
12491 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
12492 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
12493 (match_operand:SI 3 "const_int_operand")))]
12494 "TARGET_SSE2
12495 && ((unsigned) exact_log2 (INTVAL (operands[3]))
12496 < GET_MODE_NUNITS (<MODE>mode))"
12497 {
12498 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
12499
12500 switch (which_alternative)
12501 {
12502 case 0:
12503 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12504 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
12505 /* FALLTHRU */
12506 case 1:
12507 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
12508 case 2:
12509 case 4:
12510 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12511 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
12512 /* FALLTHRU */
12513 case 3:
12514 case 5:
12515 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12516 default:
12517 gcc_unreachable ();
12518 }
12519 }
12520 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
12521 (set_attr "type" "sselog")
12522 (set (attr "prefix_rex")
12523 (if_then_else
12524 (and (not (match_test "TARGET_AVX"))
12525 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
12526 (const_string "1")
12527 (const_string "*")))
12528 (set (attr "prefix_data16")
12529 (if_then_else
12530 (and (not (match_test "TARGET_AVX"))
12531 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12532 (const_string "1")
12533 (const_string "*")))
12534 (set (attr "prefix_extra")
12535 (if_then_else
12536 (and (not (match_test "TARGET_AVX"))
12537 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12538 (const_string "*")
12539 (const_string "1")))
12540 (set_attr "length_immediate" "1")
12541 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
12542 (set_attr "mode" "TI")])
12543
12544 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
12545 [(match_operand:AVX512_VEC 0 "register_operand")
12546 (match_operand:AVX512_VEC 1 "register_operand")
12547 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
12548 (match_operand:SI 3 "const_0_to_3_operand")
12549 (match_operand:AVX512_VEC 4 "register_operand")
12550 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12551 "TARGET_AVX512F"
12552 {
12553 int mask,selector;
12554 mask = INTVAL (operands[3]);
12555 selector = GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ?
12556 0xFFFF ^ (0xF000 >> mask * 4)
12557 : 0xFF ^ (0xC0 >> mask * 2);
12558 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
12559 (operands[0], operands[1], operands[2], GEN_INT (selector),
12560 operands[4], operands[5]));
12561 DONE;
12562 })
12563
12564 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
12565 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
12566 (vec_merge:AVX512_VEC
12567 (match_operand:AVX512_VEC 1 "register_operand" "v")
12568 (vec_duplicate:AVX512_VEC
12569 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
12570 (match_operand:SI 3 "const_int_operand" "n")))]
12571 "TARGET_AVX512F"
12572 {
12573 int mask;
12574 int selector = INTVAL (operands[3]);
12575
12576 if (selector == 0xFFF || selector == 0x3F)
12577 mask = 0;
12578 else if ( selector == 0xF0FF || selector == 0xCF)
12579 mask = 1;
12580 else if ( selector == 0xFF0F || selector == 0xF3)
12581 mask = 2;
12582 else if ( selector == 0xFFF0 || selector == 0xFC)
12583 mask = 3;
12584 else
12585 gcc_unreachable ();
12586
12587 operands[3] = GEN_INT (mask);
12588
12589 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
12590 }
12591 [(set_attr "type" "sselog")
12592 (set_attr "length_immediate" "1")
12593 (set_attr "prefix" "evex")
12594 (set_attr "mode" "<sseinsnmode>")])
12595
12596 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
12597 [(match_operand:AVX512_VEC_2 0 "register_operand")
12598 (match_operand:AVX512_VEC_2 1 "register_operand")
12599 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
12600 (match_operand:SI 3 "const_0_to_1_operand")
12601 (match_operand:AVX512_VEC_2 4 "register_operand")
12602 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12603 "TARGET_AVX512F"
12604 {
12605 int mask = INTVAL (operands[3]);
12606 if (mask == 0)
12607 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
12608 operands[2], operands[4],
12609 operands[5]));
12610 else
12611 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
12612 operands[2], operands[4],
12613 operands[5]));
12614 DONE;
12615 })
12616
12617 (define_insn "vec_set_lo_<mode><mask_name>"
12618 [(set (match_operand:V16FI 0 "register_operand" "=v")
12619 (vec_concat:V16FI
12620 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12621 (vec_select:<ssehalfvecmode>
12622 (match_operand:V16FI 1 "register_operand" "v")
12623 (parallel [(const_int 8) (const_int 9)
12624 (const_int 10) (const_int 11)
12625 (const_int 12) (const_int 13)
12626 (const_int 14) (const_int 15)]))))]
12627 "TARGET_AVX512DQ"
12628 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12629 [(set_attr "type" "sselog")
12630 (set_attr "length_immediate" "1")
12631 (set_attr "prefix" "evex")
12632 (set_attr "mode" "<sseinsnmode>")])
12633
12634 (define_insn "vec_set_hi_<mode><mask_name>"
12635 [(set (match_operand:V16FI 0 "register_operand" "=v")
12636 (vec_concat:V16FI
12637 (vec_select:<ssehalfvecmode>
12638 (match_operand:V16FI 1 "register_operand" "v")
12639 (parallel [(const_int 0) (const_int 1)
12640 (const_int 2) (const_int 3)
12641 (const_int 4) (const_int 5)
12642 (const_int 6) (const_int 7)]))
12643 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12644 "TARGET_AVX512DQ"
12645 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12646 [(set_attr "type" "sselog")
12647 (set_attr "length_immediate" "1")
12648 (set_attr "prefix" "evex")
12649 (set_attr "mode" "<sseinsnmode>")])
12650
12651 (define_insn "vec_set_lo_<mode><mask_name>"
12652 [(set (match_operand:V8FI 0 "register_operand" "=v")
12653 (vec_concat:V8FI
12654 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12655 (vec_select:<ssehalfvecmode>
12656 (match_operand:V8FI 1 "register_operand" "v")
12657 (parallel [(const_int 4) (const_int 5)
12658 (const_int 6) (const_int 7)]))))]
12659 "TARGET_AVX512F"
12660 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12661 [(set_attr "type" "sselog")
12662 (set_attr "length_immediate" "1")
12663 (set_attr "prefix" "evex")
12664 (set_attr "mode" "XI")])
12665
12666 (define_insn "vec_set_hi_<mode><mask_name>"
12667 [(set (match_operand:V8FI 0 "register_operand" "=v")
12668 (vec_concat:V8FI
12669 (vec_select:<ssehalfvecmode>
12670 (match_operand:V8FI 1 "register_operand" "v")
12671 (parallel [(const_int 0) (const_int 1)
12672 (const_int 2) (const_int 3)]))
12673 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12674 "TARGET_AVX512F"
12675 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12676 [(set_attr "type" "sselog")
12677 (set_attr "length_immediate" "1")
12678 (set_attr "prefix" "evex")
12679 (set_attr "mode" "XI")])
12680
12681 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
12682 [(match_operand:VI8F_256 0 "register_operand")
12683 (match_operand:VI8F_256 1 "register_operand")
12684 (match_operand:VI8F_256 2 "nonimmediate_operand")
12685 (match_operand:SI 3 "const_0_to_3_operand")
12686 (match_operand:VI8F_256 4 "register_operand")
12687 (match_operand:QI 5 "register_operand")]
12688 "TARGET_AVX512DQ"
12689 {
12690 int mask = INTVAL (operands[3]);
12691 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
12692 (operands[0], operands[1], operands[2],
12693 GEN_INT (((mask >> 0) & 1) * 2 + 0),
12694 GEN_INT (((mask >> 0) & 1) * 2 + 1),
12695 GEN_INT (((mask >> 1) & 1) * 2 + 4),
12696 GEN_INT (((mask >> 1) & 1) * 2 + 5),
12697 operands[4], operands[5]));
12698 DONE;
12699 })
12700
12701 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
12702 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
12703 (vec_select:VI8F_256
12704 (vec_concat:<ssedoublemode>
12705 (match_operand:VI8F_256 1 "register_operand" "v")
12706 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
12707 (parallel [(match_operand 3 "const_0_to_3_operand")
12708 (match_operand 4 "const_0_to_3_operand")
12709 (match_operand 5 "const_4_to_7_operand")
12710 (match_operand 6 "const_4_to_7_operand")])))]
12711 "TARGET_AVX512VL
12712 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12713 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
12714 {
12715 int mask;
12716 mask = INTVAL (operands[3]) / 2;
12717 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
12718 operands[3] = GEN_INT (mask);
12719 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
12720 }
12721 [(set_attr "type" "sselog")
12722 (set_attr "length_immediate" "1")
12723 (set_attr "prefix" "evex")
12724 (set_attr "mode" "XI")])
12725
12726 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
12727 [(match_operand:V8FI 0 "register_operand")
12728 (match_operand:V8FI 1 "register_operand")
12729 (match_operand:V8FI 2 "nonimmediate_operand")
12730 (match_operand:SI 3 "const_0_to_255_operand")
12731 (match_operand:V8FI 4 "register_operand")
12732 (match_operand:QI 5 "register_operand")]
12733 "TARGET_AVX512F"
12734 {
12735 int mask = INTVAL (operands[3]);
12736 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
12737 (operands[0], operands[1], operands[2],
12738 GEN_INT (((mask >> 0) & 3) * 2),
12739 GEN_INT (((mask >> 0) & 3) * 2 + 1),
12740 GEN_INT (((mask >> 2) & 3) * 2),
12741 GEN_INT (((mask >> 2) & 3) * 2 + 1),
12742 GEN_INT (((mask >> 4) & 3) * 2 + 8),
12743 GEN_INT (((mask >> 4) & 3) * 2 + 9),
12744 GEN_INT (((mask >> 6) & 3) * 2 + 8),
12745 GEN_INT (((mask >> 6) & 3) * 2 + 9),
12746 operands[4], operands[5]));
12747 DONE;
12748 })
12749
12750 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
12751 [(set (match_operand:V8FI 0 "register_operand" "=v")
12752 (vec_select:V8FI
12753 (vec_concat:<ssedoublemode>
12754 (match_operand:V8FI 1 "register_operand" "v")
12755 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
12756 (parallel [(match_operand 3 "const_0_to_7_operand")
12757 (match_operand 4 "const_0_to_7_operand")
12758 (match_operand 5 "const_0_to_7_operand")
12759 (match_operand 6 "const_0_to_7_operand")
12760 (match_operand 7 "const_8_to_15_operand")
12761 (match_operand 8 "const_8_to_15_operand")
12762 (match_operand 9 "const_8_to_15_operand")
12763 (match_operand 10 "const_8_to_15_operand")])))]
12764 "TARGET_AVX512F
12765 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12766 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
12767 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12768 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
12769 {
12770 int mask;
12771 mask = INTVAL (operands[3]) / 2;
12772 mask |= INTVAL (operands[5]) / 2 << 2;
12773 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
12774 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
12775 operands[3] = GEN_INT (mask);
12776
12777 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12778 }
12779 [(set_attr "type" "sselog")
12780 (set_attr "length_immediate" "1")
12781 (set_attr "prefix" "evex")
12782 (set_attr "mode" "<sseinsnmode>")])
12783
12784 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
12785 [(match_operand:VI4F_256 0 "register_operand")
12786 (match_operand:VI4F_256 1 "register_operand")
12787 (match_operand:VI4F_256 2 "nonimmediate_operand")
12788 (match_operand:SI 3 "const_0_to_3_operand")
12789 (match_operand:VI4F_256 4 "register_operand")
12790 (match_operand:QI 5 "register_operand")]
12791 "TARGET_AVX512VL"
12792 {
12793 int mask = INTVAL (operands[3]);
12794 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
12795 (operands[0], operands[1], operands[2],
12796 GEN_INT (((mask >> 0) & 1) * 4 + 0),
12797 GEN_INT (((mask >> 0) & 1) * 4 + 1),
12798 GEN_INT (((mask >> 0) & 1) * 4 + 2),
12799 GEN_INT (((mask >> 0) & 1) * 4 + 3),
12800 GEN_INT (((mask >> 1) & 1) * 4 + 8),
12801 GEN_INT (((mask >> 1) & 1) * 4 + 9),
12802 GEN_INT (((mask >> 1) & 1) * 4 + 10),
12803 GEN_INT (((mask >> 1) & 1) * 4 + 11),
12804 operands[4], operands[5]));
12805 DONE;
12806 })
12807
12808 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
12809 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
12810 (vec_select:VI4F_256
12811 (vec_concat:<ssedoublemode>
12812 (match_operand:VI4F_256 1 "register_operand" "v")
12813 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
12814 (parallel [(match_operand 3 "const_0_to_7_operand")
12815 (match_operand 4 "const_0_to_7_operand")
12816 (match_operand 5 "const_0_to_7_operand")
12817 (match_operand 6 "const_0_to_7_operand")
12818 (match_operand 7 "const_8_to_15_operand")
12819 (match_operand 8 "const_8_to_15_operand")
12820 (match_operand 9 "const_8_to_15_operand")
12821 (match_operand 10 "const_8_to_15_operand")])))]
12822 "TARGET_AVX512VL
12823 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12824 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12825 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12826 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12827 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12828 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
12829 {
12830 int mask;
12831 mask = INTVAL (operands[3]) / 4;
12832 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
12833 operands[3] = GEN_INT (mask);
12834
12835 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12836 }
12837 [(set_attr "type" "sselog")
12838 (set_attr "length_immediate" "1")
12839 (set_attr "prefix" "evex")
12840 (set_attr "mode" "<sseinsnmode>")])
12841
12842 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
12843 [(match_operand:V16FI 0 "register_operand")
12844 (match_operand:V16FI 1 "register_operand")
12845 (match_operand:V16FI 2 "nonimmediate_operand")
12846 (match_operand:SI 3 "const_0_to_255_operand")
12847 (match_operand:V16FI 4 "register_operand")
12848 (match_operand:HI 5 "register_operand")]
12849 "TARGET_AVX512F"
12850 {
12851 int mask = INTVAL (operands[3]);
12852 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
12853 (operands[0], operands[1], operands[2],
12854 GEN_INT (((mask >> 0) & 3) * 4),
12855 GEN_INT (((mask >> 0) & 3) * 4 + 1),
12856 GEN_INT (((mask >> 0) & 3) * 4 + 2),
12857 GEN_INT (((mask >> 0) & 3) * 4 + 3),
12858 GEN_INT (((mask >> 2) & 3) * 4),
12859 GEN_INT (((mask >> 2) & 3) * 4 + 1),
12860 GEN_INT (((mask >> 2) & 3) * 4 + 2),
12861 GEN_INT (((mask >> 2) & 3) * 4 + 3),
12862 GEN_INT (((mask >> 4) & 3) * 4 + 16),
12863 GEN_INT (((mask >> 4) & 3) * 4 + 17),
12864 GEN_INT (((mask >> 4) & 3) * 4 + 18),
12865 GEN_INT (((mask >> 4) & 3) * 4 + 19),
12866 GEN_INT (((mask >> 6) & 3) * 4 + 16),
12867 GEN_INT (((mask >> 6) & 3) * 4 + 17),
12868 GEN_INT (((mask >> 6) & 3) * 4 + 18),
12869 GEN_INT (((mask >> 6) & 3) * 4 + 19),
12870 operands[4], operands[5]));
12871 DONE;
12872 })
12873
12874 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
12875 [(set (match_operand:V16FI 0 "register_operand" "=v")
12876 (vec_select:V16FI
12877 (vec_concat:<ssedoublemode>
12878 (match_operand:V16FI 1 "register_operand" "v")
12879 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
12880 (parallel [(match_operand 3 "const_0_to_15_operand")
12881 (match_operand 4 "const_0_to_15_operand")
12882 (match_operand 5 "const_0_to_15_operand")
12883 (match_operand 6 "const_0_to_15_operand")
12884 (match_operand 7 "const_0_to_15_operand")
12885 (match_operand 8 "const_0_to_15_operand")
12886 (match_operand 9 "const_0_to_15_operand")
12887 (match_operand 10 "const_0_to_15_operand")
12888 (match_operand 11 "const_16_to_31_operand")
12889 (match_operand 12 "const_16_to_31_operand")
12890 (match_operand 13 "const_16_to_31_operand")
12891 (match_operand 14 "const_16_to_31_operand")
12892 (match_operand 15 "const_16_to_31_operand")
12893 (match_operand 16 "const_16_to_31_operand")
12894 (match_operand 17 "const_16_to_31_operand")
12895 (match_operand 18 "const_16_to_31_operand")])))]
12896 "TARGET_AVX512F
12897 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12898 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12899 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12900 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12901 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12902 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12903 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12904 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12905 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12906 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12907 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12908 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12909 {
12910 int mask;
12911 mask = INTVAL (operands[3]) / 4;
12912 mask |= INTVAL (operands[7]) / 4 << 2;
12913 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12914 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12915 operands[3] = GEN_INT (mask);
12916
12917 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12918 }
12919 [(set_attr "type" "sselog")
12920 (set_attr "length_immediate" "1")
12921 (set_attr "prefix" "evex")
12922 (set_attr "mode" "<sseinsnmode>")])
12923
12924 (define_expand "avx512f_pshufdv3_mask"
12925 [(match_operand:V16SI 0 "register_operand")
12926 (match_operand:V16SI 1 "nonimmediate_operand")
12927 (match_operand:SI 2 "const_0_to_255_operand")
12928 (match_operand:V16SI 3 "register_operand")
12929 (match_operand:HI 4 "register_operand")]
12930 "TARGET_AVX512F"
12931 {
12932 int mask = INTVAL (operands[2]);
12933 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12934 GEN_INT ((mask >> 0) & 3),
12935 GEN_INT ((mask >> 2) & 3),
12936 GEN_INT ((mask >> 4) & 3),
12937 GEN_INT ((mask >> 6) & 3),
12938 GEN_INT (((mask >> 0) & 3) + 4),
12939 GEN_INT (((mask >> 2) & 3) + 4),
12940 GEN_INT (((mask >> 4) & 3) + 4),
12941 GEN_INT (((mask >> 6) & 3) + 4),
12942 GEN_INT (((mask >> 0) & 3) + 8),
12943 GEN_INT (((mask >> 2) & 3) + 8),
12944 GEN_INT (((mask >> 4) & 3) + 8),
12945 GEN_INT (((mask >> 6) & 3) + 8),
12946 GEN_INT (((mask >> 0) & 3) + 12),
12947 GEN_INT (((mask >> 2) & 3) + 12),
12948 GEN_INT (((mask >> 4) & 3) + 12),
12949 GEN_INT (((mask >> 6) & 3) + 12),
12950 operands[3], operands[4]));
12951 DONE;
12952 })
12953
12954 (define_insn "avx512f_pshufd_1<mask_name>"
12955 [(set (match_operand:V16SI 0 "register_operand" "=v")
12956 (vec_select:V16SI
12957 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12958 (parallel [(match_operand 2 "const_0_to_3_operand")
12959 (match_operand 3 "const_0_to_3_operand")
12960 (match_operand 4 "const_0_to_3_operand")
12961 (match_operand 5 "const_0_to_3_operand")
12962 (match_operand 6 "const_4_to_7_operand")
12963 (match_operand 7 "const_4_to_7_operand")
12964 (match_operand 8 "const_4_to_7_operand")
12965 (match_operand 9 "const_4_to_7_operand")
12966 (match_operand 10 "const_8_to_11_operand")
12967 (match_operand 11 "const_8_to_11_operand")
12968 (match_operand 12 "const_8_to_11_operand")
12969 (match_operand 13 "const_8_to_11_operand")
12970 (match_operand 14 "const_12_to_15_operand")
12971 (match_operand 15 "const_12_to_15_operand")
12972 (match_operand 16 "const_12_to_15_operand")
12973 (match_operand 17 "const_12_to_15_operand")])))]
12974 "TARGET_AVX512F
12975 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12976 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12977 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12978 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12979 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12980 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12981 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12982 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12983 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12984 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12985 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12986 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12987 {
12988 int mask = 0;
12989 mask |= INTVAL (operands[2]) << 0;
12990 mask |= INTVAL (operands[3]) << 2;
12991 mask |= INTVAL (operands[4]) << 4;
12992 mask |= INTVAL (operands[5]) << 6;
12993 operands[2] = GEN_INT (mask);
12994
12995 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
12996 }
12997 [(set_attr "type" "sselog1")
12998 (set_attr "prefix" "evex")
12999 (set_attr "length_immediate" "1")
13000 (set_attr "mode" "XI")])
13001
13002 (define_expand "avx512vl_pshufdv3_mask"
13003 [(match_operand:V8SI 0 "register_operand")
13004 (match_operand:V8SI 1 "nonimmediate_operand")
13005 (match_operand:SI 2 "const_0_to_255_operand")
13006 (match_operand:V8SI 3 "register_operand")
13007 (match_operand:QI 4 "register_operand")]
13008 "TARGET_AVX512VL"
13009 {
13010 int mask = INTVAL (operands[2]);
13011 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
13012 GEN_INT ((mask >> 0) & 3),
13013 GEN_INT ((mask >> 2) & 3),
13014 GEN_INT ((mask >> 4) & 3),
13015 GEN_INT ((mask >> 6) & 3),
13016 GEN_INT (((mask >> 0) & 3) + 4),
13017 GEN_INT (((mask >> 2) & 3) + 4),
13018 GEN_INT (((mask >> 4) & 3) + 4),
13019 GEN_INT (((mask >> 6) & 3) + 4),
13020 operands[3], operands[4]));
13021 DONE;
13022 })
13023
13024 (define_expand "avx2_pshufdv3"
13025 [(match_operand:V8SI 0 "register_operand")
13026 (match_operand:V8SI 1 "nonimmediate_operand")
13027 (match_operand:SI 2 "const_0_to_255_operand")]
13028 "TARGET_AVX2"
13029 {
13030 int mask = INTVAL (operands[2]);
13031 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
13032 GEN_INT ((mask >> 0) & 3),
13033 GEN_INT ((mask >> 2) & 3),
13034 GEN_INT ((mask >> 4) & 3),
13035 GEN_INT ((mask >> 6) & 3),
13036 GEN_INT (((mask >> 0) & 3) + 4),
13037 GEN_INT (((mask >> 2) & 3) + 4),
13038 GEN_INT (((mask >> 4) & 3) + 4),
13039 GEN_INT (((mask >> 6) & 3) + 4)));
13040 DONE;
13041 })
13042
13043 (define_insn "avx2_pshufd_1<mask_name>"
13044 [(set (match_operand:V8SI 0 "register_operand" "=v")
13045 (vec_select:V8SI
13046 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
13047 (parallel [(match_operand 2 "const_0_to_3_operand")
13048 (match_operand 3 "const_0_to_3_operand")
13049 (match_operand 4 "const_0_to_3_operand")
13050 (match_operand 5 "const_0_to_3_operand")
13051 (match_operand 6 "const_4_to_7_operand")
13052 (match_operand 7 "const_4_to_7_operand")
13053 (match_operand 8 "const_4_to_7_operand")
13054 (match_operand 9 "const_4_to_7_operand")])))]
13055 "TARGET_AVX2
13056 && <mask_avx512vl_condition>
13057 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
13058 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
13059 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
13060 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
13061 {
13062 int mask = 0;
13063 mask |= INTVAL (operands[2]) << 0;
13064 mask |= INTVAL (operands[3]) << 2;
13065 mask |= INTVAL (operands[4]) << 4;
13066 mask |= INTVAL (operands[5]) << 6;
13067 operands[2] = GEN_INT (mask);
13068
13069 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13070 }
13071 [(set_attr "type" "sselog1")
13072 (set_attr "prefix" "maybe_evex")
13073 (set_attr "length_immediate" "1")
13074 (set_attr "mode" "OI")])
13075
13076 (define_expand "avx512vl_pshufd_mask"
13077 [(match_operand:V4SI 0 "register_operand")
13078 (match_operand:V4SI 1 "nonimmediate_operand")
13079 (match_operand:SI 2 "const_0_to_255_operand")
13080 (match_operand:V4SI 3 "register_operand")
13081 (match_operand:QI 4 "register_operand")]
13082 "TARGET_AVX512VL"
13083 {
13084 int mask = INTVAL (operands[2]);
13085 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
13086 GEN_INT ((mask >> 0) & 3),
13087 GEN_INT ((mask >> 2) & 3),
13088 GEN_INT ((mask >> 4) & 3),
13089 GEN_INT ((mask >> 6) & 3),
13090 operands[3], operands[4]));
13091 DONE;
13092 })
13093
13094 (define_expand "sse2_pshufd"
13095 [(match_operand:V4SI 0 "register_operand")
13096 (match_operand:V4SI 1 "vector_operand")
13097 (match_operand:SI 2 "const_int_operand")]
13098 "TARGET_SSE2"
13099 {
13100 int mask = INTVAL (operands[2]);
13101 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
13102 GEN_INT ((mask >> 0) & 3),
13103 GEN_INT ((mask >> 2) & 3),
13104 GEN_INT ((mask >> 4) & 3),
13105 GEN_INT ((mask >> 6) & 3)));
13106 DONE;
13107 })
13108
13109 (define_insn "sse2_pshufd_1<mask_name>"
13110 [(set (match_operand:V4SI 0 "register_operand" "=v")
13111 (vec_select:V4SI
13112 (match_operand:V4SI 1 "vector_operand" "vBm")
13113 (parallel [(match_operand 2 "const_0_to_3_operand")
13114 (match_operand 3 "const_0_to_3_operand")
13115 (match_operand 4 "const_0_to_3_operand")
13116 (match_operand 5 "const_0_to_3_operand")])))]
13117 "TARGET_SSE2 && <mask_avx512vl_condition>"
13118 {
13119 int mask = 0;
13120 mask |= INTVAL (operands[2]) << 0;
13121 mask |= INTVAL (operands[3]) << 2;
13122 mask |= INTVAL (operands[4]) << 4;
13123 mask |= INTVAL (operands[5]) << 6;
13124 operands[2] = GEN_INT (mask);
13125
13126 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13127 }
13128 [(set_attr "type" "sselog1")
13129 (set_attr "prefix_data16" "1")
13130 (set_attr "prefix" "<mask_prefix2>")
13131 (set_attr "length_immediate" "1")
13132 (set_attr "mode" "TI")])
13133
13134 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
13135 [(set (match_operand:V32HI 0 "register_operand" "=v")
13136 (unspec:V32HI
13137 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13138 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13139 UNSPEC_PSHUFLW))]
13140 "TARGET_AVX512BW"
13141 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13142 [(set_attr "type" "sselog")
13143 (set_attr "prefix" "evex")
13144 (set_attr "mode" "XI")])
13145
13146 (define_expand "avx512vl_pshuflwv3_mask"
13147 [(match_operand:V16HI 0 "register_operand")
13148 (match_operand:V16HI 1 "nonimmediate_operand")
13149 (match_operand:SI 2 "const_0_to_255_operand")
13150 (match_operand:V16HI 3 "register_operand")
13151 (match_operand:HI 4 "register_operand")]
13152 "TARGET_AVX512VL && TARGET_AVX512BW"
13153 {
13154 int mask = INTVAL (operands[2]);
13155 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
13156 GEN_INT ((mask >> 0) & 3),
13157 GEN_INT ((mask >> 2) & 3),
13158 GEN_INT ((mask >> 4) & 3),
13159 GEN_INT ((mask >> 6) & 3),
13160 GEN_INT (((mask >> 0) & 3) + 8),
13161 GEN_INT (((mask >> 2) & 3) + 8),
13162 GEN_INT (((mask >> 4) & 3) + 8),
13163 GEN_INT (((mask >> 6) & 3) + 8),
13164 operands[3], operands[4]));
13165 DONE;
13166 })
13167
13168 (define_expand "avx2_pshuflwv3"
13169 [(match_operand:V16HI 0 "register_operand")
13170 (match_operand:V16HI 1 "nonimmediate_operand")
13171 (match_operand:SI 2 "const_0_to_255_operand")]
13172 "TARGET_AVX2"
13173 {
13174 int mask = INTVAL (operands[2]);
13175 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
13176 GEN_INT ((mask >> 0) & 3),
13177 GEN_INT ((mask >> 2) & 3),
13178 GEN_INT ((mask >> 4) & 3),
13179 GEN_INT ((mask >> 6) & 3),
13180 GEN_INT (((mask >> 0) & 3) + 8),
13181 GEN_INT (((mask >> 2) & 3) + 8),
13182 GEN_INT (((mask >> 4) & 3) + 8),
13183 GEN_INT (((mask >> 6) & 3) + 8)));
13184 DONE;
13185 })
13186
13187 (define_insn "avx2_pshuflw_1<mask_name>"
13188 [(set (match_operand:V16HI 0 "register_operand" "=v")
13189 (vec_select:V16HI
13190 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13191 (parallel [(match_operand 2 "const_0_to_3_operand")
13192 (match_operand 3 "const_0_to_3_operand")
13193 (match_operand 4 "const_0_to_3_operand")
13194 (match_operand 5 "const_0_to_3_operand")
13195 (const_int 4)
13196 (const_int 5)
13197 (const_int 6)
13198 (const_int 7)
13199 (match_operand 6 "const_8_to_11_operand")
13200 (match_operand 7 "const_8_to_11_operand")
13201 (match_operand 8 "const_8_to_11_operand")
13202 (match_operand 9 "const_8_to_11_operand")
13203 (const_int 12)
13204 (const_int 13)
13205 (const_int 14)
13206 (const_int 15)])))]
13207 "TARGET_AVX2
13208 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13209 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13210 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13211 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13212 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13213 {
13214 int mask = 0;
13215 mask |= INTVAL (operands[2]) << 0;
13216 mask |= INTVAL (operands[3]) << 2;
13217 mask |= INTVAL (operands[4]) << 4;
13218 mask |= INTVAL (operands[5]) << 6;
13219 operands[2] = GEN_INT (mask);
13220
13221 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13222 }
13223 [(set_attr "type" "sselog")
13224 (set_attr "prefix" "maybe_evex")
13225 (set_attr "length_immediate" "1")
13226 (set_attr "mode" "OI")])
13227
13228 (define_expand "avx512vl_pshuflw_mask"
13229 [(match_operand:V8HI 0 "register_operand")
13230 (match_operand:V8HI 1 "nonimmediate_operand")
13231 (match_operand:SI 2 "const_0_to_255_operand")
13232 (match_operand:V8HI 3 "register_operand")
13233 (match_operand:QI 4 "register_operand")]
13234 "TARGET_AVX512VL && TARGET_AVX512BW"
13235 {
13236 int mask = INTVAL (operands[2]);
13237 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
13238 GEN_INT ((mask >> 0) & 3),
13239 GEN_INT ((mask >> 2) & 3),
13240 GEN_INT ((mask >> 4) & 3),
13241 GEN_INT ((mask >> 6) & 3),
13242 operands[3], operands[4]));
13243 DONE;
13244 })
13245
13246 (define_expand "sse2_pshuflw"
13247 [(match_operand:V8HI 0 "register_operand")
13248 (match_operand:V8HI 1 "vector_operand")
13249 (match_operand:SI 2 "const_int_operand")]
13250 "TARGET_SSE2"
13251 {
13252 int mask = INTVAL (operands[2]);
13253 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
13254 GEN_INT ((mask >> 0) & 3),
13255 GEN_INT ((mask >> 2) & 3),
13256 GEN_INT ((mask >> 4) & 3),
13257 GEN_INT ((mask >> 6) & 3)));
13258 DONE;
13259 })
13260
13261 (define_insn "sse2_pshuflw_1<mask_name>"
13262 [(set (match_operand:V8HI 0 "register_operand" "=v")
13263 (vec_select:V8HI
13264 (match_operand:V8HI 1 "vector_operand" "vBm")
13265 (parallel [(match_operand 2 "const_0_to_3_operand")
13266 (match_operand 3 "const_0_to_3_operand")
13267 (match_operand 4 "const_0_to_3_operand")
13268 (match_operand 5 "const_0_to_3_operand")
13269 (const_int 4)
13270 (const_int 5)
13271 (const_int 6)
13272 (const_int 7)])))]
13273 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13274 {
13275 int mask = 0;
13276 mask |= INTVAL (operands[2]) << 0;
13277 mask |= INTVAL (operands[3]) << 2;
13278 mask |= INTVAL (operands[4]) << 4;
13279 mask |= INTVAL (operands[5]) << 6;
13280 operands[2] = GEN_INT (mask);
13281
13282 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13283 }
13284 [(set_attr "type" "sselog")
13285 (set_attr "prefix_data16" "0")
13286 (set_attr "prefix_rep" "1")
13287 (set_attr "prefix" "maybe_vex")
13288 (set_attr "length_immediate" "1")
13289 (set_attr "mode" "TI")])
13290
13291 (define_expand "avx2_pshufhwv3"
13292 [(match_operand:V16HI 0 "register_operand")
13293 (match_operand:V16HI 1 "nonimmediate_operand")
13294 (match_operand:SI 2 "const_0_to_255_operand")]
13295 "TARGET_AVX2"
13296 {
13297 int mask = INTVAL (operands[2]);
13298 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
13299 GEN_INT (((mask >> 0) & 3) + 4),
13300 GEN_INT (((mask >> 2) & 3) + 4),
13301 GEN_INT (((mask >> 4) & 3) + 4),
13302 GEN_INT (((mask >> 6) & 3) + 4),
13303 GEN_INT (((mask >> 0) & 3) + 12),
13304 GEN_INT (((mask >> 2) & 3) + 12),
13305 GEN_INT (((mask >> 4) & 3) + 12),
13306 GEN_INT (((mask >> 6) & 3) + 12)));
13307 DONE;
13308 })
13309
13310 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
13311 [(set (match_operand:V32HI 0 "register_operand" "=v")
13312 (unspec:V32HI
13313 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13314 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13315 UNSPEC_PSHUFHW))]
13316 "TARGET_AVX512BW"
13317 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13318 [(set_attr "type" "sselog")
13319 (set_attr "prefix" "evex")
13320 (set_attr "mode" "XI")])
13321
13322 (define_expand "avx512vl_pshufhwv3_mask"
13323 [(match_operand:V16HI 0 "register_operand")
13324 (match_operand:V16HI 1 "nonimmediate_operand")
13325 (match_operand:SI 2 "const_0_to_255_operand")
13326 (match_operand:V16HI 3 "register_operand")
13327 (match_operand:HI 4 "register_operand")]
13328 "TARGET_AVX512VL && TARGET_AVX512BW"
13329 {
13330 int mask = INTVAL (operands[2]);
13331 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
13332 GEN_INT (((mask >> 0) & 3) + 4),
13333 GEN_INT (((mask >> 2) & 3) + 4),
13334 GEN_INT (((mask >> 4) & 3) + 4),
13335 GEN_INT (((mask >> 6) & 3) + 4),
13336 GEN_INT (((mask >> 0) & 3) + 12),
13337 GEN_INT (((mask >> 2) & 3) + 12),
13338 GEN_INT (((mask >> 4) & 3) + 12),
13339 GEN_INT (((mask >> 6) & 3) + 12),
13340 operands[3], operands[4]));
13341 DONE;
13342 })
13343
13344 (define_insn "avx2_pshufhw_1<mask_name>"
13345 [(set (match_operand:V16HI 0 "register_operand" "=v")
13346 (vec_select:V16HI
13347 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13348 (parallel [(const_int 0)
13349 (const_int 1)
13350 (const_int 2)
13351 (const_int 3)
13352 (match_operand 2 "const_4_to_7_operand")
13353 (match_operand 3 "const_4_to_7_operand")
13354 (match_operand 4 "const_4_to_7_operand")
13355 (match_operand 5 "const_4_to_7_operand")
13356 (const_int 8)
13357 (const_int 9)
13358 (const_int 10)
13359 (const_int 11)
13360 (match_operand 6 "const_12_to_15_operand")
13361 (match_operand 7 "const_12_to_15_operand")
13362 (match_operand 8 "const_12_to_15_operand")
13363 (match_operand 9 "const_12_to_15_operand")])))]
13364 "TARGET_AVX2
13365 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13366 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13367 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13368 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13369 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13370 {
13371 int mask = 0;
13372 mask |= (INTVAL (operands[2]) - 4) << 0;
13373 mask |= (INTVAL (operands[3]) - 4) << 2;
13374 mask |= (INTVAL (operands[4]) - 4) << 4;
13375 mask |= (INTVAL (operands[5]) - 4) << 6;
13376 operands[2] = GEN_INT (mask);
13377
13378 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13379 }
13380 [(set_attr "type" "sselog")
13381 (set_attr "prefix" "maybe_evex")
13382 (set_attr "length_immediate" "1")
13383 (set_attr "mode" "OI")])
13384
13385 (define_expand "avx512vl_pshufhw_mask"
13386 [(match_operand:V8HI 0 "register_operand")
13387 (match_operand:V8HI 1 "nonimmediate_operand")
13388 (match_operand:SI 2 "const_0_to_255_operand")
13389 (match_operand:V8HI 3 "register_operand")
13390 (match_operand:QI 4 "register_operand")]
13391 "TARGET_AVX512VL && TARGET_AVX512BW"
13392 {
13393 int mask = INTVAL (operands[2]);
13394 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
13395 GEN_INT (((mask >> 0) & 3) + 4),
13396 GEN_INT (((mask >> 2) & 3) + 4),
13397 GEN_INT (((mask >> 4) & 3) + 4),
13398 GEN_INT (((mask >> 6) & 3) + 4),
13399 operands[3], operands[4]));
13400 DONE;
13401 })
13402
13403 (define_expand "sse2_pshufhw"
13404 [(match_operand:V8HI 0 "register_operand")
13405 (match_operand:V8HI 1 "vector_operand")
13406 (match_operand:SI 2 "const_int_operand")]
13407 "TARGET_SSE2"
13408 {
13409 int mask = INTVAL (operands[2]);
13410 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
13411 GEN_INT (((mask >> 0) & 3) + 4),
13412 GEN_INT (((mask >> 2) & 3) + 4),
13413 GEN_INT (((mask >> 4) & 3) + 4),
13414 GEN_INT (((mask >> 6) & 3) + 4)));
13415 DONE;
13416 })
13417
13418 (define_insn "sse2_pshufhw_1<mask_name>"
13419 [(set (match_operand:V8HI 0 "register_operand" "=v")
13420 (vec_select:V8HI
13421 (match_operand:V8HI 1 "vector_operand" "vBm")
13422 (parallel [(const_int 0)
13423 (const_int 1)
13424 (const_int 2)
13425 (const_int 3)
13426 (match_operand 2 "const_4_to_7_operand")
13427 (match_operand 3 "const_4_to_7_operand")
13428 (match_operand 4 "const_4_to_7_operand")
13429 (match_operand 5 "const_4_to_7_operand")])))]
13430 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13431 {
13432 int mask = 0;
13433 mask |= (INTVAL (operands[2]) - 4) << 0;
13434 mask |= (INTVAL (operands[3]) - 4) << 2;
13435 mask |= (INTVAL (operands[4]) - 4) << 4;
13436 mask |= (INTVAL (operands[5]) - 4) << 6;
13437 operands[2] = GEN_INT (mask);
13438
13439 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13440 }
13441 [(set_attr "type" "sselog")
13442 (set_attr "prefix_rep" "1")
13443 (set_attr "prefix_data16" "0")
13444 (set_attr "prefix" "maybe_vex")
13445 (set_attr "length_immediate" "1")
13446 (set_attr "mode" "TI")])
13447
13448 (define_expand "sse2_loadd"
13449 [(set (match_operand:V4SI 0 "register_operand")
13450 (vec_merge:V4SI
13451 (vec_duplicate:V4SI
13452 (match_operand:SI 1 "nonimmediate_operand"))
13453 (match_dup 2)
13454 (const_int 1)))]
13455 "TARGET_SSE"
13456 "operands[2] = CONST0_RTX (V4SImode);")
13457
13458 (define_insn "sse2_loadld"
13459 [(set (match_operand:V4SI 0 "register_operand" "=v,Yi,x,x,v")
13460 (vec_merge:V4SI
13461 (vec_duplicate:V4SI
13462 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
13463 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
13464 (const_int 1)))]
13465 "TARGET_SSE"
13466 "@
13467 %vmovd\t{%2, %0|%0, %2}
13468 %vmovd\t{%2, %0|%0, %2}
13469 movss\t{%2, %0|%0, %2}
13470 movss\t{%2, %0|%0, %2}
13471 vmovss\t{%2, %1, %0|%0, %1, %2}"
13472 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
13473 (set_attr "type" "ssemov")
13474 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
13475 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
13476
13477 ;; QI and HI modes handled by pextr patterns.
13478 (define_mode_iterator PEXTR_MODE12
13479 [(V16QI "TARGET_SSE4_1") V8HI])
13480
13481 (define_insn "*vec_extract<mode>"
13482 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
13483 (vec_select:<ssescalarmode>
13484 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
13485 (parallel
13486 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
13487 "TARGET_SSE2"
13488 "@
13489 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13490 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
13491 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13492 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13493 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
13494 (set_attr "type" "sselog1")
13495 (set_attr "prefix_data16" "1")
13496 (set (attr "prefix_extra")
13497 (if_then_else
13498 (and (eq_attr "alternative" "0,2")
13499 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13500 (const_string "*")
13501 (const_string "1")))
13502 (set_attr "length_immediate" "1")
13503 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
13504 (set_attr "mode" "TI")])
13505
13506 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
13507 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
13508 (zero_extend:SWI48
13509 (vec_select:<PEXTR_MODE12:ssescalarmode>
13510 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
13511 (parallel
13512 [(match_operand:SI 2
13513 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
13514 "TARGET_SSE2"
13515 "@
13516 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13517 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
13518 [(set_attr "isa" "*,avx512bw")
13519 (set_attr "type" "sselog1")
13520 (set_attr "prefix_data16" "1")
13521 (set (attr "prefix_extra")
13522 (if_then_else
13523 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
13524 (const_string "*")
13525 (const_string "1")))
13526 (set_attr "length_immediate" "1")
13527 (set_attr "prefix" "maybe_vex")
13528 (set_attr "mode" "TI")])
13529
13530 (define_insn "*vec_extract<mode>_mem"
13531 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
13532 (vec_select:<ssescalarmode>
13533 (match_operand:VI12_128 1 "memory_operand" "o")
13534 (parallel
13535 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13536 "TARGET_SSE"
13537 "#")
13538
13539 (define_insn "*vec_extract<ssevecmodelower>_0"
13540 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,v ,m")
13541 (vec_select:SWI48
13542 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,vm,v")
13543 (parallel [(const_int 0)])))]
13544 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13545 "#")
13546
13547 (define_insn "*vec_extractv2di_0_sse"
13548 [(set (match_operand:DI 0 "nonimmediate_operand" "=v,m")
13549 (vec_select:DI
13550 (match_operand:V2DI 1 "nonimmediate_operand" "vm,v")
13551 (parallel [(const_int 0)])))]
13552 "TARGET_SSE && !TARGET_64BIT
13553 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13554 "#")
13555
13556 (define_split
13557 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13558 (vec_select:SWI48x
13559 (match_operand:<ssevecmode> 1 "register_operand")
13560 (parallel [(const_int 0)])))]
13561 "TARGET_SSE && reload_completed"
13562 [(set (match_dup 0) (match_dup 1))]
13563 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
13564
13565 (define_insn "*vec_extractv4si_0_zext_sse4"
13566 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
13567 (zero_extend:DI
13568 (vec_select:SI
13569 (match_operand:V4SI 1 "register_operand" "Yj,x,v")
13570 (parallel [(const_int 0)]))))]
13571 "TARGET_SSE4_1"
13572 "#"
13573 [(set_attr "isa" "x64,*,avx512f")])
13574
13575 (define_insn "*vec_extractv4si_0_zext"
13576 [(set (match_operand:DI 0 "register_operand" "=r")
13577 (zero_extend:DI
13578 (vec_select:SI
13579 (match_operand:V4SI 1 "register_operand" "x")
13580 (parallel [(const_int 0)]))))]
13581 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
13582 "#")
13583
13584 (define_split
13585 [(set (match_operand:DI 0 "register_operand")
13586 (zero_extend:DI
13587 (vec_select:SI
13588 (match_operand:V4SI 1 "register_operand")
13589 (parallel [(const_int 0)]))))]
13590 "TARGET_SSE2 && reload_completed"
13591 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13592 "operands[1] = gen_lowpart (SImode, operands[1]);")
13593
13594 (define_insn "*vec_extractv4si"
13595 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
13596 (vec_select:SI
13597 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
13598 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
13599 "TARGET_SSE4_1"
13600 {
13601 switch (which_alternative)
13602 {
13603 case 0:
13604 case 1:
13605 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
13606
13607 case 2:
13608 case 3:
13609 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13610 return "psrldq\t{%2, %0|%0, %2}";
13611
13612 case 4:
13613 case 5:
13614 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13615 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
13616
13617 default:
13618 gcc_unreachable ();
13619 }
13620 }
13621 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
13622 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
13623 (set (attr "prefix_extra")
13624 (if_then_else (eq_attr "alternative" "0,1")
13625 (const_string "1")
13626 (const_string "*")))
13627 (set_attr "length_immediate" "1")
13628 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
13629 (set_attr "mode" "TI")])
13630
13631 (define_insn "*vec_extractv4si_zext"
13632 [(set (match_operand:DI 0 "register_operand" "=r,r")
13633 (zero_extend:DI
13634 (vec_select:SI
13635 (match_operand:V4SI 1 "register_operand" "x,v")
13636 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13637 "TARGET_64BIT && TARGET_SSE4_1"
13638 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
13639 [(set_attr "isa" "*,avx512dq")
13640 (set_attr "type" "sselog1")
13641 (set_attr "prefix_extra" "1")
13642 (set_attr "length_immediate" "1")
13643 (set_attr "prefix" "maybe_vex")
13644 (set_attr "mode" "TI")])
13645
13646 (define_insn "*vec_extractv4si_mem"
13647 [(set (match_operand:SI 0 "register_operand" "=x,r")
13648 (vec_select:SI
13649 (match_operand:V4SI 1 "memory_operand" "o,o")
13650 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
13651 "TARGET_SSE"
13652 "#")
13653
13654 (define_insn_and_split "*vec_extractv4si_zext_mem"
13655 [(set (match_operand:DI 0 "register_operand" "=x,r")
13656 (zero_extend:DI
13657 (vec_select:SI
13658 (match_operand:V4SI 1 "memory_operand" "o,o")
13659 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13660 "TARGET_64BIT && TARGET_SSE"
13661 "#"
13662 "&& reload_completed"
13663 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13664 {
13665 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
13666 })
13667
13668 (define_insn "*vec_extractv2di_1"
13669 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
13670 (vec_select:DI
13671 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
13672 (parallel [(const_int 1)])))]
13673 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13674 "@
13675 %vpextrq\t{$1, %1, %0|%0, %1, 1}
13676 vpextrq\t{$1, %1, %0|%0, %1, 1}
13677 %vmovhps\t{%1, %0|%0, %1}
13678 psrldq\t{$8, %0|%0, 8}
13679 vpsrldq\t{$8, %1, %0|%0, %1, 8}
13680 vpsrldq\t{$8, %1, %0|%0, %1, 8}
13681 movhlps\t{%1, %0|%0, %1}
13682 #
13683 #"
13684 [(set (attr "isa")
13685 (cond [(eq_attr "alternative" "0")
13686 (const_string "x64_sse4")
13687 (eq_attr "alternative" "1")
13688 (const_string "x64_avx512dq")
13689 (eq_attr "alternative" "3")
13690 (const_string "sse2_noavx")
13691 (eq_attr "alternative" "4")
13692 (const_string "avx")
13693 (eq_attr "alternative" "5")
13694 (const_string "avx512bw")
13695 (eq_attr "alternative" "6")
13696 (const_string "noavx")
13697 (eq_attr "alternative" "8")
13698 (const_string "x64")
13699 ]
13700 (const_string "*")))
13701 (set (attr "type")
13702 (cond [(eq_attr "alternative" "2,6,7")
13703 (const_string "ssemov")
13704 (eq_attr "alternative" "3,4,5")
13705 (const_string "sseishft1")
13706 (eq_attr "alternative" "8")
13707 (const_string "imov")
13708 ]
13709 (const_string "sselog1")))
13710 (set (attr "length_immediate")
13711 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
13712 (const_string "1")
13713 (const_string "*")))
13714 (set (attr "prefix_rex")
13715 (if_then_else (eq_attr "alternative" "0,1")
13716 (const_string "1")
13717 (const_string "*")))
13718 (set (attr "prefix_extra")
13719 (if_then_else (eq_attr "alternative" "0,1")
13720 (const_string "1")
13721 (const_string "*")))
13722 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
13723 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
13724
13725 (define_split
13726 [(set (match_operand:<ssescalarmode> 0 "register_operand")
13727 (vec_select:<ssescalarmode>
13728 (match_operand:VI_128 1 "memory_operand")
13729 (parallel
13730 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13731 "TARGET_SSE && reload_completed"
13732 [(set (match_dup 0) (match_dup 1))]
13733 {
13734 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
13735
13736 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
13737 })
13738
13739 (define_insn "*vec_extractv2ti"
13740 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
13741 (vec_select:TI
13742 (match_operand:V2TI 1 "register_operand" "x,v")
13743 (parallel
13744 [(match_operand:SI 2 "const_0_to_1_operand")])))]
13745 "TARGET_AVX"
13746 "@
13747 vextract%~128\t{%2, %1, %0|%0, %1, %2}
13748 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
13749 [(set_attr "type" "sselog")
13750 (set_attr "prefix_extra" "1")
13751 (set_attr "length_immediate" "1")
13752 (set_attr "prefix" "vex,evex")
13753 (set_attr "mode" "OI")])
13754
13755 (define_insn "*vec_extractv4ti"
13756 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
13757 (vec_select:TI
13758 (match_operand:V4TI 1 "register_operand" "v")
13759 (parallel
13760 [(match_operand:SI 2 "const_0_to_3_operand")])))]
13761 "TARGET_AVX512F"
13762 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
13763 [(set_attr "type" "sselog")
13764 (set_attr "prefix_extra" "1")
13765 (set_attr "length_immediate" "1")
13766 (set_attr "prefix" "evex")
13767 (set_attr "mode" "XI")])
13768
13769 (define_mode_iterator VEXTRACTI128_MODE
13770 [(V4TI "TARGET_AVX512F") V2TI])
13771
13772 (define_split
13773 [(set (match_operand:TI 0 "nonimmediate_operand")
13774 (vec_select:TI
13775 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
13776 (parallel [(const_int 0)])))]
13777 "TARGET_AVX
13778 && reload_completed
13779 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
13780 [(set (match_dup 0) (match_dup 1))]
13781 "operands[1] = gen_lowpart (TImode, operands[1]);")
13782
13783 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
13784 ;; vector modes into vec_extract*.
13785 (define_split
13786 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13787 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
13788 "can_create_pseudo_p ()
13789 && REG_P (operands[1])
13790 && VECTOR_MODE_P (GET_MODE (operands[1]))
13791 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
13792 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
13793 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
13794 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
13795 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
13796 (parallel [(const_int 0)])))]
13797 {
13798 rtx tmp;
13799
13800 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
13801 {
13802 case 64:
13803 if (<MODE>mode == SImode)
13804 {
13805 tmp = gen_reg_rtx (V8SImode);
13806 emit_insn (gen_vec_extract_lo_v16si (tmp,
13807 gen_lowpart (V16SImode,
13808 operands[1])));
13809 }
13810 else
13811 {
13812 tmp = gen_reg_rtx (V4DImode);
13813 emit_insn (gen_vec_extract_lo_v8di (tmp,
13814 gen_lowpart (V8DImode,
13815 operands[1])));
13816 }
13817 operands[1] = tmp;
13818 /* FALLTHRU */
13819 case 32:
13820 tmp = gen_reg_rtx (<ssevecmode>mode);
13821 if (<MODE>mode == SImode)
13822 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
13823 operands[1])));
13824 else
13825 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
13826 operands[1])));
13827 operands[1] = tmp;
13828 break;
13829 case 16:
13830 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
13831 break;
13832 }
13833 })
13834
13835 (define_insn "*vec_concatv2si_sse4_1"
13836 [(set (match_operand:V2SI 0 "register_operand"
13837 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
13838 (vec_concat:V2SI
13839 (match_operand:SI 1 "nonimmediate_operand"
13840 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
13841 (match_operand:SI 2 "vector_move_operand"
13842 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
13843 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13844 "@
13845 pinsrd\t{$1, %2, %0|%0, %2, 1}
13846 pinsrd\t{$1, %2, %0|%0, %2, 1}
13847 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13848 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13849 punpckldq\t{%2, %0|%0, %2}
13850 punpckldq\t{%2, %0|%0, %2}
13851 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
13852 %vmovd\t{%1, %0|%0, %1}
13853 punpckldq\t{%2, %0|%0, %2}
13854 movd\t{%1, %0|%0, %1}"
13855 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
13856 (set (attr "type")
13857 (cond [(eq_attr "alternative" "7")
13858 (const_string "ssemov")
13859 (eq_attr "alternative" "8")
13860 (const_string "mmxcvt")
13861 (eq_attr "alternative" "9")
13862 (const_string "mmxmov")
13863 ]
13864 (const_string "sselog")))
13865 (set (attr "prefix_extra")
13866 (if_then_else (eq_attr "alternative" "0,1,2,3")
13867 (const_string "1")
13868 (const_string "*")))
13869 (set (attr "length_immediate")
13870 (if_then_else (eq_attr "alternative" "0,1,2,3")
13871 (const_string "1")
13872 (const_string "*")))
13873 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
13874 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
13875
13876 ;; ??? In theory we can match memory for the MMX alternative, but allowing
13877 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
13878 ;; alternatives pretty much forces the MMX alternative to be chosen.
13879 (define_insn "*vec_concatv2si"
13880 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
13881 (vec_concat:V2SI
13882 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
13883 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
13884 "TARGET_SSE && !TARGET_SSE4_1"
13885 "@
13886 punpckldq\t{%2, %0|%0, %2}
13887 movd\t{%1, %0|%0, %1}
13888 movd\t{%1, %0|%0, %1}
13889 unpcklps\t{%2, %0|%0, %2}
13890 movss\t{%1, %0|%0, %1}
13891 punpckldq\t{%2, %0|%0, %2}
13892 movd\t{%1, %0|%0, %1}"
13893 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
13894 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
13895 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
13896
13897 (define_insn "*vec_concatv4si"
13898 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
13899 (vec_concat:V4SI
13900 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
13901 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
13902 "TARGET_SSE"
13903 "@
13904 punpcklqdq\t{%2, %0|%0, %2}
13905 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13906 movlhps\t{%2, %0|%0, %2}
13907 movhps\t{%2, %0|%0, %q2}
13908 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
13909 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
13910 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
13911 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
13912 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
13913
13914 ;; movd instead of movq is required to handle broken assemblers.
13915 (define_insn "vec_concatv2di"
13916 [(set (match_operand:V2DI 0 "register_operand"
13917 "=Yr,*x,x ,v ,Yi,v ,x ,x,v ,x,x,v")
13918 (vec_concat:V2DI
13919 (match_operand:DI 1 "nonimmediate_operand"
13920 " 0, 0,x ,Yv,r ,vm,?!*Yn,0,Yv,0,0,v")
13921 (match_operand:DI 2 "vector_move_operand"
13922 " rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))]
13923 "TARGET_SSE"
13924 "@
13925 pinsrq\t{$1, %2, %0|%0, %2, 1}
13926 pinsrq\t{$1, %2, %0|%0, %2, 1}
13927 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13928 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13929 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
13930 %vmovq\t{%1, %0|%0, %1}
13931 movq2dq\t{%1, %0|%0, %1}
13932 punpcklqdq\t{%2, %0|%0, %2}
13933 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13934 movlhps\t{%2, %0|%0, %2}
13935 movhps\t{%2, %0|%0, %2}
13936 vmovhps\t{%2, %1, %0|%0, %1, %2}"
13937 [(set (attr "isa")
13938 (cond [(eq_attr "alternative" "0,1")
13939 (const_string "x64_sse4_noavx")
13940 (eq_attr "alternative" "2")
13941 (const_string "x64_avx")
13942 (eq_attr "alternative" "3")
13943 (const_string "x64_avx512dq")
13944 (eq_attr "alternative" "4")
13945 (const_string "x64")
13946 (eq_attr "alternative" "5,6")
13947 (const_string "sse2")
13948 (eq_attr "alternative" "7")
13949 (const_string "sse2_noavx")
13950 (eq_attr "alternative" "8,11")
13951 (const_string "avx")
13952 ]
13953 (const_string "noavx")))
13954 (set (attr "type")
13955 (if_then_else
13956 (eq_attr "alternative" "0,1,2,3,7,8")
13957 (const_string "sselog")
13958 (const_string "ssemov")))
13959 (set (attr "prefix_rex")
13960 (if_then_else (eq_attr "alternative" "0,1,2,3,4")
13961 (const_string "1")
13962 (const_string "*")))
13963 (set (attr "prefix_extra")
13964 (if_then_else (eq_attr "alternative" "0,1,2,3")
13965 (const_string "1")
13966 (const_string "*")))
13967 (set (attr "length_immediate")
13968 (if_then_else (eq_attr "alternative" "0,1,2,3")
13969 (const_string "1")
13970 (const_string "*")))
13971 (set (attr "prefix")
13972 (cond [(eq_attr "alternative" "2")
13973 (const_string "vex")
13974 (eq_attr "alternative" "3")
13975 (const_string "evex")
13976 (eq_attr "alternative" "4,5")
13977 (const_string "maybe_vex")
13978 (eq_attr "alternative" "8,11")
13979 (const_string "maybe_evex")
13980 ]
13981 (const_string "orig")))
13982 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
13983
13984 (define_expand "vec_unpacks_lo_<mode>"
13985 [(match_operand:<sseunpackmode> 0 "register_operand")
13986 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13987 "TARGET_SSE2"
13988 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
13989
13990 (define_expand "vec_unpacks_hi_<mode>"
13991 [(match_operand:<sseunpackmode> 0 "register_operand")
13992 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13993 "TARGET_SSE2"
13994 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
13995
13996 (define_expand "vec_unpacku_lo_<mode>"
13997 [(match_operand:<sseunpackmode> 0 "register_operand")
13998 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13999 "TARGET_SSE2"
14000 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
14001
14002 (define_expand "vec_unpacks_lo_hi"
14003 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14004 (match_operand:HI 1 "register_operand"))]
14005 "TARGET_AVX512F")
14006
14007 (define_expand "vec_unpacks_lo_si"
14008 [(set (match_operand:HI 0 "register_operand")
14009 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
14010 "TARGET_AVX512F")
14011
14012 (define_expand "vec_unpacks_lo_di"
14013 [(set (match_operand:SI 0 "register_operand")
14014 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
14015 "TARGET_AVX512BW")
14016
14017 (define_expand "vec_unpacku_hi_<mode>"
14018 [(match_operand:<sseunpackmode> 0 "register_operand")
14019 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14020 "TARGET_SSE2"
14021 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
14022
14023 (define_expand "vec_unpacks_hi_hi"
14024 [(parallel
14025 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14026 (lshiftrt:HI (match_operand:HI 1 "register_operand")
14027 (const_int 8)))
14028 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14029 "TARGET_AVX512F")
14030
14031 (define_expand "vec_unpacks_hi_<mode>"
14032 [(parallel
14033 [(set (subreg:SWI48x
14034 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
14035 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
14036 (match_dup 2)))
14037 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14038 "TARGET_AVX512BW"
14039 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
14040
14041 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14042 ;;
14043 ;; Miscellaneous
14044 ;;
14045 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14046
14047 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
14048 [(set (match_operand:VI12_AVX2 0 "register_operand")
14049 (truncate:VI12_AVX2
14050 (lshiftrt:<ssedoublemode>
14051 (plus:<ssedoublemode>
14052 (plus:<ssedoublemode>
14053 (zero_extend:<ssedoublemode>
14054 (match_operand:VI12_AVX2 1 "vector_operand"))
14055 (zero_extend:<ssedoublemode>
14056 (match_operand:VI12_AVX2 2 "vector_operand")))
14057 (match_dup <mask_expand_op3>))
14058 (const_int 1))))]
14059 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14060 {
14061 rtx tmp;
14062 if (<mask_applied>)
14063 tmp = operands[3];
14064 operands[3] = CONST1_RTX(<MODE>mode);
14065 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
14066
14067 if (<mask_applied>)
14068 {
14069 operands[5] = operands[3];
14070 operands[3] = tmp;
14071 }
14072 })
14073
14074 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
14075 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
14076 (truncate:VI12_AVX2
14077 (lshiftrt:<ssedoublemode>
14078 (plus:<ssedoublemode>
14079 (plus:<ssedoublemode>
14080 (zero_extend:<ssedoublemode>
14081 (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
14082 (zero_extend:<ssedoublemode>
14083 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
14084 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
14085 (const_int 1))))]
14086 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14087 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14088 "@
14089 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
14090 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14091 [(set_attr "isa" "noavx,avx")
14092 (set_attr "type" "sseiadd")
14093 (set_attr "prefix_data16" "1,*")
14094 (set_attr "prefix" "orig,<mask_prefix>")
14095 (set_attr "mode" "<sseinsnmode>")])
14096
14097 ;; The correct representation for this is absolutely enormous, and
14098 ;; surely not generally useful.
14099 (define_insn "<sse2_avx2>_psadbw"
14100 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
14101 (unspec:VI8_AVX2_AVX512BW
14102 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
14103 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
14104 UNSPEC_PSADBW))]
14105 "TARGET_SSE2"
14106 "@
14107 psadbw\t{%2, %0|%0, %2}
14108 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
14109 [(set_attr "isa" "noavx,avx")
14110 (set_attr "type" "sseiadd")
14111 (set_attr "atom_unit" "simul")
14112 (set_attr "prefix_data16" "1,*")
14113 (set_attr "prefix" "orig,maybe_evex")
14114 (set_attr "mode" "<sseinsnmode>")])
14115
14116 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
14117 [(set (match_operand:SI 0 "register_operand" "=r")
14118 (unspec:SI
14119 [(match_operand:VF_128_256 1 "register_operand" "x")]
14120 UNSPEC_MOVMSK))]
14121 "TARGET_SSE"
14122 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
14123 [(set_attr "type" "ssemov")
14124 (set_attr "prefix" "maybe_vex")
14125 (set_attr "mode" "<MODE>")])
14126
14127 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
14128 [(set (match_operand:DI 0 "register_operand" "=r")
14129 (zero_extend:DI
14130 (unspec:SI
14131 [(match_operand:VF_128_256 1 "register_operand" "x")]
14132 UNSPEC_MOVMSK)))]
14133 "TARGET_64BIT && TARGET_SSE"
14134 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
14135 [(set_attr "type" "ssemov")
14136 (set_attr "prefix" "maybe_vex")
14137 (set_attr "mode" "<MODE>")])
14138
14139 (define_insn "<sse2_avx2>_pmovmskb"
14140 [(set (match_operand:SI 0 "register_operand" "=r")
14141 (unspec:SI
14142 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14143 UNSPEC_MOVMSK))]
14144 "TARGET_SSE2"
14145 "%vpmovmskb\t{%1, %0|%0, %1}"
14146 [(set_attr "type" "ssemov")
14147 (set (attr "prefix_data16")
14148 (if_then_else
14149 (match_test "TARGET_AVX")
14150 (const_string "*")
14151 (const_string "1")))
14152 (set_attr "prefix" "maybe_vex")
14153 (set_attr "mode" "SI")])
14154
14155 (define_insn "*<sse2_avx2>_pmovmskb_zext"
14156 [(set (match_operand:DI 0 "register_operand" "=r")
14157 (zero_extend:DI
14158 (unspec:SI
14159 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14160 UNSPEC_MOVMSK)))]
14161 "TARGET_64BIT && TARGET_SSE2"
14162 "%vpmovmskb\t{%1, %k0|%k0, %1}"
14163 [(set_attr "type" "ssemov")
14164 (set (attr "prefix_data16")
14165 (if_then_else
14166 (match_test "TARGET_AVX")
14167 (const_string "*")
14168 (const_string "1")))
14169 (set_attr "prefix" "maybe_vex")
14170 (set_attr "mode" "SI")])
14171
14172 (define_expand "sse2_maskmovdqu"
14173 [(set (match_operand:V16QI 0 "memory_operand")
14174 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
14175 (match_operand:V16QI 2 "register_operand")
14176 (match_dup 0)]
14177 UNSPEC_MASKMOV))]
14178 "TARGET_SSE2")
14179
14180 (define_insn "*sse2_maskmovdqu"
14181 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
14182 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
14183 (match_operand:V16QI 2 "register_operand" "x")
14184 (mem:V16QI (match_dup 0))]
14185 UNSPEC_MASKMOV))]
14186 "TARGET_SSE2"
14187 {
14188 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
14189 that requires %v to be at the beginning of the opcode name. */
14190 if (Pmode != word_mode)
14191 fputs ("\taddr32", asm_out_file);
14192 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
14193 }
14194 [(set_attr "type" "ssemov")
14195 (set_attr "prefix_data16" "1")
14196 (set (attr "length_address")
14197 (symbol_ref ("Pmode != word_mode")))
14198 ;; The implicit %rdi operand confuses default length_vex computation.
14199 (set (attr "length_vex")
14200 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
14201 (set_attr "prefix" "maybe_vex")
14202 (set_attr "znver1_decode" "vector")
14203 (set_attr "mode" "TI")])
14204
14205 (define_insn "sse_ldmxcsr"
14206 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
14207 UNSPECV_LDMXCSR)]
14208 "TARGET_SSE"
14209 "%vldmxcsr\t%0"
14210 [(set_attr "type" "sse")
14211 (set_attr "atom_sse_attr" "mxcsr")
14212 (set_attr "prefix" "maybe_vex")
14213 (set_attr "memory" "load")])
14214
14215 (define_insn "sse_stmxcsr"
14216 [(set (match_operand:SI 0 "memory_operand" "=m")
14217 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
14218 "TARGET_SSE"
14219 "%vstmxcsr\t%0"
14220 [(set_attr "type" "sse")
14221 (set_attr "atom_sse_attr" "mxcsr")
14222 (set_attr "prefix" "maybe_vex")
14223 (set_attr "memory" "store")])
14224
14225 (define_insn "sse2_clflush"
14226 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
14227 UNSPECV_CLFLUSH)]
14228 "TARGET_SSE2"
14229 "clflush\t%a0"
14230 [(set_attr "type" "sse")
14231 (set_attr "atom_sse_attr" "fence")
14232 (set_attr "memory" "unknown")])
14233
14234 ;; As per AMD and Intel ISA manuals, the first operand is extensions
14235 ;; and it goes to %ecx. The second operand received is hints and it goes
14236 ;; to %eax.
14237 (define_insn "sse3_mwait"
14238 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
14239 (match_operand:SI 1 "register_operand" "a")]
14240 UNSPECV_MWAIT)]
14241 "TARGET_SSE3"
14242 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
14243 ;; Since 32bit register operands are implicitly zero extended to 64bit,
14244 ;; we only need to set up 32bit registers.
14245 "mwait"
14246 [(set_attr "length" "3")])
14247
14248 (define_insn "sse3_monitor_<mode>"
14249 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
14250 (match_operand:SI 1 "register_operand" "c")
14251 (match_operand:SI 2 "register_operand" "d")]
14252 UNSPECV_MONITOR)]
14253 "TARGET_SSE3"
14254 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
14255 ;; RCX and RDX are used. Since 32bit register operands are implicitly
14256 ;; zero extended to 64bit, we only need to set up 32bit registers.
14257 "%^monitor"
14258 [(set (attr "length")
14259 (symbol_ref ("(Pmode != word_mode) + 3")))])
14260
14261 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14262 ;;
14263 ;; SSSE3 instructions
14264 ;;
14265 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14266
14267 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
14268
14269 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
14270 [(set (match_operand:V16HI 0 "register_operand" "=x")
14271 (vec_concat:V16HI
14272 (vec_concat:V8HI
14273 (vec_concat:V4HI
14274 (vec_concat:V2HI
14275 (ssse3_plusminus:HI
14276 (vec_select:HI
14277 (match_operand:V16HI 1 "register_operand" "x")
14278 (parallel [(const_int 0)]))
14279 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14280 (ssse3_plusminus:HI
14281 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14282 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14283 (vec_concat:V2HI
14284 (ssse3_plusminus:HI
14285 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14286 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14287 (ssse3_plusminus:HI
14288 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14289 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14290 (vec_concat:V4HI
14291 (vec_concat:V2HI
14292 (ssse3_plusminus:HI
14293 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
14294 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
14295 (ssse3_plusminus:HI
14296 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
14297 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
14298 (vec_concat:V2HI
14299 (ssse3_plusminus:HI
14300 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
14301 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
14302 (ssse3_plusminus:HI
14303 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
14304 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
14305 (vec_concat:V8HI
14306 (vec_concat:V4HI
14307 (vec_concat:V2HI
14308 (ssse3_plusminus:HI
14309 (vec_select:HI
14310 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14311 (parallel [(const_int 0)]))
14312 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14313 (ssse3_plusminus:HI
14314 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14315 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14316 (vec_concat:V2HI
14317 (ssse3_plusminus:HI
14318 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14319 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14320 (ssse3_plusminus:HI
14321 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14322 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
14323 (vec_concat:V4HI
14324 (vec_concat:V2HI
14325 (ssse3_plusminus:HI
14326 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
14327 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
14328 (ssse3_plusminus:HI
14329 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
14330 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
14331 (vec_concat:V2HI
14332 (ssse3_plusminus:HI
14333 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
14334 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
14335 (ssse3_plusminus:HI
14336 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
14337 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
14338 "TARGET_AVX2"
14339 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14340 [(set_attr "type" "sseiadd")
14341 (set_attr "prefix_extra" "1")
14342 (set_attr "prefix" "vex")
14343 (set_attr "mode" "OI")])
14344
14345 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
14346 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
14347 (vec_concat:V8HI
14348 (vec_concat:V4HI
14349 (vec_concat:V2HI
14350 (ssse3_plusminus:HI
14351 (vec_select:HI
14352 (match_operand:V8HI 1 "register_operand" "0,x")
14353 (parallel [(const_int 0)]))
14354 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14355 (ssse3_plusminus:HI
14356 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14357 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14358 (vec_concat:V2HI
14359 (ssse3_plusminus:HI
14360 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14361 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14362 (ssse3_plusminus:HI
14363 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14364 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14365 (vec_concat:V4HI
14366 (vec_concat:V2HI
14367 (ssse3_plusminus:HI
14368 (vec_select:HI
14369 (match_operand:V8HI 2 "vector_operand" "xBm,xm")
14370 (parallel [(const_int 0)]))
14371 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14372 (ssse3_plusminus:HI
14373 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14374 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14375 (vec_concat:V2HI
14376 (ssse3_plusminus:HI
14377 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14378 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14379 (ssse3_plusminus:HI
14380 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14381 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
14382 "TARGET_SSSE3"
14383 "@
14384 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
14385 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14386 [(set_attr "isa" "noavx,avx")
14387 (set_attr "type" "sseiadd")
14388 (set_attr "atom_unit" "complex")
14389 (set_attr "prefix_data16" "1,*")
14390 (set_attr "prefix_extra" "1")
14391 (set_attr "prefix" "orig,vex")
14392 (set_attr "mode" "TI")])
14393
14394 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
14395 [(set (match_operand:V4HI 0 "register_operand" "=y")
14396 (vec_concat:V4HI
14397 (vec_concat:V2HI
14398 (ssse3_plusminus:HI
14399 (vec_select:HI
14400 (match_operand:V4HI 1 "register_operand" "0")
14401 (parallel [(const_int 0)]))
14402 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14403 (ssse3_plusminus:HI
14404 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14405 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14406 (vec_concat:V2HI
14407 (ssse3_plusminus:HI
14408 (vec_select:HI
14409 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
14410 (parallel [(const_int 0)]))
14411 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14412 (ssse3_plusminus:HI
14413 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14414 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
14415 "TARGET_SSSE3"
14416 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
14417 [(set_attr "type" "sseiadd")
14418 (set_attr "atom_unit" "complex")
14419 (set_attr "prefix_extra" "1")
14420 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14421 (set_attr "mode" "DI")])
14422
14423 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
14424 [(set (match_operand:V8SI 0 "register_operand" "=x")
14425 (vec_concat:V8SI
14426 (vec_concat:V4SI
14427 (vec_concat:V2SI
14428 (plusminus:SI
14429 (vec_select:SI
14430 (match_operand:V8SI 1 "register_operand" "x")
14431 (parallel [(const_int 0)]))
14432 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14433 (plusminus:SI
14434 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14435 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14436 (vec_concat:V2SI
14437 (plusminus:SI
14438 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
14439 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
14440 (plusminus:SI
14441 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
14442 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
14443 (vec_concat:V4SI
14444 (vec_concat:V2SI
14445 (plusminus:SI
14446 (vec_select:SI
14447 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
14448 (parallel [(const_int 0)]))
14449 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14450 (plusminus:SI
14451 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14452 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
14453 (vec_concat:V2SI
14454 (plusminus:SI
14455 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
14456 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
14457 (plusminus:SI
14458 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
14459 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
14460 "TARGET_AVX2"
14461 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14462 [(set_attr "type" "sseiadd")
14463 (set_attr "prefix_extra" "1")
14464 (set_attr "prefix" "vex")
14465 (set_attr "mode" "OI")])
14466
14467 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
14468 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
14469 (vec_concat:V4SI
14470 (vec_concat:V2SI
14471 (plusminus:SI
14472 (vec_select:SI
14473 (match_operand:V4SI 1 "register_operand" "0,x")
14474 (parallel [(const_int 0)]))
14475 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14476 (plusminus:SI
14477 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14478 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14479 (vec_concat:V2SI
14480 (plusminus:SI
14481 (vec_select:SI
14482 (match_operand:V4SI 2 "vector_operand" "xBm,xm")
14483 (parallel [(const_int 0)]))
14484 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14485 (plusminus:SI
14486 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14487 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
14488 "TARGET_SSSE3"
14489 "@
14490 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
14491 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14492 [(set_attr "isa" "noavx,avx")
14493 (set_attr "type" "sseiadd")
14494 (set_attr "atom_unit" "complex")
14495 (set_attr "prefix_data16" "1,*")
14496 (set_attr "prefix_extra" "1")
14497 (set_attr "prefix" "orig,vex")
14498 (set_attr "mode" "TI")])
14499
14500 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
14501 [(set (match_operand:V2SI 0 "register_operand" "=y")
14502 (vec_concat:V2SI
14503 (plusminus:SI
14504 (vec_select:SI
14505 (match_operand:V2SI 1 "register_operand" "0")
14506 (parallel [(const_int 0)]))
14507 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14508 (plusminus:SI
14509 (vec_select:SI
14510 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
14511 (parallel [(const_int 0)]))
14512 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
14513 "TARGET_SSSE3"
14514 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
14515 [(set_attr "type" "sseiadd")
14516 (set_attr "atom_unit" "complex")
14517 (set_attr "prefix_extra" "1")
14518 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14519 (set_attr "mode" "DI")])
14520
14521 (define_insn "avx2_pmaddubsw256"
14522 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
14523 (ss_plus:V16HI
14524 (mult:V16HI
14525 (zero_extend:V16HI
14526 (vec_select:V16QI
14527 (match_operand:V32QI 1 "register_operand" "x,v")
14528 (parallel [(const_int 0) (const_int 2)
14529 (const_int 4) (const_int 6)
14530 (const_int 8) (const_int 10)
14531 (const_int 12) (const_int 14)
14532 (const_int 16) (const_int 18)
14533 (const_int 20) (const_int 22)
14534 (const_int 24) (const_int 26)
14535 (const_int 28) (const_int 30)])))
14536 (sign_extend:V16HI
14537 (vec_select:V16QI
14538 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
14539 (parallel [(const_int 0) (const_int 2)
14540 (const_int 4) (const_int 6)
14541 (const_int 8) (const_int 10)
14542 (const_int 12) (const_int 14)
14543 (const_int 16) (const_int 18)
14544 (const_int 20) (const_int 22)
14545 (const_int 24) (const_int 26)
14546 (const_int 28) (const_int 30)]))))
14547 (mult:V16HI
14548 (zero_extend:V16HI
14549 (vec_select:V16QI (match_dup 1)
14550 (parallel [(const_int 1) (const_int 3)
14551 (const_int 5) (const_int 7)
14552 (const_int 9) (const_int 11)
14553 (const_int 13) (const_int 15)
14554 (const_int 17) (const_int 19)
14555 (const_int 21) (const_int 23)
14556 (const_int 25) (const_int 27)
14557 (const_int 29) (const_int 31)])))
14558 (sign_extend:V16HI
14559 (vec_select:V16QI (match_dup 2)
14560 (parallel [(const_int 1) (const_int 3)
14561 (const_int 5) (const_int 7)
14562 (const_int 9) (const_int 11)
14563 (const_int 13) (const_int 15)
14564 (const_int 17) (const_int 19)
14565 (const_int 21) (const_int 23)
14566 (const_int 25) (const_int 27)
14567 (const_int 29) (const_int 31)]))))))]
14568 "TARGET_AVX2"
14569 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14570 [(set_attr "isa" "*,avx512bw")
14571 (set_attr "type" "sseiadd")
14572 (set_attr "prefix_extra" "1")
14573 (set_attr "prefix" "vex,evex")
14574 (set_attr "mode" "OI")])
14575
14576 ;; The correct representation for this is absolutely enormous, and
14577 ;; surely not generally useful.
14578 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
14579 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
14580 (unspec:VI2_AVX512VL
14581 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
14582 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
14583 UNSPEC_PMADDUBSW512))]
14584 "TARGET_AVX512BW"
14585 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
14586 [(set_attr "type" "sseiadd")
14587 (set_attr "prefix" "evex")
14588 (set_attr "mode" "XI")])
14589
14590 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
14591 [(set (match_operand:V32HI 0 "register_operand" "=v")
14592 (truncate:V32HI
14593 (lshiftrt:V32SI
14594 (plus:V32SI
14595 (lshiftrt:V32SI
14596 (mult:V32SI
14597 (sign_extend:V32SI
14598 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
14599 (sign_extend:V32SI
14600 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
14601 (const_int 14))
14602 (const_vector:V32HI [(const_int 1) (const_int 1)
14603 (const_int 1) (const_int 1)
14604 (const_int 1) (const_int 1)
14605 (const_int 1) (const_int 1)
14606 (const_int 1) (const_int 1)
14607 (const_int 1) (const_int 1)
14608 (const_int 1) (const_int 1)
14609 (const_int 1) (const_int 1)
14610 (const_int 1) (const_int 1)
14611 (const_int 1) (const_int 1)
14612 (const_int 1) (const_int 1)
14613 (const_int 1) (const_int 1)
14614 (const_int 1) (const_int 1)
14615 (const_int 1) (const_int 1)
14616 (const_int 1) (const_int 1)
14617 (const_int 1) (const_int 1)]))
14618 (const_int 1))))]
14619 "TARGET_AVX512BW"
14620 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14621 [(set_attr "type" "sseimul")
14622 (set_attr "prefix" "evex")
14623 (set_attr "mode" "XI")])
14624
14625 (define_insn "ssse3_pmaddubsw128"
14626 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
14627 (ss_plus:V8HI
14628 (mult:V8HI
14629 (zero_extend:V8HI
14630 (vec_select:V8QI
14631 (match_operand:V16QI 1 "register_operand" "0,x,v")
14632 (parallel [(const_int 0) (const_int 2)
14633 (const_int 4) (const_int 6)
14634 (const_int 8) (const_int 10)
14635 (const_int 12) (const_int 14)])))
14636 (sign_extend:V8HI
14637 (vec_select:V8QI
14638 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
14639 (parallel [(const_int 0) (const_int 2)
14640 (const_int 4) (const_int 6)
14641 (const_int 8) (const_int 10)
14642 (const_int 12) (const_int 14)]))))
14643 (mult:V8HI
14644 (zero_extend:V8HI
14645 (vec_select:V8QI (match_dup 1)
14646 (parallel [(const_int 1) (const_int 3)
14647 (const_int 5) (const_int 7)
14648 (const_int 9) (const_int 11)
14649 (const_int 13) (const_int 15)])))
14650 (sign_extend:V8HI
14651 (vec_select:V8QI (match_dup 2)
14652 (parallel [(const_int 1) (const_int 3)
14653 (const_int 5) (const_int 7)
14654 (const_int 9) (const_int 11)
14655 (const_int 13) (const_int 15)]))))))]
14656 "TARGET_SSSE3"
14657 "@
14658 pmaddubsw\t{%2, %0|%0, %2}
14659 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
14660 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14661 [(set_attr "isa" "noavx,avx,avx512bw")
14662 (set_attr "type" "sseiadd")
14663 (set_attr "atom_unit" "simul")
14664 (set_attr "prefix_data16" "1,*,*")
14665 (set_attr "prefix_extra" "1")
14666 (set_attr "prefix" "orig,vex,evex")
14667 (set_attr "mode" "TI")])
14668
14669 (define_insn "ssse3_pmaddubsw"
14670 [(set (match_operand:V4HI 0 "register_operand" "=y")
14671 (ss_plus:V4HI
14672 (mult:V4HI
14673 (zero_extend:V4HI
14674 (vec_select:V4QI
14675 (match_operand:V8QI 1 "register_operand" "0")
14676 (parallel [(const_int 0) (const_int 2)
14677 (const_int 4) (const_int 6)])))
14678 (sign_extend:V4HI
14679 (vec_select:V4QI
14680 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
14681 (parallel [(const_int 0) (const_int 2)
14682 (const_int 4) (const_int 6)]))))
14683 (mult:V4HI
14684 (zero_extend:V4HI
14685 (vec_select:V4QI (match_dup 1)
14686 (parallel [(const_int 1) (const_int 3)
14687 (const_int 5) (const_int 7)])))
14688 (sign_extend:V4HI
14689 (vec_select:V4QI (match_dup 2)
14690 (parallel [(const_int 1) (const_int 3)
14691 (const_int 5) (const_int 7)]))))))]
14692 "TARGET_SSSE3"
14693 "pmaddubsw\t{%2, %0|%0, %2}"
14694 [(set_attr "type" "sseiadd")
14695 (set_attr "atom_unit" "simul")
14696 (set_attr "prefix_extra" "1")
14697 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14698 (set_attr "mode" "DI")])
14699
14700 (define_mode_iterator PMULHRSW
14701 [V4HI V8HI (V16HI "TARGET_AVX2")])
14702
14703 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
14704 [(set (match_operand:PMULHRSW 0 "register_operand")
14705 (vec_merge:PMULHRSW
14706 (truncate:PMULHRSW
14707 (lshiftrt:<ssedoublemode>
14708 (plus:<ssedoublemode>
14709 (lshiftrt:<ssedoublemode>
14710 (mult:<ssedoublemode>
14711 (sign_extend:<ssedoublemode>
14712 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14713 (sign_extend:<ssedoublemode>
14714 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14715 (const_int 14))
14716 (match_dup 5))
14717 (const_int 1)))
14718 (match_operand:PMULHRSW 3 "register_operand")
14719 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14720 "TARGET_AVX512BW && TARGET_AVX512VL"
14721 {
14722 operands[5] = CONST1_RTX(<MODE>mode);
14723 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14724 })
14725
14726 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
14727 [(set (match_operand:PMULHRSW 0 "register_operand")
14728 (truncate:PMULHRSW
14729 (lshiftrt:<ssedoublemode>
14730 (plus:<ssedoublemode>
14731 (lshiftrt:<ssedoublemode>
14732 (mult:<ssedoublemode>
14733 (sign_extend:<ssedoublemode>
14734 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14735 (sign_extend:<ssedoublemode>
14736 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14737 (const_int 14))
14738 (match_dup 3))
14739 (const_int 1))))]
14740 "TARGET_AVX2"
14741 {
14742 operands[3] = CONST1_RTX(<MODE>mode);
14743 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14744 })
14745
14746 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
14747 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
14748 (truncate:VI2_AVX2
14749 (lshiftrt:<ssedoublemode>
14750 (plus:<ssedoublemode>
14751 (lshiftrt:<ssedoublemode>
14752 (mult:<ssedoublemode>
14753 (sign_extend:<ssedoublemode>
14754 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
14755 (sign_extend:<ssedoublemode>
14756 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
14757 (const_int 14))
14758 (match_operand:VI2_AVX2 3 "const1_operand"))
14759 (const_int 1))))]
14760 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14761 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14762 "@
14763 pmulhrsw\t{%2, %0|%0, %2}
14764 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
14765 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
14766 [(set_attr "isa" "noavx,avx,avx512bw")
14767 (set_attr "type" "sseimul")
14768 (set_attr "prefix_data16" "1,*,*")
14769 (set_attr "prefix_extra" "1")
14770 (set_attr "prefix" "orig,maybe_evex,evex")
14771 (set_attr "mode" "<sseinsnmode>")])
14772
14773 (define_insn "*ssse3_pmulhrswv4hi3"
14774 [(set (match_operand:V4HI 0 "register_operand" "=y")
14775 (truncate:V4HI
14776 (lshiftrt:V4SI
14777 (plus:V4SI
14778 (lshiftrt:V4SI
14779 (mult:V4SI
14780 (sign_extend:V4SI
14781 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
14782 (sign_extend:V4SI
14783 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
14784 (const_int 14))
14785 (match_operand:V4HI 3 "const1_operand"))
14786 (const_int 1))))]
14787 "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14788 "pmulhrsw\t{%2, %0|%0, %2}"
14789 [(set_attr "type" "sseimul")
14790 (set_attr "prefix_extra" "1")
14791 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14792 (set_attr "mode" "DI")])
14793
14794 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
14795 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
14796 (unspec:VI1_AVX512
14797 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
14798 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
14799 UNSPEC_PSHUFB))]
14800 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14801 "@
14802 pshufb\t{%2, %0|%0, %2}
14803 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
14804 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14805 [(set_attr "isa" "noavx,avx,avx512bw")
14806 (set_attr "type" "sselog1")
14807 (set_attr "prefix_data16" "1,*,*")
14808 (set_attr "prefix_extra" "1")
14809 (set_attr "prefix" "orig,maybe_evex,evex")
14810 (set_attr "btver2_decode" "vector")
14811 (set_attr "mode" "<sseinsnmode>")])
14812
14813 (define_insn "ssse3_pshufbv8qi3"
14814 [(set (match_operand:V8QI 0 "register_operand" "=y")
14815 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
14816 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
14817 UNSPEC_PSHUFB))]
14818 "TARGET_SSSE3"
14819 "pshufb\t{%2, %0|%0, %2}";
14820 [(set_attr "type" "sselog1")
14821 (set_attr "prefix_extra" "1")
14822 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14823 (set_attr "mode" "DI")])
14824
14825 (define_insn "<ssse3_avx2>_psign<mode>3"
14826 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
14827 (unspec:VI124_AVX2
14828 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
14829 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
14830 UNSPEC_PSIGN))]
14831 "TARGET_SSSE3"
14832 "@
14833 psign<ssemodesuffix>\t{%2, %0|%0, %2}
14834 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14835 [(set_attr "isa" "noavx,avx")
14836 (set_attr "type" "sselog1")
14837 (set_attr "prefix_data16" "1,*")
14838 (set_attr "prefix_extra" "1")
14839 (set_attr "prefix" "orig,vex")
14840 (set_attr "mode" "<sseinsnmode>")])
14841
14842 (define_insn "ssse3_psign<mode>3"
14843 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14844 (unspec:MMXMODEI
14845 [(match_operand:MMXMODEI 1 "register_operand" "0")
14846 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
14847 UNSPEC_PSIGN))]
14848 "TARGET_SSSE3"
14849 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
14850 [(set_attr "type" "sselog1")
14851 (set_attr "prefix_extra" "1")
14852 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14853 (set_attr "mode" "DI")])
14854
14855 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
14856 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
14857 (vec_merge:VI1_AVX512
14858 (unspec:VI1_AVX512
14859 [(match_operand:VI1_AVX512 1 "register_operand" "v")
14860 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
14861 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14862 UNSPEC_PALIGNR)
14863 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
14864 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
14865 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
14866 {
14867 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14868 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
14869 }
14870 [(set_attr "type" "sseishft")
14871 (set_attr "atom_unit" "sishuf")
14872 (set_attr "prefix_extra" "1")
14873 (set_attr "length_immediate" "1")
14874 (set_attr "prefix" "evex")
14875 (set_attr "mode" "<sseinsnmode>")])
14876
14877 (define_insn "<ssse3_avx2>_palignr<mode>"
14878 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
14879 (unspec:SSESCALARMODE
14880 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
14881 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
14882 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
14883 UNSPEC_PALIGNR))]
14884 "TARGET_SSSE3"
14885 {
14886 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14887
14888 switch (which_alternative)
14889 {
14890 case 0:
14891 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14892 case 1:
14893 case 2:
14894 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14895 default:
14896 gcc_unreachable ();
14897 }
14898 }
14899 [(set_attr "isa" "noavx,avx,avx512bw")
14900 (set_attr "type" "sseishft")
14901 (set_attr "atom_unit" "sishuf")
14902 (set_attr "prefix_data16" "1,*,*")
14903 (set_attr "prefix_extra" "1")
14904 (set_attr "length_immediate" "1")
14905 (set_attr "prefix" "orig,vex,evex")
14906 (set_attr "mode" "<sseinsnmode>")])
14907
14908 (define_insn "ssse3_palignrdi"
14909 [(set (match_operand:DI 0 "register_operand" "=y")
14910 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
14911 (match_operand:DI 2 "nonimmediate_operand" "ym")
14912 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14913 UNSPEC_PALIGNR))]
14914 "TARGET_SSSE3"
14915 {
14916 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14917 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14918 }
14919 [(set_attr "type" "sseishft")
14920 (set_attr "atom_unit" "sishuf")
14921 (set_attr "prefix_extra" "1")
14922 (set_attr "length_immediate" "1")
14923 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14924 (set_attr "mode" "DI")])
14925
14926 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
14927 ;; modes for abs instruction on pre AVX-512 targets.
14928 (define_mode_iterator VI1248_AVX512VL_AVX512BW
14929 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
14930 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
14931 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
14932 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
14933
14934 (define_insn "*abs<mode>2"
14935 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
14936 (abs:VI1248_AVX512VL_AVX512BW
14937 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
14938 "TARGET_SSSE3"
14939 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
14940 [(set_attr "type" "sselog1")
14941 (set_attr "prefix_data16" "1")
14942 (set_attr "prefix_extra" "1")
14943 (set_attr "prefix" "maybe_vex")
14944 (set_attr "mode" "<sseinsnmode>")])
14945
14946 (define_insn "abs<mode>2_mask"
14947 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
14948 (vec_merge:VI48_AVX512VL
14949 (abs:VI48_AVX512VL
14950 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
14951 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
14952 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14953 "TARGET_AVX512F"
14954 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14955 [(set_attr "type" "sselog1")
14956 (set_attr "prefix" "evex")
14957 (set_attr "mode" "<sseinsnmode>")])
14958
14959 (define_insn "abs<mode>2_mask"
14960 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
14961 (vec_merge:VI12_AVX512VL
14962 (abs:VI12_AVX512VL
14963 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
14964 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
14965 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14966 "TARGET_AVX512BW"
14967 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14968 [(set_attr "type" "sselog1")
14969 (set_attr "prefix" "evex")
14970 (set_attr "mode" "<sseinsnmode>")])
14971
14972 (define_expand "abs<mode>2"
14973 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
14974 (abs:VI1248_AVX512VL_AVX512BW
14975 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))]
14976 "TARGET_SSE2"
14977 {
14978 if (!TARGET_SSSE3)
14979 {
14980 ix86_expand_sse2_abs (operands[0], operands[1]);
14981 DONE;
14982 }
14983 })
14984
14985 (define_insn "abs<mode>2"
14986 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14987 (abs:MMXMODEI
14988 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
14989 "TARGET_SSSE3"
14990 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
14991 [(set_attr "type" "sselog1")
14992 (set_attr "prefix_rep" "0")
14993 (set_attr "prefix_extra" "1")
14994 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14995 (set_attr "mode" "DI")])
14996
14997 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14998 ;;
14999 ;; AMD SSE4A instructions
15000 ;;
15001 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15002
15003 (define_insn "sse4a_movnt<mode>"
15004 [(set (match_operand:MODEF 0 "memory_operand" "=m")
15005 (unspec:MODEF
15006 [(match_operand:MODEF 1 "register_operand" "x")]
15007 UNSPEC_MOVNT))]
15008 "TARGET_SSE4A"
15009 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
15010 [(set_attr "type" "ssemov")
15011 (set_attr "mode" "<MODE>")])
15012
15013 (define_insn "sse4a_vmmovnt<mode>"
15014 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
15015 (unspec:<ssescalarmode>
15016 [(vec_select:<ssescalarmode>
15017 (match_operand:VF_128 1 "register_operand" "x")
15018 (parallel [(const_int 0)]))]
15019 UNSPEC_MOVNT))]
15020 "TARGET_SSE4A"
15021 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
15022 [(set_attr "type" "ssemov")
15023 (set_attr "mode" "<ssescalarmode>")])
15024
15025 (define_insn "sse4a_extrqi"
15026 [(set (match_operand:V2DI 0 "register_operand" "=x")
15027 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15028 (match_operand 2 "const_0_to_255_operand")
15029 (match_operand 3 "const_0_to_255_operand")]
15030 UNSPEC_EXTRQI))]
15031 "TARGET_SSE4A"
15032 "extrq\t{%3, %2, %0|%0, %2, %3}"
15033 [(set_attr "type" "sse")
15034 (set_attr "prefix_data16" "1")
15035 (set_attr "length_immediate" "2")
15036 (set_attr "mode" "TI")])
15037
15038 (define_insn "sse4a_extrq"
15039 [(set (match_operand:V2DI 0 "register_operand" "=x")
15040 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15041 (match_operand:V16QI 2 "register_operand" "x")]
15042 UNSPEC_EXTRQ))]
15043 "TARGET_SSE4A"
15044 "extrq\t{%2, %0|%0, %2}"
15045 [(set_attr "type" "sse")
15046 (set_attr "prefix_data16" "1")
15047 (set_attr "mode" "TI")])
15048
15049 (define_insn "sse4a_insertqi"
15050 [(set (match_operand:V2DI 0 "register_operand" "=x")
15051 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15052 (match_operand:V2DI 2 "register_operand" "x")
15053 (match_operand 3 "const_0_to_255_operand")
15054 (match_operand 4 "const_0_to_255_operand")]
15055 UNSPEC_INSERTQI))]
15056 "TARGET_SSE4A"
15057 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
15058 [(set_attr "type" "sseins")
15059 (set_attr "prefix_data16" "0")
15060 (set_attr "prefix_rep" "1")
15061 (set_attr "length_immediate" "2")
15062 (set_attr "mode" "TI")])
15063
15064 (define_insn "sse4a_insertq"
15065 [(set (match_operand:V2DI 0 "register_operand" "=x")
15066 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15067 (match_operand:V2DI 2 "register_operand" "x")]
15068 UNSPEC_INSERTQ))]
15069 "TARGET_SSE4A"
15070 "insertq\t{%2, %0|%0, %2}"
15071 [(set_attr "type" "sseins")
15072 (set_attr "prefix_data16" "0")
15073 (set_attr "prefix_rep" "1")
15074 (set_attr "mode" "TI")])
15075
15076 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15077 ;;
15078 ;; Intel SSE4.1 instructions
15079 ;;
15080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15081
15082 ;; Mapping of immediate bits for blend instructions
15083 (define_mode_attr blendbits
15084 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
15085
15086 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
15087 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15088 (vec_merge:VF_128_256
15089 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15090 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
15091 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
15092 "TARGET_SSE4_1"
15093 "@
15094 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15095 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15096 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15097 [(set_attr "isa" "noavx,noavx,avx")
15098 (set_attr "type" "ssemov")
15099 (set_attr "length_immediate" "1")
15100 (set_attr "prefix_data16" "1,1,*")
15101 (set_attr "prefix_extra" "1")
15102 (set_attr "prefix" "orig,orig,vex")
15103 (set_attr "mode" "<MODE>")])
15104
15105 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
15106 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15107 (unspec:VF_128_256
15108 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
15109 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15110 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
15111 UNSPEC_BLENDV))]
15112 "TARGET_SSE4_1"
15113 "@
15114 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15115 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15116 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15117 [(set_attr "isa" "noavx,noavx,avx")
15118 (set_attr "type" "ssemov")
15119 (set_attr "length_immediate" "1")
15120 (set_attr "prefix_data16" "1,1,*")
15121 (set_attr "prefix_extra" "1")
15122 (set_attr "prefix" "orig,orig,vex")
15123 (set_attr "btver2_decode" "vector,vector,vector")
15124 (set_attr "mode" "<MODE>")])
15125
15126 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
15127 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15128 (unspec:VF_128_256
15129 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
15130 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15131 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15132 UNSPEC_DP))]
15133 "TARGET_SSE4_1"
15134 "@
15135 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15136 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15137 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15138 [(set_attr "isa" "noavx,noavx,avx")
15139 (set_attr "type" "ssemul")
15140 (set_attr "length_immediate" "1")
15141 (set_attr "prefix_data16" "1,1,*")
15142 (set_attr "prefix_extra" "1")
15143 (set_attr "prefix" "orig,orig,vex")
15144 (set_attr "btver2_decode" "vector,vector,vector")
15145 (set_attr "znver1_decode" "vector,vector,vector")
15146 (set_attr "mode" "<MODE>")])
15147
15148 ;; Mode attribute used by `vmovntdqa' pattern
15149 (define_mode_attr vi8_sse4_1_avx2_avx512
15150 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
15151
15152 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
15153 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
15154 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
15155 UNSPEC_MOVNTDQA))]
15156 "TARGET_SSE4_1"
15157 "%vmovntdqa\t{%1, %0|%0, %1}"
15158 [(set_attr "isa" "noavx,noavx,avx")
15159 (set_attr "type" "ssemov")
15160 (set_attr "prefix_extra" "1,1,*")
15161 (set_attr "prefix" "orig,orig,maybe_evex")
15162 (set_attr "mode" "<sseinsnmode>")])
15163
15164 (define_insn "<sse4_1_avx2>_mpsadbw"
15165 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15166 (unspec:VI1_AVX2
15167 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
15168 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15169 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15170 UNSPEC_MPSADBW))]
15171 "TARGET_SSE4_1"
15172 "@
15173 mpsadbw\t{%3, %2, %0|%0, %2, %3}
15174 mpsadbw\t{%3, %2, %0|%0, %2, %3}
15175 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15176 [(set_attr "isa" "noavx,noavx,avx")
15177 (set_attr "type" "sselog1")
15178 (set_attr "length_immediate" "1")
15179 (set_attr "prefix_extra" "1")
15180 (set_attr "prefix" "orig,orig,vex")
15181 (set_attr "btver2_decode" "vector,vector,vector")
15182 (set_attr "znver1_decode" "vector,vector,vector")
15183 (set_attr "mode" "<sseinsnmode>")])
15184
15185 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
15186 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
15187 (vec_concat:VI2_AVX2
15188 (us_truncate:<ssehalfvecmode>
15189 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
15190 (us_truncate:<ssehalfvecmode>
15191 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
15192 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15193 "@
15194 packusdw\t{%2, %0|%0, %2}
15195 packusdw\t{%2, %0|%0, %2}
15196 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
15197 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15198 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
15199 (set_attr "type" "sselog")
15200 (set_attr "prefix_extra" "1")
15201 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
15202 (set_attr "mode" "<sseinsnmode>")])
15203
15204 (define_insn "<sse4_1_avx2>_pblendvb"
15205 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15206 (unspec:VI1_AVX2
15207 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
15208 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15209 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
15210 UNSPEC_BLENDV))]
15211 "TARGET_SSE4_1"
15212 "@
15213 pblendvb\t{%3, %2, %0|%0, %2, %3}
15214 pblendvb\t{%3, %2, %0|%0, %2, %3}
15215 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15216 [(set_attr "isa" "noavx,noavx,avx")
15217 (set_attr "type" "ssemov")
15218 (set_attr "prefix_extra" "1")
15219 (set_attr "length_immediate" "*,*,1")
15220 (set_attr "prefix" "orig,orig,vex")
15221 (set_attr "btver2_decode" "vector,vector,vector")
15222 (set_attr "mode" "<sseinsnmode>")])
15223
15224 (define_insn "sse4_1_pblendw"
15225 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15226 (vec_merge:V8HI
15227 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
15228 (match_operand:V8HI 1 "register_operand" "0,0,x")
15229 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
15230 "TARGET_SSE4_1"
15231 "@
15232 pblendw\t{%3, %2, %0|%0, %2, %3}
15233 pblendw\t{%3, %2, %0|%0, %2, %3}
15234 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15235 [(set_attr "isa" "noavx,noavx,avx")
15236 (set_attr "type" "ssemov")
15237 (set_attr "prefix_extra" "1")
15238 (set_attr "length_immediate" "1")
15239 (set_attr "prefix" "orig,orig,vex")
15240 (set_attr "mode" "TI")])
15241
15242 ;; The builtin uses an 8-bit immediate. Expand that.
15243 (define_expand "avx2_pblendw"
15244 [(set (match_operand:V16HI 0 "register_operand")
15245 (vec_merge:V16HI
15246 (match_operand:V16HI 2 "nonimmediate_operand")
15247 (match_operand:V16HI 1 "register_operand")
15248 (match_operand:SI 3 "const_0_to_255_operand")))]
15249 "TARGET_AVX2"
15250 {
15251 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
15252 operands[3] = GEN_INT (val << 8 | val);
15253 })
15254
15255 (define_insn "*avx2_pblendw"
15256 [(set (match_operand:V16HI 0 "register_operand" "=x")
15257 (vec_merge:V16HI
15258 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
15259 (match_operand:V16HI 1 "register_operand" "x")
15260 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
15261 "TARGET_AVX2"
15262 {
15263 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
15264 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15265 }
15266 [(set_attr "type" "ssemov")
15267 (set_attr "prefix_extra" "1")
15268 (set_attr "length_immediate" "1")
15269 (set_attr "prefix" "vex")
15270 (set_attr "mode" "OI")])
15271
15272 (define_insn "avx2_pblendd<mode>"
15273 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
15274 (vec_merge:VI4_AVX2
15275 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
15276 (match_operand:VI4_AVX2 1 "register_operand" "x")
15277 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
15278 "TARGET_AVX2"
15279 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15280 [(set_attr "type" "ssemov")
15281 (set_attr "prefix_extra" "1")
15282 (set_attr "length_immediate" "1")
15283 (set_attr "prefix" "vex")
15284 (set_attr "mode" "<sseinsnmode>")])
15285
15286 (define_insn "sse4_1_phminposuw"
15287 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15288 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
15289 UNSPEC_PHMINPOSUW))]
15290 "TARGET_SSE4_1"
15291 "%vphminposuw\t{%1, %0|%0, %1}"
15292 [(set_attr "isa" "noavx,noavx,avx")
15293 (set_attr "type" "sselog1")
15294 (set_attr "prefix_extra" "1")
15295 (set_attr "prefix" "orig,orig,vex")
15296 (set_attr "mode" "TI")])
15297
15298 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
15299 [(set (match_operand:V16HI 0 "register_operand" "=v")
15300 (any_extend:V16HI
15301 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15302 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15303 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15304 [(set_attr "type" "ssemov")
15305 (set_attr "prefix_extra" "1")
15306 (set_attr "prefix" "maybe_evex")
15307 (set_attr "mode" "OI")])
15308
15309 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
15310 [(set (match_operand:V32HI 0 "register_operand" "=v")
15311 (any_extend:V32HI
15312 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
15313 "TARGET_AVX512BW"
15314 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15315 [(set_attr "type" "ssemov")
15316 (set_attr "prefix_extra" "1")
15317 (set_attr "prefix" "evex")
15318 (set_attr "mode" "XI")])
15319
15320 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
15321 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
15322 (any_extend:V8HI
15323 (vec_select:V8QI
15324 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15325 (parallel [(const_int 0) (const_int 1)
15326 (const_int 2) (const_int 3)
15327 (const_int 4) (const_int 5)
15328 (const_int 6) (const_int 7)]))))]
15329 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15330 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15331 [(set_attr "isa" "noavx,noavx,avx")
15332 (set_attr "type" "ssemov")
15333 (set_attr "prefix_extra" "1")
15334 (set_attr "prefix" "orig,orig,maybe_evex")
15335 (set_attr "mode" "TI")])
15336
15337 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
15338 [(set (match_operand:V16SI 0 "register_operand" "=v")
15339 (any_extend:V16SI
15340 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15341 "TARGET_AVX512F"
15342 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15343 [(set_attr "type" "ssemov")
15344 (set_attr "prefix" "evex")
15345 (set_attr "mode" "XI")])
15346
15347 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
15348 [(set (match_operand:V8SI 0 "register_operand" "=v")
15349 (any_extend:V8SI
15350 (vec_select:V8QI
15351 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15352 (parallel [(const_int 0) (const_int 1)
15353 (const_int 2) (const_int 3)
15354 (const_int 4) (const_int 5)
15355 (const_int 6) (const_int 7)]))))]
15356 "TARGET_AVX2 && <mask_avx512vl_condition>"
15357 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15358 [(set_attr "type" "ssemov")
15359 (set_attr "prefix_extra" "1")
15360 (set_attr "prefix" "maybe_evex")
15361 (set_attr "mode" "OI")])
15362
15363 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
15364 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15365 (any_extend:V4SI
15366 (vec_select:V4QI
15367 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15368 (parallel [(const_int 0) (const_int 1)
15369 (const_int 2) (const_int 3)]))))]
15370 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15371 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15372 [(set_attr "isa" "noavx,noavx,avx")
15373 (set_attr "type" "ssemov")
15374 (set_attr "prefix_extra" "1")
15375 (set_attr "prefix" "orig,orig,maybe_evex")
15376 (set_attr "mode" "TI")])
15377
15378 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
15379 [(set (match_operand:V16SI 0 "register_operand" "=v")
15380 (any_extend:V16SI
15381 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
15382 "TARGET_AVX512F"
15383 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15384 [(set_attr "type" "ssemov")
15385 (set_attr "prefix" "evex")
15386 (set_attr "mode" "XI")])
15387
15388 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
15389 [(set (match_operand:V8SI 0 "register_operand" "=v")
15390 (any_extend:V8SI
15391 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15392 "TARGET_AVX2 && <mask_avx512vl_condition>"
15393 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15394 [(set_attr "type" "ssemov")
15395 (set_attr "prefix_extra" "1")
15396 (set_attr "prefix" "maybe_evex")
15397 (set_attr "mode" "OI")])
15398
15399 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
15400 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15401 (any_extend:V4SI
15402 (vec_select:V4HI
15403 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15404 (parallel [(const_int 0) (const_int 1)
15405 (const_int 2) (const_int 3)]))))]
15406 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15407 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15408 [(set_attr "isa" "noavx,noavx,avx")
15409 (set_attr "type" "ssemov")
15410 (set_attr "prefix_extra" "1")
15411 (set_attr "prefix" "orig,orig,maybe_evex")
15412 (set_attr "mode" "TI")])
15413
15414 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
15415 [(set (match_operand:V8DI 0 "register_operand" "=v")
15416 (any_extend:V8DI
15417 (vec_select:V8QI
15418 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15419 (parallel [(const_int 0) (const_int 1)
15420 (const_int 2) (const_int 3)
15421 (const_int 4) (const_int 5)
15422 (const_int 6) (const_int 7)]))))]
15423 "TARGET_AVX512F"
15424 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15425 [(set_attr "type" "ssemov")
15426 (set_attr "prefix" "evex")
15427 (set_attr "mode" "XI")])
15428
15429 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
15430 [(set (match_operand:V4DI 0 "register_operand" "=v")
15431 (any_extend:V4DI
15432 (vec_select:V4QI
15433 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15434 (parallel [(const_int 0) (const_int 1)
15435 (const_int 2) (const_int 3)]))))]
15436 "TARGET_AVX2 && <mask_avx512vl_condition>"
15437 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15438 [(set_attr "type" "ssemov")
15439 (set_attr "prefix_extra" "1")
15440 (set_attr "prefix" "maybe_evex")
15441 (set_attr "mode" "OI")])
15442
15443 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
15444 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15445 (any_extend:V2DI
15446 (vec_select:V2QI
15447 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15448 (parallel [(const_int 0) (const_int 1)]))))]
15449 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15450 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
15451 [(set_attr "isa" "noavx,noavx,avx")
15452 (set_attr "type" "ssemov")
15453 (set_attr "prefix_extra" "1")
15454 (set_attr "prefix" "orig,orig,maybe_evex")
15455 (set_attr "mode" "TI")])
15456
15457 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
15458 [(set (match_operand:V8DI 0 "register_operand" "=v")
15459 (any_extend:V8DI
15460 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15461 "TARGET_AVX512F"
15462 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15463 [(set_attr "type" "ssemov")
15464 (set_attr "prefix" "evex")
15465 (set_attr "mode" "XI")])
15466
15467 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
15468 [(set (match_operand:V4DI 0 "register_operand" "=v")
15469 (any_extend:V4DI
15470 (vec_select:V4HI
15471 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
15472 (parallel [(const_int 0) (const_int 1)
15473 (const_int 2) (const_int 3)]))))]
15474 "TARGET_AVX2 && <mask_avx512vl_condition>"
15475 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15476 [(set_attr "type" "ssemov")
15477 (set_attr "prefix_extra" "1")
15478 (set_attr "prefix" "maybe_evex")
15479 (set_attr "mode" "OI")])
15480
15481 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
15482 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15483 (any_extend:V2DI
15484 (vec_select:V2HI
15485 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15486 (parallel [(const_int 0) (const_int 1)]))))]
15487 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15488 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15489 [(set_attr "isa" "noavx,noavx,avx")
15490 (set_attr "type" "ssemov")
15491 (set_attr "prefix_extra" "1")
15492 (set_attr "prefix" "orig,orig,maybe_evex")
15493 (set_attr "mode" "TI")])
15494
15495 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
15496 [(set (match_operand:V8DI 0 "register_operand" "=v")
15497 (any_extend:V8DI
15498 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
15499 "TARGET_AVX512F"
15500 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15501 [(set_attr "type" "ssemov")
15502 (set_attr "prefix" "evex")
15503 (set_attr "mode" "XI")])
15504
15505 (define_insn "avx2_<code>v4siv4di2<mask_name>"
15506 [(set (match_operand:V4DI 0 "register_operand" "=v")
15507 (any_extend:V4DI
15508 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
15509 "TARGET_AVX2 && <mask_avx512vl_condition>"
15510 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15511 [(set_attr "type" "ssemov")
15512 (set_attr "prefix" "maybe_evex")
15513 (set_attr "prefix_extra" "1")
15514 (set_attr "mode" "OI")])
15515
15516 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
15517 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15518 (any_extend:V2DI
15519 (vec_select:V2SI
15520 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15521 (parallel [(const_int 0) (const_int 1)]))))]
15522 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15523 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15524 [(set_attr "isa" "noavx,noavx,avx")
15525 (set_attr "type" "ssemov")
15526 (set_attr "prefix_extra" "1")
15527 (set_attr "prefix" "orig,orig,maybe_evex")
15528 (set_attr "mode" "TI")])
15529
15530 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
15531 ;; setting FLAGS_REG. But it is not a really compare instruction.
15532 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
15533 [(set (reg:CC FLAGS_REG)
15534 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
15535 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
15536 UNSPEC_VTESTP))]
15537 "TARGET_AVX"
15538 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
15539 [(set_attr "type" "ssecomi")
15540 (set_attr "prefix_extra" "1")
15541 (set_attr "prefix" "vex")
15542 (set_attr "mode" "<MODE>")])
15543
15544 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
15545 ;; But it is not a really compare instruction.
15546 (define_insn "<sse4_1>_ptest<mode>"
15547 [(set (reg:CC FLAGS_REG)
15548 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
15549 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
15550 UNSPEC_PTEST))]
15551 "TARGET_SSE4_1"
15552 "%vptest\t{%1, %0|%0, %1}"
15553 [(set_attr "isa" "noavx,noavx,avx")
15554 (set_attr "type" "ssecomi")
15555 (set_attr "prefix_extra" "1")
15556 (set_attr "prefix" "orig,orig,vex")
15557 (set (attr "btver2_decode")
15558 (if_then_else
15559 (match_test "<sseinsnmode>mode==OImode")
15560 (const_string "vector")
15561 (const_string "*")))
15562 (set_attr "mode" "<sseinsnmode>")])
15563
15564 (define_insn "ptesttf2"
15565 [(set (reg:CC FLAGS_REG)
15566 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
15567 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
15568 UNSPEC_PTEST))]
15569 "TARGET_SSE4_1"
15570 "%vptest\t{%1, %0|%0, %1}"
15571 [(set_attr "isa" "noavx,noavx,avx")
15572 (set_attr "type" "ssecomi")
15573 (set_attr "prefix_extra" "1")
15574 (set_attr "prefix" "orig,orig,vex")
15575 (set_attr "mode" "TI")])
15576
15577 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
15578 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15579 (unspec:VF_128_256
15580 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
15581 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
15582 UNSPEC_ROUND))]
15583 "TARGET_SSE4_1"
15584 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15585 [(set_attr "isa" "noavx,noavx,avx")
15586 (set_attr "type" "ssecvt")
15587 (set_attr "prefix_data16" "1,1,*")
15588 (set_attr "prefix_extra" "1")
15589 (set_attr "length_immediate" "1")
15590 (set_attr "prefix" "orig,orig,vex")
15591 (set_attr "mode" "<MODE>")])
15592
15593 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
15594 [(match_operand:<sseintvecmode> 0 "register_operand")
15595 (match_operand:VF1_128_256 1 "vector_operand")
15596 (match_operand:SI 2 "const_0_to_15_operand")]
15597 "TARGET_SSE4_1"
15598 {
15599 rtx tmp = gen_reg_rtx (<MODE>mode);
15600
15601 emit_insn
15602 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
15603 operands[2]));
15604 emit_insn
15605 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15606 DONE;
15607 })
15608
15609 (define_expand "avx512f_round<castmode>512"
15610 [(match_operand:VF_512 0 "register_operand")
15611 (match_operand:VF_512 1 "nonimmediate_operand")
15612 (match_operand:SI 2 "const_0_to_15_operand")]
15613 "TARGET_AVX512F"
15614 {
15615 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
15616 DONE;
15617 })
15618
15619 (define_expand "avx512f_roundps512_sfix"
15620 [(match_operand:V16SI 0 "register_operand")
15621 (match_operand:V16SF 1 "nonimmediate_operand")
15622 (match_operand:SI 2 "const_0_to_15_operand")]
15623 "TARGET_AVX512F"
15624 {
15625 rtx tmp = gen_reg_rtx (V16SFmode);
15626 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
15627 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
15628 DONE;
15629 })
15630
15631 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
15632 [(match_operand:<ssepackfltmode> 0 "register_operand")
15633 (match_operand:VF2 1 "vector_operand")
15634 (match_operand:VF2 2 "vector_operand")
15635 (match_operand:SI 3 "const_0_to_15_operand")]
15636 "TARGET_SSE4_1"
15637 {
15638 rtx tmp0, tmp1;
15639
15640 if (<MODE>mode == V2DFmode
15641 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15642 {
15643 rtx tmp2 = gen_reg_rtx (V4DFmode);
15644
15645 tmp0 = gen_reg_rtx (V4DFmode);
15646 tmp1 = force_reg (V2DFmode, operands[1]);
15647
15648 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15649 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
15650 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15651 }
15652 else
15653 {
15654 tmp0 = gen_reg_rtx (<MODE>mode);
15655 tmp1 = gen_reg_rtx (<MODE>mode);
15656
15657 emit_insn
15658 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
15659 operands[3]));
15660 emit_insn
15661 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
15662 operands[3]));
15663 emit_insn
15664 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15665 }
15666 DONE;
15667 })
15668
15669 (define_insn "sse4_1_round<ssescalarmodesuffix>"
15670 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
15671 (vec_merge:VF_128
15672 (unspec:VF_128
15673 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
15674 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
15675 UNSPEC_ROUND)
15676 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
15677 (const_int 1)))]
15678 "TARGET_SSE4_1"
15679 "@
15680 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15681 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15682 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
15683 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15684 [(set_attr "isa" "noavx,noavx,avx,avx512f")
15685 (set_attr "type" "ssecvt")
15686 (set_attr "length_immediate" "1")
15687 (set_attr "prefix_data16" "1,1,*,*")
15688 (set_attr "prefix_extra" "1")
15689 (set_attr "prefix" "orig,orig,vex,evex")
15690 (set_attr "mode" "<MODE>")])
15691
15692 (define_expand "round<mode>2"
15693 [(set (match_dup 3)
15694 (plus:VF
15695 (match_operand:VF 1 "register_operand")
15696 (match_dup 2)))
15697 (set (match_operand:VF 0 "register_operand")
15698 (unspec:VF
15699 [(match_dup 3) (match_dup 4)]
15700 UNSPEC_ROUND))]
15701 "TARGET_SSE4_1 && !flag_trapping_math"
15702 {
15703 machine_mode scalar_mode;
15704 const struct real_format *fmt;
15705 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
15706 rtx half, vec_half;
15707
15708 scalar_mode = GET_MODE_INNER (<MODE>mode);
15709
15710 /* load nextafter (0.5, 0.0) */
15711 fmt = REAL_MODE_FORMAT (scalar_mode);
15712 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
15713 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
15714 half = const_double_from_real_value (pred_half, scalar_mode);
15715
15716 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
15717 vec_half = force_reg (<MODE>mode, vec_half);
15718
15719 operands[2] = gen_reg_rtx (<MODE>mode);
15720 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
15721
15722 operands[3] = gen_reg_rtx (<MODE>mode);
15723 operands[4] = GEN_INT (ROUND_TRUNC);
15724 })
15725
15726 (define_expand "round<mode>2_sfix"
15727 [(match_operand:<sseintvecmode> 0 "register_operand")
15728 (match_operand:VF1 1 "register_operand")]
15729 "TARGET_SSE4_1 && !flag_trapping_math"
15730 {
15731 rtx tmp = gen_reg_rtx (<MODE>mode);
15732
15733 emit_insn (gen_round<mode>2 (tmp, operands[1]));
15734
15735 emit_insn
15736 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15737 DONE;
15738 })
15739
15740 (define_expand "round<mode>2_vec_pack_sfix"
15741 [(match_operand:<ssepackfltmode> 0 "register_operand")
15742 (match_operand:VF2 1 "register_operand")
15743 (match_operand:VF2 2 "register_operand")]
15744 "TARGET_SSE4_1 && !flag_trapping_math"
15745 {
15746 rtx tmp0, tmp1;
15747
15748 if (<MODE>mode == V2DFmode
15749 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15750 {
15751 rtx tmp2 = gen_reg_rtx (V4DFmode);
15752
15753 tmp0 = gen_reg_rtx (V4DFmode);
15754 tmp1 = force_reg (V2DFmode, operands[1]);
15755
15756 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15757 emit_insn (gen_roundv4df2 (tmp2, tmp0));
15758 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15759 }
15760 else
15761 {
15762 tmp0 = gen_reg_rtx (<MODE>mode);
15763 tmp1 = gen_reg_rtx (<MODE>mode);
15764
15765 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
15766 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
15767
15768 emit_insn
15769 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15770 }
15771 DONE;
15772 })
15773
15774 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15775 ;;
15776 ;; Intel SSE4.2 string/text processing instructions
15777 ;;
15778 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15779
15780 (define_insn_and_split "sse4_2_pcmpestr"
15781 [(set (match_operand:SI 0 "register_operand" "=c,c")
15782 (unspec:SI
15783 [(match_operand:V16QI 2 "register_operand" "x,x")
15784 (match_operand:SI 3 "register_operand" "a,a")
15785 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
15786 (match_operand:SI 5 "register_operand" "d,d")
15787 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
15788 UNSPEC_PCMPESTR))
15789 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15790 (unspec:V16QI
15791 [(match_dup 2)
15792 (match_dup 3)
15793 (match_dup 4)
15794 (match_dup 5)
15795 (match_dup 6)]
15796 UNSPEC_PCMPESTR))
15797 (set (reg:CC FLAGS_REG)
15798 (unspec:CC
15799 [(match_dup 2)
15800 (match_dup 3)
15801 (match_dup 4)
15802 (match_dup 5)
15803 (match_dup 6)]
15804 UNSPEC_PCMPESTR))]
15805 "TARGET_SSE4_2
15806 && can_create_pseudo_p ()"
15807 "#"
15808 "&& 1"
15809 [(const_int 0)]
15810 {
15811 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15812 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15813 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15814
15815 if (ecx)
15816 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
15817 operands[3], operands[4],
15818 operands[5], operands[6]));
15819 if (xmm0)
15820 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
15821 operands[3], operands[4],
15822 operands[5], operands[6]));
15823 if (flags && !(ecx || xmm0))
15824 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
15825 operands[2], operands[3],
15826 operands[4], operands[5],
15827 operands[6]));
15828 if (!(flags || ecx || xmm0))
15829 emit_note (NOTE_INSN_DELETED);
15830
15831 DONE;
15832 }
15833 [(set_attr "type" "sselog")
15834 (set_attr "prefix_data16" "1")
15835 (set_attr "prefix_extra" "1")
15836 (set_attr "length_immediate" "1")
15837 (set_attr "memory" "none,load")
15838 (set_attr "mode" "TI")])
15839
15840 (define_insn "sse4_2_pcmpestri"
15841 [(set (match_operand:SI 0 "register_operand" "=c,c")
15842 (unspec:SI
15843 [(match_operand:V16QI 1 "register_operand" "x,x")
15844 (match_operand:SI 2 "register_operand" "a,a")
15845 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15846 (match_operand:SI 4 "register_operand" "d,d")
15847 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15848 UNSPEC_PCMPESTR))
15849 (set (reg:CC FLAGS_REG)
15850 (unspec:CC
15851 [(match_dup 1)
15852 (match_dup 2)
15853 (match_dup 3)
15854 (match_dup 4)
15855 (match_dup 5)]
15856 UNSPEC_PCMPESTR))]
15857 "TARGET_SSE4_2"
15858 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
15859 [(set_attr "type" "sselog")
15860 (set_attr "prefix_data16" "1")
15861 (set_attr "prefix_extra" "1")
15862 (set_attr "prefix" "maybe_vex")
15863 (set_attr "length_immediate" "1")
15864 (set_attr "btver2_decode" "vector")
15865 (set_attr "memory" "none,load")
15866 (set_attr "mode" "TI")])
15867
15868 (define_insn "sse4_2_pcmpestrm"
15869 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15870 (unspec:V16QI
15871 [(match_operand:V16QI 1 "register_operand" "x,x")
15872 (match_operand:SI 2 "register_operand" "a,a")
15873 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15874 (match_operand:SI 4 "register_operand" "d,d")
15875 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15876 UNSPEC_PCMPESTR))
15877 (set (reg:CC FLAGS_REG)
15878 (unspec:CC
15879 [(match_dup 1)
15880 (match_dup 2)
15881 (match_dup 3)
15882 (match_dup 4)
15883 (match_dup 5)]
15884 UNSPEC_PCMPESTR))]
15885 "TARGET_SSE4_2"
15886 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
15887 [(set_attr "type" "sselog")
15888 (set_attr "prefix_data16" "1")
15889 (set_attr "prefix_extra" "1")
15890 (set_attr "length_immediate" "1")
15891 (set_attr "prefix" "maybe_vex")
15892 (set_attr "btver2_decode" "vector")
15893 (set_attr "memory" "none,load")
15894 (set_attr "mode" "TI")])
15895
15896 (define_insn "sse4_2_pcmpestr_cconly"
15897 [(set (reg:CC FLAGS_REG)
15898 (unspec:CC
15899 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15900 (match_operand:SI 3 "register_operand" "a,a,a,a")
15901 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
15902 (match_operand:SI 5 "register_operand" "d,d,d,d")
15903 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
15904 UNSPEC_PCMPESTR))
15905 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15906 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15907 "TARGET_SSE4_2"
15908 "@
15909 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15910 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15911 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
15912 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
15913 [(set_attr "type" "sselog")
15914 (set_attr "prefix_data16" "1")
15915 (set_attr "prefix_extra" "1")
15916 (set_attr "length_immediate" "1")
15917 (set_attr "memory" "none,load,none,load")
15918 (set_attr "btver2_decode" "vector,vector,vector,vector")
15919 (set_attr "prefix" "maybe_vex")
15920 (set_attr "mode" "TI")])
15921
15922 (define_insn_and_split "sse4_2_pcmpistr"
15923 [(set (match_operand:SI 0 "register_operand" "=c,c")
15924 (unspec:SI
15925 [(match_operand:V16QI 2 "register_operand" "x,x")
15926 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15927 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
15928 UNSPEC_PCMPISTR))
15929 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15930 (unspec:V16QI
15931 [(match_dup 2)
15932 (match_dup 3)
15933 (match_dup 4)]
15934 UNSPEC_PCMPISTR))
15935 (set (reg:CC FLAGS_REG)
15936 (unspec:CC
15937 [(match_dup 2)
15938 (match_dup 3)
15939 (match_dup 4)]
15940 UNSPEC_PCMPISTR))]
15941 "TARGET_SSE4_2
15942 && can_create_pseudo_p ()"
15943 "#"
15944 "&& 1"
15945 [(const_int 0)]
15946 {
15947 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15948 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15949 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15950
15951 if (ecx)
15952 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15953 operands[3], operands[4]));
15954 if (xmm0)
15955 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15956 operands[3], operands[4]));
15957 if (flags && !(ecx || xmm0))
15958 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15959 operands[2], operands[3],
15960 operands[4]));
15961 if (!(flags || ecx || xmm0))
15962 emit_note (NOTE_INSN_DELETED);
15963
15964 DONE;
15965 }
15966 [(set_attr "type" "sselog")
15967 (set_attr "prefix_data16" "1")
15968 (set_attr "prefix_extra" "1")
15969 (set_attr "length_immediate" "1")
15970 (set_attr "memory" "none,load")
15971 (set_attr "mode" "TI")])
15972
15973 (define_insn "sse4_2_pcmpistri"
15974 [(set (match_operand:SI 0 "register_operand" "=c,c")
15975 (unspec:SI
15976 [(match_operand:V16QI 1 "register_operand" "x,x")
15977 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15978 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15979 UNSPEC_PCMPISTR))
15980 (set (reg:CC FLAGS_REG)
15981 (unspec:CC
15982 [(match_dup 1)
15983 (match_dup 2)
15984 (match_dup 3)]
15985 UNSPEC_PCMPISTR))]
15986 "TARGET_SSE4_2"
15987 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
15988 [(set_attr "type" "sselog")
15989 (set_attr "prefix_data16" "1")
15990 (set_attr "prefix_extra" "1")
15991 (set_attr "length_immediate" "1")
15992 (set_attr "prefix" "maybe_vex")
15993 (set_attr "memory" "none,load")
15994 (set_attr "btver2_decode" "vector")
15995 (set_attr "mode" "TI")])
15996
15997 (define_insn "sse4_2_pcmpistrm"
15998 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15999 (unspec:V16QI
16000 [(match_operand:V16QI 1 "register_operand" "x,x")
16001 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16002 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16003 UNSPEC_PCMPISTR))
16004 (set (reg:CC FLAGS_REG)
16005 (unspec:CC
16006 [(match_dup 1)
16007 (match_dup 2)
16008 (match_dup 3)]
16009 UNSPEC_PCMPISTR))]
16010 "TARGET_SSE4_2"
16011 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
16012 [(set_attr "type" "sselog")
16013 (set_attr "prefix_data16" "1")
16014 (set_attr "prefix_extra" "1")
16015 (set_attr "length_immediate" "1")
16016 (set_attr "prefix" "maybe_vex")
16017 (set_attr "memory" "none,load")
16018 (set_attr "btver2_decode" "vector")
16019 (set_attr "mode" "TI")])
16020
16021 (define_insn "sse4_2_pcmpistr_cconly"
16022 [(set (reg:CC FLAGS_REG)
16023 (unspec:CC
16024 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
16025 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
16026 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
16027 UNSPEC_PCMPISTR))
16028 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
16029 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
16030 "TARGET_SSE4_2"
16031 "@
16032 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16033 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16034 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
16035 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
16036 [(set_attr "type" "sselog")
16037 (set_attr "prefix_data16" "1")
16038 (set_attr "prefix_extra" "1")
16039 (set_attr "length_immediate" "1")
16040 (set_attr "memory" "none,load,none,load")
16041 (set_attr "prefix" "maybe_vex")
16042 (set_attr "btver2_decode" "vector,vector,vector,vector")
16043 (set_attr "mode" "TI")])
16044
16045 ;; Packed float variants
16046 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
16047 [(V8DI "V8SF") (V16SI "V16SF")])
16048
16049 (define_expand "avx512pf_gatherpf<mode>sf"
16050 [(unspec
16051 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16052 (mem:<GATHER_SCATTER_SF_MEM_MODE>
16053 (match_par_dup 5
16054 [(match_operand 2 "vsib_address_operand")
16055 (match_operand:VI48_512 1 "register_operand")
16056 (match_operand:SI 3 "const1248_operand")]))
16057 (match_operand:SI 4 "const_2_to_3_operand")]
16058 UNSPEC_GATHER_PREFETCH)]
16059 "TARGET_AVX512PF"
16060 {
16061 operands[5]
16062 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16063 operands[3]), UNSPEC_VSIBADDR);
16064 })
16065
16066 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
16067 [(unspec
16068 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16069 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16070 [(unspec:P
16071 [(match_operand:P 2 "vsib_address_operand" "Tv")
16072 (match_operand:VI48_512 1 "register_operand" "v")
16073 (match_operand:SI 3 "const1248_operand" "n")]
16074 UNSPEC_VSIBADDR)])
16075 (match_operand:SI 4 "const_2_to_3_operand" "n")]
16076 UNSPEC_GATHER_PREFETCH)]
16077 "TARGET_AVX512PF"
16078 {
16079 switch (INTVAL (operands[4]))
16080 {
16081 case 3:
16082 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16083 case 2:
16084 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16085 default:
16086 gcc_unreachable ();
16087 }
16088 }
16089 [(set_attr "type" "sse")
16090 (set_attr "prefix" "evex")
16091 (set_attr "mode" "XI")])
16092
16093 ;; Packed double variants
16094 (define_expand "avx512pf_gatherpf<mode>df"
16095 [(unspec
16096 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16097 (mem:V8DF
16098 (match_par_dup 5
16099 [(match_operand 2 "vsib_address_operand")
16100 (match_operand:VI4_256_8_512 1 "register_operand")
16101 (match_operand:SI 3 "const1248_operand")]))
16102 (match_operand:SI 4 "const_2_to_3_operand")]
16103 UNSPEC_GATHER_PREFETCH)]
16104 "TARGET_AVX512PF"
16105 {
16106 operands[5]
16107 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16108 operands[3]), UNSPEC_VSIBADDR);
16109 })
16110
16111 (define_insn "*avx512pf_gatherpf<mode>df_mask"
16112 [(unspec
16113 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16114 (match_operator:V8DF 5 "vsib_mem_operator"
16115 [(unspec:P
16116 [(match_operand:P 2 "vsib_address_operand" "Tv")
16117 (match_operand:VI4_256_8_512 1 "register_operand" "v")
16118 (match_operand:SI 3 "const1248_operand" "n")]
16119 UNSPEC_VSIBADDR)])
16120 (match_operand:SI 4 "const_2_to_3_operand" "n")]
16121 UNSPEC_GATHER_PREFETCH)]
16122 "TARGET_AVX512PF"
16123 {
16124 switch (INTVAL (operands[4]))
16125 {
16126 case 3:
16127 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16128 case 2:
16129 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16130 default:
16131 gcc_unreachable ();
16132 }
16133 }
16134 [(set_attr "type" "sse")
16135 (set_attr "prefix" "evex")
16136 (set_attr "mode" "XI")])
16137
16138 ;; Packed float variants
16139 (define_expand "avx512pf_scatterpf<mode>sf"
16140 [(unspec
16141 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16142 (mem:<GATHER_SCATTER_SF_MEM_MODE>
16143 (match_par_dup 5
16144 [(match_operand 2 "vsib_address_operand")
16145 (match_operand:VI48_512 1 "register_operand")
16146 (match_operand:SI 3 "const1248_operand")]))
16147 (match_operand:SI 4 "const2367_operand")]
16148 UNSPEC_SCATTER_PREFETCH)]
16149 "TARGET_AVX512PF"
16150 {
16151 operands[5]
16152 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16153 operands[3]), UNSPEC_VSIBADDR);
16154 })
16155
16156 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
16157 [(unspec
16158 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16159 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16160 [(unspec:P
16161 [(match_operand:P 2 "vsib_address_operand" "Tv")
16162 (match_operand:VI48_512 1 "register_operand" "v")
16163 (match_operand:SI 3 "const1248_operand" "n")]
16164 UNSPEC_VSIBADDR)])
16165 (match_operand:SI 4 "const2367_operand" "n")]
16166 UNSPEC_SCATTER_PREFETCH)]
16167 "TARGET_AVX512PF"
16168 {
16169 switch (INTVAL (operands[4]))
16170 {
16171 case 3:
16172 case 7:
16173 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16174 case 2:
16175 case 6:
16176 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16177 default:
16178 gcc_unreachable ();
16179 }
16180 }
16181 [(set_attr "type" "sse")
16182 (set_attr "prefix" "evex")
16183 (set_attr "mode" "XI")])
16184
16185 ;; Packed double variants
16186 (define_expand "avx512pf_scatterpf<mode>df"
16187 [(unspec
16188 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16189 (mem:V8DF
16190 (match_par_dup 5
16191 [(match_operand 2 "vsib_address_operand")
16192 (match_operand:VI4_256_8_512 1 "register_operand")
16193 (match_operand:SI 3 "const1248_operand")]))
16194 (match_operand:SI 4 "const2367_operand")]
16195 UNSPEC_SCATTER_PREFETCH)]
16196 "TARGET_AVX512PF"
16197 {
16198 operands[5]
16199 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16200 operands[3]), UNSPEC_VSIBADDR);
16201 })
16202
16203 (define_insn "*avx512pf_scatterpf<mode>df_mask"
16204 [(unspec
16205 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16206 (match_operator:V8DF 5 "vsib_mem_operator"
16207 [(unspec:P
16208 [(match_operand:P 2 "vsib_address_operand" "Tv")
16209 (match_operand:VI4_256_8_512 1 "register_operand" "v")
16210 (match_operand:SI 3 "const1248_operand" "n")]
16211 UNSPEC_VSIBADDR)])
16212 (match_operand:SI 4 "const2367_operand" "n")]
16213 UNSPEC_SCATTER_PREFETCH)]
16214 "TARGET_AVX512PF"
16215 {
16216 switch (INTVAL (operands[4]))
16217 {
16218 case 3:
16219 case 7:
16220 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16221 case 2:
16222 case 6:
16223 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16224 default:
16225 gcc_unreachable ();
16226 }
16227 }
16228 [(set_attr "type" "sse")
16229 (set_attr "prefix" "evex")
16230 (set_attr "mode" "XI")])
16231
16232 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
16233 [(set (match_operand:VF_512 0 "register_operand" "=v")
16234 (unspec:VF_512
16235 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16236 UNSPEC_EXP2))]
16237 "TARGET_AVX512ER"
16238 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16239 [(set_attr "prefix" "evex")
16240 (set_attr "type" "sse")
16241 (set_attr "mode" "<MODE>")])
16242
16243 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
16244 [(set (match_operand:VF_512 0 "register_operand" "=v")
16245 (unspec:VF_512
16246 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16247 UNSPEC_RCP28))]
16248 "TARGET_AVX512ER"
16249 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16250 [(set_attr "prefix" "evex")
16251 (set_attr "type" "sse")
16252 (set_attr "mode" "<MODE>")])
16253
16254 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
16255 [(set (match_operand:VF_128 0 "register_operand" "=v")
16256 (vec_merge:VF_128
16257 (unspec:VF_128
16258 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16259 UNSPEC_RCP28)
16260 (match_operand:VF_128 2 "register_operand" "v")
16261 (const_int 1)))]
16262 "TARGET_AVX512ER"
16263 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
16264 [(set_attr "length_immediate" "1")
16265 (set_attr "prefix" "evex")
16266 (set_attr "type" "sse")
16267 (set_attr "mode" "<MODE>")])
16268
16269 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
16270 [(set (match_operand:VF_512 0 "register_operand" "=v")
16271 (unspec:VF_512
16272 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16273 UNSPEC_RSQRT28))]
16274 "TARGET_AVX512ER"
16275 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16276 [(set_attr "prefix" "evex")
16277 (set_attr "type" "sse")
16278 (set_attr "mode" "<MODE>")])
16279
16280 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
16281 [(set (match_operand:VF_128 0 "register_operand" "=v")
16282 (vec_merge:VF_128
16283 (unspec:VF_128
16284 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16285 UNSPEC_RSQRT28)
16286 (match_operand:VF_128 2 "register_operand" "v")
16287 (const_int 1)))]
16288 "TARGET_AVX512ER"
16289 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
16290 [(set_attr "length_immediate" "1")
16291 (set_attr "type" "sse")
16292 (set_attr "prefix" "evex")
16293 (set_attr "mode" "<MODE>")])
16294
16295 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16296 ;;
16297 ;; XOP instructions
16298 ;;
16299 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16300
16301 (define_code_iterator xop_plus [plus ss_plus])
16302
16303 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
16304 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
16305
16306 ;; XOP parallel integer multiply/add instructions.
16307
16308 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
16309 [(set (match_operand:VI24_128 0 "register_operand" "=x")
16310 (xop_plus:VI24_128
16311 (mult:VI24_128
16312 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
16313 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
16314 (match_operand:VI24_128 3 "register_operand" "x")))]
16315 "TARGET_XOP"
16316 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16317 [(set_attr "type" "ssemuladd")
16318 (set_attr "mode" "TI")])
16319
16320 (define_insn "xop_p<macs>dql"
16321 [(set (match_operand:V2DI 0 "register_operand" "=x")
16322 (xop_plus:V2DI
16323 (mult:V2DI
16324 (sign_extend:V2DI
16325 (vec_select:V2SI
16326 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16327 (parallel [(const_int 0) (const_int 2)])))
16328 (sign_extend:V2DI
16329 (vec_select:V2SI
16330 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16331 (parallel [(const_int 0) (const_int 2)]))))
16332 (match_operand:V2DI 3 "register_operand" "x")))]
16333 "TARGET_XOP"
16334 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16335 [(set_attr "type" "ssemuladd")
16336 (set_attr "mode" "TI")])
16337
16338 (define_insn "xop_p<macs>dqh"
16339 [(set (match_operand:V2DI 0 "register_operand" "=x")
16340 (xop_plus:V2DI
16341 (mult:V2DI
16342 (sign_extend:V2DI
16343 (vec_select:V2SI
16344 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16345 (parallel [(const_int 1) (const_int 3)])))
16346 (sign_extend:V2DI
16347 (vec_select:V2SI
16348 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16349 (parallel [(const_int 1) (const_int 3)]))))
16350 (match_operand:V2DI 3 "register_operand" "x")))]
16351 "TARGET_XOP"
16352 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16353 [(set_attr "type" "ssemuladd")
16354 (set_attr "mode" "TI")])
16355
16356 ;; XOP parallel integer multiply/add instructions for the intrinisics
16357 (define_insn "xop_p<macs>wd"
16358 [(set (match_operand:V4SI 0 "register_operand" "=x")
16359 (xop_plus:V4SI
16360 (mult:V4SI
16361 (sign_extend:V4SI
16362 (vec_select:V4HI
16363 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16364 (parallel [(const_int 1) (const_int 3)
16365 (const_int 5) (const_int 7)])))
16366 (sign_extend:V4SI
16367 (vec_select:V4HI
16368 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16369 (parallel [(const_int 1) (const_int 3)
16370 (const_int 5) (const_int 7)]))))
16371 (match_operand:V4SI 3 "register_operand" "x")))]
16372 "TARGET_XOP"
16373 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16374 [(set_attr "type" "ssemuladd")
16375 (set_attr "mode" "TI")])
16376
16377 (define_insn "xop_p<madcs>wd"
16378 [(set (match_operand:V4SI 0 "register_operand" "=x")
16379 (xop_plus:V4SI
16380 (plus:V4SI
16381 (mult:V4SI
16382 (sign_extend:V4SI
16383 (vec_select:V4HI
16384 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16385 (parallel [(const_int 0) (const_int 2)
16386 (const_int 4) (const_int 6)])))
16387 (sign_extend:V4SI
16388 (vec_select:V4HI
16389 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16390 (parallel [(const_int 0) (const_int 2)
16391 (const_int 4) (const_int 6)]))))
16392 (mult:V4SI
16393 (sign_extend:V4SI
16394 (vec_select:V4HI
16395 (match_dup 1)
16396 (parallel [(const_int 1) (const_int 3)
16397 (const_int 5) (const_int 7)])))
16398 (sign_extend:V4SI
16399 (vec_select:V4HI
16400 (match_dup 2)
16401 (parallel [(const_int 1) (const_int 3)
16402 (const_int 5) (const_int 7)])))))
16403 (match_operand:V4SI 3 "register_operand" "x")))]
16404 "TARGET_XOP"
16405 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16406 [(set_attr "type" "ssemuladd")
16407 (set_attr "mode" "TI")])
16408
16409 ;; XOP parallel XMM conditional moves
16410 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
16411 [(set (match_operand:V 0 "register_operand" "=x,x")
16412 (if_then_else:V
16413 (match_operand:V 3 "nonimmediate_operand" "x,m")
16414 (match_operand:V 1 "register_operand" "x,x")
16415 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
16416 "TARGET_XOP"
16417 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16418 [(set_attr "type" "sse4arg")])
16419
16420 ;; XOP horizontal add/subtract instructions
16421 (define_insn "xop_phadd<u>bw"
16422 [(set (match_operand:V8HI 0 "register_operand" "=x")
16423 (plus:V8HI
16424 (any_extend:V8HI
16425 (vec_select:V8QI
16426 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16427 (parallel [(const_int 0) (const_int 2)
16428 (const_int 4) (const_int 6)
16429 (const_int 8) (const_int 10)
16430 (const_int 12) (const_int 14)])))
16431 (any_extend:V8HI
16432 (vec_select:V8QI
16433 (match_dup 1)
16434 (parallel [(const_int 1) (const_int 3)
16435 (const_int 5) (const_int 7)
16436 (const_int 9) (const_int 11)
16437 (const_int 13) (const_int 15)])))))]
16438 "TARGET_XOP"
16439 "vphadd<u>bw\t{%1, %0|%0, %1}"
16440 [(set_attr "type" "sseiadd1")])
16441
16442 (define_insn "xop_phadd<u>bd"
16443 [(set (match_operand:V4SI 0 "register_operand" "=x")
16444 (plus:V4SI
16445 (plus:V4SI
16446 (any_extend:V4SI
16447 (vec_select:V4QI
16448 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16449 (parallel [(const_int 0) (const_int 4)
16450 (const_int 8) (const_int 12)])))
16451 (any_extend:V4SI
16452 (vec_select:V4QI
16453 (match_dup 1)
16454 (parallel [(const_int 1) (const_int 5)
16455 (const_int 9) (const_int 13)]))))
16456 (plus:V4SI
16457 (any_extend:V4SI
16458 (vec_select:V4QI
16459 (match_dup 1)
16460 (parallel [(const_int 2) (const_int 6)
16461 (const_int 10) (const_int 14)])))
16462 (any_extend:V4SI
16463 (vec_select:V4QI
16464 (match_dup 1)
16465 (parallel [(const_int 3) (const_int 7)
16466 (const_int 11) (const_int 15)]))))))]
16467 "TARGET_XOP"
16468 "vphadd<u>bd\t{%1, %0|%0, %1}"
16469 [(set_attr "type" "sseiadd1")])
16470
16471 (define_insn "xop_phadd<u>bq"
16472 [(set (match_operand:V2DI 0 "register_operand" "=x")
16473 (plus:V2DI
16474 (plus:V2DI
16475 (plus:V2DI
16476 (any_extend:V2DI
16477 (vec_select:V2QI
16478 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16479 (parallel [(const_int 0) (const_int 8)])))
16480 (any_extend:V2DI
16481 (vec_select:V2QI
16482 (match_dup 1)
16483 (parallel [(const_int 1) (const_int 9)]))))
16484 (plus:V2DI
16485 (any_extend:V2DI
16486 (vec_select:V2QI
16487 (match_dup 1)
16488 (parallel [(const_int 2) (const_int 10)])))
16489 (any_extend:V2DI
16490 (vec_select:V2QI
16491 (match_dup 1)
16492 (parallel [(const_int 3) (const_int 11)])))))
16493 (plus:V2DI
16494 (plus:V2DI
16495 (any_extend:V2DI
16496 (vec_select:V2QI
16497 (match_dup 1)
16498 (parallel [(const_int 4) (const_int 12)])))
16499 (any_extend:V2DI
16500 (vec_select:V2QI
16501 (match_dup 1)
16502 (parallel [(const_int 5) (const_int 13)]))))
16503 (plus:V2DI
16504 (any_extend:V2DI
16505 (vec_select:V2QI
16506 (match_dup 1)
16507 (parallel [(const_int 6) (const_int 14)])))
16508 (any_extend:V2DI
16509 (vec_select:V2QI
16510 (match_dup 1)
16511 (parallel [(const_int 7) (const_int 15)])))))))]
16512 "TARGET_XOP"
16513 "vphadd<u>bq\t{%1, %0|%0, %1}"
16514 [(set_attr "type" "sseiadd1")])
16515
16516 (define_insn "xop_phadd<u>wd"
16517 [(set (match_operand:V4SI 0 "register_operand" "=x")
16518 (plus:V4SI
16519 (any_extend:V4SI
16520 (vec_select:V4HI
16521 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16522 (parallel [(const_int 0) (const_int 2)
16523 (const_int 4) (const_int 6)])))
16524 (any_extend:V4SI
16525 (vec_select:V4HI
16526 (match_dup 1)
16527 (parallel [(const_int 1) (const_int 3)
16528 (const_int 5) (const_int 7)])))))]
16529 "TARGET_XOP"
16530 "vphadd<u>wd\t{%1, %0|%0, %1}"
16531 [(set_attr "type" "sseiadd1")])
16532
16533 (define_insn "xop_phadd<u>wq"
16534 [(set (match_operand:V2DI 0 "register_operand" "=x")
16535 (plus:V2DI
16536 (plus:V2DI
16537 (any_extend:V2DI
16538 (vec_select:V2HI
16539 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16540 (parallel [(const_int 0) (const_int 4)])))
16541 (any_extend:V2DI
16542 (vec_select:V2HI
16543 (match_dup 1)
16544 (parallel [(const_int 1) (const_int 5)]))))
16545 (plus:V2DI
16546 (any_extend:V2DI
16547 (vec_select:V2HI
16548 (match_dup 1)
16549 (parallel [(const_int 2) (const_int 6)])))
16550 (any_extend:V2DI
16551 (vec_select:V2HI
16552 (match_dup 1)
16553 (parallel [(const_int 3) (const_int 7)]))))))]
16554 "TARGET_XOP"
16555 "vphadd<u>wq\t{%1, %0|%0, %1}"
16556 [(set_attr "type" "sseiadd1")])
16557
16558 (define_insn "xop_phadd<u>dq"
16559 [(set (match_operand:V2DI 0 "register_operand" "=x")
16560 (plus:V2DI
16561 (any_extend:V2DI
16562 (vec_select:V2SI
16563 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16564 (parallel [(const_int 0) (const_int 2)])))
16565 (any_extend:V2DI
16566 (vec_select:V2SI
16567 (match_dup 1)
16568 (parallel [(const_int 1) (const_int 3)])))))]
16569 "TARGET_XOP"
16570 "vphadd<u>dq\t{%1, %0|%0, %1}"
16571 [(set_attr "type" "sseiadd1")])
16572
16573 (define_insn "xop_phsubbw"
16574 [(set (match_operand:V8HI 0 "register_operand" "=x")
16575 (minus:V8HI
16576 (sign_extend:V8HI
16577 (vec_select:V8QI
16578 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16579 (parallel [(const_int 0) (const_int 2)
16580 (const_int 4) (const_int 6)
16581 (const_int 8) (const_int 10)
16582 (const_int 12) (const_int 14)])))
16583 (sign_extend:V8HI
16584 (vec_select:V8QI
16585 (match_dup 1)
16586 (parallel [(const_int 1) (const_int 3)
16587 (const_int 5) (const_int 7)
16588 (const_int 9) (const_int 11)
16589 (const_int 13) (const_int 15)])))))]
16590 "TARGET_XOP"
16591 "vphsubbw\t{%1, %0|%0, %1}"
16592 [(set_attr "type" "sseiadd1")])
16593
16594 (define_insn "xop_phsubwd"
16595 [(set (match_operand:V4SI 0 "register_operand" "=x")
16596 (minus:V4SI
16597 (sign_extend:V4SI
16598 (vec_select:V4HI
16599 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16600 (parallel [(const_int 0) (const_int 2)
16601 (const_int 4) (const_int 6)])))
16602 (sign_extend:V4SI
16603 (vec_select:V4HI
16604 (match_dup 1)
16605 (parallel [(const_int 1) (const_int 3)
16606 (const_int 5) (const_int 7)])))))]
16607 "TARGET_XOP"
16608 "vphsubwd\t{%1, %0|%0, %1}"
16609 [(set_attr "type" "sseiadd1")])
16610
16611 (define_insn "xop_phsubdq"
16612 [(set (match_operand:V2DI 0 "register_operand" "=x")
16613 (minus:V2DI
16614 (sign_extend:V2DI
16615 (vec_select:V2SI
16616 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16617 (parallel [(const_int 0) (const_int 2)])))
16618 (sign_extend:V2DI
16619 (vec_select:V2SI
16620 (match_dup 1)
16621 (parallel [(const_int 1) (const_int 3)])))))]
16622 "TARGET_XOP"
16623 "vphsubdq\t{%1, %0|%0, %1}"
16624 [(set_attr "type" "sseiadd1")])
16625
16626 ;; XOP permute instructions
16627 (define_insn "xop_pperm"
16628 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16629 (unspec:V16QI
16630 [(match_operand:V16QI 1 "register_operand" "x,x")
16631 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16632 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
16633 UNSPEC_XOP_PERMUTE))]
16634 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16635 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16636 [(set_attr "type" "sse4arg")
16637 (set_attr "mode" "TI")])
16638
16639 ;; XOP pack instructions that combine two vectors into a smaller vector
16640 (define_insn "xop_pperm_pack_v2di_v4si"
16641 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16642 (vec_concat:V4SI
16643 (truncate:V2SI
16644 (match_operand:V2DI 1 "register_operand" "x,x"))
16645 (truncate:V2SI
16646 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
16647 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16648 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16649 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16650 [(set_attr "type" "sse4arg")
16651 (set_attr "mode" "TI")])
16652
16653 (define_insn "xop_pperm_pack_v4si_v8hi"
16654 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16655 (vec_concat:V8HI
16656 (truncate:V4HI
16657 (match_operand:V4SI 1 "register_operand" "x,x"))
16658 (truncate:V4HI
16659 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
16660 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16661 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16662 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16663 [(set_attr "type" "sse4arg")
16664 (set_attr "mode" "TI")])
16665
16666 (define_insn "xop_pperm_pack_v8hi_v16qi"
16667 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16668 (vec_concat:V16QI
16669 (truncate:V8QI
16670 (match_operand:V8HI 1 "register_operand" "x,x"))
16671 (truncate:V8QI
16672 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
16673 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16674 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16675 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16676 [(set_attr "type" "sse4arg")
16677 (set_attr "mode" "TI")])
16678
16679 ;; XOP packed rotate instructions
16680 (define_expand "rotl<mode>3"
16681 [(set (match_operand:VI_128 0 "register_operand")
16682 (rotate:VI_128
16683 (match_operand:VI_128 1 "nonimmediate_operand")
16684 (match_operand:SI 2 "general_operand")))]
16685 "TARGET_XOP"
16686 {
16687 /* If we were given a scalar, convert it to parallel */
16688 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16689 {
16690 rtvec vs = rtvec_alloc (<ssescalarnum>);
16691 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16692 rtx reg = gen_reg_rtx (<MODE>mode);
16693 rtx op2 = operands[2];
16694 int i;
16695
16696 if (GET_MODE (op2) != <ssescalarmode>mode)
16697 {
16698 op2 = gen_reg_rtx (<ssescalarmode>mode);
16699 convert_move (op2, operands[2], false);
16700 }
16701
16702 for (i = 0; i < <ssescalarnum>; i++)
16703 RTVEC_ELT (vs, i) = op2;
16704
16705 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16706 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16707 DONE;
16708 }
16709 })
16710
16711 (define_expand "rotr<mode>3"
16712 [(set (match_operand:VI_128 0 "register_operand")
16713 (rotatert:VI_128
16714 (match_operand:VI_128 1 "nonimmediate_operand")
16715 (match_operand:SI 2 "general_operand")))]
16716 "TARGET_XOP"
16717 {
16718 /* If we were given a scalar, convert it to parallel */
16719 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16720 {
16721 rtvec vs = rtvec_alloc (<ssescalarnum>);
16722 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16723 rtx neg = gen_reg_rtx (<MODE>mode);
16724 rtx reg = gen_reg_rtx (<MODE>mode);
16725 rtx op2 = operands[2];
16726 int i;
16727
16728 if (GET_MODE (op2) != <ssescalarmode>mode)
16729 {
16730 op2 = gen_reg_rtx (<ssescalarmode>mode);
16731 convert_move (op2, operands[2], false);
16732 }
16733
16734 for (i = 0; i < <ssescalarnum>; i++)
16735 RTVEC_ELT (vs, i) = op2;
16736
16737 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16738 emit_insn (gen_neg<mode>2 (neg, reg));
16739 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
16740 DONE;
16741 }
16742 })
16743
16744 (define_insn "xop_rotl<mode>3"
16745 [(set (match_operand:VI_128 0 "register_operand" "=x")
16746 (rotate:VI_128
16747 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16748 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16749 "TARGET_XOP"
16750 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16751 [(set_attr "type" "sseishft")
16752 (set_attr "length_immediate" "1")
16753 (set_attr "mode" "TI")])
16754
16755 (define_insn "xop_rotr<mode>3"
16756 [(set (match_operand:VI_128 0 "register_operand" "=x")
16757 (rotatert:VI_128
16758 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16759 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16760 "TARGET_XOP"
16761 {
16762 operands[3]
16763 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
16764 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
16765 }
16766 [(set_attr "type" "sseishft")
16767 (set_attr "length_immediate" "1")
16768 (set_attr "mode" "TI")])
16769
16770 (define_expand "vrotr<mode>3"
16771 [(match_operand:VI_128 0 "register_operand")
16772 (match_operand:VI_128 1 "register_operand")
16773 (match_operand:VI_128 2 "register_operand")]
16774 "TARGET_XOP"
16775 {
16776 rtx reg = gen_reg_rtx (<MODE>mode);
16777 emit_insn (gen_neg<mode>2 (reg, operands[2]));
16778 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16779 DONE;
16780 })
16781
16782 (define_expand "vrotl<mode>3"
16783 [(match_operand:VI_128 0 "register_operand")
16784 (match_operand:VI_128 1 "register_operand")
16785 (match_operand:VI_128 2 "register_operand")]
16786 "TARGET_XOP"
16787 {
16788 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
16789 DONE;
16790 })
16791
16792 (define_insn "xop_vrotl<mode>3"
16793 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16794 (if_then_else:VI_128
16795 (ge:VI_128
16796 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16797 (const_int 0))
16798 (rotate:VI_128
16799 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16800 (match_dup 2))
16801 (rotatert:VI_128
16802 (match_dup 1)
16803 (neg:VI_128 (match_dup 2)))))]
16804 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16805 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16806 [(set_attr "type" "sseishft")
16807 (set_attr "prefix_data16" "0")
16808 (set_attr "prefix_extra" "2")
16809 (set_attr "mode" "TI")])
16810
16811 ;; XOP packed shift instructions.
16812 (define_expand "vlshr<mode>3"
16813 [(set (match_operand:VI12_128 0 "register_operand")
16814 (lshiftrt:VI12_128
16815 (match_operand:VI12_128 1 "register_operand")
16816 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16817 "TARGET_XOP"
16818 {
16819 rtx neg = gen_reg_rtx (<MODE>mode);
16820 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16821 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16822 DONE;
16823 })
16824
16825 (define_expand "vlshr<mode>3"
16826 [(set (match_operand:VI48_128 0 "register_operand")
16827 (lshiftrt:VI48_128
16828 (match_operand:VI48_128 1 "register_operand")
16829 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16830 "TARGET_AVX2 || TARGET_XOP"
16831 {
16832 if (!TARGET_AVX2)
16833 {
16834 rtx neg = gen_reg_rtx (<MODE>mode);
16835 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16836 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16837 DONE;
16838 }
16839 })
16840
16841 (define_expand "vlshr<mode>3"
16842 [(set (match_operand:VI48_512 0 "register_operand")
16843 (lshiftrt:VI48_512
16844 (match_operand:VI48_512 1 "register_operand")
16845 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16846 "TARGET_AVX512F")
16847
16848 (define_expand "vlshr<mode>3"
16849 [(set (match_operand:VI48_256 0 "register_operand")
16850 (lshiftrt:VI48_256
16851 (match_operand:VI48_256 1 "register_operand")
16852 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16853 "TARGET_AVX2")
16854
16855 (define_expand "vashrv8hi3<mask_name>"
16856 [(set (match_operand:V8HI 0 "register_operand")
16857 (ashiftrt:V8HI
16858 (match_operand:V8HI 1 "register_operand")
16859 (match_operand:V8HI 2 "nonimmediate_operand")))]
16860 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
16861 {
16862 if (TARGET_XOP)
16863 {
16864 rtx neg = gen_reg_rtx (V8HImode);
16865 emit_insn (gen_negv8hi2 (neg, operands[2]));
16866 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16867 DONE;
16868 }
16869 })
16870
16871 (define_expand "vashrv16qi3"
16872 [(set (match_operand:V16QI 0 "register_operand")
16873 (ashiftrt:V16QI
16874 (match_operand:V16QI 1 "register_operand")
16875 (match_operand:V16QI 2 "nonimmediate_operand")))]
16876 "TARGET_XOP"
16877 {
16878 rtx neg = gen_reg_rtx (V16QImode);
16879 emit_insn (gen_negv16qi2 (neg, operands[2]));
16880 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16881 DONE;
16882 })
16883
16884 (define_expand "vashrv2di3<mask_name>"
16885 [(set (match_operand:V2DI 0 "register_operand")
16886 (ashiftrt:V2DI
16887 (match_operand:V2DI 1 "register_operand")
16888 (match_operand:V2DI 2 "nonimmediate_operand")))]
16889 "TARGET_XOP || TARGET_AVX512VL"
16890 {
16891 if (TARGET_XOP)
16892 {
16893 rtx neg = gen_reg_rtx (V2DImode);
16894 emit_insn (gen_negv2di2 (neg, operands[2]));
16895 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16896 DONE;
16897 }
16898 })
16899
16900 (define_expand "vashrv4si3"
16901 [(set (match_operand:V4SI 0 "register_operand")
16902 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16903 (match_operand:V4SI 2 "nonimmediate_operand")))]
16904 "TARGET_AVX2 || TARGET_XOP"
16905 {
16906 if (!TARGET_AVX2)
16907 {
16908 rtx neg = gen_reg_rtx (V4SImode);
16909 emit_insn (gen_negv4si2 (neg, operands[2]));
16910 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16911 DONE;
16912 }
16913 })
16914
16915 (define_expand "vashrv16si3"
16916 [(set (match_operand:V16SI 0 "register_operand")
16917 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16918 (match_operand:V16SI 2 "nonimmediate_operand")))]
16919 "TARGET_AVX512F")
16920
16921 (define_expand "vashrv8si3"
16922 [(set (match_operand:V8SI 0 "register_operand")
16923 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16924 (match_operand:V8SI 2 "nonimmediate_operand")))]
16925 "TARGET_AVX2")
16926
16927 (define_expand "vashl<mode>3"
16928 [(set (match_operand:VI12_128 0 "register_operand")
16929 (ashift:VI12_128
16930 (match_operand:VI12_128 1 "register_operand")
16931 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16932 "TARGET_XOP"
16933 {
16934 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16935 DONE;
16936 })
16937
16938 (define_expand "vashl<mode>3"
16939 [(set (match_operand:VI48_128 0 "register_operand")
16940 (ashift:VI48_128
16941 (match_operand:VI48_128 1 "register_operand")
16942 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16943 "TARGET_AVX2 || TARGET_XOP"
16944 {
16945 if (!TARGET_AVX2)
16946 {
16947 operands[2] = force_reg (<MODE>mode, operands[2]);
16948 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16949 DONE;
16950 }
16951 })
16952
16953 (define_expand "vashl<mode>3"
16954 [(set (match_operand:VI48_512 0 "register_operand")
16955 (ashift:VI48_512
16956 (match_operand:VI48_512 1 "register_operand")
16957 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16958 "TARGET_AVX512F")
16959
16960 (define_expand "vashl<mode>3"
16961 [(set (match_operand:VI48_256 0 "register_operand")
16962 (ashift:VI48_256
16963 (match_operand:VI48_256 1 "register_operand")
16964 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16965 "TARGET_AVX2")
16966
16967 (define_insn "xop_sha<mode>3"
16968 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16969 (if_then_else:VI_128
16970 (ge:VI_128
16971 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16972 (const_int 0))
16973 (ashift:VI_128
16974 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16975 (match_dup 2))
16976 (ashiftrt:VI_128
16977 (match_dup 1)
16978 (neg:VI_128 (match_dup 2)))))]
16979 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16980 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16981 [(set_attr "type" "sseishft")
16982 (set_attr "prefix_data16" "0")
16983 (set_attr "prefix_extra" "2")
16984 (set_attr "mode" "TI")])
16985
16986 (define_insn "xop_shl<mode>3"
16987 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16988 (if_then_else:VI_128
16989 (ge:VI_128
16990 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16991 (const_int 0))
16992 (ashift:VI_128
16993 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16994 (match_dup 2))
16995 (lshiftrt:VI_128
16996 (match_dup 1)
16997 (neg:VI_128 (match_dup 2)))))]
16998 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16999 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17000 [(set_attr "type" "sseishft")
17001 (set_attr "prefix_data16" "0")
17002 (set_attr "prefix_extra" "2")
17003 (set_attr "mode" "TI")])
17004
17005 (define_expand "<shift_insn><mode>3"
17006 [(set (match_operand:VI1_AVX512 0 "register_operand")
17007 (any_shift:VI1_AVX512
17008 (match_operand:VI1_AVX512 1 "register_operand")
17009 (match_operand:SI 2 "nonmemory_operand")))]
17010 "TARGET_SSE2"
17011 {
17012 if (TARGET_XOP && <MODE>mode == V16QImode)
17013 {
17014 bool negate = false;
17015 rtx (*gen) (rtx, rtx, rtx);
17016 rtx tmp, par;
17017 int i;
17018
17019 if (<CODE> != ASHIFT)
17020 {
17021 if (CONST_INT_P (operands[2]))
17022 operands[2] = GEN_INT (-INTVAL (operands[2]));
17023 else
17024 negate = true;
17025 }
17026 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
17027 for (i = 0; i < 16; i++)
17028 XVECEXP (par, 0, i) = operands[2];
17029
17030 tmp = gen_reg_rtx (V16QImode);
17031 emit_insn (gen_vec_initv16qiqi (tmp, par));
17032
17033 if (negate)
17034 emit_insn (gen_negv16qi2 (tmp, tmp));
17035
17036 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
17037 emit_insn (gen (operands[0], operands[1], tmp));
17038 }
17039 else
17040 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
17041 DONE;
17042 })
17043
17044 (define_expand "ashrv2di3"
17045 [(set (match_operand:V2DI 0 "register_operand")
17046 (ashiftrt:V2DI
17047 (match_operand:V2DI 1 "register_operand")
17048 (match_operand:DI 2 "nonmemory_operand")))]
17049 "TARGET_XOP || TARGET_AVX512VL"
17050 {
17051 if (!TARGET_AVX512VL)
17052 {
17053 rtx reg = gen_reg_rtx (V2DImode);
17054 rtx par;
17055 bool negate = false;
17056 int i;
17057
17058 if (CONST_INT_P (operands[2]))
17059 operands[2] = GEN_INT (-INTVAL (operands[2]));
17060 else
17061 negate = true;
17062
17063 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
17064 for (i = 0; i < 2; i++)
17065 XVECEXP (par, 0, i) = operands[2];
17066
17067 emit_insn (gen_vec_initv2didi (reg, par));
17068
17069 if (negate)
17070 emit_insn (gen_negv2di2 (reg, reg));
17071
17072 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
17073 DONE;
17074 }
17075 })
17076
17077 ;; XOP FRCZ support
17078 (define_insn "xop_frcz<mode>2"
17079 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
17080 (unspec:FMAMODE
17081 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
17082 UNSPEC_FRCZ))]
17083 "TARGET_XOP"
17084 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
17085 [(set_attr "type" "ssecvt1")
17086 (set_attr "mode" "<MODE>")])
17087
17088 (define_expand "xop_vmfrcz<mode>2"
17089 [(set (match_operand:VF_128 0 "register_operand")
17090 (vec_merge:VF_128
17091 (unspec:VF_128
17092 [(match_operand:VF_128 1 "nonimmediate_operand")]
17093 UNSPEC_FRCZ)
17094 (match_dup 2)
17095 (const_int 1)))]
17096 "TARGET_XOP"
17097 "operands[2] = CONST0_RTX (<MODE>mode);")
17098
17099 (define_insn "*xop_vmfrcz<mode>2"
17100 [(set (match_operand:VF_128 0 "register_operand" "=x")
17101 (vec_merge:VF_128
17102 (unspec:VF_128
17103 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
17104 UNSPEC_FRCZ)
17105 (match_operand:VF_128 2 "const0_operand")
17106 (const_int 1)))]
17107 "TARGET_XOP"
17108 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
17109 [(set_attr "type" "ssecvt1")
17110 (set_attr "mode" "<MODE>")])
17111
17112 (define_insn "xop_maskcmp<mode>3"
17113 [(set (match_operand:VI_128 0 "register_operand" "=x")
17114 (match_operator:VI_128 1 "ix86_comparison_int_operator"
17115 [(match_operand:VI_128 2 "register_operand" "x")
17116 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17117 "TARGET_XOP"
17118 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17119 [(set_attr "type" "sse4arg")
17120 (set_attr "prefix_data16" "0")
17121 (set_attr "prefix_rep" "0")
17122 (set_attr "prefix_extra" "2")
17123 (set_attr "length_immediate" "1")
17124 (set_attr "mode" "TI")])
17125
17126 (define_insn "xop_maskcmp_uns<mode>3"
17127 [(set (match_operand:VI_128 0 "register_operand" "=x")
17128 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
17129 [(match_operand:VI_128 2 "register_operand" "x")
17130 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17131 "TARGET_XOP"
17132 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17133 [(set_attr "type" "ssecmp")
17134 (set_attr "prefix_data16" "0")
17135 (set_attr "prefix_rep" "0")
17136 (set_attr "prefix_extra" "2")
17137 (set_attr "length_immediate" "1")
17138 (set_attr "mode" "TI")])
17139
17140 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
17141 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
17142 ;; the exact instruction generated for the intrinsic.
17143 (define_insn "xop_maskcmp_uns2<mode>3"
17144 [(set (match_operand:VI_128 0 "register_operand" "=x")
17145 (unspec:VI_128
17146 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
17147 [(match_operand:VI_128 2 "register_operand" "x")
17148 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
17149 UNSPEC_XOP_UNSIGNED_CMP))]
17150 "TARGET_XOP"
17151 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17152 [(set_attr "type" "ssecmp")
17153 (set_attr "prefix_data16" "0")
17154 (set_attr "prefix_extra" "2")
17155 (set_attr "length_immediate" "1")
17156 (set_attr "mode" "TI")])
17157
17158 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
17159 ;; being added here to be complete.
17160 (define_insn "xop_pcom_tf<mode>3"
17161 [(set (match_operand:VI_128 0 "register_operand" "=x")
17162 (unspec:VI_128
17163 [(match_operand:VI_128 1 "register_operand" "x")
17164 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
17165 (match_operand:SI 3 "const_int_operand" "n")]
17166 UNSPEC_XOP_TRUEFALSE))]
17167 "TARGET_XOP"
17168 {
17169 return ((INTVAL (operands[3]) != 0)
17170 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17171 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
17172 }
17173 [(set_attr "type" "ssecmp")
17174 (set_attr "prefix_data16" "0")
17175 (set_attr "prefix_extra" "2")
17176 (set_attr "length_immediate" "1")
17177 (set_attr "mode" "TI")])
17178
17179 (define_insn "xop_vpermil2<mode>3"
17180 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
17181 (unspec:VF_128_256
17182 [(match_operand:VF_128_256 1 "register_operand" "x,x")
17183 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
17184 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
17185 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
17186 UNSPEC_VPERMIL2))]
17187 "TARGET_XOP"
17188 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
17189 [(set_attr "type" "sse4arg")
17190 (set_attr "length_immediate" "1")
17191 (set_attr "mode" "<MODE>")])
17192
17193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17194
17195 (define_insn "aesenc"
17196 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17197 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17198 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17199 UNSPEC_AESENC))]
17200 "TARGET_AES"
17201 "@
17202 aesenc\t{%2, %0|%0, %2}
17203 vaesenc\t{%2, %1, %0|%0, %1, %2}"
17204 [(set_attr "isa" "noavx,avx")
17205 (set_attr "type" "sselog1")
17206 (set_attr "prefix_extra" "1")
17207 (set_attr "prefix" "orig,vex")
17208 (set_attr "btver2_decode" "double,double")
17209 (set_attr "mode" "TI")])
17210
17211 (define_insn "aesenclast"
17212 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17213 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17214 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17215 UNSPEC_AESENCLAST))]
17216 "TARGET_AES"
17217 "@
17218 aesenclast\t{%2, %0|%0, %2}
17219 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
17220 [(set_attr "isa" "noavx,avx")
17221 (set_attr "type" "sselog1")
17222 (set_attr "prefix_extra" "1")
17223 (set_attr "prefix" "orig,vex")
17224 (set_attr "btver2_decode" "double,double")
17225 (set_attr "mode" "TI")])
17226
17227 (define_insn "aesdec"
17228 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17229 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17230 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17231 UNSPEC_AESDEC))]
17232 "TARGET_AES"
17233 "@
17234 aesdec\t{%2, %0|%0, %2}
17235 vaesdec\t{%2, %1, %0|%0, %1, %2}"
17236 [(set_attr "isa" "noavx,avx")
17237 (set_attr "type" "sselog1")
17238 (set_attr "prefix_extra" "1")
17239 (set_attr "prefix" "orig,vex")
17240 (set_attr "btver2_decode" "double,double")
17241 (set_attr "mode" "TI")])
17242
17243 (define_insn "aesdeclast"
17244 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17246 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17247 UNSPEC_AESDECLAST))]
17248 "TARGET_AES"
17249 "@
17250 aesdeclast\t{%2, %0|%0, %2}
17251 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
17252 [(set_attr "isa" "noavx,avx")
17253 (set_attr "type" "sselog1")
17254 (set_attr "prefix_extra" "1")
17255 (set_attr "prefix" "orig,vex")
17256 (set_attr "btver2_decode" "double,double")
17257 (set_attr "mode" "TI")])
17258
17259 (define_insn "aesimc"
17260 [(set (match_operand:V2DI 0 "register_operand" "=x")
17261 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
17262 UNSPEC_AESIMC))]
17263 "TARGET_AES"
17264 "%vaesimc\t{%1, %0|%0, %1}"
17265 [(set_attr "type" "sselog1")
17266 (set_attr "prefix_extra" "1")
17267 (set_attr "prefix" "maybe_vex")
17268 (set_attr "mode" "TI")])
17269
17270 (define_insn "aeskeygenassist"
17271 [(set (match_operand:V2DI 0 "register_operand" "=x")
17272 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
17273 (match_operand:SI 2 "const_0_to_255_operand" "n")]
17274 UNSPEC_AESKEYGENASSIST))]
17275 "TARGET_AES"
17276 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
17277 [(set_attr "type" "sselog1")
17278 (set_attr "prefix_extra" "1")
17279 (set_attr "length_immediate" "1")
17280 (set_attr "prefix" "maybe_vex")
17281 (set_attr "mode" "TI")])
17282
17283 (define_insn "pclmulqdq"
17284 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17285 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17286 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
17287 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17288 UNSPEC_PCLMUL))]
17289 "TARGET_PCLMUL"
17290 "@
17291 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
17292 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17293 [(set_attr "isa" "noavx,avx")
17294 (set_attr "type" "sselog1")
17295 (set_attr "prefix_extra" "1")
17296 (set_attr "length_immediate" "1")
17297 (set_attr "prefix" "orig,vex")
17298 (set_attr "mode" "TI")])
17299
17300 (define_expand "avx_vzeroall"
17301 [(match_par_dup 0 [(const_int 0)])]
17302 "TARGET_AVX"
17303 {
17304 int nregs = TARGET_64BIT ? 16 : 8;
17305 int regno;
17306
17307 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
17308
17309 XVECEXP (operands[0], 0, 0)
17310 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
17311 UNSPECV_VZEROALL);
17312
17313 for (regno = 0; regno < nregs; regno++)
17314 XVECEXP (operands[0], 0, regno + 1)
17315 = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
17316 CONST0_RTX (V8SImode));
17317 })
17318
17319 (define_insn "*avx_vzeroall"
17320 [(match_parallel 0 "vzeroall_operation"
17321 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
17322 "TARGET_AVX"
17323 "vzeroall"
17324 [(set_attr "type" "sse")
17325 (set_attr "modrm" "0")
17326 (set_attr "memory" "none")
17327 (set_attr "prefix" "vex")
17328 (set_attr "btver2_decode" "vector")
17329 (set_attr "mode" "OI")])
17330
17331 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
17332 ;; if the upper 128bits are unused.
17333 (define_insn "avx_vzeroupper"
17334 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
17335 "TARGET_AVX"
17336 "vzeroupper"
17337 [(set_attr "type" "sse")
17338 (set_attr "modrm" "0")
17339 (set_attr "memory" "none")
17340 (set_attr "prefix" "vex")
17341 (set_attr "btver2_decode" "vector")
17342 (set_attr "mode" "OI")])
17343
17344 (define_mode_attr pbroadcast_evex_isa
17345 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
17346 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
17347 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
17348 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
17349
17350 (define_insn "avx2_pbroadcast<mode>"
17351 [(set (match_operand:VI 0 "register_operand" "=x,v")
17352 (vec_duplicate:VI
17353 (vec_select:<ssescalarmode>
17354 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
17355 (parallel [(const_int 0)]))))]
17356 "TARGET_AVX2"
17357 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
17358 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
17359 (set_attr "type" "ssemov")
17360 (set_attr "prefix_extra" "1")
17361 (set_attr "prefix" "vex,evex")
17362 (set_attr "mode" "<sseinsnmode>")])
17363
17364 (define_insn "avx2_pbroadcast<mode>_1"
17365 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
17366 (vec_duplicate:VI_256
17367 (vec_select:<ssescalarmode>
17368 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
17369 (parallel [(const_int 0)]))))]
17370 "TARGET_AVX2"
17371 "@
17372 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17373 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17374 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17375 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
17376 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
17377 (set_attr "type" "ssemov")
17378 (set_attr "prefix_extra" "1")
17379 (set_attr "prefix" "vex")
17380 (set_attr "mode" "<sseinsnmode>")])
17381
17382 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
17383 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
17384 (unspec:VI48F_256_512
17385 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
17386 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17387 UNSPEC_VPERMVAR))]
17388 "TARGET_AVX2 && <mask_mode512bit_condition>"
17389 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17390 [(set_attr "type" "sselog")
17391 (set_attr "prefix" "<mask_prefix2>")
17392 (set_attr "mode" "<sseinsnmode>")])
17393
17394 (define_insn "<avx512>_permvar<mode><mask_name>"
17395 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17396 (unspec:VI1_AVX512VL
17397 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
17398 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17399 UNSPEC_VPERMVAR))]
17400 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
17401 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17402 [(set_attr "type" "sselog")
17403 (set_attr "prefix" "<mask_prefix2>")
17404 (set_attr "mode" "<sseinsnmode>")])
17405
17406 (define_insn "<avx512>_permvar<mode><mask_name>"
17407 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17408 (unspec:VI2_AVX512VL
17409 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
17410 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17411 UNSPEC_VPERMVAR))]
17412 "TARGET_AVX512BW && <mask_mode512bit_condition>"
17413 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17414 [(set_attr "type" "sselog")
17415 (set_attr "prefix" "<mask_prefix2>")
17416 (set_attr "mode" "<sseinsnmode>")])
17417
17418 (define_expand "avx2_perm<mode>"
17419 [(match_operand:VI8F_256 0 "register_operand")
17420 (match_operand:VI8F_256 1 "nonimmediate_operand")
17421 (match_operand:SI 2 "const_0_to_255_operand")]
17422 "TARGET_AVX2"
17423 {
17424 int mask = INTVAL (operands[2]);
17425 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
17426 GEN_INT ((mask >> 0) & 3),
17427 GEN_INT ((mask >> 2) & 3),
17428 GEN_INT ((mask >> 4) & 3),
17429 GEN_INT ((mask >> 6) & 3)));
17430 DONE;
17431 })
17432
17433 (define_expand "avx512vl_perm<mode>_mask"
17434 [(match_operand:VI8F_256 0 "register_operand")
17435 (match_operand:VI8F_256 1 "nonimmediate_operand")
17436 (match_operand:SI 2 "const_0_to_255_operand")
17437 (match_operand:VI8F_256 3 "vector_move_operand")
17438 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17439 "TARGET_AVX512VL"
17440 {
17441 int mask = INTVAL (operands[2]);
17442 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
17443 GEN_INT ((mask >> 0) & 3),
17444 GEN_INT ((mask >> 2) & 3),
17445 GEN_INT ((mask >> 4) & 3),
17446 GEN_INT ((mask >> 6) & 3),
17447 operands[3], operands[4]));
17448 DONE;
17449 })
17450
17451 (define_insn "avx2_perm<mode>_1<mask_name>"
17452 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17453 (vec_select:VI8F_256
17454 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
17455 (parallel [(match_operand 2 "const_0_to_3_operand")
17456 (match_operand 3 "const_0_to_3_operand")
17457 (match_operand 4 "const_0_to_3_operand")
17458 (match_operand 5 "const_0_to_3_operand")])))]
17459 "TARGET_AVX2 && <mask_mode512bit_condition>"
17460 {
17461 int mask = 0;
17462 mask |= INTVAL (operands[2]) << 0;
17463 mask |= INTVAL (operands[3]) << 2;
17464 mask |= INTVAL (operands[4]) << 4;
17465 mask |= INTVAL (operands[5]) << 6;
17466 operands[2] = GEN_INT (mask);
17467 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
17468 }
17469 [(set_attr "type" "sselog")
17470 (set_attr "prefix" "<mask_prefix2>")
17471 (set_attr "mode" "<sseinsnmode>")])
17472
17473 (define_expand "avx512f_perm<mode>"
17474 [(match_operand:V8FI 0 "register_operand")
17475 (match_operand:V8FI 1 "nonimmediate_operand")
17476 (match_operand:SI 2 "const_0_to_255_operand")]
17477 "TARGET_AVX512F"
17478 {
17479 int mask = INTVAL (operands[2]);
17480 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
17481 GEN_INT ((mask >> 0) & 3),
17482 GEN_INT ((mask >> 2) & 3),
17483 GEN_INT ((mask >> 4) & 3),
17484 GEN_INT ((mask >> 6) & 3),
17485 GEN_INT (((mask >> 0) & 3) + 4),
17486 GEN_INT (((mask >> 2) & 3) + 4),
17487 GEN_INT (((mask >> 4) & 3) + 4),
17488 GEN_INT (((mask >> 6) & 3) + 4)));
17489 DONE;
17490 })
17491
17492 (define_expand "avx512f_perm<mode>_mask"
17493 [(match_operand:V8FI 0 "register_operand")
17494 (match_operand:V8FI 1 "nonimmediate_operand")
17495 (match_operand:SI 2 "const_0_to_255_operand")
17496 (match_operand:V8FI 3 "vector_move_operand")
17497 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17498 "TARGET_AVX512F"
17499 {
17500 int mask = INTVAL (operands[2]);
17501 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
17502 GEN_INT ((mask >> 0) & 3),
17503 GEN_INT ((mask >> 2) & 3),
17504 GEN_INT ((mask >> 4) & 3),
17505 GEN_INT ((mask >> 6) & 3),
17506 GEN_INT (((mask >> 0) & 3) + 4),
17507 GEN_INT (((mask >> 2) & 3) + 4),
17508 GEN_INT (((mask >> 4) & 3) + 4),
17509 GEN_INT (((mask >> 6) & 3) + 4),
17510 operands[3], operands[4]));
17511 DONE;
17512 })
17513
17514 (define_insn "avx512f_perm<mode>_1<mask_name>"
17515 [(set (match_operand:V8FI 0 "register_operand" "=v")
17516 (vec_select:V8FI
17517 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
17518 (parallel [(match_operand 2 "const_0_to_3_operand")
17519 (match_operand 3 "const_0_to_3_operand")
17520 (match_operand 4 "const_0_to_3_operand")
17521 (match_operand 5 "const_0_to_3_operand")
17522 (match_operand 6 "const_4_to_7_operand")
17523 (match_operand 7 "const_4_to_7_operand")
17524 (match_operand 8 "const_4_to_7_operand")
17525 (match_operand 9 "const_4_to_7_operand")])))]
17526 "TARGET_AVX512F && <mask_mode512bit_condition>
17527 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
17528 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
17529 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
17530 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
17531 {
17532 int mask = 0;
17533 mask |= INTVAL (operands[2]) << 0;
17534 mask |= INTVAL (operands[3]) << 2;
17535 mask |= INTVAL (operands[4]) << 4;
17536 mask |= INTVAL (operands[5]) << 6;
17537 operands[2] = GEN_INT (mask);
17538 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
17539 }
17540 [(set_attr "type" "sselog")
17541 (set_attr "prefix" "<mask_prefix2>")
17542 (set_attr "mode" "<sseinsnmode>")])
17543
17544 (define_insn "avx2_permv2ti"
17545 [(set (match_operand:V4DI 0 "register_operand" "=x")
17546 (unspec:V4DI
17547 [(match_operand:V4DI 1 "register_operand" "x")
17548 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
17549 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17550 UNSPEC_VPERMTI))]
17551 "TARGET_AVX2"
17552 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17553 [(set_attr "type" "sselog")
17554 (set_attr "prefix" "vex")
17555 (set_attr "mode" "OI")])
17556
17557 (define_insn "avx2_vec_dupv4df"
17558 [(set (match_operand:V4DF 0 "register_operand" "=v")
17559 (vec_duplicate:V4DF
17560 (vec_select:DF
17561 (match_operand:V2DF 1 "register_operand" "v")
17562 (parallel [(const_int 0)]))))]
17563 "TARGET_AVX2"
17564 "vbroadcastsd\t{%1, %0|%0, %1}"
17565 [(set_attr "type" "sselog1")
17566 (set_attr "prefix" "maybe_evex")
17567 (set_attr "mode" "V4DF")])
17568
17569 (define_insn "<avx512>_vec_dup<mode>_1"
17570 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
17571 (vec_duplicate:VI_AVX512BW
17572 (vec_select:<ssescalarmode>
17573 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
17574 (parallel [(const_int 0)]))))]
17575 "TARGET_AVX512F"
17576 "@
17577 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17578 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
17579 [(set_attr "type" "ssemov")
17580 (set_attr "prefix" "evex")
17581 (set_attr "mode" "<sseinsnmode>")])
17582
17583 (define_insn "<avx512>_vec_dup<mode><mask_name>"
17584 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
17585 (vec_duplicate:V48_AVX512VL
17586 (vec_select:<ssescalarmode>
17587 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17588 (parallel [(const_int 0)]))))]
17589 "TARGET_AVX512F"
17590 {
17591 /* There is no DF broadcast (in AVX-512*) to 128b register.
17592 Mimic it with integer variant. */
17593 if (<MODE>mode == V2DFmode)
17594 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
17595
17596 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
17597 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}";
17598 else
17599 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
17600 }
17601 [(set_attr "type" "ssemov")
17602 (set_attr "prefix" "evex")
17603 (set_attr "mode" "<sseinsnmode>")])
17604
17605 (define_insn "<avx512>_vec_dup<mode><mask_name>"
17606 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17607 (vec_duplicate:VI12_AVX512VL
17608 (vec_select:<ssescalarmode>
17609 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17610 (parallel [(const_int 0)]))))]
17611 "TARGET_AVX512BW"
17612 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17613 [(set_attr "type" "ssemov")
17614 (set_attr "prefix" "evex")
17615 (set_attr "mode" "<sseinsnmode>")])
17616
17617 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17618 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17619 (vec_duplicate:V16FI
17620 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17621 "TARGET_AVX512F"
17622 "@
17623 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
17624 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17625 [(set_attr "type" "ssemov")
17626 (set_attr "prefix" "evex")
17627 (set_attr "mode" "<sseinsnmode>")])
17628
17629 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17630 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
17631 (vec_duplicate:V8FI
17632 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17633 "TARGET_AVX512F"
17634 "@
17635 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17636 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17637 [(set_attr "type" "ssemov")
17638 (set_attr "prefix" "evex")
17639 (set_attr "mode" "<sseinsnmode>")])
17640
17641 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17642 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
17643 (vec_duplicate:VI12_AVX512VL
17644 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17645 "TARGET_AVX512BW"
17646 "@
17647 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
17648 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
17649 [(set_attr "type" "ssemov")
17650 (set_attr "prefix" "evex")
17651 (set_attr "mode" "<sseinsnmode>")])
17652
17653 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17654 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
17655 (vec_duplicate:V48_AVX512VL
17656 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17657 "TARGET_AVX512F"
17658 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17659 [(set_attr "type" "ssemov")
17660 (set_attr "prefix" "evex")
17661 (set_attr "mode" "<sseinsnmode>")
17662 (set (attr "enabled")
17663 (if_then_else (eq_attr "alternative" "1")
17664 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
17665 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
17666 (const_int 1)))])
17667
17668 (define_insn "vec_dupv4sf"
17669 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
17670 (vec_duplicate:V4SF
17671 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
17672 "TARGET_SSE"
17673 "@
17674 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
17675 vbroadcastss\t{%1, %0|%0, %1}
17676 shufps\t{$0, %0, %0|%0, %0, 0}"
17677 [(set_attr "isa" "avx,avx,noavx")
17678 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
17679 (set_attr "length_immediate" "1,0,1")
17680 (set_attr "prefix_extra" "0,1,*")
17681 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
17682 (set_attr "mode" "V4SF")])
17683
17684 (define_insn "*vec_dupv4si"
17685 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
17686 (vec_duplicate:V4SI
17687 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
17688 "TARGET_SSE"
17689 "@
17690 %vpshufd\t{$0, %1, %0|%0, %1, 0}
17691 vbroadcastss\t{%1, %0|%0, %1}
17692 shufps\t{$0, %0, %0|%0, %0, 0}"
17693 [(set_attr "isa" "sse2,avx,noavx")
17694 (set_attr "type" "sselog1,ssemov,sselog1")
17695 (set_attr "length_immediate" "1,0,1")
17696 (set_attr "prefix_extra" "0,1,*")
17697 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
17698 (set_attr "mode" "TI,V4SF,V4SF")])
17699
17700 (define_insn "*vec_dupv2di"
17701 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
17702 (vec_duplicate:V2DI
17703 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,m,0")))]
17704 "TARGET_SSE"
17705 "@
17706 punpcklqdq\t%0, %0
17707 vpunpcklqdq\t{%d1, %0|%0, %d1}
17708 %vmovddup\t{%1, %0|%0, %1}
17709 movlhps\t%0, %0"
17710 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
17711 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
17712 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
17713 (set_attr "mode" "TI,TI,DF,V4SF")])
17714
17715 (define_insn "avx2_vbroadcasti128_<mode>"
17716 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
17717 (vec_concat:VI_256
17718 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
17719 (match_dup 1)))]
17720 "TARGET_AVX2"
17721 "@
17722 vbroadcasti128\t{%1, %0|%0, %1}
17723 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
17724 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
17725 [(set_attr "isa" "*,avx512dq,avx512vl")
17726 (set_attr "type" "ssemov")
17727 (set_attr "prefix_extra" "1")
17728 (set_attr "prefix" "vex,evex,evex")
17729 (set_attr "mode" "OI")])
17730
17731 ;; Modes handled by AVX vec_dup patterns.
17732 (define_mode_iterator AVX_VEC_DUP_MODE
17733 [V8SI V8SF V4DI V4DF])
17734 ;; Modes handled by AVX2 vec_dup patterns.
17735 (define_mode_iterator AVX2_VEC_DUP_MODE
17736 [V32QI V16QI V16HI V8HI V8SI V4SI])
17737
17738 (define_insn "*vec_dup<mode>"
17739 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
17740 (vec_duplicate:AVX2_VEC_DUP_MODE
17741 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
17742 "TARGET_AVX2"
17743 "@
17744 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17745 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17746 #"
17747 [(set_attr "isa" "*,*,noavx512vl")
17748 (set_attr "type" "ssemov")
17749 (set_attr "prefix_extra" "1")
17750 (set_attr "prefix" "maybe_evex")
17751 (set_attr "mode" "<sseinsnmode>")])
17752
17753 (define_insn "vec_dup<mode>"
17754 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
17755 (vec_duplicate:AVX_VEC_DUP_MODE
17756 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
17757 "TARGET_AVX"
17758 "@
17759 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17760 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
17761 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17762 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
17763 #"
17764 [(set_attr "type" "ssemov")
17765 (set_attr "prefix_extra" "1")
17766 (set_attr "prefix" "maybe_evex")
17767 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
17768 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
17769
17770 (define_split
17771 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
17772 (vec_duplicate:AVX2_VEC_DUP_MODE
17773 (match_operand:<ssescalarmode> 1 "register_operand")))]
17774 "TARGET_AVX2
17775 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
17776 available, because then we can broadcast from GPRs directly.
17777 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
17778 for V*SI mode it requires just -mavx512vl. */
17779 && !(TARGET_AVX512VL
17780 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
17781 && reload_completed && GENERAL_REG_P (operands[1])"
17782 [(const_int 0)]
17783 {
17784 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
17785 CONST0_RTX (V4SImode),
17786 gen_lowpart (SImode, operands[1])));
17787 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
17788 gen_lowpart (<ssexmmmode>mode,
17789 operands[0])));
17790 DONE;
17791 })
17792
17793 (define_split
17794 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
17795 (vec_duplicate:AVX_VEC_DUP_MODE
17796 (match_operand:<ssescalarmode> 1 "register_operand")))]
17797 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
17798 [(set (match_dup 2)
17799 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
17800 (set (match_dup 0)
17801 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
17802 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
17803
17804 (define_insn "avx_vbroadcastf128_<mode>"
17805 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
17806 (vec_concat:V_256
17807 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
17808 (match_dup 1)))]
17809 "TARGET_AVX"
17810 "@
17811 vbroadcast<i128>\t{%1, %0|%0, %1}
17812 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17813 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
17814 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
17815 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17816 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
17817 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
17818 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
17819 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
17820 (set_attr "prefix_extra" "1")
17821 (set_attr "length_immediate" "0,1,1,0,1,0,1")
17822 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
17823 (set_attr "mode" "<sseinsnmode>")])
17824
17825 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
17826 (define_mode_iterator VI4F_BRCST32x2
17827 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
17828 V16SF (V8SF "TARGET_AVX512VL")])
17829
17830 (define_mode_attr 64x2mode
17831 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
17832
17833 (define_mode_attr 32x2mode
17834 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
17835 (V8SF "V2SF") (V4SI "V2SI")])
17836
17837 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
17838 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
17839 (vec_duplicate:VI4F_BRCST32x2
17840 (vec_select:<32x2mode>
17841 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17842 (parallel [(const_int 0) (const_int 1)]))))]
17843 "TARGET_AVX512DQ"
17844 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
17845 [(set_attr "type" "ssemov")
17846 (set_attr "prefix_extra" "1")
17847 (set_attr "prefix" "evex")
17848 (set_attr "mode" "<sseinsnmode>")])
17849
17850 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
17851 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
17852 (vec_duplicate:VI4F_256
17853 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17854 "TARGET_AVX512VL"
17855 "@
17856 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
17857 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17858 [(set_attr "type" "ssemov")
17859 (set_attr "prefix_extra" "1")
17860 (set_attr "prefix" "evex")
17861 (set_attr "mode" "<sseinsnmode>")])
17862
17863 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17864 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17865 (vec_duplicate:V16FI
17866 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17867 "TARGET_AVX512DQ"
17868 "@
17869 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17870 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17871 [(set_attr "type" "ssemov")
17872 (set_attr "prefix_extra" "1")
17873 (set_attr "prefix" "evex")
17874 (set_attr "mode" "<sseinsnmode>")])
17875
17876 ;; For broadcast[i|f]64x2
17877 (define_mode_iterator VI8F_BRCST64x2
17878 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
17879
17880 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17881 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
17882 (vec_duplicate:VI8F_BRCST64x2
17883 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
17884 "TARGET_AVX512DQ"
17885 "@
17886 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
17887 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17888 [(set_attr "type" "ssemov")
17889 (set_attr "prefix_extra" "1")
17890 (set_attr "prefix" "evex")
17891 (set_attr "mode" "<sseinsnmode>")])
17892
17893 (define_insn "avx512cd_maskb_vec_dup<mode>"
17894 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
17895 (vec_duplicate:VI8_AVX512VL
17896 (zero_extend:DI
17897 (match_operand:QI 1 "register_operand" "Yk"))))]
17898 "TARGET_AVX512CD"
17899 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
17900 [(set_attr "type" "mskmov")
17901 (set_attr "prefix" "evex")
17902 (set_attr "mode" "XI")])
17903
17904 (define_insn "avx512cd_maskw_vec_dup<mode>"
17905 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
17906 (vec_duplicate:VI4_AVX512VL
17907 (zero_extend:SI
17908 (match_operand:HI 1 "register_operand" "Yk"))))]
17909 "TARGET_AVX512CD"
17910 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
17911 [(set_attr "type" "mskmov")
17912 (set_attr "prefix" "evex")
17913 (set_attr "mode" "XI")])
17914
17915 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
17916 ;; If it so happens that the input is in memory, use vbroadcast.
17917 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
17918 (define_insn "*avx_vperm_broadcast_v4sf"
17919 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
17920 (vec_select:V4SF
17921 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
17922 (match_parallel 2 "avx_vbroadcast_operand"
17923 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17924 "TARGET_AVX"
17925 {
17926 int elt = INTVAL (operands[3]);
17927 switch (which_alternative)
17928 {
17929 case 0:
17930 case 1:
17931 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
17932 return "vbroadcastss\t{%1, %0|%0, %k1}";
17933 case 2:
17934 operands[2] = GEN_INT (elt * 0x55);
17935 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
17936 default:
17937 gcc_unreachable ();
17938 }
17939 }
17940 [(set_attr "type" "ssemov,ssemov,sselog1")
17941 (set_attr "prefix_extra" "1")
17942 (set_attr "length_immediate" "0,0,1")
17943 (set_attr "prefix" "maybe_evex")
17944 (set_attr "mode" "SF,SF,V4SF")])
17945
17946 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
17947 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
17948 (vec_select:VF_256
17949 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
17950 (match_parallel 2 "avx_vbroadcast_operand"
17951 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17952 "TARGET_AVX"
17953 "#"
17954 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
17955 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
17956 {
17957 rtx op0 = operands[0], op1 = operands[1];
17958 int elt = INTVAL (operands[3]);
17959
17960 if (REG_P (op1))
17961 {
17962 int mask;
17963
17964 if (TARGET_AVX2 && elt == 0)
17965 {
17966 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17967 op1)));
17968 DONE;
17969 }
17970
17971 /* Shuffle element we care about into all elements of the 128-bit lane.
17972 The other lane gets shuffled too, but we don't care. */
17973 if (<MODE>mode == V4DFmode)
17974 mask = (elt & 1 ? 15 : 0);
17975 else
17976 mask = (elt & 3) * 0x55;
17977 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17978
17979 /* Shuffle the lane we care about into both lanes of the dest. */
17980 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17981 if (EXT_REX_SSE_REG_P (op0))
17982 {
17983 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
17984 or VSHUFF128. */
17985 gcc_assert (<MODE>mode == V8SFmode);
17986 if ((mask & 1) == 0)
17987 emit_insn (gen_avx2_vec_dupv8sf (op0,
17988 gen_lowpart (V4SFmode, op0)));
17989 else
17990 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
17991 GEN_INT (4), GEN_INT (5),
17992 GEN_INT (6), GEN_INT (7),
17993 GEN_INT (12), GEN_INT (13),
17994 GEN_INT (14), GEN_INT (15)));
17995 DONE;
17996 }
17997
17998 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
17999 DONE;
18000 }
18001
18002 operands[1] = adjust_address (op1, <ssescalarmode>mode,
18003 elt * GET_MODE_SIZE (<ssescalarmode>mode));
18004 })
18005
18006 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18007 [(set (match_operand:VF2 0 "register_operand")
18008 (vec_select:VF2
18009 (match_operand:VF2 1 "nonimmediate_operand")
18010 (match_operand:SI 2 "const_0_to_255_operand")))]
18011 "TARGET_AVX && <mask_mode512bit_condition>"
18012 {
18013 int mask = INTVAL (operands[2]);
18014 rtx perm[<ssescalarnum>];
18015
18016 int i;
18017 for (i = 0; i < <ssescalarnum>; i = i + 2)
18018 {
18019 perm[i] = GEN_INT (((mask >> i) & 1) + i);
18020 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
18021 }
18022
18023 operands[2]
18024 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18025 })
18026
18027 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18028 [(set (match_operand:VF1 0 "register_operand")
18029 (vec_select:VF1
18030 (match_operand:VF1 1 "nonimmediate_operand")
18031 (match_operand:SI 2 "const_0_to_255_operand")))]
18032 "TARGET_AVX && <mask_mode512bit_condition>"
18033 {
18034 int mask = INTVAL (operands[2]);
18035 rtx perm[<ssescalarnum>];
18036
18037 int i;
18038 for (i = 0; i < <ssescalarnum>; i = i + 4)
18039 {
18040 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
18041 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
18042 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
18043 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
18044 }
18045
18046 operands[2]
18047 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18048 })
18049
18050 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
18051 [(set (match_operand:VF 0 "register_operand" "=v")
18052 (vec_select:VF
18053 (match_operand:VF 1 "nonimmediate_operand" "vm")
18054 (match_parallel 2 ""
18055 [(match_operand 3 "const_int_operand")])))]
18056 "TARGET_AVX && <mask_mode512bit_condition>
18057 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
18058 {
18059 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
18060 operands[2] = GEN_INT (mask);
18061 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
18062 }
18063 [(set_attr "type" "sselog")
18064 (set_attr "prefix_extra" "1")
18065 (set_attr "length_immediate" "1")
18066 (set_attr "prefix" "<mask_prefix>")
18067 (set_attr "mode" "<sseinsnmode>")])
18068
18069 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
18070 [(set (match_operand:VF 0 "register_operand" "=v")
18071 (unspec:VF
18072 [(match_operand:VF 1 "register_operand" "v")
18073 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
18074 UNSPEC_VPERMIL))]
18075 "TARGET_AVX && <mask_mode512bit_condition>"
18076 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18077 [(set_attr "type" "sselog")
18078 (set_attr "prefix_extra" "1")
18079 (set_attr "btver2_decode" "vector")
18080 (set_attr "prefix" "<mask_prefix>")
18081 (set_attr "mode" "<sseinsnmode>")])
18082
18083 (define_mode_iterator VPERMI2
18084 [V16SI V16SF V8DI V8DF
18085 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
18086 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
18087 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
18088 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
18089 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
18090 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
18091 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
18092 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
18093
18094 (define_mode_iterator VPERMI2I
18095 [V16SI V8DI
18096 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
18097 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
18098 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
18099 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
18100 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
18101 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
18102
18103 (define_expand "<avx512>_vpermi2var<mode>3_mask"
18104 [(set (match_operand:VPERMI2 0 "register_operand")
18105 (vec_merge:VPERMI2
18106 (unspec:VPERMI2
18107 [(match_operand:<sseintvecmode> 2 "register_operand")
18108 (match_operand:VPERMI2 1 "register_operand")
18109 (match_operand:VPERMI2 3 "nonimmediate_operand")]
18110 UNSPEC_VPERMT2)
18111 (match_dup 5)
18112 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
18113 "TARGET_AVX512F"
18114 "operands[5] = gen_lowpart (<MODE>mode, operands[2]);")
18115
18116 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
18117 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
18118 (vec_merge:VPERMI2I
18119 (unspec:VPERMI2I
18120 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
18121 (match_operand:VPERMI2I 1 "register_operand" "v")
18122 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
18123 UNSPEC_VPERMT2)
18124 (match_dup 2)
18125 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18126 "TARGET_AVX512F"
18127 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18128 [(set_attr "type" "sselog")
18129 (set_attr "prefix" "evex")
18130 (set_attr "mode" "<sseinsnmode>")])
18131
18132 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
18133 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18134 (vec_merge:VF_AVX512VL
18135 (unspec:VF_AVX512VL
18136 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
18137 (match_operand:VF_AVX512VL 1 "register_operand" "v")
18138 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
18139 UNSPEC_VPERMT2)
18140 (subreg:VF_AVX512VL (match_dup 2) 0)
18141 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18142 "TARGET_AVX512F"
18143 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18144 [(set_attr "type" "sselog")
18145 (set_attr "prefix" "evex")
18146 (set_attr "mode" "<sseinsnmode>")])
18147
18148 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
18149 [(match_operand:VPERMI2 0 "register_operand")
18150 (match_operand:<sseintvecmode> 1 "register_operand")
18151 (match_operand:VPERMI2 2 "register_operand")
18152 (match_operand:VPERMI2 3 "nonimmediate_operand")
18153 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18154 "TARGET_AVX512F"
18155 {
18156 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
18157 operands[0], operands[1], operands[2], operands[3],
18158 CONST0_RTX (<MODE>mode), operands[4]));
18159 DONE;
18160 })
18161
18162 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
18163 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
18164 (unspec:VPERMI2
18165 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
18166 (match_operand:VPERMI2 2 "register_operand" "0,v")
18167 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
18168 UNSPEC_VPERMT2))]
18169 "TARGET_AVX512F"
18170 "@
18171 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
18172 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18173 [(set_attr "type" "sselog")
18174 (set_attr "prefix" "evex")
18175 (set_attr "mode" "<sseinsnmode>")])
18176
18177 (define_insn "<avx512>_vpermt2var<mode>3_mask"
18178 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
18179 (vec_merge:VPERMI2
18180 (unspec:VPERMI2
18181 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18182 (match_operand:VPERMI2 2 "register_operand" "0")
18183 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
18184 UNSPEC_VPERMT2)
18185 (match_dup 2)
18186 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18187 "TARGET_AVX512F"
18188 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18189 [(set_attr "type" "sselog")
18190 (set_attr "prefix" "evex")
18191 (set_attr "mode" "<sseinsnmode>")])
18192
18193 (define_expand "avx_vperm2f128<mode>3"
18194 [(set (match_operand:AVX256MODE2P 0 "register_operand")
18195 (unspec:AVX256MODE2P
18196 [(match_operand:AVX256MODE2P 1 "register_operand")
18197 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
18198 (match_operand:SI 3 "const_0_to_255_operand")]
18199 UNSPEC_VPERMIL2F128))]
18200 "TARGET_AVX"
18201 {
18202 int mask = INTVAL (operands[3]);
18203 if ((mask & 0x88) == 0)
18204 {
18205 rtx perm[<ssescalarnum>], t1, t2;
18206 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
18207
18208 base = (mask & 3) * nelt2;
18209 for (i = 0; i < nelt2; ++i)
18210 perm[i] = GEN_INT (base + i);
18211
18212 base = ((mask >> 4) & 3) * nelt2;
18213 for (i = 0; i < nelt2; ++i)
18214 perm[i + nelt2] = GEN_INT (base + i);
18215
18216 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
18217 operands[1], operands[2]);
18218 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
18219 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
18220 t2 = gen_rtx_SET (operands[0], t2);
18221 emit_insn (t2);
18222 DONE;
18223 }
18224 })
18225
18226 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
18227 ;; means that in order to represent this properly in rtl we'd have to
18228 ;; nest *another* vec_concat with a zero operand and do the select from
18229 ;; a 4x wide vector. That doesn't seem very nice.
18230 (define_insn "*avx_vperm2f128<mode>_full"
18231 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18232 (unspec:AVX256MODE2P
18233 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
18234 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
18235 (match_operand:SI 3 "const_0_to_255_operand" "n")]
18236 UNSPEC_VPERMIL2F128))]
18237 "TARGET_AVX"
18238 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18239 [(set_attr "type" "sselog")
18240 (set_attr "prefix_extra" "1")
18241 (set_attr "length_immediate" "1")
18242 (set_attr "prefix" "vex")
18243 (set_attr "mode" "<sseinsnmode>")])
18244
18245 (define_insn "*avx_vperm2f128<mode>_nozero"
18246 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18247 (vec_select:AVX256MODE2P
18248 (vec_concat:<ssedoublevecmode>
18249 (match_operand:AVX256MODE2P 1 "register_operand" "x")
18250 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
18251 (match_parallel 3 ""
18252 [(match_operand 4 "const_int_operand")])))]
18253 "TARGET_AVX
18254 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
18255 {
18256 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
18257 if (mask == 0x12)
18258 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
18259 if (mask == 0x20)
18260 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
18261 operands[3] = GEN_INT (mask);
18262 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
18263 }
18264 [(set_attr "type" "sselog")
18265 (set_attr "prefix_extra" "1")
18266 (set_attr "length_immediate" "1")
18267 (set_attr "prefix" "vex")
18268 (set_attr "mode" "<sseinsnmode>")])
18269
18270 (define_insn "*ssse3_palignr<mode>_perm"
18271 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
18272 (vec_select:V_128
18273 (match_operand:V_128 1 "register_operand" "0,x,v")
18274 (match_parallel 2 "palignr_operand"
18275 [(match_operand 3 "const_int_operand" "n,n,n")])))]
18276 "TARGET_SSSE3"
18277 {
18278 operands[2] = (GEN_INT (INTVAL (operands[3])
18279 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
18280
18281 switch (which_alternative)
18282 {
18283 case 0:
18284 return "palignr\t{%2, %1, %0|%0, %1, %2}";
18285 case 1:
18286 case 2:
18287 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
18288 default:
18289 gcc_unreachable ();
18290 }
18291 }
18292 [(set_attr "isa" "noavx,avx,avx512bw")
18293 (set_attr "type" "sseishft")
18294 (set_attr "atom_unit" "sishuf")
18295 (set_attr "prefix_data16" "1,*,*")
18296 (set_attr "prefix_extra" "1")
18297 (set_attr "length_immediate" "1")
18298 (set_attr "prefix" "orig,vex,evex")])
18299
18300 (define_expand "avx512vl_vinsert<mode>"
18301 [(match_operand:VI48F_256 0 "register_operand")
18302 (match_operand:VI48F_256 1 "register_operand")
18303 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18304 (match_operand:SI 3 "const_0_to_1_operand")
18305 (match_operand:VI48F_256 4 "register_operand")
18306 (match_operand:<avx512fmaskmode> 5 "register_operand")]
18307 "TARGET_AVX512VL"
18308 {
18309 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18310
18311 switch (INTVAL (operands[3]))
18312 {
18313 case 0:
18314 insn = gen_vec_set_lo_<mode>_mask;
18315 break;
18316 case 1:
18317 insn = gen_vec_set_hi_<mode>_mask;
18318 break;
18319 default:
18320 gcc_unreachable ();
18321 }
18322
18323 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
18324 operands[5]));
18325 DONE;
18326 })
18327
18328 (define_expand "avx_vinsertf128<mode>"
18329 [(match_operand:V_256 0 "register_operand")
18330 (match_operand:V_256 1 "register_operand")
18331 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18332 (match_operand:SI 3 "const_0_to_1_operand")]
18333 "TARGET_AVX"
18334 {
18335 rtx (*insn)(rtx, rtx, rtx);
18336
18337 switch (INTVAL (operands[3]))
18338 {
18339 case 0:
18340 insn = gen_vec_set_lo_<mode>;
18341 break;
18342 case 1:
18343 insn = gen_vec_set_hi_<mode>;
18344 break;
18345 default:
18346 gcc_unreachable ();
18347 }
18348
18349 emit_insn (insn (operands[0], operands[1], operands[2]));
18350 DONE;
18351 })
18352
18353 (define_insn "vec_set_lo_<mode><mask_name>"
18354 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18355 (vec_concat:VI8F_256
18356 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18357 (vec_select:<ssehalfvecmode>
18358 (match_operand:VI8F_256 1 "register_operand" "v")
18359 (parallel [(const_int 2) (const_int 3)]))))]
18360 "TARGET_AVX && <mask_avx512dq_condition>"
18361 {
18362 if (TARGET_AVX512DQ)
18363 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18364 else if (TARGET_AVX512VL)
18365 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18366 else
18367 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18368 }
18369 [(set_attr "type" "sselog")
18370 (set_attr "prefix_extra" "1")
18371 (set_attr "length_immediate" "1")
18372 (set_attr "prefix" "vex")
18373 (set_attr "mode" "<sseinsnmode>")])
18374
18375 (define_insn "vec_set_hi_<mode><mask_name>"
18376 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18377 (vec_concat:VI8F_256
18378 (vec_select:<ssehalfvecmode>
18379 (match_operand:VI8F_256 1 "register_operand" "v")
18380 (parallel [(const_int 0) (const_int 1)]))
18381 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18382 "TARGET_AVX && <mask_avx512dq_condition>"
18383 {
18384 if (TARGET_AVX512DQ)
18385 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18386 else if (TARGET_AVX512VL)
18387 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18388 else
18389 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18390 }
18391 [(set_attr "type" "sselog")
18392 (set_attr "prefix_extra" "1")
18393 (set_attr "length_immediate" "1")
18394 (set_attr "prefix" "vex")
18395 (set_attr "mode" "<sseinsnmode>")])
18396
18397 (define_insn "vec_set_lo_<mode><mask_name>"
18398 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18399 (vec_concat:VI4F_256
18400 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18401 (vec_select:<ssehalfvecmode>
18402 (match_operand:VI4F_256 1 "register_operand" "v")
18403 (parallel [(const_int 4) (const_int 5)
18404 (const_int 6) (const_int 7)]))))]
18405 "TARGET_AVX"
18406 {
18407 if (TARGET_AVX512VL)
18408 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18409 else
18410 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18411 }
18412 [(set_attr "type" "sselog")
18413 (set_attr "prefix_extra" "1")
18414 (set_attr "length_immediate" "1")
18415 (set_attr "prefix" "vex")
18416 (set_attr "mode" "<sseinsnmode>")])
18417
18418 (define_insn "vec_set_hi_<mode><mask_name>"
18419 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18420 (vec_concat:VI4F_256
18421 (vec_select:<ssehalfvecmode>
18422 (match_operand:VI4F_256 1 "register_operand" "v")
18423 (parallel [(const_int 0) (const_int 1)
18424 (const_int 2) (const_int 3)]))
18425 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18426 "TARGET_AVX"
18427 {
18428 if (TARGET_AVX512VL)
18429 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18430 else
18431 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18432 }
18433 [(set_attr "type" "sselog")
18434 (set_attr "prefix_extra" "1")
18435 (set_attr "length_immediate" "1")
18436 (set_attr "prefix" "vex")
18437 (set_attr "mode" "<sseinsnmode>")])
18438
18439 (define_insn "vec_set_lo_v16hi"
18440 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18441 (vec_concat:V16HI
18442 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
18443 (vec_select:V8HI
18444 (match_operand:V16HI 1 "register_operand" "x,v")
18445 (parallel [(const_int 8) (const_int 9)
18446 (const_int 10) (const_int 11)
18447 (const_int 12) (const_int 13)
18448 (const_int 14) (const_int 15)]))))]
18449 "TARGET_AVX"
18450 "@
18451 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18452 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18453 [(set_attr "type" "sselog")
18454 (set_attr "prefix_extra" "1")
18455 (set_attr "length_immediate" "1")
18456 (set_attr "prefix" "vex,evex")
18457 (set_attr "mode" "OI")])
18458
18459 (define_insn "vec_set_hi_v16hi"
18460 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18461 (vec_concat:V16HI
18462 (vec_select:V8HI
18463 (match_operand:V16HI 1 "register_operand" "x,v")
18464 (parallel [(const_int 0) (const_int 1)
18465 (const_int 2) (const_int 3)
18466 (const_int 4) (const_int 5)
18467 (const_int 6) (const_int 7)]))
18468 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
18469 "TARGET_AVX"
18470 "@
18471 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18472 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18473 [(set_attr "type" "sselog")
18474 (set_attr "prefix_extra" "1")
18475 (set_attr "length_immediate" "1")
18476 (set_attr "prefix" "vex,evex")
18477 (set_attr "mode" "OI")])
18478
18479 (define_insn "vec_set_lo_v32qi"
18480 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18481 (vec_concat:V32QI
18482 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
18483 (vec_select:V16QI
18484 (match_operand:V32QI 1 "register_operand" "x,v")
18485 (parallel [(const_int 16) (const_int 17)
18486 (const_int 18) (const_int 19)
18487 (const_int 20) (const_int 21)
18488 (const_int 22) (const_int 23)
18489 (const_int 24) (const_int 25)
18490 (const_int 26) (const_int 27)
18491 (const_int 28) (const_int 29)
18492 (const_int 30) (const_int 31)]))))]
18493 "TARGET_AVX"
18494 "@
18495 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18496 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18497 [(set_attr "type" "sselog")
18498 (set_attr "prefix_extra" "1")
18499 (set_attr "length_immediate" "1")
18500 (set_attr "prefix" "vex,evex")
18501 (set_attr "mode" "OI")])
18502
18503 (define_insn "vec_set_hi_v32qi"
18504 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18505 (vec_concat:V32QI
18506 (vec_select:V16QI
18507 (match_operand:V32QI 1 "register_operand" "x,v")
18508 (parallel [(const_int 0) (const_int 1)
18509 (const_int 2) (const_int 3)
18510 (const_int 4) (const_int 5)
18511 (const_int 6) (const_int 7)
18512 (const_int 8) (const_int 9)
18513 (const_int 10) (const_int 11)
18514 (const_int 12) (const_int 13)
18515 (const_int 14) (const_int 15)]))
18516 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
18517 "TARGET_AVX"
18518 "@
18519 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18520 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18521 [(set_attr "type" "sselog")
18522 (set_attr "prefix_extra" "1")
18523 (set_attr "length_immediate" "1")
18524 (set_attr "prefix" "vex,evex")
18525 (set_attr "mode" "OI")])
18526
18527 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
18528 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
18529 (unspec:V48_AVX2
18530 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
18531 (match_operand:V48_AVX2 1 "memory_operand" "m")]
18532 UNSPEC_MASKMOV))]
18533 "TARGET_AVX"
18534 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
18535 [(set_attr "type" "sselog1")
18536 (set_attr "prefix_extra" "1")
18537 (set_attr "prefix" "vex")
18538 (set_attr "btver2_decode" "vector")
18539 (set_attr "mode" "<sseinsnmode>")])
18540
18541 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
18542 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
18543 (unspec:V48_AVX2
18544 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
18545 (match_operand:V48_AVX2 2 "register_operand" "x")
18546 (match_dup 0)]
18547 UNSPEC_MASKMOV))]
18548 "TARGET_AVX"
18549 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18550 [(set_attr "type" "sselog1")
18551 (set_attr "prefix_extra" "1")
18552 (set_attr "prefix" "vex")
18553 (set_attr "btver2_decode" "vector")
18554 (set_attr "mode" "<sseinsnmode>")])
18555
18556 (define_expand "maskload<mode><sseintvecmodelower>"
18557 [(set (match_operand:V48_AVX2 0 "register_operand")
18558 (unspec:V48_AVX2
18559 [(match_operand:<sseintvecmode> 2 "register_operand")
18560 (match_operand:V48_AVX2 1 "memory_operand")]
18561 UNSPEC_MASKMOV))]
18562 "TARGET_AVX")
18563
18564 (define_expand "maskload<mode><avx512fmaskmodelower>"
18565 [(set (match_operand:V48_AVX512VL 0 "register_operand")
18566 (vec_merge:V48_AVX512VL
18567 (match_operand:V48_AVX512VL 1 "memory_operand")
18568 (match_dup 0)
18569 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18570 "TARGET_AVX512F")
18571
18572 (define_expand "maskload<mode><avx512fmaskmodelower>"
18573 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
18574 (vec_merge:VI12_AVX512VL
18575 (match_operand:VI12_AVX512VL 1 "memory_operand")
18576 (match_dup 0)
18577 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18578 "TARGET_AVX512BW")
18579
18580 (define_expand "maskstore<mode><sseintvecmodelower>"
18581 [(set (match_operand:V48_AVX2 0 "memory_operand")
18582 (unspec:V48_AVX2
18583 [(match_operand:<sseintvecmode> 2 "register_operand")
18584 (match_operand:V48_AVX2 1 "register_operand")
18585 (match_dup 0)]
18586 UNSPEC_MASKMOV))]
18587 "TARGET_AVX")
18588
18589 (define_expand "maskstore<mode><avx512fmaskmodelower>"
18590 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
18591 (vec_merge:V48_AVX512VL
18592 (match_operand:V48_AVX512VL 1 "register_operand")
18593 (match_dup 0)
18594 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18595 "TARGET_AVX512F")
18596
18597 (define_expand "maskstore<mode><avx512fmaskmodelower>"
18598 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
18599 (vec_merge:VI12_AVX512VL
18600 (match_operand:VI12_AVX512VL 1 "register_operand")
18601 (match_dup 0)
18602 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18603 "TARGET_AVX512BW")
18604
18605 (define_expand "cbranch<mode>4"
18606 [(set (reg:CC FLAGS_REG)
18607 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
18608 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
18609 (set (pc) (if_then_else
18610 (match_operator 0 "bt_comparison_operator"
18611 [(reg:CC FLAGS_REG) (const_int 0)])
18612 (label_ref (match_operand 3))
18613 (pc)))]
18614 "TARGET_SSE4_1"
18615 {
18616 ix86_expand_branch (GET_CODE (operands[0]),
18617 operands[1], operands[2], operands[3]);
18618 DONE;
18619 })
18620
18621
18622 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
18623 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
18624 (unspec:AVX256MODE2P
18625 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18626 UNSPEC_CAST))]
18627 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
18628 "#"
18629 "&& reload_completed"
18630 [(set (match_dup 0) (match_dup 1))]
18631 {
18632 if (REG_P (operands[0]))
18633 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
18634 else
18635 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
18636 <ssehalfvecmode>mode);
18637 })
18638
18639 ;; Modes handled by vec_init expanders.
18640 (define_mode_iterator VEC_INIT_MODE
18641 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18642 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18643 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18644 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
18645 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18646 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
18647 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
18648
18649 ;; Likewise, but for initialization from half sized vectors.
18650 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
18651 (define_mode_iterator VEC_INIT_HALF_MODE
18652 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18653 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18654 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18655 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
18656 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18657 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
18658 (V4TI "TARGET_AVX512F")])
18659
18660 (define_expand "vec_init<mode><ssescalarmodelower>"
18661 [(match_operand:VEC_INIT_MODE 0 "register_operand")
18662 (match_operand 1)]
18663 "TARGET_SSE"
18664 {
18665 ix86_expand_vector_init (false, operands[0], operands[1]);
18666 DONE;
18667 })
18668
18669 (define_expand "vec_init<mode><ssehalfvecmodelower>"
18670 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
18671 (match_operand 1)]
18672 "TARGET_SSE"
18673 {
18674 ix86_expand_vector_init (false, operands[0], operands[1]);
18675 DONE;
18676 })
18677
18678 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18679 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
18680 (ashiftrt:VI48_AVX512F_AVX512VL
18681 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
18682 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
18683 "TARGET_AVX2 && <mask_mode512bit_condition>"
18684 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18685 [(set_attr "type" "sseishft")
18686 (set_attr "prefix" "maybe_evex")
18687 (set_attr "mode" "<sseinsnmode>")])
18688
18689 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18690 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18691 (ashiftrt:VI2_AVX512VL
18692 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18693 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18694 "TARGET_AVX512BW"
18695 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18696 [(set_attr "type" "sseishft")
18697 (set_attr "prefix" "maybe_evex")
18698 (set_attr "mode" "<sseinsnmode>")])
18699
18700 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18701 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
18702 (any_lshift:VI48_AVX512F
18703 (match_operand:VI48_AVX512F 1 "register_operand" "v")
18704 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
18705 "TARGET_AVX2 && <mask_mode512bit_condition>"
18706 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18707 [(set_attr "type" "sseishft")
18708 (set_attr "prefix" "maybe_evex")
18709 (set_attr "mode" "<sseinsnmode>")])
18710
18711 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18712 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18713 (any_lshift:VI2_AVX512VL
18714 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18715 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18716 "TARGET_AVX512BW"
18717 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18718 [(set_attr "type" "sseishft")
18719 (set_attr "prefix" "maybe_evex")
18720 (set_attr "mode" "<sseinsnmode>")])
18721
18722 (define_insn "avx_vec_concat<mode>"
18723 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
18724 (vec_concat:V_256_512
18725 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,v,x,v")
18726 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,vm,C,C")))]
18727 "TARGET_AVX"
18728 {
18729 switch (which_alternative)
18730 {
18731 case 0:
18732 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18733 case 1:
18734 if (<MODE_SIZE> == 64)
18735 {
18736 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
18737 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18738 else
18739 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18740 }
18741 else
18742 {
18743 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18744 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18745 else
18746 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18747 }
18748 case 2:
18749 case 3:
18750 switch (get_attr_mode (insn))
18751 {
18752 case MODE_V16SF:
18753 return "vmovaps\t{%1, %t0|%t0, %1}";
18754 case MODE_V8DF:
18755 return "vmovapd\t{%1, %t0|%t0, %1}";
18756 case MODE_V8SF:
18757 return "vmovaps\t{%1, %x0|%x0, %1}";
18758 case MODE_V4DF:
18759 return "vmovapd\t{%1, %x0|%x0, %1}";
18760 case MODE_XI:
18761 if (which_alternative == 2)
18762 return "vmovdqa\t{%1, %t0|%t0, %1}";
18763 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18764 return "vmovdqa64\t{%1, %t0|%t0, %1}";
18765 else
18766 return "vmovdqa32\t{%1, %t0|%t0, %1}";
18767 case MODE_OI:
18768 if (which_alternative == 2)
18769 return "vmovdqa\t{%1, %x0|%x0, %1}";
18770 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18771 return "vmovdqa64\t{%1, %x0|%x0, %1}";
18772 else
18773 return "vmovdqa32\t{%1, %x0|%x0, %1}";
18774 default:
18775 gcc_unreachable ();
18776 }
18777 default:
18778 gcc_unreachable ();
18779 }
18780 }
18781 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
18782 (set_attr "prefix_extra" "1,1,*,*")
18783 (set_attr "length_immediate" "1,1,*,*")
18784 (set_attr "prefix" "maybe_evex")
18785 (set_attr "mode" "<sseinsnmode>")])
18786
18787 (define_insn "vcvtph2ps<mask_name>"
18788 [(set (match_operand:V4SF 0 "register_operand" "=v")
18789 (vec_select:V4SF
18790 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
18791 UNSPEC_VCVTPH2PS)
18792 (parallel [(const_int 0) (const_int 1)
18793 (const_int 2) (const_int 3)])))]
18794 "TARGET_F16C || TARGET_AVX512VL"
18795 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18796 [(set_attr "type" "ssecvt")
18797 (set_attr "prefix" "maybe_evex")
18798 (set_attr "mode" "V4SF")])
18799
18800 (define_insn "*vcvtph2ps_load<mask_name>"
18801 [(set (match_operand:V4SF 0 "register_operand" "=v")
18802 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
18803 UNSPEC_VCVTPH2PS))]
18804 "TARGET_F16C || TARGET_AVX512VL"
18805 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18806 [(set_attr "type" "ssecvt")
18807 (set_attr "prefix" "vex")
18808 (set_attr "mode" "V8SF")])
18809
18810 (define_insn "vcvtph2ps256<mask_name>"
18811 [(set (match_operand:V8SF 0 "register_operand" "=v")
18812 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
18813 UNSPEC_VCVTPH2PS))]
18814 "TARGET_F16C || TARGET_AVX512VL"
18815 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18816 [(set_attr "type" "ssecvt")
18817 (set_attr "prefix" "vex")
18818 (set_attr "btver2_decode" "double")
18819 (set_attr "mode" "V8SF")])
18820
18821 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
18822 [(set (match_operand:V16SF 0 "register_operand" "=v")
18823 (unspec:V16SF
18824 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18825 UNSPEC_VCVTPH2PS))]
18826 "TARGET_AVX512F"
18827 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18828 [(set_attr "type" "ssecvt")
18829 (set_attr "prefix" "evex")
18830 (set_attr "mode" "V16SF")])
18831
18832 (define_expand "vcvtps2ph_mask"
18833 [(set (match_operand:V8HI 0 "register_operand")
18834 (vec_merge:V8HI
18835 (vec_concat:V8HI
18836 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18837 (match_operand:SI 2 "const_0_to_255_operand")]
18838 UNSPEC_VCVTPS2PH)
18839 (match_dup 5))
18840 (match_operand:V8HI 3 "vector_move_operand")
18841 (match_operand:QI 4 "register_operand")))]
18842 "TARGET_AVX512VL"
18843 "operands[5] = CONST0_RTX (V4HImode);")
18844
18845 (define_expand "vcvtps2ph"
18846 [(set (match_operand:V8HI 0 "register_operand")
18847 (vec_concat:V8HI
18848 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18849 (match_operand:SI 2 "const_0_to_255_operand")]
18850 UNSPEC_VCVTPS2PH)
18851 (match_dup 3)))]
18852 "TARGET_F16C"
18853 "operands[3] = CONST0_RTX (V4HImode);")
18854
18855 (define_insn "*vcvtps2ph<mask_name>"
18856 [(set (match_operand:V8HI 0 "register_operand" "=v")
18857 (vec_concat:V8HI
18858 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
18859 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18860 UNSPEC_VCVTPS2PH)
18861 (match_operand:V4HI 3 "const0_operand")))]
18862 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
18863 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
18864 [(set_attr "type" "ssecvt")
18865 (set_attr "prefix" "maybe_evex")
18866 (set_attr "mode" "V4SF")])
18867
18868 (define_insn "*vcvtps2ph_store<mask_name>"
18869 [(set (match_operand:V4HI 0 "memory_operand" "=m")
18870 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
18871 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18872 UNSPEC_VCVTPS2PH))]
18873 "TARGET_F16C || TARGET_AVX512VL"
18874 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18875 [(set_attr "type" "ssecvt")
18876 (set_attr "prefix" "maybe_evex")
18877 (set_attr "mode" "V4SF")])
18878
18879 (define_insn "vcvtps2ph256<mask_name>"
18880 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
18881 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
18882 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18883 UNSPEC_VCVTPS2PH))]
18884 "TARGET_F16C || TARGET_AVX512VL"
18885 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18886 [(set_attr "type" "ssecvt")
18887 (set_attr "prefix" "maybe_evex")
18888 (set_attr "btver2_decode" "vector")
18889 (set_attr "mode" "V8SF")])
18890
18891 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
18892 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
18893 (unspec:V16HI
18894 [(match_operand:V16SF 1 "register_operand" "v")
18895 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18896 UNSPEC_VCVTPS2PH))]
18897 "TARGET_AVX512F"
18898 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18899 [(set_attr "type" "ssecvt")
18900 (set_attr "prefix" "evex")
18901 (set_attr "mode" "V16SF")])
18902
18903 ;; For gather* insn patterns
18904 (define_mode_iterator VEC_GATHER_MODE
18905 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
18906 (define_mode_attr VEC_GATHER_IDXSI
18907 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
18908 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
18909 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
18910 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
18911
18912 (define_mode_attr VEC_GATHER_IDXDI
18913 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18914 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
18915 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
18916 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
18917
18918 (define_mode_attr VEC_GATHER_SRCDI
18919 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18920 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
18921 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
18922 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
18923
18924 (define_expand "avx2_gathersi<mode>"
18925 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18926 (unspec:VEC_GATHER_MODE
18927 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
18928 (mem:<ssescalarmode>
18929 (match_par_dup 6
18930 [(match_operand 2 "vsib_address_operand")
18931 (match_operand:<VEC_GATHER_IDXSI>
18932 3 "register_operand")
18933 (match_operand:SI 5 "const1248_operand ")]))
18934 (mem:BLK (scratch))
18935 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
18936 UNSPEC_GATHER))
18937 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
18938 "TARGET_AVX2"
18939 {
18940 operands[6]
18941 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18942 operands[5]), UNSPEC_VSIBADDR);
18943 })
18944
18945 (define_insn "*avx2_gathersi<mode>"
18946 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18947 (unspec:VEC_GATHER_MODE
18948 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
18949 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18950 [(unspec:P
18951 [(match_operand:P 3 "vsib_address_operand" "Tv")
18952 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
18953 (match_operand:SI 6 "const1248_operand" "n")]
18954 UNSPEC_VSIBADDR)])
18955 (mem:BLK (scratch))
18956 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
18957 UNSPEC_GATHER))
18958 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18959 "TARGET_AVX2"
18960 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
18961 [(set_attr "type" "ssemov")
18962 (set_attr "prefix" "vex")
18963 (set_attr "mode" "<sseinsnmode>")])
18964
18965 (define_insn "*avx2_gathersi<mode>_2"
18966 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18967 (unspec:VEC_GATHER_MODE
18968 [(pc)
18969 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18970 [(unspec:P
18971 [(match_operand:P 2 "vsib_address_operand" "Tv")
18972 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18973 (match_operand:SI 5 "const1248_operand" "n")]
18974 UNSPEC_VSIBADDR)])
18975 (mem:BLK (scratch))
18976 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18977 UNSPEC_GATHER))
18978 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18979 "TARGET_AVX2"
18980 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18981 [(set_attr "type" "ssemov")
18982 (set_attr "prefix" "vex")
18983 (set_attr "mode" "<sseinsnmode>")])
18984
18985 (define_expand "avx2_gatherdi<mode>"
18986 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18987 (unspec:VEC_GATHER_MODE
18988 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18989 (mem:<ssescalarmode>
18990 (match_par_dup 6
18991 [(match_operand 2 "vsib_address_operand")
18992 (match_operand:<VEC_GATHER_IDXDI>
18993 3 "register_operand")
18994 (match_operand:SI 5 "const1248_operand ")]))
18995 (mem:BLK (scratch))
18996 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
18997 UNSPEC_GATHER))
18998 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
18999 "TARGET_AVX2"
19000 {
19001 operands[6]
19002 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19003 operands[5]), UNSPEC_VSIBADDR);
19004 })
19005
19006 (define_insn "*avx2_gatherdi<mode>"
19007 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19008 (unspec:VEC_GATHER_MODE
19009 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19010 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19011 [(unspec:P
19012 [(match_operand:P 3 "vsib_address_operand" "Tv")
19013 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19014 (match_operand:SI 6 "const1248_operand" "n")]
19015 UNSPEC_VSIBADDR)])
19016 (mem:BLK (scratch))
19017 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19018 UNSPEC_GATHER))
19019 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19020 "TARGET_AVX2"
19021 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
19022 [(set_attr "type" "ssemov")
19023 (set_attr "prefix" "vex")
19024 (set_attr "mode" "<sseinsnmode>")])
19025
19026 (define_insn "*avx2_gatherdi<mode>_2"
19027 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19028 (unspec:VEC_GATHER_MODE
19029 [(pc)
19030 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19031 [(unspec:P
19032 [(match_operand:P 2 "vsib_address_operand" "Tv")
19033 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19034 (match_operand:SI 5 "const1248_operand" "n")]
19035 UNSPEC_VSIBADDR)])
19036 (mem:BLK (scratch))
19037 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19038 UNSPEC_GATHER))
19039 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19040 "TARGET_AVX2"
19041 {
19042 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19043 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
19044 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
19045 }
19046 [(set_attr "type" "ssemov")
19047 (set_attr "prefix" "vex")
19048 (set_attr "mode" "<sseinsnmode>")])
19049
19050 (define_insn "*avx2_gatherdi<mode>_3"
19051 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19052 (vec_select:<VEC_GATHER_SRCDI>
19053 (unspec:VI4F_256
19054 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19055 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19056 [(unspec:P
19057 [(match_operand:P 3 "vsib_address_operand" "Tv")
19058 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19059 (match_operand:SI 6 "const1248_operand" "n")]
19060 UNSPEC_VSIBADDR)])
19061 (mem:BLK (scratch))
19062 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19063 UNSPEC_GATHER)
19064 (parallel [(const_int 0) (const_int 1)
19065 (const_int 2) (const_int 3)])))
19066 (clobber (match_scratch:VI4F_256 1 "=&x"))]
19067 "TARGET_AVX2"
19068 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
19069 [(set_attr "type" "ssemov")
19070 (set_attr "prefix" "vex")
19071 (set_attr "mode" "<sseinsnmode>")])
19072
19073 (define_insn "*avx2_gatherdi<mode>_4"
19074 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19075 (vec_select:<VEC_GATHER_SRCDI>
19076 (unspec:VI4F_256
19077 [(pc)
19078 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19079 [(unspec:P
19080 [(match_operand:P 2 "vsib_address_operand" "Tv")
19081 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19082 (match_operand:SI 5 "const1248_operand" "n")]
19083 UNSPEC_VSIBADDR)])
19084 (mem:BLK (scratch))
19085 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19086 UNSPEC_GATHER)
19087 (parallel [(const_int 0) (const_int 1)
19088 (const_int 2) (const_int 3)])))
19089 (clobber (match_scratch:VI4F_256 1 "=&x"))]
19090 "TARGET_AVX2"
19091 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
19092 [(set_attr "type" "ssemov")
19093 (set_attr "prefix" "vex")
19094 (set_attr "mode" "<sseinsnmode>")])
19095
19096 ;; Memory operand override for -masm=intel of the v*gatherq* patterns.
19097 (define_mode_attr gatherq_mode
19098 [(V4SI "q") (V2DI "x") (V4SF "q") (V2DF "x")
19099 (V8SI "x") (V4DI "t") (V8SF "x") (V4DF "t")
19100 (V16SI "t") (V8DI "g") (V16SF "t") (V8DF "g")])
19101
19102 (define_expand "<avx512>_gathersi<mode>"
19103 [(parallel [(set (match_operand:VI48F 0 "register_operand")
19104 (unspec:VI48F
19105 [(match_operand:VI48F 1 "register_operand")
19106 (match_operand:<avx512fmaskmode> 4 "register_operand")
19107 (mem:<ssescalarmode>
19108 (match_par_dup 6
19109 [(match_operand 2 "vsib_address_operand")
19110 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
19111 (match_operand:SI 5 "const1248_operand")]))]
19112 UNSPEC_GATHER))
19113 (clobber (match_scratch:<avx512fmaskmode> 7))])]
19114 "TARGET_AVX512F"
19115 {
19116 operands[6]
19117 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19118 operands[5]), UNSPEC_VSIBADDR);
19119 })
19120
19121 (define_insn "*avx512f_gathersi<mode>"
19122 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19123 (unspec:VI48F
19124 [(match_operand:VI48F 1 "register_operand" "0")
19125 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
19126 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19127 [(unspec:P
19128 [(match_operand:P 4 "vsib_address_operand" "Tv")
19129 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
19130 (match_operand:SI 5 "const1248_operand" "n")]
19131 UNSPEC_VSIBADDR)])]
19132 UNSPEC_GATHER))
19133 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
19134 "TARGET_AVX512F"
19135 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %<xtg_mode>6}"
19136 [(set_attr "type" "ssemov")
19137 (set_attr "prefix" "evex")
19138 (set_attr "mode" "<sseinsnmode>")])
19139
19140 (define_insn "*avx512f_gathersi<mode>_2"
19141 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19142 (unspec:VI48F
19143 [(pc)
19144 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19145 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19146 [(unspec:P
19147 [(match_operand:P 3 "vsib_address_operand" "Tv")
19148 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19149 (match_operand:SI 4 "const1248_operand" "n")]
19150 UNSPEC_VSIBADDR)])]
19151 UNSPEC_GATHER))
19152 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19153 "TARGET_AVX512F"
19154 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<xtg_mode>5}"
19155 [(set_attr "type" "ssemov")
19156 (set_attr "prefix" "evex")
19157 (set_attr "mode" "<sseinsnmode>")])
19158
19159
19160 (define_expand "<avx512>_gatherdi<mode>"
19161 [(parallel [(set (match_operand:VI48F 0 "register_operand")
19162 (unspec:VI48F
19163 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
19164 (match_operand:QI 4 "register_operand")
19165 (mem:<ssescalarmode>
19166 (match_par_dup 6
19167 [(match_operand 2 "vsib_address_operand")
19168 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
19169 (match_operand:SI 5 "const1248_operand")]))]
19170 UNSPEC_GATHER))
19171 (clobber (match_scratch:QI 7))])]
19172 "TARGET_AVX512F"
19173 {
19174 operands[6]
19175 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19176 operands[5]), UNSPEC_VSIBADDR);
19177 })
19178
19179 (define_insn "*avx512f_gatherdi<mode>"
19180 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19181 (unspec:VI48F
19182 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
19183 (match_operand:QI 7 "register_operand" "2")
19184 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19185 [(unspec:P
19186 [(match_operand:P 4 "vsib_address_operand" "Tv")
19187 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
19188 (match_operand:SI 5 "const1248_operand" "n")]
19189 UNSPEC_VSIBADDR)])]
19190 UNSPEC_GATHER))
19191 (clobber (match_scratch:QI 2 "=&Yk"))]
19192 "TARGET_AVX512F"
19193 {
19194 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %<gatherq_mode>6}";
19195 }
19196 [(set_attr "type" "ssemov")
19197 (set_attr "prefix" "evex")
19198 (set_attr "mode" "<sseinsnmode>")])
19199
19200 (define_insn "*avx512f_gatherdi<mode>_2"
19201 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19202 (unspec:VI48F
19203 [(pc)
19204 (match_operand:QI 6 "register_operand" "1")
19205 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19206 [(unspec:P
19207 [(match_operand:P 3 "vsib_address_operand" "Tv")
19208 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19209 (match_operand:SI 4 "const1248_operand" "n")]
19210 UNSPEC_VSIBADDR)])]
19211 UNSPEC_GATHER))
19212 (clobber (match_scratch:QI 1 "=&Yk"))]
19213 "TARGET_AVX512F"
19214 {
19215 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19216 {
19217 if (<MODE_SIZE> != 64)
19218 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %<gatherq_mode>5}";
19219 else
19220 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %t5}";
19221 }
19222 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<gatherq_mode>5}";
19223 }
19224 [(set_attr "type" "ssemov")
19225 (set_attr "prefix" "evex")
19226 (set_attr "mode" "<sseinsnmode>")])
19227
19228 (define_expand "<avx512>_scattersi<mode>"
19229 [(parallel [(set (mem:VI48F
19230 (match_par_dup 5
19231 [(match_operand 0 "vsib_address_operand")
19232 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
19233 (match_operand:SI 4 "const1248_operand")]))
19234 (unspec:VI48F
19235 [(match_operand:<avx512fmaskmode> 1 "register_operand")
19236 (match_operand:VI48F 3 "register_operand")]
19237 UNSPEC_SCATTER))
19238 (clobber (match_scratch:<avx512fmaskmode> 6))])]
19239 "TARGET_AVX512F"
19240 {
19241 operands[5]
19242 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19243 operands[4]), UNSPEC_VSIBADDR);
19244 })
19245
19246 (define_insn "*avx512f_scattersi<mode>"
19247 [(set (match_operator:VI48F 5 "vsib_mem_operator"
19248 [(unspec:P
19249 [(match_operand:P 0 "vsib_address_operand" "Tv")
19250 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19251 (match_operand:SI 4 "const1248_operand" "n")]
19252 UNSPEC_VSIBADDR)])
19253 (unspec:VI48F
19254 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19255 (match_operand:VI48F 3 "register_operand" "v")]
19256 UNSPEC_SCATTER))
19257 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19258 "TARGET_AVX512F"
19259 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
19260 [(set_attr "type" "ssemov")
19261 (set_attr "prefix" "evex")
19262 (set_attr "mode" "<sseinsnmode>")])
19263
19264 (define_expand "<avx512>_scatterdi<mode>"
19265 [(parallel [(set (mem:VI48F
19266 (match_par_dup 5
19267 [(match_operand 0 "vsib_address_operand")
19268 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
19269 (match_operand:SI 4 "const1248_operand")]))
19270 (unspec:VI48F
19271 [(match_operand:QI 1 "register_operand")
19272 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
19273 UNSPEC_SCATTER))
19274 (clobber (match_scratch:QI 6))])]
19275 "TARGET_AVX512F"
19276 {
19277 operands[5]
19278 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19279 operands[4]), UNSPEC_VSIBADDR);
19280 })
19281
19282 (define_insn "*avx512f_scatterdi<mode>"
19283 [(set (match_operator:VI48F 5 "vsib_mem_operator"
19284 [(unspec:P
19285 [(match_operand:P 0 "vsib_address_operand" "Tv")
19286 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19287 (match_operand:SI 4 "const1248_operand" "n")]
19288 UNSPEC_VSIBADDR)])
19289 (unspec:VI48F
19290 [(match_operand:QI 6 "register_operand" "1")
19291 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
19292 UNSPEC_SCATTER))
19293 (clobber (match_scratch:QI 1 "=&Yk"))]
19294 "TARGET_AVX512F"
19295 {
19296 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
19297 return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}";
19298 return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%t5%{%1%}, %3}";
19299 }
19300 [(set_attr "type" "ssemov")
19301 (set_attr "prefix" "evex")
19302 (set_attr "mode" "<sseinsnmode>")])
19303
19304 (define_insn "<avx512>_compress<mode>_mask"
19305 [(set (match_operand:VI48F 0 "register_operand" "=v")
19306 (unspec:VI48F
19307 [(match_operand:VI48F 1 "register_operand" "v")
19308 (match_operand:VI48F 2 "vector_move_operand" "0C")
19309 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
19310 UNSPEC_COMPRESS))]
19311 "TARGET_AVX512F"
19312 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19313 [(set_attr "type" "ssemov")
19314 (set_attr "prefix" "evex")
19315 (set_attr "mode" "<sseinsnmode>")])
19316
19317 (define_insn "compress<mode>_mask"
19318 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
19319 (unspec:VI12_AVX512VLBW
19320 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
19321 (match_operand:VI12_AVX512VLBW 2 "vector_move_operand" "0C")
19322 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
19323 UNSPEC_COMPRESS))]
19324 "TARGET_AVX512VBMI2"
19325 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19326 [(set_attr "type" "ssemov")
19327 (set_attr "prefix" "evex")
19328 (set_attr "mode" "<sseinsnmode>")])
19329
19330 (define_insn "<avx512>_compressstore<mode>_mask"
19331 [(set (match_operand:VI48F 0 "memory_operand" "=m")
19332 (unspec:VI48F
19333 [(match_operand:VI48F 1 "register_operand" "x")
19334 (match_dup 0)
19335 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
19336 UNSPEC_COMPRESS_STORE))]
19337 "TARGET_AVX512F"
19338 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19339 [(set_attr "type" "ssemov")
19340 (set_attr "prefix" "evex")
19341 (set_attr "memory" "store")
19342 (set_attr "mode" "<sseinsnmode>")])
19343
19344 (define_insn "compressstore<mode>_mask"
19345 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
19346 (unspec:VI12_AVX512VLBW
19347 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
19348 (match_dup 0)
19349 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
19350 UNSPEC_COMPRESS_STORE))]
19351 "TARGET_AVX512VBMI2"
19352 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19353 [(set_attr "type" "ssemov")
19354 (set_attr "prefix" "evex")
19355 (set_attr "memory" "store")
19356 (set_attr "mode" "<sseinsnmode>")])
19357
19358 (define_expand "<avx512>_expand<mode>_maskz"
19359 [(set (match_operand:VI48F 0 "register_operand")
19360 (unspec:VI48F
19361 [(match_operand:VI48F 1 "nonimmediate_operand")
19362 (match_operand:VI48F 2 "vector_move_operand")
19363 (match_operand:<avx512fmaskmode> 3 "register_operand")]
19364 UNSPEC_EXPAND))]
19365 "TARGET_AVX512F"
19366 "operands[2] = CONST0_RTX (<MODE>mode);")
19367
19368 (define_insn "<avx512>_expand<mode>_mask"
19369 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
19370 (unspec:VI48F
19371 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
19372 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
19373 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
19374 UNSPEC_EXPAND))]
19375 "TARGET_AVX512F"
19376 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19377 [(set_attr "type" "ssemov")
19378 (set_attr "prefix" "evex")
19379 (set_attr "memory" "none,load")
19380 (set_attr "mode" "<sseinsnmode>")])
19381
19382 (define_insn "expand<mode>_mask"
19383 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
19384 (unspec:VI12_AVX512VLBW
19385 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
19386 (match_operand:VI12_AVX512VLBW 2 "vector_move_operand" "0C,0C")
19387 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
19388 UNSPEC_EXPAND))]
19389 "TARGET_AVX512VBMI2"
19390 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19391 [(set_attr "type" "ssemov")
19392 (set_attr "prefix" "evex")
19393 (set_attr "memory" "none,load")
19394 (set_attr "mode" "<sseinsnmode>")])
19395
19396 (define_expand "expand<mode>_maskz"
19397 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
19398 (unspec:VI12_AVX512VLBW
19399 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
19400 (match_operand:VI12_AVX512VLBW 2 "vector_move_operand")
19401 (match_operand:<avx512fmaskmode> 3 "register_operand")]
19402 UNSPEC_EXPAND))]
19403 "TARGET_AVX512VBMI2"
19404 "operands[2] = CONST0_RTX (<MODE>mode);")
19405
19406 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
19407 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19408 (unspec:VF_AVX512VL
19409 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19410 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19411 (match_operand:SI 3 "const_0_to_15_operand")]
19412 UNSPEC_RANGE))]
19413 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
19414 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
19415 [(set_attr "type" "sse")
19416 (set_attr "prefix" "evex")
19417 (set_attr "mode" "<MODE>")])
19418
19419 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
19420 [(set (match_operand:VF_128 0 "register_operand" "=v")
19421 (vec_merge:VF_128
19422 (unspec:VF_128
19423 [(match_operand:VF_128 1 "register_operand" "v")
19424 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19425 (match_operand:SI 3 "const_0_to_15_operand")]
19426 UNSPEC_RANGE)
19427 (match_dup 1)
19428 (const_int 1)))]
19429 "TARGET_AVX512DQ"
19430 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
19431 [(set_attr "type" "sse")
19432 (set_attr "prefix" "evex")
19433 (set_attr "mode" "<MODE>")])
19434
19435 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
19436 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19437 (unspec:<avx512fmaskmode>
19438 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19439 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19440 UNSPEC_FPCLASS))]
19441 "TARGET_AVX512DQ"
19442 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
19443 [(set_attr "type" "sse")
19444 (set_attr "length_immediate" "1")
19445 (set_attr "prefix" "evex")
19446 (set_attr "mode" "<MODE>")])
19447
19448 (define_insn "avx512dq_vmfpclass<mode>"
19449 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19450 (and:<avx512fmaskmode>
19451 (unspec:<avx512fmaskmode>
19452 [(match_operand:VF_128 1 "register_operand" "v")
19453 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19454 UNSPEC_FPCLASS)
19455 (const_int 1)))]
19456 "TARGET_AVX512DQ"
19457 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
19458 [(set_attr "type" "sse")
19459 (set_attr "length_immediate" "1")
19460 (set_attr "prefix" "evex")
19461 (set_attr "mode" "<MODE>")])
19462
19463 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
19464 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19465 (unspec:VF_AVX512VL
19466 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
19467 (match_operand:SI 2 "const_0_to_15_operand")]
19468 UNSPEC_GETMANT))]
19469 "TARGET_AVX512F"
19470 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
19471 [(set_attr "prefix" "evex")
19472 (set_attr "mode" "<MODE>")])
19473
19474 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
19475 [(set (match_operand:VF_128 0 "register_operand" "=v")
19476 (vec_merge:VF_128
19477 (unspec:VF_128
19478 [(match_operand:VF_128 1 "register_operand" "v")
19479 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
19480 (match_operand:SI 3 "const_0_to_15_operand")]
19481 UNSPEC_GETMANT)
19482 (match_dup 1)
19483 (const_int 1)))]
19484 "TARGET_AVX512F"
19485 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_scalar_mask_op4>, %3}";
19486 [(set_attr "prefix" "evex")
19487 (set_attr "mode" "<ssescalarmode>")])
19488
19489 ;; The correct representation for this is absolutely enormous, and
19490 ;; surely not generally useful.
19491 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
19492 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19493 (unspec:VI2_AVX512VL
19494 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
19495 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
19496 (match_operand:SI 3 "const_0_to_255_operand")]
19497 UNSPEC_DBPSADBW))]
19498 "TARGET_AVX512BW"
19499 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
19500 [(set_attr "type" "sselog1")
19501 (set_attr "length_immediate" "1")
19502 (set_attr "prefix" "evex")
19503 (set_attr "mode" "<sseinsnmode>")])
19504
19505 (define_insn "clz<mode>2<mask_name>"
19506 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19507 (clz:VI48_AVX512VL
19508 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
19509 "TARGET_AVX512CD"
19510 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19511 [(set_attr "type" "sse")
19512 (set_attr "prefix" "evex")
19513 (set_attr "mode" "<sseinsnmode>")])
19514
19515 (define_insn "<mask_codefor>conflict<mode><mask_name>"
19516 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19517 (unspec:VI48_AVX512VL
19518 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
19519 UNSPEC_CONFLICT))]
19520 "TARGET_AVX512CD"
19521 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19522 [(set_attr "type" "sse")
19523 (set_attr "prefix" "evex")
19524 (set_attr "mode" "<sseinsnmode>")])
19525
19526 (define_insn "sha1msg1"
19527 [(set (match_operand:V4SI 0 "register_operand" "=x")
19528 (unspec:V4SI
19529 [(match_operand:V4SI 1 "register_operand" "0")
19530 (match_operand:V4SI 2 "vector_operand" "xBm")]
19531 UNSPEC_SHA1MSG1))]
19532 "TARGET_SHA"
19533 "sha1msg1\t{%2, %0|%0, %2}"
19534 [(set_attr "type" "sselog1")
19535 (set_attr "mode" "TI")])
19536
19537 (define_insn "sha1msg2"
19538 [(set (match_operand:V4SI 0 "register_operand" "=x")
19539 (unspec:V4SI
19540 [(match_operand:V4SI 1 "register_operand" "0")
19541 (match_operand:V4SI 2 "vector_operand" "xBm")]
19542 UNSPEC_SHA1MSG2))]
19543 "TARGET_SHA"
19544 "sha1msg2\t{%2, %0|%0, %2}"
19545 [(set_attr "type" "sselog1")
19546 (set_attr "mode" "TI")])
19547
19548 (define_insn "sha1nexte"
19549 [(set (match_operand:V4SI 0 "register_operand" "=x")
19550 (unspec:V4SI
19551 [(match_operand:V4SI 1 "register_operand" "0")
19552 (match_operand:V4SI 2 "vector_operand" "xBm")]
19553 UNSPEC_SHA1NEXTE))]
19554 "TARGET_SHA"
19555 "sha1nexte\t{%2, %0|%0, %2}"
19556 [(set_attr "type" "sselog1")
19557 (set_attr "mode" "TI")])
19558
19559 (define_insn "sha1rnds4"
19560 [(set (match_operand:V4SI 0 "register_operand" "=x")
19561 (unspec:V4SI
19562 [(match_operand:V4SI 1 "register_operand" "0")
19563 (match_operand:V4SI 2 "vector_operand" "xBm")
19564 (match_operand:SI 3 "const_0_to_3_operand" "n")]
19565 UNSPEC_SHA1RNDS4))]
19566 "TARGET_SHA"
19567 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
19568 [(set_attr "type" "sselog1")
19569 (set_attr "length_immediate" "1")
19570 (set_attr "mode" "TI")])
19571
19572 (define_insn "sha256msg1"
19573 [(set (match_operand:V4SI 0 "register_operand" "=x")
19574 (unspec:V4SI
19575 [(match_operand:V4SI 1 "register_operand" "0")
19576 (match_operand:V4SI 2 "vector_operand" "xBm")]
19577 UNSPEC_SHA256MSG1))]
19578 "TARGET_SHA"
19579 "sha256msg1\t{%2, %0|%0, %2}"
19580 [(set_attr "type" "sselog1")
19581 (set_attr "mode" "TI")])
19582
19583 (define_insn "sha256msg2"
19584 [(set (match_operand:V4SI 0 "register_operand" "=x")
19585 (unspec:V4SI
19586 [(match_operand:V4SI 1 "register_operand" "0")
19587 (match_operand:V4SI 2 "vector_operand" "xBm")]
19588 UNSPEC_SHA256MSG2))]
19589 "TARGET_SHA"
19590 "sha256msg2\t{%2, %0|%0, %2}"
19591 [(set_attr "type" "sselog1")
19592 (set_attr "mode" "TI")])
19593
19594 (define_insn "sha256rnds2"
19595 [(set (match_operand:V4SI 0 "register_operand" "=x")
19596 (unspec:V4SI
19597 [(match_operand:V4SI 1 "register_operand" "0")
19598 (match_operand:V4SI 2 "vector_operand" "xBm")
19599 (match_operand:V4SI 3 "register_operand" "Yz")]
19600 UNSPEC_SHA256RNDS2))]
19601 "TARGET_SHA"
19602 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
19603 [(set_attr "type" "sselog1")
19604 (set_attr "length_immediate" "1")
19605 (set_attr "mode" "TI")])
19606
19607 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
19608 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19609 (unspec:AVX512MODE2P
19610 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
19611 UNSPEC_CAST))]
19612 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19613 "#"
19614 "&& reload_completed"
19615 [(set (match_dup 0) (match_dup 1))]
19616 {
19617 if (REG_P (operands[0]))
19618 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
19619 else
19620 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19621 <ssequartermode>mode);
19622 })
19623
19624 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
19625 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19626 (unspec:AVX512MODE2P
19627 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
19628 UNSPEC_CAST))]
19629 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19630 "#"
19631 "&& reload_completed"
19632 [(set (match_dup 0) (match_dup 1))]
19633 {
19634 if (REG_P (operands[0]))
19635 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
19636 else
19637 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19638 <ssehalfvecmode>mode);
19639 })
19640
19641 (define_int_iterator VPMADD52
19642 [UNSPEC_VPMADD52LUQ
19643 UNSPEC_VPMADD52HUQ])
19644
19645 (define_int_attr vpmadd52type
19646 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
19647
19648 (define_expand "vpamdd52huq<mode>_maskz"
19649 [(match_operand:VI8_AVX512VL 0 "register_operand")
19650 (match_operand:VI8_AVX512VL 1 "register_operand")
19651 (match_operand:VI8_AVX512VL 2 "register_operand")
19652 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19653 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19654 "TARGET_AVX512IFMA"
19655 {
19656 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
19657 operands[0], operands[1], operands[2], operands[3],
19658 CONST0_RTX (<MODE>mode), operands[4]));
19659 DONE;
19660 })
19661
19662 (define_expand "vpamdd52luq<mode>_maskz"
19663 [(match_operand:VI8_AVX512VL 0 "register_operand")
19664 (match_operand:VI8_AVX512VL 1 "register_operand")
19665 (match_operand:VI8_AVX512VL 2 "register_operand")
19666 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19667 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19668 "TARGET_AVX512IFMA"
19669 {
19670 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
19671 operands[0], operands[1], operands[2], operands[3],
19672 CONST0_RTX (<MODE>mode), operands[4]));
19673 DONE;
19674 })
19675
19676 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
19677 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19678 (unspec:VI8_AVX512VL
19679 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19680 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19681 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19682 VPMADD52))]
19683 "TARGET_AVX512IFMA"
19684 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
19685 [(set_attr "type" "ssemuladd")
19686 (set_attr "prefix" "evex")
19687 (set_attr "mode" "<sseinsnmode>")])
19688
19689 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
19690 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19691 (vec_merge:VI8_AVX512VL
19692 (unspec:VI8_AVX512VL
19693 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19694 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19695 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19696 VPMADD52)
19697 (match_dup 1)
19698 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
19699 "TARGET_AVX512IFMA"
19700 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
19701 [(set_attr "type" "ssemuladd")
19702 (set_attr "prefix" "evex")
19703 (set_attr "mode" "<sseinsnmode>")])
19704
19705 (define_insn "vpmultishiftqb<mode><mask_name>"
19706 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19707 (unspec:VI1_AVX512VL
19708 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
19709 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
19710 UNSPEC_VPMULTISHIFT))]
19711 "TARGET_AVX512VBMI"
19712 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19713 [(set_attr "type" "sselog")
19714 (set_attr "prefix" "evex")
19715 (set_attr "mode" "<sseinsnmode>")])
19716
19717 (define_mode_iterator IMOD4
19718 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
19719
19720 (define_mode_attr imod4_narrow
19721 [(V64SF "V16SF") (V64SI "V16SI")])
19722
19723 (define_expand "mov<mode>"
19724 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
19725 (match_operand:IMOD4 1 "vector_move_operand"))]
19726 "TARGET_AVX512F"
19727 {
19728 ix86_expand_vector_move (<MODE>mode, operands);
19729 DONE;
19730 })
19731
19732 (define_insn_and_split "*mov<mode>_internal"
19733 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
19734 (match_operand:IMOD4 1 "vector_move_operand" " C,vm,v"))]
19735 "TARGET_AVX512F
19736 && (register_operand (operands[0], <MODE>mode)
19737 || register_operand (operands[1], <MODE>mode))"
19738 "#"
19739 "&& reload_completed"
19740 [(const_int 0)]
19741 {
19742 rtx op0, op1;
19743 int i;
19744
19745 for (i = 0; i < 4; i++)
19746 {
19747 op0 = simplify_subreg
19748 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
19749 op1 = simplify_subreg
19750 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
19751 emit_move_insn (op0, op1);
19752 }
19753 DONE;
19754 })
19755
19756 (define_insn "avx5124fmaddps_4fmaddps"
19757 [(set (match_operand:V16SF 0 "register_operand" "=v")
19758 (unspec:V16SF
19759 [(match_operand:V16SF 1 "register_operand" "0")
19760 (match_operand:V64SF 2 "register_operand" "Yh")
19761 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
19762 "TARGET_AVX5124FMAPS"
19763 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
19764 [(set_attr ("type") ("ssemuladd"))
19765 (set_attr ("prefix") ("evex"))
19766 (set_attr ("mode") ("V16SF"))])
19767
19768 (define_insn "avx5124fmaddps_4fmaddps_mask"
19769 [(set (match_operand:V16SF 0 "register_operand" "=v")
19770 (vec_merge:V16SF
19771 (unspec:V16SF
19772 [(match_operand:V64SF 1 "register_operand" "Yh")
19773 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
19774 (match_operand:V16SF 3 "register_operand" "0")
19775 (match_operand:HI 4 "register_operand" "Yk")))]
19776 "TARGET_AVX5124FMAPS"
19777 "v4fmaddps\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19778 [(set_attr ("type") ("ssemuladd"))
19779 (set_attr ("prefix") ("evex"))
19780 (set_attr ("mode") ("V16SF"))])
19781
19782 (define_insn "avx5124fmaddps_4fmaddps_maskz"
19783 [(set (match_operand:V16SF 0 "register_operand" "=v")
19784 (vec_merge:V16SF
19785 (unspec:V16SF
19786 [(match_operand:V16SF 1 "register_operand" "0")
19787 (match_operand:V64SF 2 "register_operand" "Yh")
19788 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
19789 (match_operand:V16SF 4 "const0_operand" "C")
19790 (match_operand:HI 5 "register_operand" "Yk")))]
19791 "TARGET_AVX5124FMAPS"
19792 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19793 [(set_attr ("type") ("ssemuladd"))
19794 (set_attr ("prefix") ("evex"))
19795 (set_attr ("mode") ("V16SF"))])
19796
19797 (define_insn "avx5124fmaddps_4fmaddss"
19798 [(set (match_operand:V4SF 0 "register_operand" "=v")
19799 (unspec:V4SF
19800 [(match_operand:V4SF 1 "register_operand" "0")
19801 (match_operand:V64SF 2 "register_operand" "Yh")
19802 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
19803 "TARGET_AVX5124FMAPS"
19804 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
19805 [(set_attr ("type") ("ssemuladd"))
19806 (set_attr ("prefix") ("evex"))
19807 (set_attr ("mode") ("SF"))])
19808
19809 (define_insn "avx5124fmaddps_4fmaddss_mask"
19810 [(set (match_operand:V4SF 0 "register_operand" "=v")
19811 (vec_merge:V4SF
19812 (unspec:V4SF
19813 [(match_operand:V64SF 1 "register_operand" "Yh")
19814 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
19815 (match_operand:V4SF 3 "register_operand" "0")
19816 (match_operand:QI 4 "register_operand" "Yk")))]
19817 "TARGET_AVX5124FMAPS"
19818 "v4fmaddss\t{%2, %x1, %0%{%4%}|%{%4%}%0, %x1, %2}"
19819 [(set_attr ("type") ("ssemuladd"))
19820 (set_attr ("prefix") ("evex"))
19821 (set_attr ("mode") ("SF"))])
19822
19823 (define_insn "avx5124fmaddps_4fmaddss_maskz"
19824 [(set (match_operand:V4SF 0 "register_operand" "=v")
19825 (vec_merge:V4SF
19826 (unspec:V4SF
19827 [(match_operand:V4SF 1 "register_operand" "0")
19828 (match_operand:V64SF 2 "register_operand" "Yh")
19829 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
19830 (match_operand:V4SF 4 "const0_operand" "C")
19831 (match_operand:QI 5 "register_operand" "Yk")))]
19832 "TARGET_AVX5124FMAPS"
19833 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %x2, %3}"
19834 [(set_attr ("type") ("ssemuladd"))
19835 (set_attr ("prefix") ("evex"))
19836 (set_attr ("mode") ("SF"))])
19837
19838 (define_insn "avx5124fmaddps_4fnmaddps"
19839 [(set (match_operand:V16SF 0 "register_operand" "=v")
19840 (unspec:V16SF
19841 [(match_operand:V16SF 1 "register_operand" "0")
19842 (match_operand:V64SF 2 "register_operand" "Yh")
19843 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
19844 "TARGET_AVX5124FMAPS"
19845 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
19846 [(set_attr ("type") ("ssemuladd"))
19847 (set_attr ("prefix") ("evex"))
19848 (set_attr ("mode") ("V16SF"))])
19849
19850 (define_insn "avx5124fmaddps_4fnmaddps_mask"
19851 [(set (match_operand:V16SF 0 "register_operand" "=v")
19852 (vec_merge:V16SF
19853 (unspec:V16SF
19854 [(match_operand:V64SF 1 "register_operand" "Yh")
19855 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19856 (match_operand:V16SF 3 "register_operand" "0")
19857 (match_operand:HI 4 "register_operand" "Yk")))]
19858 "TARGET_AVX5124FMAPS"
19859 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19860 [(set_attr ("type") ("ssemuladd"))
19861 (set_attr ("prefix") ("evex"))
19862 (set_attr ("mode") ("V16SF"))])
19863
19864 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
19865 [(set (match_operand:V16SF 0 "register_operand" "=v")
19866 (vec_merge:V16SF
19867 (unspec:V16SF
19868 [(match_operand:V16SF 1 "register_operand" "0")
19869 (match_operand:V64SF 2 "register_operand" "Yh")
19870 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19871 (match_operand:V16SF 4 "const0_operand" "C")
19872 (match_operand:HI 5 "register_operand" "Yk")))]
19873 "TARGET_AVX5124FMAPS"
19874 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19875 [(set_attr ("type") ("ssemuladd"))
19876 (set_attr ("prefix") ("evex"))
19877 (set_attr ("mode") ("V16SF"))])
19878
19879 (define_insn "avx5124fmaddps_4fnmaddss"
19880 [(set (match_operand:V4SF 0 "register_operand" "=v")
19881 (unspec:V4SF
19882 [(match_operand:V4SF 1 "register_operand" "0")
19883 (match_operand:V64SF 2 "register_operand" "Yh")
19884 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
19885 "TARGET_AVX5124FMAPS"
19886 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
19887 [(set_attr ("type") ("ssemuladd"))
19888 (set_attr ("prefix") ("evex"))
19889 (set_attr ("mode") ("SF"))])
19890
19891 (define_insn "avx5124fmaddps_4fnmaddss_mask"
19892 [(set (match_operand:V4SF 0 "register_operand" "=v")
19893 (vec_merge:V4SF
19894 (unspec:V4SF
19895 [(match_operand:V64SF 1 "register_operand" "Yh")
19896 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19897 (match_operand:V4SF 3 "register_operand" "0")
19898 (match_operand:QI 4 "register_operand" "Yk")))]
19899 "TARGET_AVX5124FMAPS"
19900 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%{%4%}%0, %x1, %2}"
19901 [(set_attr ("type") ("ssemuladd"))
19902 (set_attr ("prefix") ("evex"))
19903 (set_attr ("mode") ("SF"))])
19904
19905 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
19906 [(set (match_operand:V4SF 0 "register_operand" "=v")
19907 (vec_merge:V4SF
19908 (unspec:V4SF
19909 [(match_operand:V4SF 1 "register_operand" "0")
19910 (match_operand:V64SF 2 "register_operand" "Yh")
19911 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19912 (match_operand:V4SF 4 "const0_operand" "C")
19913 (match_operand:QI 5 "register_operand" "Yk")))]
19914 "TARGET_AVX5124FMAPS"
19915 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %x2, %3}"
19916 [(set_attr ("type") ("ssemuladd"))
19917 (set_attr ("prefix") ("evex"))
19918 (set_attr ("mode") ("SF"))])
19919
19920 (define_insn "avx5124vnniw_vp4dpwssd"
19921 [(set (match_operand:V16SI 0 "register_operand" "=v")
19922 (unspec:V16SI
19923 [(match_operand:V16SI 1 "register_operand" "0")
19924 (match_operand:V64SI 2 "register_operand" "Yh")
19925 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
19926 "TARGET_AVX5124VNNIW"
19927 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
19928 [(set_attr ("type") ("ssemuladd"))
19929 (set_attr ("prefix") ("evex"))
19930 (set_attr ("mode") ("TI"))])
19931
19932 (define_insn "avx5124vnniw_vp4dpwssd_mask"
19933 [(set (match_operand:V16SI 0 "register_operand" "=v")
19934 (vec_merge:V16SI
19935 (unspec:V16SI
19936 [(match_operand:V64SI 1 "register_operand" "Yh")
19937 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
19938 (match_operand:V16SI 3 "register_operand" "0")
19939 (match_operand:HI 4 "register_operand" "Yk")))]
19940 "TARGET_AVX5124VNNIW"
19941 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19942 [(set_attr ("type") ("ssemuladd"))
19943 (set_attr ("prefix") ("evex"))
19944 (set_attr ("mode") ("TI"))])
19945
19946 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
19947 [(set (match_operand:V16SI 0 "register_operand" "=v")
19948 (vec_merge:V16SI
19949 (unspec:V16SI
19950 [(match_operand:V16SI 1 "register_operand" "0")
19951 (match_operand:V64SI 2 "register_operand" "Yh")
19952 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
19953 (match_operand:V16SI 4 "const0_operand" "C")
19954 (match_operand:HI 5 "register_operand" "Yk")))]
19955 "TARGET_AVX5124VNNIW"
19956 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19957 [(set_attr ("type") ("ssemuladd"))
19958 (set_attr ("prefix") ("evex"))
19959 (set_attr ("mode") ("TI"))])
19960
19961 (define_insn "avx5124vnniw_vp4dpwssds"
19962 [(set (match_operand:V16SI 0 "register_operand" "=v")
19963 (unspec:V16SI
19964 [(match_operand:V16SI 1 "register_operand" "0")
19965 (match_operand:V64SI 2 "register_operand" "Yh")
19966 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
19967 "TARGET_AVX5124VNNIW"
19968 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
19969 [(set_attr ("type") ("ssemuladd"))
19970 (set_attr ("prefix") ("evex"))
19971 (set_attr ("mode") ("TI"))])
19972
19973 (define_insn "avx5124vnniw_vp4dpwssds_mask"
19974 [(set (match_operand:V16SI 0 "register_operand" "=v")
19975 (vec_merge:V16SI
19976 (unspec:V16SI
19977 [(match_operand:V64SI 1 "register_operand" "Yh")
19978 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
19979 (match_operand:V16SI 3 "register_operand" "0")
19980 (match_operand:HI 4 "register_operand" "Yk")))]
19981 "TARGET_AVX5124VNNIW"
19982 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19983 [(set_attr ("type") ("ssemuladd"))
19984 (set_attr ("prefix") ("evex"))
19985 (set_attr ("mode") ("TI"))])
19986
19987 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
19988 [(set (match_operand:V16SI 0 "register_operand" "=v")
19989 (vec_merge:V16SI
19990 (unspec:V16SI
19991 [(match_operand:V16SI 1 "register_operand" "0")
19992 (match_operand:V64SI 2 "register_operand" "Yh")
19993 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
19994 (match_operand:V16SI 4 "const0_operand" "C")
19995 (match_operand:HI 5 "register_operand" "Yk")))]
19996 "TARGET_AVX5124VNNIW"
19997 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19998 [(set_attr ("type") ("ssemuladd"))
19999 (set_attr ("prefix") ("evex"))
20000 (set_attr ("mode") ("TI"))])
20001
20002 (define_insn "vpopcount<mode><mask_name>"
20003 [(set (match_operand:VI48_512 0 "register_operand" "=v")
20004 (popcount:VI48_512
20005 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
20006 "TARGET_AVX512VPOPCNTDQ"
20007 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
20008
20009 ;; Save multiple registers out-of-line.
20010 (define_insn "save_multiple<mode>"
20011 [(match_parallel 0 "save_multiple"
20012 [(use (match_operand:P 1 "symbol_operand"))])]
20013 "TARGET_SSE && TARGET_64BIT"
20014 "call\t%P1")
20015
20016 ;; Restore multiple registers out-of-line.
20017 (define_insn "restore_multiple<mode>"
20018 [(match_parallel 0 "restore_multiple"
20019 [(use (match_operand:P 1 "symbol_operand"))])]
20020 "TARGET_SSE && TARGET_64BIT"
20021 "call\t%P1")
20022
20023 ;; Restore multiple registers out-of-line and return.
20024 (define_insn "restore_multiple_and_return<mode>"
20025 [(match_parallel 0 "restore_multiple"
20026 [(return)
20027 (use (match_operand:P 1 "symbol_operand"))
20028 (set (reg:DI SP_REG) (reg:DI R10_REG))
20029 ])]
20030 "TARGET_SSE && TARGET_64BIT"
20031 "jmp\t%P1")
20032
20033 ;; Restore multiple registers out-of-line when hard frame pointer is used,
20034 ;; perform the leave operation prior to returning (from the function).
20035 (define_insn "restore_multiple_leave_return<mode>"
20036 [(match_parallel 0 "restore_multiple"
20037 [(return)
20038 (use (match_operand:P 1 "symbol_operand"))
20039 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
20040 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
20041 (clobber (mem:BLK (scratch)))
20042 ])]
20043 "TARGET_SSE && TARGET_64BIT"
20044 "jmp\t%P1")
20045
20046 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
20047 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20048 (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20049 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
20050 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
20051 UNSPEC_GF2P8AFFINEINV))]
20052 "TARGET_GFNI"
20053 "@
20054 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
20055 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
20056 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
20057 [(set_attr "isa" "noavx,avx,avx512bw")
20058 (set_attr "prefix_data16" "1,*,*")
20059 (set_attr "prefix_extra" "1")
20060 (set_attr "prefix" "orig,maybe_evex,evex")
20061 (set_attr "mode" "<sseinsnmode>")])
20062
20063 (define_insn "vgf2p8affineqb_<mode><mask_name>"
20064 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20065 (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20066 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
20067 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
20068 UNSPEC_GF2P8AFFINE))]
20069 "TARGET_GFNI"
20070 "@
20071 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
20072 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
20073 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
20074 [(set_attr "isa" "noavx,avx,avx512bw")
20075 (set_attr "prefix_data16" "1,*,*")
20076 (set_attr "prefix_extra" "1")
20077 (set_attr "prefix" "orig,maybe_evex,evex")
20078 (set_attr "mode" "<sseinsnmode>")])
20079
20080 (define_insn "vgf2p8mulb_<mode><mask_name>"
20081 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20082 (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20083 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")]
20084 UNSPEC_GF2P8MUL))]
20085 "TARGET_GFNI"
20086 "@
20087 gf2p8mulb\t{%2, %0| %0, %2}
20088 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}
20089 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
20090 [(set_attr "isa" "noavx,avx,avx512bw")
20091 (set_attr "prefix_data16" "1,*,*")
20092 (set_attr "prefix_extra" "1")
20093 (set_attr "prefix" "orig,maybe_evex,evex")
20094 (set_attr "mode" "<sseinsnmode>")])
20095
20096 (define_insn "vpshrd_<mode><mask_name>"
20097 [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
20098 (unspec:VI248_VLBW
20099 [(match_operand:VI248_VLBW 1 "register_operand" "v")
20100 (match_operand:VI248_VLBW 2 "nonimmediate_operand" "vm")
20101 (match_operand:SI 3 "const_0_to_255_operand" "n")
20102 ] UNSPEC_VPSHRD))]
20103 "TARGET_AVX512VBMI2"
20104 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
20105 [(set_attr ("prefix") ("evex"))])
20106
20107 (define_insn "vpshld_<mode><mask_name>"
20108 [(set (match_operand:VI248_VLBW 0 "register_operand" "=v")
20109 (unspec:VI248_VLBW
20110 [(match_operand:VI248_VLBW 1 "register_operand" "v")
20111 (match_operand:VI248_VLBW 2 "nonimmediate_operand" "vm")
20112 (match_operand:SI 3 "const_0_to_255_operand" "n")
20113 ] UNSPEC_VPSHLD))]
20114 "TARGET_AVX512VBMI2"
20115 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
20116 [(set_attr ("prefix") ("evex"))])