d4cdc42fd7c859b9083a72689d810f90c1682ee1
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23
24 ;; SSE3
25 UNSPEC_LDDQU
26
27 ;; SSSE3
28 UNSPEC_PSHUFB
29 UNSPEC_PSIGN
30 UNSPEC_PALIGNR
31
32 ;; For SSE4A support
33 UNSPEC_EXTRQI
34 UNSPEC_EXTRQ
35 UNSPEC_INSERTQI
36 UNSPEC_INSERTQ
37
38 ;; For SSE4.1 support
39 UNSPEC_BLENDV
40 UNSPEC_INSERTPS
41 UNSPEC_DP
42 UNSPEC_MOVNTDQA
43 UNSPEC_MPSADBW
44 UNSPEC_PHMINPOSUW
45 UNSPEC_PTEST
46
47 ;; For SSE4.2 support
48 UNSPEC_PCMPESTR
49 UNSPEC_PCMPISTR
50
51 ;; For FMA4 support
52 UNSPEC_FMADDSUB
53 UNSPEC_XOP_UNSIGNED_CMP
54 UNSPEC_XOP_TRUEFALSE
55 UNSPEC_XOP_PERMUTE
56 UNSPEC_FRCZ
57
58 ;; For AES support
59 UNSPEC_AESENC
60 UNSPEC_AESENCLAST
61 UNSPEC_AESDEC
62 UNSPEC_AESDECLAST
63 UNSPEC_AESIMC
64 UNSPEC_AESKEYGENASSIST
65
66 ;; For PCLMUL support
67 UNSPEC_PCLMUL
68
69 ;; For AVX support
70 UNSPEC_PCMP
71 UNSPEC_VPERMIL
72 UNSPEC_VPERMIL2
73 UNSPEC_VPERMIL2F128
74 UNSPEC_CAST
75 UNSPEC_VTESTP
76 UNSPEC_VCVTPH2PS
77 UNSPEC_VCVTPS2PH
78
79 ;; For AVX2 support
80 UNSPEC_VPERMVAR
81 UNSPEC_VPERMTI
82 UNSPEC_GATHER
83 UNSPEC_VSIBADDR
84
85 ;; For AVX512F support
86 UNSPEC_VPERMI2
87 UNSPEC_VPERMT2
88 UNSPEC_VPERMI2_MASK
89 UNSPEC_UNSIGNED_FIX_NOTRUNC
90 UNSPEC_UNSIGNED_PCMP
91 UNSPEC_TESTM
92 UNSPEC_TESTNM
93 UNSPEC_SCATTER
94 UNSPEC_RCP14
95 UNSPEC_RSQRT14
96 UNSPEC_FIXUPIMM
97 UNSPEC_SCALEF
98 UNSPEC_VTERNLOG
99 UNSPEC_GETEXP
100 UNSPEC_GETMANT
101 UNSPEC_ALIGN
102 UNSPEC_CONFLICT
103 UNSPEC_COMPRESS
104 UNSPEC_COMPRESS_STORE
105 UNSPEC_EXPAND
106 UNSPEC_MASKED_EQ
107 UNSPEC_MASKED_GT
108
109 ;; For embed. rounding feature
110 UNSPEC_EMBEDDED_ROUNDING
111
112 ;; For AVX512PF support
113 UNSPEC_GATHER_PREFETCH
114 UNSPEC_SCATTER_PREFETCH
115
116 ;; For AVX512ER support
117 UNSPEC_EXP2
118 UNSPEC_RCP28
119 UNSPEC_RSQRT28
120
121 ;; For SHA support
122 UNSPEC_SHA1MSG1
123 UNSPEC_SHA1MSG2
124 UNSPEC_SHA1NEXTE
125 UNSPEC_SHA1RNDS4
126 UNSPEC_SHA256MSG1
127 UNSPEC_SHA256MSG2
128 UNSPEC_SHA256RNDS2
129
130 ;; For AVX512BW support
131 UNSPEC_DBPSADBW
132 UNSPEC_PMADDUBSW512
133 UNSPEC_PMADDWD512
134 UNSPEC_PSHUFHW
135 UNSPEC_PSHUFLW
136 UNSPEC_CVTINT2MASK
137
138 ;; For AVX512DQ support
139 UNSPEC_REDUCE
140 UNSPEC_FPCLASS
141 UNSPEC_RANGE
142
143 ;; For AVX512IFMA support
144 UNSPEC_VPMADD52LUQ
145 UNSPEC_VPMADD52HUQ
146
147 ;; For AVX512VBMI support
148 UNSPEC_VPMULTISHIFT
149 ])
150
151 (define_c_enum "unspecv" [
152 UNSPECV_LDMXCSR
153 UNSPECV_STMXCSR
154 UNSPECV_CLFLUSH
155 UNSPECV_MONITOR
156 UNSPECV_MWAIT
157 UNSPECV_VZEROALL
158 UNSPECV_VZEROUPPER
159 ])
160
161 ;; All vector modes including V?TImode, used in move patterns.
162 (define_mode_iterator VMOVE
163 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
164 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
165 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
166 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
167 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
168 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
169 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
170
171 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
172 (define_mode_iterator V48_AVX512VL
173 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
174 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
175 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
176 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
177
178 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
179 (define_mode_iterator VI12_AVX512VL
180 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
181 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
182
183 (define_mode_iterator VI1_AVX512VL
184 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
185
186 ;; All vector modes
187 (define_mode_iterator V
188 [(V32QI "TARGET_AVX") V16QI
189 (V16HI "TARGET_AVX") V8HI
190 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
191 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
192 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
193 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
194
195 ;; All 128bit vector modes
196 (define_mode_iterator V_128
197 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
198
199 ;; All 256bit vector modes
200 (define_mode_iterator V_256
201 [V32QI V16HI V8SI V4DI V8SF V4DF])
202
203 ;; All 512bit vector modes
204 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
205
206 ;; All 256bit and 512bit vector modes
207 (define_mode_iterator V_256_512
208 [V32QI V16HI V8SI V4DI V8SF V4DF
209 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
210 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
211
212 ;; All vector float modes
213 (define_mode_iterator VF
214 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
215 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
216
217 ;; 128- and 256-bit float vector modes
218 (define_mode_iterator VF_128_256
219 [(V8SF "TARGET_AVX") V4SF
220 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
221
222 ;; All SFmode vector float modes
223 (define_mode_iterator VF1
224 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
225
226 ;; 128- and 256-bit SF vector modes
227 (define_mode_iterator VF1_128_256
228 [(V8SF "TARGET_AVX") V4SF])
229
230 (define_mode_iterator VF1_128_256VL
231 [V8SF (V4SF "TARGET_AVX512VL")])
232
233 ;; All DFmode vector float modes
234 (define_mode_iterator VF2
235 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
236
237 ;; 128- and 256-bit DF vector modes
238 (define_mode_iterator VF2_128_256
239 [(V4DF "TARGET_AVX") V2DF])
240
241 (define_mode_iterator VF2_512_256
242 [(V8DF "TARGET_AVX512F") V4DF])
243
244 (define_mode_iterator VF2_512_256VL
245 [V8DF (V4DF "TARGET_AVX512VL")])
246
247 ;; All 128bit vector float modes
248 (define_mode_iterator VF_128
249 [V4SF (V2DF "TARGET_SSE2")])
250
251 ;; All 256bit vector float modes
252 (define_mode_iterator VF_256
253 [V8SF V4DF])
254
255 ;; All 512bit vector float modes
256 (define_mode_iterator VF_512
257 [V16SF V8DF])
258
259 (define_mode_iterator VI48_AVX512VL
260 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
261 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
262
263 (define_mode_iterator VF_AVX512VL
264 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
265 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
266
267 (define_mode_iterator VF2_AVX512VL
268 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
269
270 (define_mode_iterator VF1_AVX512VL
271 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
272
273 ;; All vector integer modes
274 (define_mode_iterator VI
275 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
276 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
277 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
278 (V8SI "TARGET_AVX") V4SI
279 (V4DI "TARGET_AVX") V2DI])
280
281 (define_mode_iterator VI_AVX2
282 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
283 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
284 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
285 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
286
287 ;; All QImode vector integer modes
288 (define_mode_iterator VI1
289 [(V32QI "TARGET_AVX") V16QI])
290
291 ;; All DImode vector integer modes
292 (define_mode_iterator V_AVX
293 [V16QI V8HI V4SI V2DI V4SF V2DF
294 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
295 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
296 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
297
298 (define_mode_iterator VI48_AVX
299 [V4SI V2DI
300 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
301
302 (define_mode_iterator VI8
303 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
304
305 (define_mode_iterator VI8_AVX512VL
306 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
307
308 (define_mode_iterator VI8_256_512
309 [V8DI (V4DI "TARGET_AVX512VL")])
310
311 (define_mode_iterator VI1_AVX2
312 [(V32QI "TARGET_AVX2") V16QI])
313
314 (define_mode_iterator VI1_AVX512
315 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
316
317 (define_mode_iterator VI2_AVX2
318 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
319
320 (define_mode_iterator VI2_AVX512F
321 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
322
323 (define_mode_iterator VI4_AVX
324 [(V8SI "TARGET_AVX") V4SI])
325
326 (define_mode_iterator VI4_AVX2
327 [(V8SI "TARGET_AVX2") V4SI])
328
329 (define_mode_iterator VI4_AVX512F
330 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
331
332 (define_mode_iterator VI4_AVX512VL
333 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
334
335 (define_mode_iterator VI48_AVX512F_AVX512VL
336 [V4SI V8SI (V16SI "TARGET_AVX512F")
337 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
338
339 (define_mode_iterator VI2_AVX512VL
340 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
341
342 (define_mode_iterator VI8_AVX2_AVX512BW
343 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
344
345 (define_mode_iterator VI8_AVX2
346 [(V4DI "TARGET_AVX2") V2DI])
347
348 (define_mode_iterator VI8_AVX2_AVX512F
349 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
350
351 (define_mode_iterator VI4_128_8_256
352 [V4SI V4DI])
353
354 ;; All V8D* modes
355 (define_mode_iterator V8FI
356 [V8DF V8DI])
357
358 ;; All V16S* modes
359 (define_mode_iterator V16FI
360 [V16SF V16SI])
361
362 ;; ??? We should probably use TImode instead.
363 (define_mode_iterator VIMAX_AVX2
364 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
365
366 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
367 (define_mode_iterator SSESCALARMODE
368 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
369
370 (define_mode_iterator VI12_AVX2
371 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
372 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
373
374 (define_mode_iterator VI24_AVX2
375 [(V16HI "TARGET_AVX2") V8HI
376 (V8SI "TARGET_AVX2") V4SI])
377
378 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
379 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
380 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
381 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
382
383 (define_mode_iterator VI124_AVX2
384 [(V32QI "TARGET_AVX2") V16QI
385 (V16HI "TARGET_AVX2") V8HI
386 (V8SI "TARGET_AVX2") V4SI])
387
388 (define_mode_iterator VI2_AVX2_AVX512BW
389 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
390
391 (define_mode_iterator VI48_AVX2
392 [(V8SI "TARGET_AVX2") V4SI
393 (V4DI "TARGET_AVX2") V2DI])
394
395 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
396 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
397 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
398 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
399
400 (define_mode_iterator VI248_AVX512BW_AVX512VL
401 [(V32HI "TARGET_AVX512BW")
402 (V4DI "TARGET_AVX512VL") V16SI V8DI])
403
404 ;; Suppose TARGET_AVX512VL as baseline
405 (define_mode_iterator VI24_AVX512BW_1
406 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
407 V8SI V4SI])
408
409 (define_mode_iterator VI48_AVX512F
410 [(V16SI "TARGET_AVX512F") V8SI V4SI
411 (V8DI "TARGET_AVX512F") V4DI V2DI])
412
413 (define_mode_iterator VI48_AVX_AVX512F
414 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
415 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
416
417 (define_mode_iterator VI12_AVX_AVX512F
418 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
419 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
420
421 (define_mode_iterator V48_AVX2
422 [V4SF V2DF
423 V8SF V4DF
424 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
425 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
426
427 (define_mode_attr avx512
428 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
429 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
430 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
431 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
432 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
433 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
434
435 (define_mode_attr sse2_avx_avx512f
436 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
437 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
438 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
439 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
440 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
441 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
442
443 (define_mode_attr sse2_avx2
444 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
445 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
446 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
447 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
448 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
449
450 (define_mode_attr ssse3_avx2
451 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
452 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
453 (V4SI "ssse3") (V8SI "avx2")
454 (V2DI "ssse3") (V4DI "avx2")
455 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
456
457 (define_mode_attr sse4_1_avx2
458 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
459 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
460 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
461 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
462
463 (define_mode_attr avx_avx2
464 [(V4SF "avx") (V2DF "avx")
465 (V8SF "avx") (V4DF "avx")
466 (V4SI "avx2") (V2DI "avx2")
467 (V8SI "avx2") (V4DI "avx2")])
468
469 (define_mode_attr vec_avx2
470 [(V16QI "vec") (V32QI "avx2")
471 (V8HI "vec") (V16HI "avx2")
472 (V4SI "vec") (V8SI "avx2")
473 (V2DI "vec") (V4DI "avx2")])
474
475 (define_mode_attr avx2_avx512
476 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
477 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
478 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
479 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
480 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
481
482 (define_mode_attr shuffletype
483 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
484 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
485 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
486 (V32HI "i") (V16HI "i") (V8HI "i")
487 (V64QI "i") (V32QI "i") (V16QI "i")
488 (V4TI "i") (V2TI "i") (V1TI "i")])
489
490 (define_mode_attr ssequartermode
491 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
492
493 (define_mode_attr ssedoublemodelower
494 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
495 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
496 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
497
498 (define_mode_attr ssedoublemode
499 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
500 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
501 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
502 (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
503 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
504 (V4DI "V8DI") (V8DI "V16DI")])
505
506 (define_mode_attr ssebytemode
507 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
508
509 ;; All 128bit vector integer modes
510 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
511
512 ;; All 256bit vector integer modes
513 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
514
515 ;; All 512bit vector integer modes
516 (define_mode_iterator VI_512
517 [(V64QI "TARGET_AVX512BW")
518 (V32HI "TARGET_AVX512BW")
519 V16SI V8DI])
520
521 ;; Various 128bit vector integer mode combinations
522 (define_mode_iterator VI12_128 [V16QI V8HI])
523 (define_mode_iterator VI14_128 [V16QI V4SI])
524 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
525 (define_mode_iterator VI24_128 [V8HI V4SI])
526 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
527 (define_mode_iterator VI48_128 [V4SI V2DI])
528
529 ;; Various 256bit and 512 vector integer mode combinations
530 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
531 (define_mode_iterator VI124_256_AVX512F_AVX512BW
532 [V32QI V16HI V8SI
533 (V64QI "TARGET_AVX512BW")
534 (V32HI "TARGET_AVX512BW")
535 (V16SI "TARGET_AVX512F")])
536 (define_mode_iterator VI48_256 [V8SI V4DI])
537 (define_mode_iterator VI48_512 [V16SI V8DI])
538 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
539 (define_mode_iterator VI_AVX512BW
540 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
541
542 ;; Int-float size matches
543 (define_mode_iterator VI4F_128 [V4SI V4SF])
544 (define_mode_iterator VI8F_128 [V2DI V2DF])
545 (define_mode_iterator VI4F_256 [V8SI V8SF])
546 (define_mode_iterator VI8F_256 [V4DI V4DF])
547 (define_mode_iterator VI8F_256_512
548 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
549 (define_mode_iterator VI48F_256_512
550 [V8SI V8SF
551 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
552 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
553 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
554 (define_mode_iterator VF48_I1248
555 [V16SI V16SF V8DI V8DF V32HI V64QI])
556 (define_mode_iterator VI48F
557 [V16SI V16SF V8DI V8DF
558 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
559 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
560 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
561 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
562 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
563
564 ;; Mapping from float mode to required SSE level
565 (define_mode_attr sse
566 [(SF "sse") (DF "sse2")
567 (V4SF "sse") (V2DF "sse2")
568 (V16SF "avx512f") (V8SF "avx")
569 (V8DF "avx512f") (V4DF "avx")])
570
571 (define_mode_attr sse2
572 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
573 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
574
575 (define_mode_attr sse3
576 [(V16QI "sse3") (V32QI "avx")])
577
578 (define_mode_attr sse4_1
579 [(V4SF "sse4_1") (V2DF "sse4_1")
580 (V8SF "avx") (V4DF "avx")
581 (V8DF "avx512f")
582 (V4DI "avx") (V2DI "sse4_1")
583 (V8SI "avx") (V4SI "sse4_1")
584 (V16QI "sse4_1") (V32QI "avx")
585 (V8HI "sse4_1") (V16HI "avx")])
586
587 (define_mode_attr avxsizesuffix
588 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
589 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
590 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
591 (V16SF "512") (V8DF "512")
592 (V8SF "256") (V4DF "256")
593 (V4SF "") (V2DF "")])
594
595 ;; SSE instruction mode
596 (define_mode_attr sseinsnmode
597 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
598 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
599 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
600 (V16SF "V16SF") (V8DF "V8DF")
601 (V8SF "V8SF") (V4DF "V4DF")
602 (V4SF "V4SF") (V2DF "V2DF")
603 (TI "TI")])
604
605 ;; Mapping of vector modes to corresponding mask size
606 (define_mode_attr avx512fmaskmode
607 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
608 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
609 (V16SI "HI") (V8SI "QI") (V4SI "QI")
610 (V8DI "QI") (V4DI "QI") (V2DI "QI")
611 (V16SF "HI") (V8SF "QI") (V4SF "QI")
612 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
613
614 ;; Mapping of vector modes to corresponding mask size
615 (define_mode_attr avx512fmaskmodelower
616 [(V64QI "di") (V32QI "si") (V16QI "hi")
617 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
618 (V16SI "hi") (V8SI "qi") (V4SI "qi")
619 (V8DI "qi") (V4DI "qi") (V2DI "qi")
620 (V16SF "hi") (V8SF "qi") (V4SF "qi")
621 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
622
623 ;; Mapping of vector float modes to an integer mode of the same size
624 (define_mode_attr sseintvecmode
625 [(V16SF "V16SI") (V8DF "V8DI")
626 (V8SF "V8SI") (V4DF "V4DI")
627 (V4SF "V4SI") (V2DF "V2DI")
628 (V16SI "V16SI") (V8DI "V8DI")
629 (V8SI "V8SI") (V4DI "V4DI")
630 (V4SI "V4SI") (V2DI "V2DI")
631 (V16HI "V16HI") (V8HI "V8HI")
632 (V32HI "V32HI") (V64QI "V64QI")
633 (V32QI "V32QI") (V16QI "V16QI")])
634
635 (define_mode_attr sseintvecmode2
636 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
637 (V8SF "OI") (V4SF "TI")])
638
639 (define_mode_attr sseintvecmodelower
640 [(V16SF "v16si") (V8DF "v8di")
641 (V8SF "v8si") (V4DF "v4di")
642 (V4SF "v4si") (V2DF "v2di")
643 (V8SI "v8si") (V4DI "v4di")
644 (V4SI "v4si") (V2DI "v2di")
645 (V16HI "v16hi") (V8HI "v8hi")
646 (V32QI "v32qi") (V16QI "v16qi")])
647
648 ;; Mapping of vector modes to a vector mode of double size
649 (define_mode_attr ssedoublevecmode
650 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
651 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
652 (V8SF "V16SF") (V4DF "V8DF")
653 (V4SF "V8SF") (V2DF "V4DF")])
654
655 ;; Mapping of vector modes to a vector mode of half size
656 (define_mode_attr ssehalfvecmode
657 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
658 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
659 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
660 (V16SF "V8SF") (V8DF "V4DF")
661 (V8SF "V4SF") (V4DF "V2DF")
662 (V4SF "V2SF")])
663
664 ;; Mapping of vector modes ti packed single mode of the same size
665 (define_mode_attr ssePSmode
666 [(V16SI "V16SF") (V8DF "V16SF")
667 (V16SF "V16SF") (V8DI "V16SF")
668 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
669 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
670 (V8SI "V8SF") (V4SI "V4SF")
671 (V4DI "V8SF") (V2DI "V4SF")
672 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
673 (V8SF "V8SF") (V4SF "V4SF")
674 (V4DF "V8SF") (V2DF "V4SF")])
675
676 (define_mode_attr ssePSmode2
677 [(V8DI "V8SF") (V4DI "V4SF")])
678
679 ;; Mapping of vector modes back to the scalar modes
680 (define_mode_attr ssescalarmode
681 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
682 (V32HI "HI") (V16HI "HI") (V8HI "HI")
683 (V16SI "SI") (V8SI "SI") (V4SI "SI")
684 (V8DI "DI") (V4DI "DI") (V2DI "DI")
685 (V16SF "SF") (V8SF "SF") (V4SF "SF")
686 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
687
688 ;; Mapping of vector modes to the 128bit modes
689 (define_mode_attr ssexmmmode
690 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
691 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
692 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
693 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
694 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
695 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
696
697 ;; Pointer size override for scalar modes (Intel asm dialect)
698 (define_mode_attr iptr
699 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
700 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
701 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
702 (V8SF "k") (V4DF "q")
703 (V4SF "k") (V2DF "q")
704 (SF "k") (DF "q")])
705
706 ;; Number of scalar elements in each vector type
707 (define_mode_attr ssescalarnum
708 [(V64QI "64") (V16SI "16") (V8DI "8")
709 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
710 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
711 (V16SF "16") (V8DF "8")
712 (V8SF "8") (V4DF "4")
713 (V4SF "4") (V2DF "2")])
714
715 ;; Mask of scalar elements in each vector type
716 (define_mode_attr ssescalarnummask
717 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
718 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
719 (V8SF "7") (V4DF "3")
720 (V4SF "3") (V2DF "1")])
721
722 (define_mode_attr ssescalarsize
723 [(V4TI "64") (V2TI "64") (V1TI "64")
724 (V8DI "64") (V4DI "64") (V2DI "64")
725 (V64QI "8") (V32QI "8") (V16QI "8")
726 (V32HI "16") (V16HI "16") (V8HI "16")
727 (V16SI "32") (V8SI "32") (V4SI "32")
728 (V16SF "32") (V8SF "32") (V4SF "32")
729 (V8DF "64") (V4DF "64") (V2DF "64")])
730
731 ;; SSE prefix for integer vector modes
732 (define_mode_attr sseintprefix
733 [(V2DI "p") (V2DF "")
734 (V4DI "p") (V4DF "")
735 (V8DI "p") (V8DF "")
736 (V4SI "p") (V4SF "")
737 (V8SI "p") (V8SF "")
738 (V16SI "p") (V16SF "")
739 (V16QI "p") (V8HI "p")
740 (V32QI "p") (V16HI "p")
741 (V64QI "p") (V32HI "p")])
742
743 ;; SSE scalar suffix for vector modes
744 (define_mode_attr ssescalarmodesuffix
745 [(SF "ss") (DF "sd")
746 (V8SF "ss") (V4DF "sd")
747 (V4SF "ss") (V2DF "sd")
748 (V8SI "ss") (V4DI "sd")
749 (V4SI "d")])
750
751 ;; Pack/unpack vector modes
752 (define_mode_attr sseunpackmode
753 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
754 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
755 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
756
757 (define_mode_attr ssepackmode
758 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
759 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
760 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
761
762 ;; Mapping of the max integer size for xop rotate immediate constraint
763 (define_mode_attr sserotatemax
764 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
765
766 ;; Mapping of mode to cast intrinsic name
767 (define_mode_attr castmode
768 [(V8SI "si") (V8SF "ps") (V4DF "pd")
769 (V16SI "si") (V16SF "ps") (V8DF "pd")])
770
771 ;; Instruction suffix for sign and zero extensions.
772 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
773
774 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
775 ;; i64x4 or f64x4 for 512bit modes.
776 (define_mode_attr i128
777 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
778 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
779 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
780
781 ;; Mix-n-match
782 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
783 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
784
785 ;; Mapping for dbpsabbw modes
786 (define_mode_attr dbpsadbwmode
787 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
788
789 ;; Mapping suffixes for broadcast
790 (define_mode_attr bcstscalarsuff
791 [(V64QI "b") (V32QI "b") (V16QI "b")
792 (V32HI "w") (V16HI "w") (V8HI "w")
793 (V16SI "d") (V8SI "d") (V4SI "d")
794 (V8DI "q") (V4DI "q") (V2DI "q")
795 (V16SF "ss") (V8SF "ss") (V4SF "ss")
796 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
797
798 ;; Tie mode of assembler operand to mode iterator
799 (define_mode_attr concat_tg_mode
800 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
801 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
802
803 ;; Half mask mode for unpacks
804 (define_mode_attr HALFMASKMODE
805 [(DI "SI") (SI "HI")])
806
807 ;; Double mask mode for packs
808 (define_mode_attr DOUBLEMASKMODE
809 [(HI "SI") (SI "DI")])
810
811
812 ;; Include define_subst patterns for instructions with mask
813 (include "subst.md")
814
815 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
816
817 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
818 ;;
819 ;; Move patterns
820 ;;
821 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
822
823 ;; All of these patterns are enabled for SSE1 as well as SSE2.
824 ;; This is essential for maintaining stable calling conventions.
825
826 (define_expand "mov<mode>"
827 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
828 (match_operand:VMOVE 1 "nonimmediate_operand"))]
829 "TARGET_SSE"
830 {
831 ix86_expand_vector_move (<MODE>mode, operands);
832 DONE;
833 })
834
835 (define_insn "mov<mode>_internal"
836 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
837 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))]
838 "TARGET_SSE
839 && (register_operand (operands[0], <MODE>mode)
840 || register_operand (operands[1], <MODE>mode))"
841 {
842 switch (get_attr_type (insn))
843 {
844 case TYPE_SSELOG1:
845 return standard_sse_constant_opcode (insn, operands[1]);
846
847 case TYPE_SSEMOV:
848 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
849 in avx512f, so we need to use workarounds, to access sse registers
850 16-31, which are evex-only. In avx512vl we don't need workarounds. */
851 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
852 && (EXT_REX_SSE_REG_P (operands[0])
853 || EXT_REX_SSE_REG_P (operands[1])))
854 {
855 if (memory_operand (operands[0], <MODE>mode))
856 {
857 if (<MODE_SIZE> == 32)
858 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
859 else if (<MODE_SIZE> == 16)
860 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
861 else
862 gcc_unreachable ();
863 }
864 else if (memory_operand (operands[1], <MODE>mode))
865 {
866 if (<MODE_SIZE> == 32)
867 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
868 else if (<MODE_SIZE> == 16)
869 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
870 else
871 gcc_unreachable ();
872 }
873 else
874 /* Reg -> reg move is always aligned. Just use wider move. */
875 switch (get_attr_mode (insn))
876 {
877 case MODE_V8SF:
878 case MODE_V4SF:
879 return "vmovaps\t{%g1, %g0|%g0, %g1}";
880 case MODE_V4DF:
881 case MODE_V2DF:
882 return "vmovapd\t{%g1, %g0|%g0, %g1}";
883 case MODE_OI:
884 case MODE_TI:
885 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
886 default:
887 gcc_unreachable ();
888 }
889 }
890
891 switch (get_attr_mode (insn))
892 {
893 case MODE_V16SF:
894 case MODE_V8SF:
895 case MODE_V4SF:
896 if (misaligned_operand (operands[0], <MODE>mode)
897 || misaligned_operand (operands[1], <MODE>mode))
898 return "%vmovups\t{%1, %0|%0, %1}";
899 else
900 return "%vmovaps\t{%1, %0|%0, %1}";
901
902 case MODE_V8DF:
903 case MODE_V4DF:
904 case MODE_V2DF:
905 if (misaligned_operand (operands[0], <MODE>mode)
906 || misaligned_operand (operands[1], <MODE>mode))
907 return "%vmovupd\t{%1, %0|%0, %1}";
908 else
909 return "%vmovapd\t{%1, %0|%0, %1}";
910
911 case MODE_OI:
912 case MODE_TI:
913 if (misaligned_operand (operands[0], <MODE>mode)
914 || misaligned_operand (operands[1], <MODE>mode))
915 return TARGET_AVX512VL ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
916 : "%vmovdqu\t{%1, %0|%0, %1}";
917 else
918 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
919 : "%vmovdqa\t{%1, %0|%0, %1}";
920 case MODE_XI:
921 if (misaligned_operand (operands[0], <MODE>mode)
922 || misaligned_operand (operands[1], <MODE>mode))
923 return (<MODE>mode == V16SImode
924 || <MODE>mode == V8DImode
925 || TARGET_AVX512BW)
926 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
927 : "vmovdqu64\t{%1, %0|%0, %1}";
928 else
929 return "vmovdqa64\t{%1, %0|%0, %1}";
930
931 default:
932 gcc_unreachable ();
933 }
934
935 default:
936 gcc_unreachable ();
937 }
938 }
939 [(set_attr "type" "sselog1,ssemov,ssemov")
940 (set_attr "prefix" "maybe_vex")
941 (set (attr "mode")
942 (cond [(and (eq_attr "alternative" "0")
943 (and (match_test "TARGET_AVX512VL")
944 (match_operand 1 "vector_all_ones_operand")))
945 (const_string "XI")
946 (and (match_test "<MODE_SIZE> == 16")
947 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
948 (and (eq_attr "alternative" "2")
949 (match_test "TARGET_SSE_TYPELESS_STORES"))))
950 (const_string "<ssePSmode>")
951 (match_test "TARGET_AVX")
952 (const_string "<sseinsnmode>")
953 (ior (not (match_test "TARGET_SSE2"))
954 (match_test "optimize_function_for_size_p (cfun)"))
955 (const_string "V4SF")
956 (and (eq_attr "alternative" "0")
957 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
958 (const_string "TI")
959 ]
960 (const_string "<sseinsnmode>")))])
961
962 (define_insn "<avx512>_load<mode>_mask"
963 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
964 (vec_merge:V48_AVX512VL
965 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
966 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
967 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
968 "TARGET_AVX512F"
969 {
970 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
971 {
972 if (misaligned_operand (operands[1], <MODE>mode))
973 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
974 else
975 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
976 }
977 else
978 {
979 if (misaligned_operand (operands[1], <MODE>mode))
980 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
981 else
982 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
983 }
984 }
985 [(set_attr "type" "ssemov")
986 (set_attr "prefix" "evex")
987 (set_attr "memory" "none,load")
988 (set_attr "mode" "<sseinsnmode>")])
989
990 (define_insn "<avx512>_load<mode>_mask"
991 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
992 (vec_merge:VI12_AVX512VL
993 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
994 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
995 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
996 "TARGET_AVX512BW"
997 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
998 [(set_attr "type" "ssemov")
999 (set_attr "prefix" "evex")
1000 (set_attr "memory" "none,load")
1001 (set_attr "mode" "<sseinsnmode>")])
1002
1003 (define_insn "<avx512>_blendm<mode>"
1004 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1005 (vec_merge:V48_AVX512VL
1006 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1007 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1008 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1009 "TARGET_AVX512F"
1010 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1011 [(set_attr "type" "ssemov")
1012 (set_attr "prefix" "evex")
1013 (set_attr "mode" "<sseinsnmode>")])
1014
1015 (define_insn "<avx512>_blendm<mode>"
1016 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1017 (vec_merge:VI12_AVX512VL
1018 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1019 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1020 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1021 "TARGET_AVX512BW"
1022 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1023 [(set_attr "type" "ssemov")
1024 (set_attr "prefix" "evex")
1025 (set_attr "mode" "<sseinsnmode>")])
1026
1027 (define_insn "<avx512>_store<mode>_mask"
1028 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1029 (vec_merge:V48_AVX512VL
1030 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1031 (match_dup 0)
1032 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1033 "TARGET_AVX512F"
1034 {
1035 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1036 {
1037 if (misaligned_operand (operands[0], <MODE>mode))
1038 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1039 else
1040 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1041 }
1042 else
1043 {
1044 if (misaligned_operand (operands[0], <MODE>mode))
1045 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1046 else
1047 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1048 }
1049 }
1050 [(set_attr "type" "ssemov")
1051 (set_attr "prefix" "evex")
1052 (set_attr "memory" "store")
1053 (set_attr "mode" "<sseinsnmode>")])
1054
1055 (define_insn "<avx512>_store<mode>_mask"
1056 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1057 (vec_merge:VI12_AVX512VL
1058 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1059 (match_dup 0)
1060 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1061 "TARGET_AVX512BW"
1062 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1063 [(set_attr "type" "ssemov")
1064 (set_attr "prefix" "evex")
1065 (set_attr "memory" "store")
1066 (set_attr "mode" "<sseinsnmode>")])
1067
1068 (define_insn "sse2_movq128"
1069 [(set (match_operand:V2DI 0 "register_operand" "=x")
1070 (vec_concat:V2DI
1071 (vec_select:DI
1072 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1073 (parallel [(const_int 0)]))
1074 (const_int 0)))]
1075 "TARGET_SSE2"
1076 "%vmovq\t{%1, %0|%0, %q1}"
1077 [(set_attr "type" "ssemov")
1078 (set_attr "prefix" "maybe_vex")
1079 (set_attr "mode" "TI")])
1080
1081 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1082 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1083 ;; from memory, we'd prefer to load the memory directly into the %xmm
1084 ;; register. To facilitate this happy circumstance, this pattern won't
1085 ;; split until after register allocation. If the 64-bit value didn't
1086 ;; come from memory, this is the best we can do. This is much better
1087 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1088 ;; from there.
1089
1090 (define_insn_and_split "movdi_to_sse"
1091 [(parallel
1092 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1093 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1094 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1095 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1096 "#"
1097 "&& reload_completed"
1098 [(const_int 0)]
1099 {
1100 if (register_operand (operands[1], DImode))
1101 {
1102 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1103 Assemble the 64-bit DImode value in an xmm register. */
1104 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1105 gen_lowpart (SImode, operands[1])));
1106 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1107 gen_highpart (SImode, operands[1])));
1108 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1109 operands[2]));
1110 }
1111 else if (memory_operand (operands[1], DImode))
1112 {
1113 rtx tmp = gen_reg_rtx (V2DImode);
1114 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1115 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1116 }
1117 else
1118 gcc_unreachable ();
1119 })
1120
1121 (define_split
1122 [(set (match_operand:V4SF 0 "register_operand")
1123 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1124 "TARGET_SSE && reload_completed"
1125 [(set (match_dup 0)
1126 (vec_merge:V4SF
1127 (vec_duplicate:V4SF (match_dup 1))
1128 (match_dup 2)
1129 (const_int 1)))]
1130 {
1131 operands[1] = gen_lowpart (SFmode, operands[1]);
1132 operands[2] = CONST0_RTX (V4SFmode);
1133 })
1134
1135 (define_split
1136 [(set (match_operand:V2DF 0 "register_operand")
1137 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1138 "TARGET_SSE2 && reload_completed"
1139 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1140 {
1141 operands[1] = gen_lowpart (DFmode, operands[1]);
1142 operands[2] = CONST0_RTX (DFmode);
1143 })
1144
1145 (define_expand "movmisalign<mode>"
1146 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1147 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1148 "TARGET_SSE"
1149 {
1150 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1151 DONE;
1152 })
1153
1154 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1155 (define_peephole2
1156 [(set (match_operand:V2DF 0 "register_operand")
1157 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1158 (match_operand:DF 4 "const0_operand")))
1159 (set (match_operand:V2DF 2 "register_operand")
1160 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1161 (parallel [(const_int 0)]))
1162 (match_operand:DF 3 "memory_operand")))]
1163 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1164 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1165 [(set (match_dup 2) (match_dup 4))]
1166 "operands[4] = adjust_address (operands[1], V2DFmode, 0);")
1167
1168 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1169 (define_peephole2
1170 [(set (match_operand:DF 0 "memory_operand")
1171 (vec_select:DF (match_operand:V2DF 1 "register_operand")
1172 (parallel [(const_int 0)])))
1173 (set (match_operand:DF 2 "memory_operand")
1174 (vec_select:DF (match_operand:V2DF 3 "register_operand")
1175 (parallel [(const_int 1)])))]
1176 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1177 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1178 [(set (match_dup 4) (match_dup 1))]
1179 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1180
1181 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1182 [(set (match_operand:VI1 0 "register_operand" "=x")
1183 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1184 UNSPEC_LDDQU))]
1185 "TARGET_SSE3"
1186 "%vlddqu\t{%1, %0|%0, %1}"
1187 [(set_attr "type" "ssemov")
1188 (set_attr "movu" "1")
1189 (set (attr "prefix_data16")
1190 (if_then_else
1191 (match_test "TARGET_AVX")
1192 (const_string "*")
1193 (const_string "0")))
1194 (set (attr "prefix_rep")
1195 (if_then_else
1196 (match_test "TARGET_AVX")
1197 (const_string "*")
1198 (const_string "1")))
1199 (set_attr "prefix" "maybe_vex")
1200 (set_attr "mode" "<sseinsnmode>")])
1201
1202 (define_insn "sse2_movnti<mode>"
1203 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1204 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1205 UNSPEC_MOVNT))]
1206 "TARGET_SSE2"
1207 "movnti\t{%1, %0|%0, %1}"
1208 [(set_attr "type" "ssemov")
1209 (set_attr "prefix_data16" "0")
1210 (set_attr "mode" "<MODE>")])
1211
1212 (define_insn "<sse>_movnt<mode>"
1213 [(set (match_operand:VF 0 "memory_operand" "=m")
1214 (unspec:VF
1215 [(match_operand:VF 1 "register_operand" "v")]
1216 UNSPEC_MOVNT))]
1217 "TARGET_SSE"
1218 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1219 [(set_attr "type" "ssemov")
1220 (set_attr "prefix" "maybe_vex")
1221 (set_attr "mode" "<MODE>")])
1222
1223 (define_insn "<sse2>_movnt<mode>"
1224 [(set (match_operand:VI8 0 "memory_operand" "=m")
1225 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1226 UNSPEC_MOVNT))]
1227 "TARGET_SSE2"
1228 "%vmovntdq\t{%1, %0|%0, %1}"
1229 [(set_attr "type" "ssecvt")
1230 (set (attr "prefix_data16")
1231 (if_then_else
1232 (match_test "TARGET_AVX")
1233 (const_string "*")
1234 (const_string "1")))
1235 (set_attr "prefix" "maybe_vex")
1236 (set_attr "mode" "<sseinsnmode>")])
1237
1238 ; Expand patterns for non-temporal stores. At the moment, only those
1239 ; that directly map to insns are defined; it would be possible to
1240 ; define patterns for other modes that would expand to several insns.
1241
1242 ;; Modes handled by storent patterns.
1243 (define_mode_iterator STORENT_MODE
1244 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1245 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1246 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1247 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1248 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1249
1250 (define_expand "storent<mode>"
1251 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1252 (unspec:STORENT_MODE
1253 [(match_operand:STORENT_MODE 1 "register_operand")]
1254 UNSPEC_MOVNT))]
1255 "TARGET_SSE")
1256
1257 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1258 ;;
1259 ;; Parallel floating point arithmetic
1260 ;;
1261 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1262
1263 (define_expand "<code><mode>2"
1264 [(set (match_operand:VF 0 "register_operand")
1265 (absneg:VF
1266 (match_operand:VF 1 "register_operand")))]
1267 "TARGET_SSE"
1268 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1269
1270 (define_insn_and_split "*absneg<mode>2"
1271 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1272 (match_operator:VF 3 "absneg_operator"
1273 [(match_operand:VF 1 "vector_operand" "0, xBm,v, m")]))
1274 (use (match_operand:VF 2 "vector_operand" "xBm,0, vm,v"))]
1275 "TARGET_SSE"
1276 "#"
1277 "&& reload_completed"
1278 [(const_int 0)]
1279 {
1280 enum rtx_code absneg_op;
1281 rtx op1, op2;
1282 rtx t;
1283
1284 if (TARGET_AVX)
1285 {
1286 if (MEM_P (operands[1]))
1287 op1 = operands[2], op2 = operands[1];
1288 else
1289 op1 = operands[1], op2 = operands[2];
1290 }
1291 else
1292 {
1293 op1 = operands[0];
1294 if (rtx_equal_p (operands[0], operands[1]))
1295 op2 = operands[2];
1296 else
1297 op2 = operands[1];
1298 }
1299
1300 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1301 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1302 t = gen_rtx_SET (operands[0], t);
1303 emit_insn (t);
1304 DONE;
1305 }
1306 [(set_attr "isa" "noavx,noavx,avx,avx")])
1307
1308 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1309 [(set (match_operand:VF 0 "register_operand")
1310 (plusminus:VF
1311 (match_operand:VF 1 "<round_nimm_predicate>")
1312 (match_operand:VF 2 "<round_nimm_predicate>")))]
1313 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1314 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1315
1316 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1317 [(set (match_operand:VF 0 "register_operand" "=x,v")
1318 (plusminus:VF
1319 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1320 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1321 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1322 "@
1323 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1324 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1325 [(set_attr "isa" "noavx,avx")
1326 (set_attr "type" "sseadd")
1327 (set_attr "prefix" "<mask_prefix3>")
1328 (set_attr "mode" "<MODE>")])
1329
1330 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1331 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1332 (vec_merge:VF_128
1333 (plusminus:VF_128
1334 (match_operand:VF_128 1 "register_operand" "0,v")
1335 (match_operand:VF_128 2 "vector_operand" "xBm,<round_constraint>"))
1336 (match_dup 1)
1337 (const_int 1)))]
1338 "TARGET_SSE"
1339 "@
1340 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1341 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1342 [(set_attr "isa" "noavx,avx")
1343 (set_attr "type" "sseadd")
1344 (set_attr "prefix" "<round_prefix>")
1345 (set_attr "mode" "<ssescalarmode>")])
1346
1347 (define_expand "mul<mode>3<mask_name><round_name>"
1348 [(set (match_operand:VF 0 "register_operand")
1349 (mult:VF
1350 (match_operand:VF 1 "<round_nimm_predicate>")
1351 (match_operand:VF 2 "<round_nimm_predicate>")))]
1352 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1353 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1354
1355 (define_insn "*mul<mode>3<mask_name><round_name>"
1356 [(set (match_operand:VF 0 "register_operand" "=x,v")
1357 (mult:VF
1358 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1359 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1360 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1361 "@
1362 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1363 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1364 [(set_attr "isa" "noavx,avx")
1365 (set_attr "type" "ssemul")
1366 (set_attr "prefix" "<mask_prefix3>")
1367 (set_attr "btver2_decode" "direct,double")
1368 (set_attr "mode" "<MODE>")])
1369
1370 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1371 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1372 (vec_merge:VF_128
1373 (multdiv:VF_128
1374 (match_operand:VF_128 1 "register_operand" "0,v")
1375 (match_operand:VF_128 2 "vector_operand" "xBm,<round_constraint>"))
1376 (match_dup 1)
1377 (const_int 1)))]
1378 "TARGET_SSE"
1379 "@
1380 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1381 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1382 [(set_attr "isa" "noavx,avx")
1383 (set_attr "type" "sse<multdiv_mnemonic>")
1384 (set_attr "prefix" "<round_prefix>")
1385 (set_attr "btver2_decode" "direct,double")
1386 (set_attr "mode" "<ssescalarmode>")])
1387
1388 (define_expand "div<mode>3"
1389 [(set (match_operand:VF2 0 "register_operand")
1390 (div:VF2 (match_operand:VF2 1 "register_operand")
1391 (match_operand:VF2 2 "vector_operand")))]
1392 "TARGET_SSE2"
1393 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1394
1395 (define_expand "div<mode>3"
1396 [(set (match_operand:VF1 0 "register_operand")
1397 (div:VF1 (match_operand:VF1 1 "register_operand")
1398 (match_operand:VF1 2 "vector_operand")))]
1399 "TARGET_SSE"
1400 {
1401 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1402
1403 if (TARGET_SSE_MATH
1404 && TARGET_RECIP_VEC_DIV
1405 && !optimize_insn_for_size_p ()
1406 && flag_finite_math_only && !flag_trapping_math
1407 && flag_unsafe_math_optimizations)
1408 {
1409 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1410 DONE;
1411 }
1412 })
1413
1414 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1415 [(set (match_operand:VF 0 "register_operand" "=x,v")
1416 (div:VF
1417 (match_operand:VF 1 "register_operand" "0,v")
1418 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1419 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1420 "@
1421 div<ssemodesuffix>\t{%2, %0|%0, %2}
1422 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1423 [(set_attr "isa" "noavx,avx")
1424 (set_attr "type" "ssediv")
1425 (set_attr "prefix" "<mask_prefix3>")
1426 (set_attr "mode" "<MODE>")])
1427
1428 (define_insn "<sse>_rcp<mode>2"
1429 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1430 (unspec:VF1_128_256
1431 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1432 "TARGET_SSE"
1433 "%vrcpps\t{%1, %0|%0, %1}"
1434 [(set_attr "type" "sse")
1435 (set_attr "atom_sse_attr" "rcp")
1436 (set_attr "btver2_sse_attr" "rcp")
1437 (set_attr "prefix" "maybe_vex")
1438 (set_attr "mode" "<MODE>")])
1439
1440 (define_insn "sse_vmrcpv4sf2"
1441 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1442 (vec_merge:V4SF
1443 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1444 UNSPEC_RCP)
1445 (match_operand:V4SF 2 "register_operand" "0,x")
1446 (const_int 1)))]
1447 "TARGET_SSE"
1448 "@
1449 rcpss\t{%1, %0|%0, %k1}
1450 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1451 [(set_attr "isa" "noavx,avx")
1452 (set_attr "type" "sse")
1453 (set_attr "atom_sse_attr" "rcp")
1454 (set_attr "btver2_sse_attr" "rcp")
1455 (set_attr "prefix" "orig,vex")
1456 (set_attr "mode" "SF")])
1457
1458 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1459 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1460 (unspec:VF_AVX512VL
1461 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1462 UNSPEC_RCP14))]
1463 "TARGET_AVX512F"
1464 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1465 [(set_attr "type" "sse")
1466 (set_attr "prefix" "evex")
1467 (set_attr "mode" "<MODE>")])
1468
1469 (define_insn "srcp14<mode>"
1470 [(set (match_operand:VF_128 0 "register_operand" "=v")
1471 (vec_merge:VF_128
1472 (unspec:VF_128
1473 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1474 UNSPEC_RCP14)
1475 (match_operand:VF_128 2 "register_operand" "v")
1476 (const_int 1)))]
1477 "TARGET_AVX512F"
1478 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1479 [(set_attr "type" "sse")
1480 (set_attr "prefix" "evex")
1481 (set_attr "mode" "<MODE>")])
1482
1483 (define_expand "sqrt<mode>2"
1484 [(set (match_operand:VF2 0 "register_operand")
1485 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
1486 "TARGET_SSE2")
1487
1488 (define_expand "sqrt<mode>2"
1489 [(set (match_operand:VF1 0 "register_operand")
1490 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
1491 "TARGET_SSE"
1492 {
1493 if (TARGET_SSE_MATH
1494 && TARGET_RECIP_VEC_SQRT
1495 && !optimize_insn_for_size_p ()
1496 && flag_finite_math_only && !flag_trapping_math
1497 && flag_unsafe_math_optimizations)
1498 {
1499 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1500 DONE;
1501 }
1502 })
1503
1504 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1505 [(set (match_operand:VF 0 "register_operand" "=x,v")
1506 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1507 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1508 "@
1509 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
1510 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1511 [(set_attr "isa" "noavx,avx")
1512 (set_attr "type" "sse")
1513 (set_attr "atom_sse_attr" "sqrt")
1514 (set_attr "btver2_sse_attr" "sqrt")
1515 (set_attr "prefix" "maybe_vex")
1516 (set_attr "mode" "<MODE>")])
1517
1518 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1519 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1520 (vec_merge:VF_128
1521 (sqrt:VF_128
1522 (match_operand:VF_128 1 "vector_operand" "xBm,<round_constraint>"))
1523 (match_operand:VF_128 2 "register_operand" "0,v")
1524 (const_int 1)))]
1525 "TARGET_SSE"
1526 "@
1527 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1528 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1529 [(set_attr "isa" "noavx,avx")
1530 (set_attr "type" "sse")
1531 (set_attr "atom_sse_attr" "sqrt")
1532 (set_attr "prefix" "<round_prefix>")
1533 (set_attr "btver2_sse_attr" "sqrt")
1534 (set_attr "mode" "<ssescalarmode>")])
1535
1536 (define_expand "rsqrt<mode>2"
1537 [(set (match_operand:VF1_128_256 0 "register_operand")
1538 (unspec:VF1_128_256
1539 [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
1540 "TARGET_SSE_MATH"
1541 {
1542 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1543 DONE;
1544 })
1545
1546 (define_insn "<sse>_rsqrt<mode>2"
1547 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1548 (unspec:VF1_128_256
1549 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
1550 "TARGET_SSE"
1551 "%vrsqrtps\t{%1, %0|%0, %1}"
1552 [(set_attr "type" "sse")
1553 (set_attr "prefix" "maybe_vex")
1554 (set_attr "mode" "<MODE>")])
1555
1556 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1557 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1558 (unspec:VF_AVX512VL
1559 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1560 UNSPEC_RSQRT14))]
1561 "TARGET_AVX512F"
1562 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1563 [(set_attr "type" "sse")
1564 (set_attr "prefix" "evex")
1565 (set_attr "mode" "<MODE>")])
1566
1567 (define_insn "rsqrt14<mode>"
1568 [(set (match_operand:VF_128 0 "register_operand" "=v")
1569 (vec_merge:VF_128
1570 (unspec:VF_128
1571 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1572 UNSPEC_RSQRT14)
1573 (match_operand:VF_128 2 "register_operand" "v")
1574 (const_int 1)))]
1575 "TARGET_AVX512F"
1576 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1577 [(set_attr "type" "sse")
1578 (set_attr "prefix" "evex")
1579 (set_attr "mode" "<MODE>")])
1580
1581 (define_insn "sse_vmrsqrtv4sf2"
1582 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1583 (vec_merge:V4SF
1584 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1585 UNSPEC_RSQRT)
1586 (match_operand:V4SF 2 "register_operand" "0,x")
1587 (const_int 1)))]
1588 "TARGET_SSE"
1589 "@
1590 rsqrtss\t{%1, %0|%0, %k1}
1591 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1592 [(set_attr "isa" "noavx,avx")
1593 (set_attr "type" "sse")
1594 (set_attr "prefix" "orig,vex")
1595 (set_attr "mode" "SF")])
1596
1597 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1598 ;; isn't really correct, as those rtl operators aren't defined when
1599 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1600
1601 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1602 [(set (match_operand:VF 0 "register_operand")
1603 (smaxmin:VF
1604 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1605 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1606 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1607 {
1608 if (!flag_finite_math_only)
1609 operands[1] = force_reg (<MODE>mode, operands[1]);
1610 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1611 })
1612
1613 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1614 [(set (match_operand:VF 0 "register_operand" "=x,v")
1615 (smaxmin:VF
1616 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1617 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
1618 "TARGET_SSE && flag_finite_math_only
1619 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1620 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1621 "@
1622 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1623 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1624 [(set_attr "isa" "noavx,avx")
1625 (set_attr "type" "sseadd")
1626 (set_attr "btver2_sse_attr" "maxmin")
1627 (set_attr "prefix" "<mask_prefix3>")
1628 (set_attr "mode" "<MODE>")])
1629
1630 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1631 [(set (match_operand:VF 0 "register_operand" "=x,v")
1632 (smaxmin:VF
1633 (match_operand:VF 1 "register_operand" "0,v")
1634 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
1635 "TARGET_SSE && !flag_finite_math_only
1636 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1637 "@
1638 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1639 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1640 [(set_attr "isa" "noavx,avx")
1641 (set_attr "type" "sseadd")
1642 (set_attr "btver2_sse_attr" "maxmin")
1643 (set_attr "prefix" "<mask_prefix3>")
1644 (set_attr "mode" "<MODE>")])
1645
1646 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1647 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1648 (vec_merge:VF_128
1649 (smaxmin:VF_128
1650 (match_operand:VF_128 1 "register_operand" "0,v")
1651 (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_constraint>"))
1652 (match_dup 1)
1653 (const_int 1)))]
1654 "TARGET_SSE"
1655 "@
1656 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1657 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1658 [(set_attr "isa" "noavx,avx")
1659 (set_attr "type" "sse")
1660 (set_attr "btver2_sse_attr" "maxmin")
1661 (set_attr "prefix" "<round_saeonly_prefix>")
1662 (set_attr "mode" "<ssescalarmode>")])
1663
1664 ;; These versions of the min/max patterns implement exactly the operations
1665 ;; min = (op1 < op2 ? op1 : op2)
1666 ;; max = (!(op1 < op2) ? op1 : op2)
1667 ;; Their operands are not commutative, and thus they may be used in the
1668 ;; presence of -0.0 and NaN.
1669
1670 (define_insn "*ieee_smin<mode>3"
1671 [(set (match_operand:VF 0 "register_operand" "=x,v")
1672 (unspec:VF
1673 [(match_operand:VF 1 "register_operand" "0,v")
1674 (match_operand:VF 2 "vector_operand" "xBm,vm")]
1675 UNSPEC_IEEE_MIN))]
1676 "TARGET_SSE"
1677 "@
1678 min<ssemodesuffix>\t{%2, %0|%0, %2}
1679 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1680 [(set_attr "isa" "noavx,avx")
1681 (set_attr "type" "sseadd")
1682 (set_attr "prefix" "orig,vex")
1683 (set_attr "mode" "<MODE>")])
1684
1685 (define_insn "*ieee_smax<mode>3"
1686 [(set (match_operand:VF 0 "register_operand" "=x,v")
1687 (unspec:VF
1688 [(match_operand:VF 1 "register_operand" "0,v")
1689 (match_operand:VF 2 "vector_operand" "xBm,vm")]
1690 UNSPEC_IEEE_MAX))]
1691 "TARGET_SSE"
1692 "@
1693 max<ssemodesuffix>\t{%2, %0|%0, %2}
1694 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1695 [(set_attr "isa" "noavx,avx")
1696 (set_attr "type" "sseadd")
1697 (set_attr "prefix" "orig,vex")
1698 (set_attr "mode" "<MODE>")])
1699
1700 (define_insn "avx_addsubv4df3"
1701 [(set (match_operand:V4DF 0 "register_operand" "=x")
1702 (vec_merge:V4DF
1703 (minus:V4DF
1704 (match_operand:V4DF 1 "register_operand" "x")
1705 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1706 (plus:V4DF (match_dup 1) (match_dup 2))
1707 (const_int 5)))]
1708 "TARGET_AVX"
1709 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1710 [(set_attr "type" "sseadd")
1711 (set_attr "prefix" "vex")
1712 (set_attr "mode" "V4DF")])
1713
1714 (define_insn "sse3_addsubv2df3"
1715 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1716 (vec_merge:V2DF
1717 (minus:V2DF
1718 (match_operand:V2DF 1 "register_operand" "0,x")
1719 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
1720 (plus:V2DF (match_dup 1) (match_dup 2))
1721 (const_int 1)))]
1722 "TARGET_SSE3"
1723 "@
1724 addsubpd\t{%2, %0|%0, %2}
1725 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1726 [(set_attr "isa" "noavx,avx")
1727 (set_attr "type" "sseadd")
1728 (set_attr "atom_unit" "complex")
1729 (set_attr "prefix" "orig,vex")
1730 (set_attr "mode" "V2DF")])
1731
1732 (define_insn "avx_addsubv8sf3"
1733 [(set (match_operand:V8SF 0 "register_operand" "=x")
1734 (vec_merge:V8SF
1735 (minus:V8SF
1736 (match_operand:V8SF 1 "register_operand" "x")
1737 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1738 (plus:V8SF (match_dup 1) (match_dup 2))
1739 (const_int 85)))]
1740 "TARGET_AVX"
1741 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1742 [(set_attr "type" "sseadd")
1743 (set_attr "prefix" "vex")
1744 (set_attr "mode" "V8SF")])
1745
1746 (define_insn "sse3_addsubv4sf3"
1747 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1748 (vec_merge:V4SF
1749 (minus:V4SF
1750 (match_operand:V4SF 1 "register_operand" "0,x")
1751 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
1752 (plus:V4SF (match_dup 1) (match_dup 2))
1753 (const_int 5)))]
1754 "TARGET_SSE3"
1755 "@
1756 addsubps\t{%2, %0|%0, %2}
1757 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1758 [(set_attr "isa" "noavx,avx")
1759 (set_attr "type" "sseadd")
1760 (set_attr "prefix" "orig,vex")
1761 (set_attr "prefix_rep" "1,*")
1762 (set_attr "mode" "V4SF")])
1763
1764 (define_split
1765 [(set (match_operand:VF_128_256 0 "register_operand")
1766 (match_operator:VF_128_256 6 "addsub_vm_operator"
1767 [(minus:VF_128_256
1768 (match_operand:VF_128_256 1 "register_operand")
1769 (match_operand:VF_128_256 2 "vector_operand"))
1770 (plus:VF_128_256
1771 (match_operand:VF_128_256 3 "vector_operand")
1772 (match_operand:VF_128_256 4 "vector_operand"))
1773 (match_operand 5 "const_int_operand")]))]
1774 "TARGET_SSE3
1775 && can_create_pseudo_p ()
1776 && ((rtx_equal_p (operands[1], operands[3])
1777 && rtx_equal_p (operands[2], operands[4]))
1778 || (rtx_equal_p (operands[1], operands[4])
1779 && rtx_equal_p (operands[2], operands[3])))"
1780 [(set (match_dup 0)
1781 (vec_merge:VF_128_256
1782 (minus:VF_128_256 (match_dup 1) (match_dup 2))
1783 (plus:VF_128_256 (match_dup 1) (match_dup 2))
1784 (match_dup 5)))])
1785
1786 (define_split
1787 [(set (match_operand:VF_128_256 0 "register_operand")
1788 (match_operator:VF_128_256 6 "addsub_vm_operator"
1789 [(plus:VF_128_256
1790 (match_operand:VF_128_256 1 "vector_operand")
1791 (match_operand:VF_128_256 2 "vector_operand"))
1792 (minus:VF_128_256
1793 (match_operand:VF_128_256 3 "register_operand")
1794 (match_operand:VF_128_256 4 "vector_operand"))
1795 (match_operand 5 "const_int_operand")]))]
1796 "TARGET_SSE3
1797 && can_create_pseudo_p ()
1798 && ((rtx_equal_p (operands[1], operands[3])
1799 && rtx_equal_p (operands[2], operands[4]))
1800 || (rtx_equal_p (operands[1], operands[4])
1801 && rtx_equal_p (operands[2], operands[3])))"
1802 [(set (match_dup 0)
1803 (vec_merge:VF_128_256
1804 (minus:VF_128_256 (match_dup 3) (match_dup 4))
1805 (plus:VF_128_256 (match_dup 3) (match_dup 4))
1806 (match_dup 5)))]
1807 {
1808 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
1809 operands[5]
1810 = GEN_INT (~INTVAL (operands[5])
1811 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
1812 })
1813
1814 (define_split
1815 [(set (match_operand:VF_128_256 0 "register_operand")
1816 (match_operator:VF_128_256 7 "addsub_vs_operator"
1817 [(vec_concat:<ssedoublemode>
1818 (minus:VF_128_256
1819 (match_operand:VF_128_256 1 "register_operand")
1820 (match_operand:VF_128_256 2 "vector_operand"))
1821 (plus:VF_128_256
1822 (match_operand:VF_128_256 3 "vector_operand")
1823 (match_operand:VF_128_256 4 "vector_operand")))
1824 (match_parallel 5 "addsub_vs_parallel"
1825 [(match_operand 6 "const_int_operand")])]))]
1826 "TARGET_SSE3
1827 && can_create_pseudo_p ()
1828 && ((rtx_equal_p (operands[1], operands[3])
1829 && rtx_equal_p (operands[2], operands[4]))
1830 || (rtx_equal_p (operands[1], operands[4])
1831 && rtx_equal_p (operands[2], operands[3])))"
1832 [(set (match_dup 0)
1833 (vec_merge:VF_128_256
1834 (minus:VF_128_256 (match_dup 1) (match_dup 2))
1835 (plus:VF_128_256 (match_dup 1) (match_dup 2))
1836 (match_dup 5)))]
1837 {
1838 int i, nelt = XVECLEN (operands[5], 0);
1839 HOST_WIDE_INT ival = 0;
1840
1841 for (i = 0; i < nelt; i++)
1842 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
1843 ival |= HOST_WIDE_INT_1 << i;
1844
1845 operands[5] = GEN_INT (ival);
1846 })
1847
1848 (define_split
1849 [(set (match_operand:VF_128_256 0 "register_operand")
1850 (match_operator:VF_128_256 7 "addsub_vs_operator"
1851 [(vec_concat:<ssedoublemode>
1852 (plus:VF_128_256
1853 (match_operand:VF_128_256 1 "vector_operand")
1854 (match_operand:VF_128_256 2 "vector_operand"))
1855 (minus:VF_128_256
1856 (match_operand:VF_128_256 3 "register_operand")
1857 (match_operand:VF_128_256 4 "vector_operand")))
1858 (match_parallel 5 "addsub_vs_parallel"
1859 [(match_operand 6 "const_int_operand")])]))]
1860 "TARGET_SSE3
1861 && can_create_pseudo_p ()
1862 && ((rtx_equal_p (operands[1], operands[3])
1863 && rtx_equal_p (operands[2], operands[4]))
1864 || (rtx_equal_p (operands[1], operands[4])
1865 && rtx_equal_p (operands[2], operands[3])))"
1866 [(set (match_dup 0)
1867 (vec_merge:VF_128_256
1868 (minus:VF_128_256 (match_dup 3) (match_dup 4))
1869 (plus:VF_128_256 (match_dup 3) (match_dup 4))
1870 (match_dup 5)))]
1871 {
1872 int i, nelt = XVECLEN (operands[5], 0);
1873 HOST_WIDE_INT ival = 0;
1874
1875 for (i = 0; i < nelt; i++)
1876 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
1877 ival |= HOST_WIDE_INT_1 << i;
1878
1879 operands[5] = GEN_INT (ival);
1880 })
1881
1882 (define_insn "avx_h<plusminus_insn>v4df3"
1883 [(set (match_operand:V4DF 0 "register_operand" "=x")
1884 (vec_concat:V4DF
1885 (vec_concat:V2DF
1886 (plusminus:DF
1887 (vec_select:DF
1888 (match_operand:V4DF 1 "register_operand" "x")
1889 (parallel [(const_int 0)]))
1890 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1891 (plusminus:DF
1892 (vec_select:DF
1893 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1894 (parallel [(const_int 0)]))
1895 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1896 (vec_concat:V2DF
1897 (plusminus:DF
1898 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1899 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1900 (plusminus:DF
1901 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1902 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1903 "TARGET_AVX"
1904 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1905 [(set_attr "type" "sseadd")
1906 (set_attr "prefix" "vex")
1907 (set_attr "mode" "V4DF")])
1908
1909 (define_expand "sse3_haddv2df3"
1910 [(set (match_operand:V2DF 0 "register_operand")
1911 (vec_concat:V2DF
1912 (plus:DF
1913 (vec_select:DF
1914 (match_operand:V2DF 1 "register_operand")
1915 (parallel [(const_int 0)]))
1916 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1917 (plus:DF
1918 (vec_select:DF
1919 (match_operand:V2DF 2 "vector_operand")
1920 (parallel [(const_int 0)]))
1921 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1922 "TARGET_SSE3")
1923
1924 (define_insn "*sse3_haddv2df3"
1925 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1926 (vec_concat:V2DF
1927 (plus:DF
1928 (vec_select:DF
1929 (match_operand:V2DF 1 "register_operand" "0,x")
1930 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1931 (vec_select:DF
1932 (match_dup 1)
1933 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1934 (plus:DF
1935 (vec_select:DF
1936 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
1937 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1938 (vec_select:DF
1939 (match_dup 2)
1940 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1941 "TARGET_SSE3
1942 && INTVAL (operands[3]) != INTVAL (operands[4])
1943 && INTVAL (operands[5]) != INTVAL (operands[6])"
1944 "@
1945 haddpd\t{%2, %0|%0, %2}
1946 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1947 [(set_attr "isa" "noavx,avx")
1948 (set_attr "type" "sseadd")
1949 (set_attr "prefix" "orig,vex")
1950 (set_attr "mode" "V2DF")])
1951
1952 (define_insn "sse3_hsubv2df3"
1953 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1954 (vec_concat:V2DF
1955 (minus:DF
1956 (vec_select:DF
1957 (match_operand:V2DF 1 "register_operand" "0,x")
1958 (parallel [(const_int 0)]))
1959 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1960 (minus:DF
1961 (vec_select:DF
1962 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
1963 (parallel [(const_int 0)]))
1964 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1965 "TARGET_SSE3"
1966 "@
1967 hsubpd\t{%2, %0|%0, %2}
1968 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1969 [(set_attr "isa" "noavx,avx")
1970 (set_attr "type" "sseadd")
1971 (set_attr "prefix" "orig,vex")
1972 (set_attr "mode" "V2DF")])
1973
1974 (define_insn "*sse3_haddv2df3_low"
1975 [(set (match_operand:DF 0 "register_operand" "=x,x")
1976 (plus:DF
1977 (vec_select:DF
1978 (match_operand:V2DF 1 "register_operand" "0,x")
1979 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1980 (vec_select:DF
1981 (match_dup 1)
1982 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1983 "TARGET_SSE3
1984 && INTVAL (operands[2]) != INTVAL (operands[3])"
1985 "@
1986 haddpd\t{%0, %0|%0, %0}
1987 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1988 [(set_attr "isa" "noavx,avx")
1989 (set_attr "type" "sseadd1")
1990 (set_attr "prefix" "orig,vex")
1991 (set_attr "mode" "V2DF")])
1992
1993 (define_insn "*sse3_hsubv2df3_low"
1994 [(set (match_operand:DF 0 "register_operand" "=x,x")
1995 (minus:DF
1996 (vec_select:DF
1997 (match_operand:V2DF 1 "register_operand" "0,x")
1998 (parallel [(const_int 0)]))
1999 (vec_select:DF
2000 (match_dup 1)
2001 (parallel [(const_int 1)]))))]
2002 "TARGET_SSE3"
2003 "@
2004 hsubpd\t{%0, %0|%0, %0}
2005 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2006 [(set_attr "isa" "noavx,avx")
2007 (set_attr "type" "sseadd1")
2008 (set_attr "prefix" "orig,vex")
2009 (set_attr "mode" "V2DF")])
2010
2011 (define_insn "avx_h<plusminus_insn>v8sf3"
2012 [(set (match_operand:V8SF 0 "register_operand" "=x")
2013 (vec_concat:V8SF
2014 (vec_concat:V4SF
2015 (vec_concat:V2SF
2016 (plusminus:SF
2017 (vec_select:SF
2018 (match_operand:V8SF 1 "register_operand" "x")
2019 (parallel [(const_int 0)]))
2020 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2021 (plusminus:SF
2022 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2023 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2024 (vec_concat:V2SF
2025 (plusminus:SF
2026 (vec_select:SF
2027 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2028 (parallel [(const_int 0)]))
2029 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2030 (plusminus:SF
2031 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2032 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2033 (vec_concat:V4SF
2034 (vec_concat:V2SF
2035 (plusminus:SF
2036 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2037 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2038 (plusminus:SF
2039 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2040 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2041 (vec_concat:V2SF
2042 (plusminus:SF
2043 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2044 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2045 (plusminus:SF
2046 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2047 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2048 "TARGET_AVX"
2049 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2050 [(set_attr "type" "sseadd")
2051 (set_attr "prefix" "vex")
2052 (set_attr "mode" "V8SF")])
2053
2054 (define_insn "sse3_h<plusminus_insn>v4sf3"
2055 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2056 (vec_concat:V4SF
2057 (vec_concat:V2SF
2058 (plusminus:SF
2059 (vec_select:SF
2060 (match_operand:V4SF 1 "register_operand" "0,x")
2061 (parallel [(const_int 0)]))
2062 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2063 (plusminus:SF
2064 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2065 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2066 (vec_concat:V2SF
2067 (plusminus:SF
2068 (vec_select:SF
2069 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2070 (parallel [(const_int 0)]))
2071 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2072 (plusminus:SF
2073 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2074 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2075 "TARGET_SSE3"
2076 "@
2077 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2078 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2079 [(set_attr "isa" "noavx,avx")
2080 (set_attr "type" "sseadd")
2081 (set_attr "atom_unit" "complex")
2082 (set_attr "prefix" "orig,vex")
2083 (set_attr "prefix_rep" "1,*")
2084 (set_attr "mode" "V4SF")])
2085
2086 (define_expand "reduc_plus_scal_v8df"
2087 [(match_operand:DF 0 "register_operand")
2088 (match_operand:V8DF 1 "register_operand")]
2089 "TARGET_AVX512F"
2090 {
2091 rtx tmp = gen_reg_rtx (V8DFmode);
2092 ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
2093 emit_insn (gen_vec_extractv8df (operands[0], tmp, const0_rtx));
2094 DONE;
2095 })
2096
2097 (define_expand "reduc_plus_scal_v4df"
2098 [(match_operand:DF 0 "register_operand")
2099 (match_operand:V4DF 1 "register_operand")]
2100 "TARGET_AVX"
2101 {
2102 rtx tmp = gen_reg_rtx (V4DFmode);
2103 rtx tmp2 = gen_reg_rtx (V4DFmode);
2104 rtx vec_res = gen_reg_rtx (V4DFmode);
2105 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2106 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2107 emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
2108 emit_insn (gen_vec_extractv4df (operands[0], vec_res, const0_rtx));
2109 DONE;
2110 })
2111
2112 (define_expand "reduc_plus_scal_v2df"
2113 [(match_operand:DF 0 "register_operand")
2114 (match_operand:V2DF 1 "register_operand")]
2115 "TARGET_SSE3"
2116 {
2117 rtx tmp = gen_reg_rtx (V2DFmode);
2118 emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
2119 emit_insn (gen_vec_extractv2df (operands[0], tmp, const0_rtx));
2120 DONE;
2121 })
2122
2123 (define_expand "reduc_plus_scal_v16sf"
2124 [(match_operand:SF 0 "register_operand")
2125 (match_operand:V16SF 1 "register_operand")]
2126 "TARGET_AVX512F"
2127 {
2128 rtx tmp = gen_reg_rtx (V16SFmode);
2129 ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
2130 emit_insn (gen_vec_extractv16sf (operands[0], tmp, const0_rtx));
2131 DONE;
2132 })
2133
2134 (define_expand "reduc_plus_scal_v8sf"
2135 [(match_operand:SF 0 "register_operand")
2136 (match_operand:V8SF 1 "register_operand")]
2137 "TARGET_AVX"
2138 {
2139 rtx tmp = gen_reg_rtx (V8SFmode);
2140 rtx tmp2 = gen_reg_rtx (V8SFmode);
2141 rtx vec_res = gen_reg_rtx (V8SFmode);
2142 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2143 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2144 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2145 emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
2146 emit_insn (gen_vec_extractv8sf (operands[0], vec_res, const0_rtx));
2147 DONE;
2148 })
2149
2150 (define_expand "reduc_plus_scal_v4sf"
2151 [(match_operand:SF 0 "register_operand")
2152 (match_operand:V4SF 1 "register_operand")]
2153 "TARGET_SSE"
2154 {
2155 rtx vec_res = gen_reg_rtx (V4SFmode);
2156 if (TARGET_SSE3)
2157 {
2158 rtx tmp = gen_reg_rtx (V4SFmode);
2159 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2160 emit_insn (gen_sse3_haddv4sf3 (vec_res, tmp, tmp));
2161 }
2162 else
2163 ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
2164 emit_insn (gen_vec_extractv4sf (operands[0], vec_res, const0_rtx));
2165 DONE;
2166 })
2167
2168 ;; Modes handled by reduc_sm{in,ax}* patterns.
2169 (define_mode_iterator REDUC_SMINMAX_MODE
2170 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2171 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2172 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2173 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2174 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2175 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2176 (V8DF "TARGET_AVX512F")])
2177
2178 (define_expand "reduc_<code>_scal_<mode>"
2179 [(smaxmin:REDUC_SMINMAX_MODE
2180 (match_operand:<ssescalarmode> 0 "register_operand")
2181 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2182 ""
2183 {
2184 rtx tmp = gen_reg_rtx (<MODE>mode);
2185 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2186 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
2187 DONE;
2188 })
2189
2190 (define_expand "reduc_<code>_scal_<mode>"
2191 [(umaxmin:VI_AVX512BW
2192 (match_operand:<ssescalarmode> 0 "register_operand")
2193 (match_operand:VI_AVX512BW 1 "register_operand"))]
2194 "TARGET_AVX512F"
2195 {
2196 rtx tmp = gen_reg_rtx (<MODE>mode);
2197 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2198 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
2199 DONE;
2200 })
2201
2202 (define_expand "reduc_<code>_scal_<mode>"
2203 [(umaxmin:VI_256
2204 (match_operand:<ssescalarmode> 0 "register_operand")
2205 (match_operand:VI_256 1 "register_operand"))]
2206 "TARGET_AVX2"
2207 {
2208 rtx tmp = gen_reg_rtx (<MODE>mode);
2209 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2210 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
2211 DONE;
2212 })
2213
2214 (define_expand "reduc_umin_scal_v8hi"
2215 [(umin:V8HI
2216 (match_operand:HI 0 "register_operand")
2217 (match_operand:V8HI 1 "register_operand"))]
2218 "TARGET_SSE4_1"
2219 {
2220 rtx tmp = gen_reg_rtx (V8HImode);
2221 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2222 emit_insn (gen_vec_extractv8hi (operands[0], tmp, const0_rtx));
2223 DONE;
2224 })
2225
2226 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2227 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2228 (unspec:VF_AVX512VL
2229 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2230 (match_operand:SI 2 "const_0_to_255_operand")]
2231 UNSPEC_REDUCE))]
2232 "TARGET_AVX512DQ"
2233 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2234 [(set_attr "type" "sse")
2235 (set_attr "prefix" "evex")
2236 (set_attr "mode" "<MODE>")])
2237
2238 (define_insn "reduces<mode>"
2239 [(set (match_operand:VF_128 0 "register_operand" "=v")
2240 (vec_merge:VF_128
2241 (unspec:VF_128
2242 [(match_operand:VF_128 1 "register_operand" "v")
2243 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2244 (match_operand:SI 3 "const_0_to_255_operand")]
2245 UNSPEC_REDUCE)
2246 (match_dup 1)
2247 (const_int 1)))]
2248 "TARGET_AVX512DQ"
2249 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2250 [(set_attr "type" "sse")
2251 (set_attr "prefix" "evex")
2252 (set_attr "mode" "<MODE>")])
2253
2254 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2255 ;;
2256 ;; Parallel floating point comparisons
2257 ;;
2258 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2259
2260 (define_insn "avx_cmp<mode>3"
2261 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2262 (unspec:VF_128_256
2263 [(match_operand:VF_128_256 1 "register_operand" "x")
2264 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2265 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2266 UNSPEC_PCMP))]
2267 "TARGET_AVX"
2268 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2269 [(set_attr "type" "ssecmp")
2270 (set_attr "length_immediate" "1")
2271 (set_attr "prefix" "vex")
2272 (set_attr "mode" "<MODE>")])
2273
2274 (define_insn "avx_vmcmp<mode>3"
2275 [(set (match_operand:VF_128 0 "register_operand" "=x")
2276 (vec_merge:VF_128
2277 (unspec:VF_128
2278 [(match_operand:VF_128 1 "register_operand" "x")
2279 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2280 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2281 UNSPEC_PCMP)
2282 (match_dup 1)
2283 (const_int 1)))]
2284 "TARGET_AVX"
2285 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2286 [(set_attr "type" "ssecmp")
2287 (set_attr "length_immediate" "1")
2288 (set_attr "prefix" "vex")
2289 (set_attr "mode" "<ssescalarmode>")])
2290
2291 (define_insn "*<sse>_maskcmp<mode>3_comm"
2292 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2293 (match_operator:VF_128_256 3 "sse_comparison_operator"
2294 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2295 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2296 "TARGET_SSE
2297 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2298 "@
2299 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2300 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2301 [(set_attr "isa" "noavx,avx")
2302 (set_attr "type" "ssecmp")
2303 (set_attr "length_immediate" "1")
2304 (set_attr "prefix" "orig,vex")
2305 (set_attr "mode" "<MODE>")])
2306
2307 (define_insn "<sse>_maskcmp<mode>3"
2308 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2309 (match_operator:VF_128_256 3 "sse_comparison_operator"
2310 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2311 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2312 "TARGET_SSE"
2313 "@
2314 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2315 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2316 [(set_attr "isa" "noavx,avx")
2317 (set_attr "type" "ssecmp")
2318 (set_attr "length_immediate" "1")
2319 (set_attr "prefix" "orig,vex")
2320 (set_attr "mode" "<MODE>")])
2321
2322 (define_insn "<sse>_vmmaskcmp<mode>3"
2323 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2324 (vec_merge:VF_128
2325 (match_operator:VF_128 3 "sse_comparison_operator"
2326 [(match_operand:VF_128 1 "register_operand" "0,x")
2327 (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2328 (match_dup 1)
2329 (const_int 1)))]
2330 "TARGET_SSE"
2331 "@
2332 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2333 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2334 [(set_attr "isa" "noavx,avx")
2335 (set_attr "type" "ssecmp")
2336 (set_attr "length_immediate" "1,*")
2337 (set_attr "prefix" "orig,vex")
2338 (set_attr "mode" "<ssescalarmode>")])
2339
2340 (define_mode_attr cmp_imm_predicate
2341 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2342 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2343 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2344 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2345 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2346 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2347 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2348 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2349 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2350
2351 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2352 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2353 (unspec:<avx512fmaskmode>
2354 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2355 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2356 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2357 UNSPEC_PCMP))]
2358 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2359 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2360 [(set_attr "type" "ssecmp")
2361 (set_attr "length_immediate" "1")
2362 (set_attr "prefix" "evex")
2363 (set_attr "mode" "<sseinsnmode>")])
2364
2365 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2366 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2367 (unspec:<avx512fmaskmode>
2368 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2369 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2370 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2371 UNSPEC_PCMP))]
2372 "TARGET_AVX512BW"
2373 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2374 [(set_attr "type" "ssecmp")
2375 (set_attr "length_immediate" "1")
2376 (set_attr "prefix" "evex")
2377 (set_attr "mode" "<sseinsnmode>")])
2378
2379 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2380 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2381 (unspec:<avx512fmaskmode>
2382 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2383 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2384 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2385 UNSPEC_UNSIGNED_PCMP))]
2386 "TARGET_AVX512BW"
2387 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2388 [(set_attr "type" "ssecmp")
2389 (set_attr "length_immediate" "1")
2390 (set_attr "prefix" "evex")
2391 (set_attr "mode" "<sseinsnmode>")])
2392
2393 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2394 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2395 (unspec:<avx512fmaskmode>
2396 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2397 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2398 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2399 UNSPEC_UNSIGNED_PCMP))]
2400 "TARGET_AVX512F"
2401 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2402 [(set_attr "type" "ssecmp")
2403 (set_attr "length_immediate" "1")
2404 (set_attr "prefix" "evex")
2405 (set_attr "mode" "<sseinsnmode>")])
2406
2407 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2408 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2409 (and:<avx512fmaskmode>
2410 (unspec:<avx512fmaskmode>
2411 [(match_operand:VF_128 1 "register_operand" "v")
2412 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2413 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2414 UNSPEC_PCMP)
2415 (const_int 1)))]
2416 "TARGET_AVX512F"
2417 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2418 [(set_attr "type" "ssecmp")
2419 (set_attr "length_immediate" "1")
2420 (set_attr "prefix" "evex")
2421 (set_attr "mode" "<ssescalarmode>")])
2422
2423 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2424 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2425 (and:<avx512fmaskmode>
2426 (unspec:<avx512fmaskmode>
2427 [(match_operand:VF_128 1 "register_operand" "v")
2428 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2429 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2430 UNSPEC_PCMP)
2431 (and:<avx512fmaskmode>
2432 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2433 (const_int 1))))]
2434 "TARGET_AVX512F"
2435 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2436 [(set_attr "type" "ssecmp")
2437 (set_attr "length_immediate" "1")
2438 (set_attr "prefix" "evex")
2439 (set_attr "mode" "<ssescalarmode>")])
2440
2441 (define_insn "avx512f_maskcmp<mode>3"
2442 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2443 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2444 [(match_operand:VF 1 "register_operand" "v")
2445 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2446 "TARGET_AVX512F"
2447 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2448 [(set_attr "type" "ssecmp")
2449 (set_attr "length_immediate" "1")
2450 (set_attr "prefix" "evex")
2451 (set_attr "mode" "<sseinsnmode>")])
2452
2453 (define_insn "<sse>_comi<round_saeonly_name>"
2454 [(set (reg:CCFP FLAGS_REG)
2455 (compare:CCFP
2456 (vec_select:MODEF
2457 (match_operand:<ssevecmode> 0 "register_operand" "v")
2458 (parallel [(const_int 0)]))
2459 (vec_select:MODEF
2460 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2461 (parallel [(const_int 0)]))))]
2462 "SSE_FLOAT_MODE_P (<MODE>mode)"
2463 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2464 [(set_attr "type" "ssecomi")
2465 (set_attr "prefix" "maybe_vex")
2466 (set_attr "prefix_rep" "0")
2467 (set (attr "prefix_data16")
2468 (if_then_else (eq_attr "mode" "DF")
2469 (const_string "1")
2470 (const_string "0")))
2471 (set_attr "mode" "<MODE>")])
2472
2473 (define_insn "<sse>_ucomi<round_saeonly_name>"
2474 [(set (reg:CCFPU FLAGS_REG)
2475 (compare:CCFPU
2476 (vec_select:MODEF
2477 (match_operand:<ssevecmode> 0 "register_operand" "v")
2478 (parallel [(const_int 0)]))
2479 (vec_select:MODEF
2480 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2481 (parallel [(const_int 0)]))))]
2482 "SSE_FLOAT_MODE_P (<MODE>mode)"
2483 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2484 [(set_attr "type" "ssecomi")
2485 (set_attr "prefix" "maybe_vex")
2486 (set_attr "prefix_rep" "0")
2487 (set (attr "prefix_data16")
2488 (if_then_else (eq_attr "mode" "DF")
2489 (const_string "1")
2490 (const_string "0")))
2491 (set_attr "mode" "<MODE>")])
2492
2493 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2494 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2495 (match_operator:<avx512fmaskmode> 1 ""
2496 [(match_operand:V48_AVX512VL 2 "register_operand")
2497 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2498 "TARGET_AVX512F"
2499 {
2500 bool ok = ix86_expand_mask_vec_cmp (operands);
2501 gcc_assert (ok);
2502 DONE;
2503 })
2504
2505 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2506 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2507 (match_operator:<avx512fmaskmode> 1 ""
2508 [(match_operand:VI12_AVX512VL 2 "register_operand")
2509 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2510 "TARGET_AVX512BW"
2511 {
2512 bool ok = ix86_expand_mask_vec_cmp (operands);
2513 gcc_assert (ok);
2514 DONE;
2515 })
2516
2517 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2518 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2519 (match_operator:<sseintvecmode> 1 ""
2520 [(match_operand:VI_256 2 "register_operand")
2521 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2522 "TARGET_AVX2"
2523 {
2524 bool ok = ix86_expand_int_vec_cmp (operands);
2525 gcc_assert (ok);
2526 DONE;
2527 })
2528
2529 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2530 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2531 (match_operator:<sseintvecmode> 1 ""
2532 [(match_operand:VI124_128 2 "register_operand")
2533 (match_operand:VI124_128 3 "vector_operand")]))]
2534 "TARGET_SSE2"
2535 {
2536 bool ok = ix86_expand_int_vec_cmp (operands);
2537 gcc_assert (ok);
2538 DONE;
2539 })
2540
2541 (define_expand "vec_cmpv2div2di"
2542 [(set (match_operand:V2DI 0 "register_operand")
2543 (match_operator:V2DI 1 ""
2544 [(match_operand:V2DI 2 "register_operand")
2545 (match_operand:V2DI 3 "vector_operand")]))]
2546 "TARGET_SSE4_2"
2547 {
2548 bool ok = ix86_expand_int_vec_cmp (operands);
2549 gcc_assert (ok);
2550 DONE;
2551 })
2552
2553 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2554 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2555 (match_operator:<sseintvecmode> 1 ""
2556 [(match_operand:VF_256 2 "register_operand")
2557 (match_operand:VF_256 3 "nonimmediate_operand")]))]
2558 "TARGET_AVX"
2559 {
2560 bool ok = ix86_expand_fp_vec_cmp (operands);
2561 gcc_assert (ok);
2562 DONE;
2563 })
2564
2565 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2566 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2567 (match_operator:<sseintvecmode> 1 ""
2568 [(match_operand:VF_128 2 "register_operand")
2569 (match_operand:VF_128 3 "vector_operand")]))]
2570 "TARGET_SSE"
2571 {
2572 bool ok = ix86_expand_fp_vec_cmp (operands);
2573 gcc_assert (ok);
2574 DONE;
2575 })
2576
2577 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2578 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2579 (match_operator:<avx512fmaskmode> 1 ""
2580 [(match_operand:VI48_AVX512VL 2 "register_operand")
2581 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
2582 "TARGET_AVX512F"
2583 {
2584 bool ok = ix86_expand_mask_vec_cmp (operands);
2585 gcc_assert (ok);
2586 DONE;
2587 })
2588
2589 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2590 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2591 (match_operator:<avx512fmaskmode> 1 ""
2592 [(match_operand:VI12_AVX512VL 2 "register_operand")
2593 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2594 "TARGET_AVX512BW"
2595 {
2596 bool ok = ix86_expand_mask_vec_cmp (operands);
2597 gcc_assert (ok);
2598 DONE;
2599 })
2600
2601 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2602 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2603 (match_operator:<sseintvecmode> 1 ""
2604 [(match_operand:VI_256 2 "register_operand")
2605 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2606 "TARGET_AVX2"
2607 {
2608 bool ok = ix86_expand_int_vec_cmp (operands);
2609 gcc_assert (ok);
2610 DONE;
2611 })
2612
2613 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2614 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2615 (match_operator:<sseintvecmode> 1 ""
2616 [(match_operand:VI124_128 2 "register_operand")
2617 (match_operand:VI124_128 3 "vector_operand")]))]
2618 "TARGET_SSE2"
2619 {
2620 bool ok = ix86_expand_int_vec_cmp (operands);
2621 gcc_assert (ok);
2622 DONE;
2623 })
2624
2625 (define_expand "vec_cmpuv2div2di"
2626 [(set (match_operand:V2DI 0 "register_operand")
2627 (match_operator:V2DI 1 ""
2628 [(match_operand:V2DI 2 "register_operand")
2629 (match_operand:V2DI 3 "vector_operand")]))]
2630 "TARGET_SSE4_2"
2631 {
2632 bool ok = ix86_expand_int_vec_cmp (operands);
2633 gcc_assert (ok);
2634 DONE;
2635 })
2636
2637 (define_expand "vcond<V_512:mode><VF_512:mode>"
2638 [(set (match_operand:V_512 0 "register_operand")
2639 (if_then_else:V_512
2640 (match_operator 3 ""
2641 [(match_operand:VF_512 4 "nonimmediate_operand")
2642 (match_operand:VF_512 5 "nonimmediate_operand")])
2643 (match_operand:V_512 1 "general_operand")
2644 (match_operand:V_512 2 "general_operand")))]
2645 "TARGET_AVX512F
2646 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2647 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2648 {
2649 bool ok = ix86_expand_fp_vcond (operands);
2650 gcc_assert (ok);
2651 DONE;
2652 })
2653
2654 (define_expand "vcond<V_256:mode><VF_256:mode>"
2655 [(set (match_operand:V_256 0 "register_operand")
2656 (if_then_else:V_256
2657 (match_operator 3 ""
2658 [(match_operand:VF_256 4 "nonimmediate_operand")
2659 (match_operand:VF_256 5 "nonimmediate_operand")])
2660 (match_operand:V_256 1 "general_operand")
2661 (match_operand:V_256 2 "general_operand")))]
2662 "TARGET_AVX
2663 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2664 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2665 {
2666 bool ok = ix86_expand_fp_vcond (operands);
2667 gcc_assert (ok);
2668 DONE;
2669 })
2670
2671 (define_expand "vcond<V_128:mode><VF_128:mode>"
2672 [(set (match_operand:V_128 0 "register_operand")
2673 (if_then_else:V_128
2674 (match_operator 3 ""
2675 [(match_operand:VF_128 4 "vector_operand")
2676 (match_operand:VF_128 5 "vector_operand")])
2677 (match_operand:V_128 1 "general_operand")
2678 (match_operand:V_128 2 "general_operand")))]
2679 "TARGET_SSE
2680 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2681 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2682 {
2683 bool ok = ix86_expand_fp_vcond (operands);
2684 gcc_assert (ok);
2685 DONE;
2686 })
2687
2688 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
2689 [(set (match_operand:V48_AVX512VL 0 "register_operand")
2690 (vec_merge:V48_AVX512VL
2691 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
2692 (match_operand:V48_AVX512VL 2 "vector_move_operand")
2693 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
2694 "TARGET_AVX512F")
2695
2696 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
2697 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
2698 (vec_merge:VI12_AVX512VL
2699 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
2700 (match_operand:VI12_AVX512VL 2 "vector_move_operand")
2701 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
2702 "TARGET_AVX512BW")
2703
2704 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
2705 [(set (match_operand:VI_256 0 "register_operand")
2706 (vec_merge:VI_256
2707 (match_operand:VI_256 1 "nonimmediate_operand")
2708 (match_operand:VI_256 2 "vector_move_operand")
2709 (match_operand:<sseintvecmode> 3 "register_operand")))]
2710 "TARGET_AVX2"
2711 {
2712 ix86_expand_sse_movcc (operands[0], operands[3],
2713 operands[1], operands[2]);
2714 DONE;
2715 })
2716
2717 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
2718 [(set (match_operand:VI124_128 0 "register_operand")
2719 (vec_merge:VI124_128
2720 (match_operand:VI124_128 1 "vector_operand")
2721 (match_operand:VI124_128 2 "vector_move_operand")
2722 (match_operand:<sseintvecmode> 3 "register_operand")))]
2723 "TARGET_SSE2"
2724 {
2725 ix86_expand_sse_movcc (operands[0], operands[3],
2726 operands[1], operands[2]);
2727 DONE;
2728 })
2729
2730 (define_expand "vcond_mask_v2div2di"
2731 [(set (match_operand:V2DI 0 "register_operand")
2732 (vec_merge:V2DI
2733 (match_operand:V2DI 1 "vector_operand")
2734 (match_operand:V2DI 2 "vector_move_operand")
2735 (match_operand:V2DI 3 "register_operand")))]
2736 "TARGET_SSE4_2"
2737 {
2738 ix86_expand_sse_movcc (operands[0], operands[3],
2739 operands[1], operands[2]);
2740 DONE;
2741 })
2742
2743 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
2744 [(set (match_operand:VF_256 0 "register_operand")
2745 (vec_merge:VF_256
2746 (match_operand:VF_256 1 "nonimmediate_operand")
2747 (match_operand:VF_256 2 "vector_move_operand")
2748 (match_operand:<sseintvecmode> 3 "register_operand")))]
2749 "TARGET_AVX"
2750 {
2751 ix86_expand_sse_movcc (operands[0], operands[3],
2752 operands[1], operands[2]);
2753 DONE;
2754 })
2755
2756 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
2757 [(set (match_operand:VF_128 0 "register_operand")
2758 (vec_merge:VF_128
2759 (match_operand:VF_128 1 "vector_operand")
2760 (match_operand:VF_128 2 "vector_move_operand")
2761 (match_operand:<sseintvecmode> 3 "register_operand")))]
2762 "TARGET_SSE"
2763 {
2764 ix86_expand_sse_movcc (operands[0], operands[3],
2765 operands[1], operands[2]);
2766 DONE;
2767 })
2768
2769 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2770 ;;
2771 ;; Parallel floating point logical operations
2772 ;;
2773 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2774
2775 (define_insn "<sse>_andnot<mode>3<mask_name>"
2776 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2777 (and:VF_128_256
2778 (not:VF_128_256
2779 (match_operand:VF_128_256 1 "register_operand" "0,v"))
2780 (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))]
2781 "TARGET_SSE && <mask_avx512vl_condition>"
2782 {
2783 static char buf[128];
2784 const char *ops;
2785 const char *suffix;
2786
2787 switch (get_attr_mode (insn))
2788 {
2789 case MODE_V8SF:
2790 case MODE_V4SF:
2791 suffix = "ps";
2792 break;
2793 default:
2794 suffix = "<ssemodesuffix>";
2795 }
2796
2797 switch (which_alternative)
2798 {
2799 case 0:
2800 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2801 break;
2802 case 1:
2803 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2804 break;
2805 default:
2806 gcc_unreachable ();
2807 }
2808
2809 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2810 if (<mask_applied> && !TARGET_AVX512DQ)
2811 {
2812 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2813 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2814 }
2815
2816 snprintf (buf, sizeof (buf), ops, suffix);
2817 return buf;
2818 }
2819 [(set_attr "isa" "noavx,avx")
2820 (set_attr "type" "sselog")
2821 (set_attr "prefix" "orig,maybe_evex")
2822 (set (attr "mode")
2823 (cond [(and (match_test "<MODE_SIZE> == 16")
2824 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2825 (const_string "<ssePSmode>")
2826 (match_test "TARGET_AVX")
2827 (const_string "<MODE>")
2828 (match_test "optimize_function_for_size_p (cfun)")
2829 (const_string "V4SF")
2830 ]
2831 (const_string "<MODE>")))])
2832
2833
2834 (define_insn "<sse>_andnot<mode>3<mask_name>"
2835 [(set (match_operand:VF_512 0 "register_operand" "=v")
2836 (and:VF_512
2837 (not:VF_512
2838 (match_operand:VF_512 1 "register_operand" "v"))
2839 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2840 "TARGET_AVX512F"
2841 {
2842 static char buf[128];
2843 const char *ops;
2844 const char *suffix;
2845
2846 suffix = "<ssemodesuffix>";
2847 ops = "";
2848
2849 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2850 if (!TARGET_AVX512DQ)
2851 {
2852 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2853 ops = "p";
2854 }
2855
2856 snprintf (buf, sizeof (buf),
2857 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2858 ops, suffix);
2859 return buf;
2860 }
2861 [(set_attr "type" "sselog")
2862 (set_attr "prefix" "evex")
2863 (set_attr "mode" "<sseinsnmode>")])
2864
2865 (define_expand "<code><mode>3<mask_name>"
2866 [(set (match_operand:VF_128_256 0 "register_operand")
2867 (any_logic:VF_128_256
2868 (match_operand:VF_128_256 1 "vector_operand")
2869 (match_operand:VF_128_256 2 "vector_operand")))]
2870 "TARGET_SSE && <mask_avx512vl_condition>"
2871 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2872
2873 (define_expand "<code><mode>3<mask_name>"
2874 [(set (match_operand:VF_512 0 "register_operand")
2875 (any_logic:VF_512
2876 (match_operand:VF_512 1 "nonimmediate_operand")
2877 (match_operand:VF_512 2 "nonimmediate_operand")))]
2878 "TARGET_AVX512F"
2879 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2880
2881 (define_insn "*<code><mode>3<mask_name>"
2882 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2883 (any_logic:VF_128_256
2884 (match_operand:VF_128_256 1 "vector_operand" "%0,v")
2885 (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))]
2886 "TARGET_SSE && <mask_avx512vl_condition>
2887 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2888 {
2889 static char buf[128];
2890 const char *ops;
2891 const char *suffix;
2892
2893 switch (get_attr_mode (insn))
2894 {
2895 case MODE_V8SF:
2896 case MODE_V4SF:
2897 suffix = "ps";
2898 break;
2899 default:
2900 suffix = "<ssemodesuffix>";
2901 }
2902
2903 switch (which_alternative)
2904 {
2905 case 0:
2906 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2907 break;
2908 case 1:
2909 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2910 break;
2911 default:
2912 gcc_unreachable ();
2913 }
2914
2915 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2916 if (<mask_applied> && !TARGET_AVX512DQ)
2917 {
2918 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2919 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2920 }
2921
2922 snprintf (buf, sizeof (buf), ops, suffix);
2923 return buf;
2924 }
2925 [(set_attr "isa" "noavx,avx")
2926 (set_attr "type" "sselog")
2927 (set_attr "prefix" "orig,maybe_evex")
2928 (set (attr "mode")
2929 (cond [(and (match_test "<MODE_SIZE> == 16")
2930 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2931 (const_string "<ssePSmode>")
2932 (match_test "TARGET_AVX")
2933 (const_string "<MODE>")
2934 (match_test "optimize_function_for_size_p (cfun)")
2935 (const_string "V4SF")
2936 ]
2937 (const_string "<MODE>")))])
2938
2939 (define_insn "*<code><mode>3<mask_name>"
2940 [(set (match_operand:VF_512 0 "register_operand" "=v")
2941 (any_logic:VF_512
2942 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
2943 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2944 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2945 {
2946 static char buf[128];
2947 const char *ops;
2948 const char *suffix;
2949
2950 suffix = "<ssemodesuffix>";
2951 ops = "";
2952
2953 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2954 if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
2955 {
2956 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2957 ops = "p";
2958 }
2959
2960 snprintf (buf, sizeof (buf),
2961 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2962 ops, suffix);
2963 return buf;
2964 }
2965 [(set_attr "type" "sselog")
2966 (set_attr "prefix" "evex")
2967 (set_attr "mode" "<sseinsnmode>")])
2968
2969 (define_expand "copysign<mode>3"
2970 [(set (match_dup 4)
2971 (and:VF
2972 (not:VF (match_dup 3))
2973 (match_operand:VF 1 "vector_operand")))
2974 (set (match_dup 5)
2975 (and:VF (match_dup 3)
2976 (match_operand:VF 2 "vector_operand")))
2977 (set (match_operand:VF 0 "register_operand")
2978 (ior:VF (match_dup 4) (match_dup 5)))]
2979 "TARGET_SSE"
2980 {
2981 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2982
2983 operands[4] = gen_reg_rtx (<MODE>mode);
2984 operands[5] = gen_reg_rtx (<MODE>mode);
2985 })
2986
2987 ;; Also define scalar versions. These are used for abs, neg, and
2988 ;; conditional move. Using subregs into vector modes causes register
2989 ;; allocation lossage. These patterns do not allow memory operands
2990 ;; because the native instructions read the full 128-bits.
2991
2992 (define_insn "*andnot<mode>3"
2993 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2994 (and:MODEF
2995 (not:MODEF
2996 (match_operand:MODEF 1 "register_operand" "0,x"))
2997 (match_operand:MODEF 2 "register_operand" "x,x")))]
2998 "SSE_FLOAT_MODE_P (<MODE>mode)"
2999 {
3000 static char buf[32];
3001 const char *ops;
3002 const char *suffix
3003 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3004
3005 switch (which_alternative)
3006 {
3007 case 0:
3008 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3009 break;
3010 case 1:
3011 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3012 break;
3013 default:
3014 gcc_unreachable ();
3015 }
3016
3017 snprintf (buf, sizeof (buf), ops, suffix);
3018 return buf;
3019 }
3020 [(set_attr "isa" "noavx,avx")
3021 (set_attr "type" "sselog")
3022 (set_attr "prefix" "orig,vex")
3023 (set (attr "mode")
3024 (cond [(and (match_test "<MODE_SIZE> == 16")
3025 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3026 (const_string "V4SF")
3027 (match_test "TARGET_AVX")
3028 (const_string "<ssevecmode>")
3029 (match_test "optimize_function_for_size_p (cfun)")
3030 (const_string "V4SF")
3031 ]
3032 (const_string "<ssevecmode>")))])
3033
3034 (define_insn "*andnottf3"
3035 [(set (match_operand:TF 0 "register_operand" "=x,x")
3036 (and:TF
3037 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
3038 (match_operand:TF 2 "vector_operand" "xBm,xm")))]
3039 "TARGET_SSE"
3040 {
3041 static char buf[32];
3042 const char *ops;
3043 const char *tmp
3044 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
3045
3046 switch (which_alternative)
3047 {
3048 case 0:
3049 ops = "%s\t{%%2, %%0|%%0, %%2}";
3050 break;
3051 case 1:
3052 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3053 break;
3054 default:
3055 gcc_unreachable ();
3056 }
3057
3058 snprintf (buf, sizeof (buf), ops, tmp);
3059 return buf;
3060 }
3061 [(set_attr "isa" "noavx,avx")
3062 (set_attr "type" "sselog")
3063 (set (attr "prefix_data16")
3064 (if_then_else
3065 (and (eq_attr "alternative" "0")
3066 (eq_attr "mode" "TI"))
3067 (const_string "1")
3068 (const_string "*")))
3069 (set_attr "prefix" "orig,vex")
3070 (set (attr "mode")
3071 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3072 (const_string "V4SF")
3073 (match_test "TARGET_AVX")
3074 (const_string "TI")
3075 (ior (not (match_test "TARGET_SSE2"))
3076 (match_test "optimize_function_for_size_p (cfun)"))
3077 (const_string "V4SF")
3078 ]
3079 (const_string "TI")))])
3080
3081 (define_insn "*<code><mode>3"
3082 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
3083 (any_logic:MODEF
3084 (match_operand:MODEF 1 "register_operand" "%0,x")
3085 (match_operand:MODEF 2 "register_operand" "x,x")))]
3086 "SSE_FLOAT_MODE_P (<MODE>mode)"
3087 {
3088 static char buf[32];
3089 const char *ops;
3090 const char *suffix
3091 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3092
3093 switch (which_alternative)
3094 {
3095 case 0:
3096 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3097 break;
3098 case 1:
3099 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3100 break;
3101 default:
3102 gcc_unreachable ();
3103 }
3104
3105 snprintf (buf, sizeof (buf), ops, suffix);
3106 return buf;
3107 }
3108 [(set_attr "isa" "noavx,avx")
3109 (set_attr "type" "sselog")
3110 (set_attr "prefix" "orig,vex")
3111 (set (attr "mode")
3112 (cond [(and (match_test "<MODE_SIZE> == 16")
3113 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3114 (const_string "V4SF")
3115 (match_test "TARGET_AVX")
3116 (const_string "<ssevecmode>")
3117 (match_test "optimize_function_for_size_p (cfun)")
3118 (const_string "V4SF")
3119 ]
3120 (const_string "<ssevecmode>")))])
3121
3122 (define_expand "<code>tf3"
3123 [(set (match_operand:TF 0 "register_operand")
3124 (any_logic:TF
3125 (match_operand:TF 1 "vector_operand")
3126 (match_operand:TF 2 "vector_operand")))]
3127 "TARGET_SSE"
3128 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3129
3130 (define_insn "*<code>tf3"
3131 [(set (match_operand:TF 0 "register_operand" "=x,x")
3132 (any_logic:TF
3133 (match_operand:TF 1 "vector_operand" "%0,x")
3134 (match_operand:TF 2 "vector_operand" "xBm,xm")))]
3135 "TARGET_SSE
3136 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3137 {
3138 static char buf[32];
3139 const char *ops;
3140 const char *tmp
3141 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
3142
3143 switch (which_alternative)
3144 {
3145 case 0:
3146 ops = "%s\t{%%2, %%0|%%0, %%2}";
3147 break;
3148 case 1:
3149 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3150 break;
3151 default:
3152 gcc_unreachable ();
3153 }
3154
3155 snprintf (buf, sizeof (buf), ops, tmp);
3156 return buf;
3157 }
3158 [(set_attr "isa" "noavx,avx")
3159 (set_attr "type" "sselog")
3160 (set (attr "prefix_data16")
3161 (if_then_else
3162 (and (eq_attr "alternative" "0")
3163 (eq_attr "mode" "TI"))
3164 (const_string "1")
3165 (const_string "*")))
3166 (set_attr "prefix" "orig,vex")
3167 (set (attr "mode")
3168 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3169 (const_string "V4SF")
3170 (match_test "TARGET_AVX")
3171 (const_string "TI")
3172 (ior (not (match_test "TARGET_SSE2"))
3173 (match_test "optimize_function_for_size_p (cfun)"))
3174 (const_string "V4SF")
3175 ]
3176 (const_string "TI")))])
3177
3178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3179 ;;
3180 ;; FMA floating point multiply/accumulate instructions. These include
3181 ;; scalar versions of the instructions as well as vector versions.
3182 ;;
3183 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3184
3185 ;; The standard names for scalar FMA are only available with SSE math enabled.
3186 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3187 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3188 ;; and TARGET_FMA4 are both false.
3189 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3190 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3191 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3192 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3193 (define_mode_iterator FMAMODEM
3194 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3195 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3196 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3197 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3198 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3199 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3200 (V16SF "TARGET_AVX512F")
3201 (V8DF "TARGET_AVX512F")])
3202
3203 (define_expand "fma<mode>4"
3204 [(set (match_operand:FMAMODEM 0 "register_operand")
3205 (fma:FMAMODEM
3206 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3207 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3208 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3209
3210 (define_expand "fms<mode>4"
3211 [(set (match_operand:FMAMODEM 0 "register_operand")
3212 (fma:FMAMODEM
3213 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3214 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3215 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3216
3217 (define_expand "fnma<mode>4"
3218 [(set (match_operand:FMAMODEM 0 "register_operand")
3219 (fma:FMAMODEM
3220 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3221 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3222 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3223
3224 (define_expand "fnms<mode>4"
3225 [(set (match_operand:FMAMODEM 0 "register_operand")
3226 (fma:FMAMODEM
3227 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3228 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3229 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3230
3231 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3232 (define_mode_iterator FMAMODE_AVX512
3233 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3234 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3235 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3236 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3237 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3238 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3239 (V16SF "TARGET_AVX512F")
3240 (V8DF "TARGET_AVX512F")])
3241
3242 (define_mode_iterator FMAMODE
3243 [SF DF V4SF V2DF V8SF V4DF])
3244
3245 (define_expand "fma4i_fmadd_<mode>"
3246 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3247 (fma:FMAMODE_AVX512
3248 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3249 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3250 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3251
3252 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3253 [(match_operand:VF_AVX512VL 0 "register_operand")
3254 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3255 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3256 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3257 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3258 "TARGET_AVX512F && <round_mode512bit_condition>"
3259 {
3260 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3261 operands[0], operands[1], operands[2], operands[3],
3262 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3263 DONE;
3264 })
3265
3266 (define_insn "*fma_fmadd_<mode>"
3267 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3268 (fma:FMAMODE
3269 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3270 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3271 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3272 "TARGET_FMA || TARGET_FMA4"
3273 "@
3274 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3275 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3276 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3277 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3278 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3279 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3280 (set_attr "type" "ssemuladd")
3281 (set_attr "mode" "<MODE>")])
3282
3283 ;; Suppose AVX-512F as baseline
3284 (define_mode_iterator VF_SF_AVX512VL
3285 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3286 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3287
3288 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3289 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3290 (fma:VF_SF_AVX512VL
3291 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3292 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3293 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3294 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3295 "@
3296 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3297 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3298 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3299 [(set_attr "type" "ssemuladd")
3300 (set_attr "mode" "<MODE>")])
3301
3302 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3303 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3304 (vec_merge:VF_AVX512VL
3305 (fma:VF_AVX512VL
3306 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3307 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3308 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3309 (match_dup 1)
3310 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3311 "TARGET_AVX512F && <round_mode512bit_condition>"
3312 "@
3313 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3314 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3315 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3316 (set_attr "type" "ssemuladd")
3317 (set_attr "mode" "<MODE>")])
3318
3319 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3320 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3321 (vec_merge:VF_AVX512VL
3322 (fma:VF_AVX512VL
3323 (match_operand:VF_AVX512VL 1 "register_operand" "x")
3324 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3325 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3326 (match_dup 3)
3327 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3328 "TARGET_AVX512F"
3329 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3330 [(set_attr "isa" "fma_avx512f")
3331 (set_attr "type" "ssemuladd")
3332 (set_attr "mode" "<MODE>")])
3333
3334 (define_insn "*fma_fmsub_<mode>"
3335 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3336 (fma:FMAMODE
3337 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3338 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3339 (neg:FMAMODE
3340 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3341 "TARGET_FMA || TARGET_FMA4"
3342 "@
3343 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3344 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3345 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3346 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3347 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3348 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3349 (set_attr "type" "ssemuladd")
3350 (set_attr "mode" "<MODE>")])
3351
3352 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3353 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3354 (fma:VF_SF_AVX512VL
3355 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3356 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3357 (neg:VF_SF_AVX512VL
3358 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3359 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3360 "@
3361 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3362 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3363 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3364 [(set_attr "type" "ssemuladd")
3365 (set_attr "mode" "<MODE>")])
3366
3367 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3368 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3369 (vec_merge:VF_AVX512VL
3370 (fma:VF_AVX512VL
3371 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3372 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3373 (neg:VF_AVX512VL
3374 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3375 (match_dup 1)
3376 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3377 "TARGET_AVX512F"
3378 "@
3379 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3380 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3381 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3382 (set_attr "type" "ssemuladd")
3383 (set_attr "mode" "<MODE>")])
3384
3385 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3386 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3387 (vec_merge:VF_AVX512VL
3388 (fma:VF_AVX512VL
3389 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3390 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3391 (neg:VF_AVX512VL
3392 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3393 (match_dup 3)
3394 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3395 "TARGET_AVX512F && <round_mode512bit_condition>"
3396 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3397 [(set_attr "isa" "fma_avx512f")
3398 (set_attr "type" "ssemuladd")
3399 (set_attr "mode" "<MODE>")])
3400
3401 (define_insn "*fma_fnmadd_<mode>"
3402 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3403 (fma:FMAMODE
3404 (neg:FMAMODE
3405 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3406 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3407 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3408 "TARGET_FMA || TARGET_FMA4"
3409 "@
3410 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3411 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3412 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3413 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3414 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3415 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3416 (set_attr "type" "ssemuladd")
3417 (set_attr "mode" "<MODE>")])
3418
3419 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3420 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3421 (fma:VF_SF_AVX512VL
3422 (neg:VF_SF_AVX512VL
3423 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3424 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3425 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3426 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3427 "@
3428 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3429 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3430 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3431 [(set_attr "type" "ssemuladd")
3432 (set_attr "mode" "<MODE>")])
3433
3434 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3435 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3436 (vec_merge:VF_AVX512VL
3437 (fma:VF_AVX512VL
3438 (neg:VF_AVX512VL
3439 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3440 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3441 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3442 (match_dup 1)
3443 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3444 "TARGET_AVX512F && <round_mode512bit_condition>"
3445 "@
3446 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3447 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3448 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3449 (set_attr "type" "ssemuladd")
3450 (set_attr "mode" "<MODE>")])
3451
3452 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3453 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3454 (vec_merge:VF_AVX512VL
3455 (fma:VF_AVX512VL
3456 (neg:VF_AVX512VL
3457 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3458 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3459 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3460 (match_dup 3)
3461 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3462 "TARGET_AVX512F && <round_mode512bit_condition>"
3463 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3464 [(set_attr "isa" "fma_avx512f")
3465 (set_attr "type" "ssemuladd")
3466 (set_attr "mode" "<MODE>")])
3467
3468 (define_insn "*fma_fnmsub_<mode>"
3469 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3470 (fma:FMAMODE
3471 (neg:FMAMODE
3472 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3473 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3474 (neg:FMAMODE
3475 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3476 "TARGET_FMA || TARGET_FMA4"
3477 "@
3478 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3479 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3480 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3481 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3482 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3483 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3484 (set_attr "type" "ssemuladd")
3485 (set_attr "mode" "<MODE>")])
3486
3487 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3488 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3489 (fma:VF_SF_AVX512VL
3490 (neg:VF_SF_AVX512VL
3491 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3492 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3493 (neg:VF_SF_AVX512VL
3494 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3495 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3496 "@
3497 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3498 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3499 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3500 [(set_attr "type" "ssemuladd")
3501 (set_attr "mode" "<MODE>")])
3502
3503 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3504 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3505 (vec_merge:VF_AVX512VL
3506 (fma:VF_AVX512VL
3507 (neg:VF_AVX512VL
3508 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3509 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3510 (neg:VF_AVX512VL
3511 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3512 (match_dup 1)
3513 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3514 "TARGET_AVX512F && <round_mode512bit_condition>"
3515 "@
3516 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3517 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3518 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3519 (set_attr "type" "ssemuladd")
3520 (set_attr "mode" "<MODE>")])
3521
3522 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3523 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3524 (vec_merge:VF_AVX512VL
3525 (fma:VF_AVX512VL
3526 (neg:VF_AVX512VL
3527 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3528 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3529 (neg:VF_AVX512VL
3530 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3531 (match_dup 3)
3532 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3533 "TARGET_AVX512F"
3534 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3535 [(set_attr "isa" "fma_avx512f")
3536 (set_attr "type" "ssemuladd")
3537 (set_attr "mode" "<MODE>")])
3538
3539 ;; FMA parallel floating point multiply addsub and subadd operations.
3540
3541 ;; It would be possible to represent these without the UNSPEC as
3542 ;;
3543 ;; (vec_merge
3544 ;; (fma op1 op2 op3)
3545 ;; (fma op1 op2 (neg op3))
3546 ;; (merge-const))
3547 ;;
3548 ;; But this doesn't seem useful in practice.
3549
3550 (define_expand "fmaddsub_<mode>"
3551 [(set (match_operand:VF 0 "register_operand")
3552 (unspec:VF
3553 [(match_operand:VF 1 "nonimmediate_operand")
3554 (match_operand:VF 2 "nonimmediate_operand")
3555 (match_operand:VF 3 "nonimmediate_operand")]
3556 UNSPEC_FMADDSUB))]
3557 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3558
3559 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3560 [(match_operand:VF_AVX512VL 0 "register_operand")
3561 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3562 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3563 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3564 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3565 "TARGET_AVX512F"
3566 {
3567 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3568 operands[0], operands[1], operands[2], operands[3],
3569 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3570 DONE;
3571 })
3572
3573 (define_insn "*fma_fmaddsub_<mode>"
3574 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3575 (unspec:VF_128_256
3576 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3577 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3578 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3579 UNSPEC_FMADDSUB))]
3580 "TARGET_FMA || TARGET_FMA4"
3581 "@
3582 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3583 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3584 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3585 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3586 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3587 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3588 (set_attr "type" "ssemuladd")
3589 (set_attr "mode" "<MODE>")])
3590
3591 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3592 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3593 (unspec:VF_SF_AVX512VL
3594 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3595 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3596 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3597 UNSPEC_FMADDSUB))]
3598 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3599 "@
3600 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3601 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3602 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3603 [(set_attr "type" "ssemuladd")
3604 (set_attr "mode" "<MODE>")])
3605
3606 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3607 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3608 (vec_merge:VF_AVX512VL
3609 (unspec:VF_AVX512VL
3610 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3611 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3612 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
3613 UNSPEC_FMADDSUB)
3614 (match_dup 1)
3615 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3616 "TARGET_AVX512F"
3617 "@
3618 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3619 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3620 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3621 (set_attr "type" "ssemuladd")
3622 (set_attr "mode" "<MODE>")])
3623
3624 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3625 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3626 (vec_merge:VF_AVX512VL
3627 (unspec:VF_AVX512VL
3628 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3629 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3630 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
3631 UNSPEC_FMADDSUB)
3632 (match_dup 3)
3633 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3634 "TARGET_AVX512F"
3635 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3636 [(set_attr "isa" "fma_avx512f")
3637 (set_attr "type" "ssemuladd")
3638 (set_attr "mode" "<MODE>")])
3639
3640 (define_insn "*fma_fmsubadd_<mode>"
3641 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3642 (unspec:VF_128_256
3643 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3644 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3645 (neg:VF_128_256
3646 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
3647 UNSPEC_FMADDSUB))]
3648 "TARGET_FMA || TARGET_FMA4"
3649 "@
3650 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3651 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3652 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3653 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3654 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3655 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3656 (set_attr "type" "ssemuladd")
3657 (set_attr "mode" "<MODE>")])
3658
3659 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3660 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3661 (unspec:VF_SF_AVX512VL
3662 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3663 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3664 (neg:VF_SF_AVX512VL
3665 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3666 UNSPEC_FMADDSUB))]
3667 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3668 "@
3669 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3670 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3671 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3672 [(set_attr "type" "ssemuladd")
3673 (set_attr "mode" "<MODE>")])
3674
3675 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
3676 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3677 (vec_merge:VF_AVX512VL
3678 (unspec:VF_AVX512VL
3679 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3680 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3681 (neg:VF_AVX512VL
3682 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
3683 UNSPEC_FMADDSUB)
3684 (match_dup 1)
3685 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3686 "TARGET_AVX512F"
3687 "@
3688 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3689 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3690 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3691 (set_attr "type" "ssemuladd")
3692 (set_attr "mode" "<MODE>")])
3693
3694 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
3695 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3696 (vec_merge:VF_AVX512VL
3697 (unspec:VF_AVX512VL
3698 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3699 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3700 (neg:VF_AVX512VL
3701 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
3702 UNSPEC_FMADDSUB)
3703 (match_dup 3)
3704 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3705 "TARGET_AVX512F"
3706 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3707 [(set_attr "isa" "fma_avx512f")
3708 (set_attr "type" "ssemuladd")
3709 (set_attr "mode" "<MODE>")])
3710
3711 ;; FMA3 floating point scalar intrinsics. These merge result with
3712 ;; high-order elements from the destination register.
3713
3714 (define_expand "fmai_vmfmadd_<mode><round_name>"
3715 [(set (match_operand:VF_128 0 "register_operand")
3716 (vec_merge:VF_128
3717 (fma:VF_128
3718 (match_operand:VF_128 1 "<round_nimm_predicate>")
3719 (match_operand:VF_128 2 "<round_nimm_predicate>")
3720 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3721 (match_dup 1)
3722 (const_int 1)))]
3723 "TARGET_FMA")
3724
3725 (define_insn "*fmai_fmadd_<mode>"
3726 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3727 (vec_merge:VF_128
3728 (fma:VF_128
3729 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3730 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3731 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3732 (match_dup 1)
3733 (const_int 1)))]
3734 "TARGET_FMA || TARGET_AVX512F"
3735 "@
3736 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3737 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3738 [(set_attr "type" "ssemuladd")
3739 (set_attr "mode" "<MODE>")])
3740
3741 (define_insn "*fmai_fmsub_<mode>"
3742 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3743 (vec_merge:VF_128
3744 (fma:VF_128
3745 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3746 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3747 (neg:VF_128
3748 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3749 (match_dup 1)
3750 (const_int 1)))]
3751 "TARGET_FMA || TARGET_AVX512F"
3752 "@
3753 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3754 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3755 [(set_attr "type" "ssemuladd")
3756 (set_attr "mode" "<MODE>")])
3757
3758 (define_insn "*fmai_fnmadd_<mode><round_name>"
3759 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3760 (vec_merge:VF_128
3761 (fma:VF_128
3762 (neg:VF_128
3763 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3764 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3765 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3766 (match_dup 1)
3767 (const_int 1)))]
3768 "TARGET_FMA || TARGET_AVX512F"
3769 "@
3770 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3771 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3772 [(set_attr "type" "ssemuladd")
3773 (set_attr "mode" "<MODE>")])
3774
3775 (define_insn "*fmai_fnmsub_<mode><round_name>"
3776 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3777 (vec_merge:VF_128
3778 (fma:VF_128
3779 (neg:VF_128
3780 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3781 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3782 (neg:VF_128
3783 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3784 (match_dup 1)
3785 (const_int 1)))]
3786 "TARGET_FMA || TARGET_AVX512F"
3787 "@
3788 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3789 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3790 [(set_attr "type" "ssemuladd")
3791 (set_attr "mode" "<MODE>")])
3792
3793 ;; FMA4 floating point scalar intrinsics. These write the
3794 ;; entire destination register, with the high-order elements zeroed.
3795
3796 (define_expand "fma4i_vmfmadd_<mode>"
3797 [(set (match_operand:VF_128 0 "register_operand")
3798 (vec_merge:VF_128
3799 (fma:VF_128
3800 (match_operand:VF_128 1 "nonimmediate_operand")
3801 (match_operand:VF_128 2 "nonimmediate_operand")
3802 (match_operand:VF_128 3 "nonimmediate_operand"))
3803 (match_dup 4)
3804 (const_int 1)))]
3805 "TARGET_FMA4"
3806 "operands[4] = CONST0_RTX (<MODE>mode);")
3807
3808 (define_insn "*fma4i_vmfmadd_<mode>"
3809 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3810 (vec_merge:VF_128
3811 (fma:VF_128
3812 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3813 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3814 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3815 (match_operand:VF_128 4 "const0_operand")
3816 (const_int 1)))]
3817 "TARGET_FMA4"
3818 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3819 [(set_attr "type" "ssemuladd")
3820 (set_attr "mode" "<MODE>")])
3821
3822 (define_insn "*fma4i_vmfmsub_<mode>"
3823 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3824 (vec_merge:VF_128
3825 (fma:VF_128
3826 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3827 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3828 (neg:VF_128
3829 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3830 (match_operand:VF_128 4 "const0_operand")
3831 (const_int 1)))]
3832 "TARGET_FMA4"
3833 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3834 [(set_attr "type" "ssemuladd")
3835 (set_attr "mode" "<MODE>")])
3836
3837 (define_insn "*fma4i_vmfnmadd_<mode>"
3838 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3839 (vec_merge:VF_128
3840 (fma:VF_128
3841 (neg:VF_128
3842 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3843 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3844 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3845 (match_operand:VF_128 4 "const0_operand")
3846 (const_int 1)))]
3847 "TARGET_FMA4"
3848 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3849 [(set_attr "type" "ssemuladd")
3850 (set_attr "mode" "<MODE>")])
3851
3852 (define_insn "*fma4i_vmfnmsub_<mode>"
3853 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3854 (vec_merge:VF_128
3855 (fma:VF_128
3856 (neg:VF_128
3857 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3858 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3859 (neg:VF_128
3860 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3861 (match_operand:VF_128 4 "const0_operand")
3862 (const_int 1)))]
3863 "TARGET_FMA4"
3864 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3865 [(set_attr "type" "ssemuladd")
3866 (set_attr "mode" "<MODE>")])
3867
3868 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3869 ;;
3870 ;; Parallel single-precision floating point conversion operations
3871 ;;
3872 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3873
3874 (define_insn "sse_cvtpi2ps"
3875 [(set (match_operand:V4SF 0 "register_operand" "=x")
3876 (vec_merge:V4SF
3877 (vec_duplicate:V4SF
3878 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3879 (match_operand:V4SF 1 "register_operand" "0")
3880 (const_int 3)))]
3881 "TARGET_SSE"
3882 "cvtpi2ps\t{%2, %0|%0, %2}"
3883 [(set_attr "type" "ssecvt")
3884 (set_attr "mode" "V4SF")])
3885
3886 (define_insn "sse_cvtps2pi"
3887 [(set (match_operand:V2SI 0 "register_operand" "=y")
3888 (vec_select:V2SI
3889 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3890 UNSPEC_FIX_NOTRUNC)
3891 (parallel [(const_int 0) (const_int 1)])))]
3892 "TARGET_SSE"
3893 "cvtps2pi\t{%1, %0|%0, %q1}"
3894 [(set_attr "type" "ssecvt")
3895 (set_attr "unit" "mmx")
3896 (set_attr "mode" "DI")])
3897
3898 (define_insn "sse_cvttps2pi"
3899 [(set (match_operand:V2SI 0 "register_operand" "=y")
3900 (vec_select:V2SI
3901 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3902 (parallel [(const_int 0) (const_int 1)])))]
3903 "TARGET_SSE"
3904 "cvttps2pi\t{%1, %0|%0, %q1}"
3905 [(set_attr "type" "ssecvt")
3906 (set_attr "unit" "mmx")
3907 (set_attr "prefix_rep" "0")
3908 (set_attr "mode" "SF")])
3909
3910 (define_insn "sse_cvtsi2ss<round_name>"
3911 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3912 (vec_merge:V4SF
3913 (vec_duplicate:V4SF
3914 (float:SF (match_operand:SI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
3915 (match_operand:V4SF 1 "register_operand" "0,0,v")
3916 (const_int 1)))]
3917 "TARGET_SSE"
3918 "@
3919 cvtsi2ss\t{%2, %0|%0, %2}
3920 cvtsi2ss\t{%2, %0|%0, %2}
3921 vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
3922 [(set_attr "isa" "noavx,noavx,avx")
3923 (set_attr "type" "sseicvt")
3924 (set_attr "athlon_decode" "vector,double,*")
3925 (set_attr "amdfam10_decode" "vector,double,*")
3926 (set_attr "bdver1_decode" "double,direct,*")
3927 (set_attr "btver2_decode" "double,double,double")
3928 (set_attr "znver1_decode" "double,double,double")
3929 (set_attr "prefix" "orig,orig,maybe_evex")
3930 (set_attr "mode" "SF")])
3931
3932 (define_insn "sse_cvtsi2ssq<round_name>"
3933 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3934 (vec_merge:V4SF
3935 (vec_duplicate:V4SF
3936 (float:SF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
3937 (match_operand:V4SF 1 "register_operand" "0,0,v")
3938 (const_int 1)))]
3939 "TARGET_SSE && TARGET_64BIT"
3940 "@
3941 cvtsi2ssq\t{%2, %0|%0, %2}
3942 cvtsi2ssq\t{%2, %0|%0, %2}
3943 vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
3944 [(set_attr "isa" "noavx,noavx,avx")
3945 (set_attr "type" "sseicvt")
3946 (set_attr "athlon_decode" "vector,double,*")
3947 (set_attr "amdfam10_decode" "vector,double,*")
3948 (set_attr "bdver1_decode" "double,direct,*")
3949 (set_attr "btver2_decode" "double,double,double")
3950 (set_attr "length_vex" "*,*,4")
3951 (set_attr "prefix_rex" "1,1,*")
3952 (set_attr "prefix" "orig,orig,maybe_evex")
3953 (set_attr "mode" "SF")])
3954
3955 (define_insn "sse_cvtss2si<round_name>"
3956 [(set (match_operand:SI 0 "register_operand" "=r,r")
3957 (unspec:SI
3958 [(vec_select:SF
3959 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
3960 (parallel [(const_int 0)]))]
3961 UNSPEC_FIX_NOTRUNC))]
3962 "TARGET_SSE"
3963 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3964 [(set_attr "type" "sseicvt")
3965 (set_attr "athlon_decode" "double,vector")
3966 (set_attr "bdver1_decode" "double,double")
3967 (set_attr "prefix_rep" "1")
3968 (set_attr "prefix" "maybe_vex")
3969 (set_attr "mode" "SI")])
3970
3971 (define_insn "sse_cvtss2si_2"
3972 [(set (match_operand:SI 0 "register_operand" "=r,r")
3973 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3974 UNSPEC_FIX_NOTRUNC))]
3975 "TARGET_SSE"
3976 "%vcvtss2si\t{%1, %0|%0, %k1}"
3977 [(set_attr "type" "sseicvt")
3978 (set_attr "athlon_decode" "double,vector")
3979 (set_attr "amdfam10_decode" "double,double")
3980 (set_attr "bdver1_decode" "double,double")
3981 (set_attr "prefix_rep" "1")
3982 (set_attr "prefix" "maybe_vex")
3983 (set_attr "mode" "SI")])
3984
3985 (define_insn "sse_cvtss2siq<round_name>"
3986 [(set (match_operand:DI 0 "register_operand" "=r,r")
3987 (unspec:DI
3988 [(vec_select:SF
3989 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
3990 (parallel [(const_int 0)]))]
3991 UNSPEC_FIX_NOTRUNC))]
3992 "TARGET_SSE && TARGET_64BIT"
3993 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3994 [(set_attr "type" "sseicvt")
3995 (set_attr "athlon_decode" "double,vector")
3996 (set_attr "bdver1_decode" "double,double")
3997 (set_attr "prefix_rep" "1")
3998 (set_attr "prefix" "maybe_vex")
3999 (set_attr "mode" "DI")])
4000
4001 (define_insn "sse_cvtss2siq_2"
4002 [(set (match_operand:DI 0 "register_operand" "=r,r")
4003 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4004 UNSPEC_FIX_NOTRUNC))]
4005 "TARGET_SSE && TARGET_64BIT"
4006 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
4007 [(set_attr "type" "sseicvt")
4008 (set_attr "athlon_decode" "double,vector")
4009 (set_attr "amdfam10_decode" "double,double")
4010 (set_attr "bdver1_decode" "double,double")
4011 (set_attr "prefix_rep" "1")
4012 (set_attr "prefix" "maybe_vex")
4013 (set_attr "mode" "DI")])
4014
4015 (define_insn "sse_cvttss2si<round_saeonly_name>"
4016 [(set (match_operand:SI 0 "register_operand" "=r,r")
4017 (fix:SI
4018 (vec_select:SF
4019 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4020 (parallel [(const_int 0)]))))]
4021 "TARGET_SSE"
4022 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4023 [(set_attr "type" "sseicvt")
4024 (set_attr "athlon_decode" "double,vector")
4025 (set_attr "amdfam10_decode" "double,double")
4026 (set_attr "bdver1_decode" "double,double")
4027 (set_attr "prefix_rep" "1")
4028 (set_attr "prefix" "maybe_vex")
4029 (set_attr "mode" "SI")])
4030
4031 (define_insn "sse_cvttss2siq<round_saeonly_name>"
4032 [(set (match_operand:DI 0 "register_operand" "=r,r")
4033 (fix:DI
4034 (vec_select:SF
4035 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
4036 (parallel [(const_int 0)]))))]
4037 "TARGET_SSE && TARGET_64BIT"
4038 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4039 [(set_attr "type" "sseicvt")
4040 (set_attr "athlon_decode" "double,vector")
4041 (set_attr "amdfam10_decode" "double,double")
4042 (set_attr "bdver1_decode" "double,double")
4043 (set_attr "prefix_rep" "1")
4044 (set_attr "prefix" "maybe_vex")
4045 (set_attr "mode" "DI")])
4046
4047 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
4048 [(set (match_operand:VF_128 0 "register_operand" "=v")
4049 (vec_merge:VF_128
4050 (vec_duplicate:VF_128
4051 (unsigned_float:<ssescalarmode>
4052 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4053 (match_operand:VF_128 1 "register_operand" "v")
4054 (const_int 1)))]
4055 "TARGET_AVX512F && <round_modev4sf_condition>"
4056 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4057 [(set_attr "type" "sseicvt")
4058 (set_attr "prefix" "evex")
4059 (set_attr "mode" "<ssescalarmode>")])
4060
4061 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4062 [(set (match_operand:VF_128 0 "register_operand" "=v")
4063 (vec_merge:VF_128
4064 (vec_duplicate:VF_128
4065 (unsigned_float:<ssescalarmode>
4066 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4067 (match_operand:VF_128 1 "register_operand" "v")
4068 (const_int 1)))]
4069 "TARGET_AVX512F && TARGET_64BIT"
4070 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4071 [(set_attr "type" "sseicvt")
4072 (set_attr "prefix" "evex")
4073 (set_attr "mode" "<ssescalarmode>")])
4074
4075 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4076 [(set (match_operand:VF1 0 "register_operand" "=x,v")
4077 (float:VF1
4078 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
4079 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4080 "@
4081 cvtdq2ps\t{%1, %0|%0, %1}
4082 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4083 [(set_attr "isa" "noavx,avx")
4084 (set_attr "type" "ssecvt")
4085 (set_attr "prefix" "maybe_vex")
4086 (set_attr "mode" "<sseinsnmode>")])
4087
4088 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4089 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4090 (unsigned_float:VF1_AVX512VL
4091 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4092 "TARGET_AVX512F"
4093 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4094 [(set_attr "type" "ssecvt")
4095 (set_attr "prefix" "evex")
4096 (set_attr "mode" "<MODE>")])
4097
4098 (define_expand "floatuns<sseintvecmodelower><mode>2"
4099 [(match_operand:VF1 0 "register_operand")
4100 (match_operand:<sseintvecmode> 1 "register_operand")]
4101 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4102 {
4103 if (<MODE>mode == V16SFmode)
4104 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4105 else
4106 if (TARGET_AVX512VL)
4107 {
4108 if (<MODE>mode == V4SFmode)
4109 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4110 else
4111 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4112 }
4113 else
4114 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4115
4116 DONE;
4117 })
4118
4119
4120 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4121 (define_mode_attr sf2simodelower
4122 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4123
4124 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4125 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4126 (unspec:VI4_AVX
4127 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
4128 UNSPEC_FIX_NOTRUNC))]
4129 "TARGET_SSE2 && <mask_mode512bit_condition>"
4130 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4131 [(set_attr "type" "ssecvt")
4132 (set (attr "prefix_data16")
4133 (if_then_else
4134 (match_test "TARGET_AVX")
4135 (const_string "*")
4136 (const_string "1")))
4137 (set_attr "prefix" "maybe_vex")
4138 (set_attr "mode" "<sseinsnmode>")])
4139
4140 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4141 [(set (match_operand:V16SI 0 "register_operand" "=v")
4142 (unspec:V16SI
4143 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4144 UNSPEC_FIX_NOTRUNC))]
4145 "TARGET_AVX512F"
4146 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4147 [(set_attr "type" "ssecvt")
4148 (set_attr "prefix" "evex")
4149 (set_attr "mode" "XI")])
4150
4151 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4152 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4153 (unspec:VI4_AVX512VL
4154 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4155 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4156 "TARGET_AVX512F"
4157 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4158 [(set_attr "type" "ssecvt")
4159 (set_attr "prefix" "evex")
4160 (set_attr "mode" "<sseinsnmode>")])
4161
4162 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4163 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4164 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4165 UNSPEC_FIX_NOTRUNC))]
4166 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4167 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4168 [(set_attr "type" "ssecvt")
4169 (set_attr "prefix" "evex")
4170 (set_attr "mode" "<sseinsnmode>")])
4171
4172 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4173 [(set (match_operand:V2DI 0 "register_operand" "=v")
4174 (unspec:V2DI
4175 [(vec_select:V2SF
4176 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4177 (parallel [(const_int 0) (const_int 1)]))]
4178 UNSPEC_FIX_NOTRUNC))]
4179 "TARGET_AVX512DQ && TARGET_AVX512VL"
4180 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4181 [(set_attr "type" "ssecvt")
4182 (set_attr "prefix" "evex")
4183 (set_attr "mode" "TI")])
4184
4185 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4186 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4187 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4188 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4189 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4190 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4191 [(set_attr "type" "ssecvt")
4192 (set_attr "prefix" "evex")
4193 (set_attr "mode" "<sseinsnmode>")])
4194
4195 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4196 [(set (match_operand:V2DI 0 "register_operand" "=v")
4197 (unspec:V2DI
4198 [(vec_select:V2SF
4199 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4200 (parallel [(const_int 0) (const_int 1)]))]
4201 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4202 "TARGET_AVX512DQ && TARGET_AVX512VL"
4203 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4204 [(set_attr "type" "ssecvt")
4205 (set_attr "prefix" "evex")
4206 (set_attr "mode" "TI")])
4207
4208 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4209 [(set (match_operand:V16SI 0 "register_operand" "=v")
4210 (any_fix:V16SI
4211 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4212 "TARGET_AVX512F"
4213 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4214 [(set_attr "type" "ssecvt")
4215 (set_attr "prefix" "evex")
4216 (set_attr "mode" "XI")])
4217
4218 (define_insn "fix_truncv8sfv8si2<mask_name>"
4219 [(set (match_operand:V8SI 0 "register_operand" "=v")
4220 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4221 "TARGET_AVX && <mask_avx512vl_condition>"
4222 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4223 [(set_attr "type" "ssecvt")
4224 (set_attr "prefix" "<mask_prefix>")
4225 (set_attr "mode" "OI")])
4226
4227 (define_insn "fix_truncv4sfv4si2<mask_name>"
4228 [(set (match_operand:V4SI 0 "register_operand" "=v")
4229 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
4230 "TARGET_SSE2 && <mask_avx512vl_condition>"
4231 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4232 [(set_attr "type" "ssecvt")
4233 (set (attr "prefix_rep")
4234 (if_then_else
4235 (match_test "TARGET_AVX")
4236 (const_string "*")
4237 (const_string "1")))
4238 (set (attr "prefix_data16")
4239 (if_then_else
4240 (match_test "TARGET_AVX")
4241 (const_string "*")
4242 (const_string "0")))
4243 (set_attr "prefix_data16" "0")
4244 (set_attr "prefix" "<mask_prefix2>")
4245 (set_attr "mode" "TI")])
4246
4247 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4248 [(match_operand:<sseintvecmode> 0 "register_operand")
4249 (match_operand:VF1 1 "register_operand")]
4250 "TARGET_SSE2"
4251 {
4252 if (<MODE>mode == V16SFmode)
4253 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4254 operands[1]));
4255 else
4256 {
4257 rtx tmp[3];
4258 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4259 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4260 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4261 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4262 }
4263 DONE;
4264 })
4265
4266 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4267 ;;
4268 ;; Parallel double-precision floating point conversion operations
4269 ;;
4270 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4271
4272 (define_insn "sse2_cvtpi2pd"
4273 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4274 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4275 "TARGET_SSE2"
4276 "cvtpi2pd\t{%1, %0|%0, %1}"
4277 [(set_attr "type" "ssecvt")
4278 (set_attr "unit" "mmx,*")
4279 (set_attr "prefix_data16" "1,*")
4280 (set_attr "mode" "V2DF")])
4281
4282 (define_insn "sse2_cvtpd2pi"
4283 [(set (match_operand:V2SI 0 "register_operand" "=y")
4284 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4285 UNSPEC_FIX_NOTRUNC))]
4286 "TARGET_SSE2"
4287 "cvtpd2pi\t{%1, %0|%0, %1}"
4288 [(set_attr "type" "ssecvt")
4289 (set_attr "unit" "mmx")
4290 (set_attr "bdver1_decode" "double")
4291 (set_attr "btver2_decode" "direct")
4292 (set_attr "prefix_data16" "1")
4293 (set_attr "mode" "DI")])
4294
4295 (define_insn "sse2_cvttpd2pi"
4296 [(set (match_operand:V2SI 0 "register_operand" "=y")
4297 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4298 "TARGET_SSE2"
4299 "cvttpd2pi\t{%1, %0|%0, %1}"
4300 [(set_attr "type" "ssecvt")
4301 (set_attr "unit" "mmx")
4302 (set_attr "bdver1_decode" "double")
4303 (set_attr "prefix_data16" "1")
4304 (set_attr "mode" "TI")])
4305
4306 (define_insn "sse2_cvtsi2sd"
4307 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4308 (vec_merge:V2DF
4309 (vec_duplicate:V2DF
4310 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4311 (match_operand:V2DF 1 "register_operand" "0,0,v")
4312 (const_int 1)))]
4313 "TARGET_SSE2"
4314 "@
4315 cvtsi2sd\t{%2, %0|%0, %2}
4316 cvtsi2sd\t{%2, %0|%0, %2}
4317 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4318 [(set_attr "isa" "noavx,noavx,avx")
4319 (set_attr "type" "sseicvt")
4320 (set_attr "athlon_decode" "double,direct,*")
4321 (set_attr "amdfam10_decode" "vector,double,*")
4322 (set_attr "bdver1_decode" "double,direct,*")
4323 (set_attr "btver2_decode" "double,double,double")
4324 (set_attr "znver1_decode" "double,double,double")
4325 (set_attr "prefix" "orig,orig,maybe_evex")
4326 (set_attr "mode" "DF")])
4327
4328 (define_insn "sse2_cvtsi2sdq<round_name>"
4329 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4330 (vec_merge:V2DF
4331 (vec_duplicate:V2DF
4332 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4333 (match_operand:V2DF 1 "register_operand" "0,0,v")
4334 (const_int 1)))]
4335 "TARGET_SSE2 && TARGET_64BIT"
4336 "@
4337 cvtsi2sdq\t{%2, %0|%0, %2}
4338 cvtsi2sdq\t{%2, %0|%0, %2}
4339 vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4340 [(set_attr "isa" "noavx,noavx,avx")
4341 (set_attr "type" "sseicvt")
4342 (set_attr "athlon_decode" "double,direct,*")
4343 (set_attr "amdfam10_decode" "vector,double,*")
4344 (set_attr "bdver1_decode" "double,direct,*")
4345 (set_attr "length_vex" "*,*,4")
4346 (set_attr "prefix_rex" "1,1,*")
4347 (set_attr "prefix" "orig,orig,maybe_evex")
4348 (set_attr "mode" "DF")])
4349
4350 (define_insn "avx512f_vcvtss2usi<round_name>"
4351 [(set (match_operand:SI 0 "register_operand" "=r")
4352 (unspec:SI
4353 [(vec_select:SF
4354 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4355 (parallel [(const_int 0)]))]
4356 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4357 "TARGET_AVX512F"
4358 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4359 [(set_attr "type" "sseicvt")
4360 (set_attr "prefix" "evex")
4361 (set_attr "mode" "SI")])
4362
4363 (define_insn "avx512f_vcvtss2usiq<round_name>"
4364 [(set (match_operand:DI 0 "register_operand" "=r")
4365 (unspec:DI
4366 [(vec_select:SF
4367 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4368 (parallel [(const_int 0)]))]
4369 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4370 "TARGET_AVX512F && TARGET_64BIT"
4371 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4372 [(set_attr "type" "sseicvt")
4373 (set_attr "prefix" "evex")
4374 (set_attr "mode" "DI")])
4375
4376 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4377 [(set (match_operand:SI 0 "register_operand" "=r")
4378 (unsigned_fix:SI
4379 (vec_select:SF
4380 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4381 (parallel [(const_int 0)]))))]
4382 "TARGET_AVX512F"
4383 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4384 [(set_attr "type" "sseicvt")
4385 (set_attr "prefix" "evex")
4386 (set_attr "mode" "SI")])
4387
4388 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4389 [(set (match_operand:DI 0 "register_operand" "=r")
4390 (unsigned_fix:DI
4391 (vec_select:SF
4392 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4393 (parallel [(const_int 0)]))))]
4394 "TARGET_AVX512F && TARGET_64BIT"
4395 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4396 [(set_attr "type" "sseicvt")
4397 (set_attr "prefix" "evex")
4398 (set_attr "mode" "DI")])
4399
4400 (define_insn "avx512f_vcvtsd2usi<round_name>"
4401 [(set (match_operand:SI 0 "register_operand" "=r")
4402 (unspec:SI
4403 [(vec_select:DF
4404 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4405 (parallel [(const_int 0)]))]
4406 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4407 "TARGET_AVX512F"
4408 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4409 [(set_attr "type" "sseicvt")
4410 (set_attr "prefix" "evex")
4411 (set_attr "mode" "SI")])
4412
4413 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4414 [(set (match_operand:DI 0 "register_operand" "=r")
4415 (unspec:DI
4416 [(vec_select:DF
4417 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4418 (parallel [(const_int 0)]))]
4419 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4420 "TARGET_AVX512F && TARGET_64BIT"
4421 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4422 [(set_attr "type" "sseicvt")
4423 (set_attr "prefix" "evex")
4424 (set_attr "mode" "DI")])
4425
4426 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4427 [(set (match_operand:SI 0 "register_operand" "=r")
4428 (unsigned_fix:SI
4429 (vec_select:DF
4430 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4431 (parallel [(const_int 0)]))))]
4432 "TARGET_AVX512F"
4433 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4434 [(set_attr "type" "sseicvt")
4435 (set_attr "prefix" "evex")
4436 (set_attr "mode" "SI")])
4437
4438 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4439 [(set (match_operand:DI 0 "register_operand" "=r")
4440 (unsigned_fix:DI
4441 (vec_select:DF
4442 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4443 (parallel [(const_int 0)]))))]
4444 "TARGET_AVX512F && TARGET_64BIT"
4445 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4446 [(set_attr "type" "sseicvt")
4447 (set_attr "prefix" "evex")
4448 (set_attr "mode" "DI")])
4449
4450 (define_insn "sse2_cvtsd2si<round_name>"
4451 [(set (match_operand:SI 0 "register_operand" "=r,r")
4452 (unspec:SI
4453 [(vec_select:DF
4454 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4455 (parallel [(const_int 0)]))]
4456 UNSPEC_FIX_NOTRUNC))]
4457 "TARGET_SSE2"
4458 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4459 [(set_attr "type" "sseicvt")
4460 (set_attr "athlon_decode" "double,vector")
4461 (set_attr "bdver1_decode" "double,double")
4462 (set_attr "btver2_decode" "double,double")
4463 (set_attr "prefix_rep" "1")
4464 (set_attr "prefix" "maybe_vex")
4465 (set_attr "mode" "SI")])
4466
4467 (define_insn "sse2_cvtsd2si_2"
4468 [(set (match_operand:SI 0 "register_operand" "=r,r")
4469 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4470 UNSPEC_FIX_NOTRUNC))]
4471 "TARGET_SSE2"
4472 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4473 [(set_attr "type" "sseicvt")
4474 (set_attr "athlon_decode" "double,vector")
4475 (set_attr "amdfam10_decode" "double,double")
4476 (set_attr "bdver1_decode" "double,double")
4477 (set_attr "prefix_rep" "1")
4478 (set_attr "prefix" "maybe_vex")
4479 (set_attr "mode" "SI")])
4480
4481 (define_insn "sse2_cvtsd2siq<round_name>"
4482 [(set (match_operand:DI 0 "register_operand" "=r,r")
4483 (unspec:DI
4484 [(vec_select:DF
4485 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4486 (parallel [(const_int 0)]))]
4487 UNSPEC_FIX_NOTRUNC))]
4488 "TARGET_SSE2 && TARGET_64BIT"
4489 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4490 [(set_attr "type" "sseicvt")
4491 (set_attr "athlon_decode" "double,vector")
4492 (set_attr "bdver1_decode" "double,double")
4493 (set_attr "prefix_rep" "1")
4494 (set_attr "prefix" "maybe_vex")
4495 (set_attr "mode" "DI")])
4496
4497 (define_insn "sse2_cvtsd2siq_2"
4498 [(set (match_operand:DI 0 "register_operand" "=r,r")
4499 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4500 UNSPEC_FIX_NOTRUNC))]
4501 "TARGET_SSE2 && TARGET_64BIT"
4502 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4503 [(set_attr "type" "sseicvt")
4504 (set_attr "athlon_decode" "double,vector")
4505 (set_attr "amdfam10_decode" "double,double")
4506 (set_attr "bdver1_decode" "double,double")
4507 (set_attr "prefix_rep" "1")
4508 (set_attr "prefix" "maybe_vex")
4509 (set_attr "mode" "DI")])
4510
4511 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4512 [(set (match_operand:SI 0 "register_operand" "=r,r")
4513 (fix:SI
4514 (vec_select:DF
4515 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4516 (parallel [(const_int 0)]))))]
4517 "TARGET_SSE2"
4518 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4519 [(set_attr "type" "sseicvt")
4520 (set_attr "athlon_decode" "double,vector")
4521 (set_attr "amdfam10_decode" "double,double")
4522 (set_attr "bdver1_decode" "double,double")
4523 (set_attr "btver2_decode" "double,double")
4524 (set_attr "prefix_rep" "1")
4525 (set_attr "prefix" "maybe_vex")
4526 (set_attr "mode" "SI")])
4527
4528 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4529 [(set (match_operand:DI 0 "register_operand" "=r,r")
4530 (fix:DI
4531 (vec_select:DF
4532 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4533 (parallel [(const_int 0)]))))]
4534 "TARGET_SSE2 && TARGET_64BIT"
4535 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4536 [(set_attr "type" "sseicvt")
4537 (set_attr "athlon_decode" "double,vector")
4538 (set_attr "amdfam10_decode" "double,double")
4539 (set_attr "bdver1_decode" "double,double")
4540 (set_attr "prefix_rep" "1")
4541 (set_attr "prefix" "maybe_vex")
4542 (set_attr "mode" "DI")])
4543
4544 ;; For float<si2dfmode><mode>2 insn pattern
4545 (define_mode_attr si2dfmode
4546 [(V8DF "V8SI") (V4DF "V4SI")])
4547 (define_mode_attr si2dfmodelower
4548 [(V8DF "v8si") (V4DF "v4si")])
4549
4550 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4551 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4552 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4553 "TARGET_AVX && <mask_mode512bit_condition>"
4554 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4555 [(set_attr "type" "ssecvt")
4556 (set_attr "prefix" "maybe_vex")
4557 (set_attr "mode" "<MODE>")])
4558
4559 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4560 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4561 (any_float:VF2_AVX512VL
4562 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4563 "TARGET_AVX512DQ"
4564 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4565 [(set_attr "type" "ssecvt")
4566 (set_attr "prefix" "evex")
4567 (set_attr "mode" "<MODE>")])
4568
4569 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4570 (define_mode_attr qq2pssuff
4571 [(V8SF "") (V4SF "{y}")])
4572
4573 (define_mode_attr sselongvecmode
4574 [(V8SF "V8DI") (V4SF "V4DI")])
4575
4576 (define_mode_attr sselongvecmodelower
4577 [(V8SF "v8di") (V4SF "v4di")])
4578
4579 (define_mode_attr sseintvecmode3
4580 [(V8SF "XI") (V4SF "OI")
4581 (V8DF "OI") (V4DF "TI")])
4582
4583 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4584 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4585 (any_float:VF1_128_256VL
4586 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4587 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4588 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4589 [(set_attr "type" "ssecvt")
4590 (set_attr "prefix" "evex")
4591 (set_attr "mode" "<MODE>")])
4592
4593 (define_insn "*<floatsuffix>floatv2div2sf2"
4594 [(set (match_operand:V4SF 0 "register_operand" "=v")
4595 (vec_concat:V4SF
4596 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4597 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4598 "TARGET_AVX512DQ && TARGET_AVX512VL"
4599 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4600 [(set_attr "type" "ssecvt")
4601 (set_attr "prefix" "evex")
4602 (set_attr "mode" "V4SF")])
4603
4604 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4605 [(set (match_operand:V4SF 0 "register_operand" "=v")
4606 (vec_concat:V4SF
4607 (vec_merge:V2SF
4608 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4609 (vec_select:V2SF
4610 (match_operand:V4SF 2 "vector_move_operand" "0C")
4611 (parallel [(const_int 0) (const_int 1)]))
4612 (match_operand:QI 3 "register_operand" "Yk"))
4613 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4614 "TARGET_AVX512DQ && TARGET_AVX512VL"
4615 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4616 [(set_attr "type" "ssecvt")
4617 (set_attr "prefix" "evex")
4618 (set_attr "mode" "V4SF")])
4619
4620 (define_insn "*<floatsuffix>floatv2div2sf2_mask_1"
4621 [(set (match_operand:V4SF 0 "register_operand" "=v")
4622 (vec_concat:V4SF
4623 (vec_merge:V2SF
4624 (any_float:V2SF (match_operand:V2DI 1
4625 "nonimmediate_operand" "vm"))
4626 (const_vector:V2SF [(const_int 0) (const_int 0)])
4627 (match_operand:QI 2 "register_operand" "Yk"))
4628 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4629 "TARGET_AVX512DQ && TARGET_AVX512VL"
4630 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
4631 [(set_attr "type" "ssecvt")
4632 (set_attr "prefix" "evex")
4633 (set_attr "mode" "V4SF")])
4634
4635 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4636 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4637 (unsigned_float:VF2_512_256VL
4638 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4639 "TARGET_AVX512F"
4640 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4641 [(set_attr "type" "ssecvt")
4642 (set_attr "prefix" "evex")
4643 (set_attr "mode" "<MODE>")])
4644
4645 (define_insn "ufloatv2siv2df2<mask_name>"
4646 [(set (match_operand:V2DF 0 "register_operand" "=v")
4647 (unsigned_float:V2DF
4648 (vec_select:V2SI
4649 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4650 (parallel [(const_int 0) (const_int 1)]))))]
4651 "TARGET_AVX512VL"
4652 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4653 [(set_attr "type" "ssecvt")
4654 (set_attr "prefix" "evex")
4655 (set_attr "mode" "V2DF")])
4656
4657 (define_insn "avx512f_cvtdq2pd512_2"
4658 [(set (match_operand:V8DF 0 "register_operand" "=v")
4659 (float:V8DF
4660 (vec_select:V8SI
4661 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4662 (parallel [(const_int 0) (const_int 1)
4663 (const_int 2) (const_int 3)
4664 (const_int 4) (const_int 5)
4665 (const_int 6) (const_int 7)]))))]
4666 "TARGET_AVX512F"
4667 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4668 [(set_attr "type" "ssecvt")
4669 (set_attr "prefix" "evex")
4670 (set_attr "mode" "V8DF")])
4671
4672 (define_insn "avx_cvtdq2pd256_2"
4673 [(set (match_operand:V4DF 0 "register_operand" "=v")
4674 (float:V4DF
4675 (vec_select:V4SI
4676 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
4677 (parallel [(const_int 0) (const_int 1)
4678 (const_int 2) (const_int 3)]))))]
4679 "TARGET_AVX"
4680 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4681 [(set_attr "type" "ssecvt")
4682 (set_attr "prefix" "maybe_evex")
4683 (set_attr "mode" "V4DF")])
4684
4685 (define_insn "sse2_cvtdq2pd<mask_name>"
4686 [(set (match_operand:V2DF 0 "register_operand" "=v")
4687 (float:V2DF
4688 (vec_select:V2SI
4689 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4690 (parallel [(const_int 0) (const_int 1)]))))]
4691 "TARGET_SSE2 && <mask_avx512vl_condition>"
4692 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4693 [(set_attr "type" "ssecvt")
4694 (set_attr "prefix" "maybe_vex")
4695 (set_attr "mode" "V2DF")])
4696
4697 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4698 [(set (match_operand:V8SI 0 "register_operand" "=v")
4699 (unspec:V8SI
4700 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4701 UNSPEC_FIX_NOTRUNC))]
4702 "TARGET_AVX512F"
4703 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4704 [(set_attr "type" "ssecvt")
4705 (set_attr "prefix" "evex")
4706 (set_attr "mode" "OI")])
4707
4708 (define_insn "avx_cvtpd2dq256<mask_name>"
4709 [(set (match_operand:V4SI 0 "register_operand" "=v")
4710 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
4711 UNSPEC_FIX_NOTRUNC))]
4712 "TARGET_AVX && <mask_avx512vl_condition>"
4713 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4714 [(set_attr "type" "ssecvt")
4715 (set_attr "prefix" "<mask_prefix>")
4716 (set_attr "mode" "OI")])
4717
4718 (define_expand "avx_cvtpd2dq256_2"
4719 [(set (match_operand:V8SI 0 "register_operand")
4720 (vec_concat:V8SI
4721 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4722 UNSPEC_FIX_NOTRUNC)
4723 (match_dup 2)))]
4724 "TARGET_AVX"
4725 "operands[2] = CONST0_RTX (V4SImode);")
4726
4727 (define_insn "*avx_cvtpd2dq256_2"
4728 [(set (match_operand:V8SI 0 "register_operand" "=x")
4729 (vec_concat:V8SI
4730 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4731 UNSPEC_FIX_NOTRUNC)
4732 (match_operand:V4SI 2 "const0_operand")))]
4733 "TARGET_AVX"
4734 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4735 [(set_attr "type" "ssecvt")
4736 (set_attr "prefix" "vex")
4737 (set_attr "btver2_decode" "vector")
4738 (set_attr "mode" "OI")])
4739
4740 (define_insn "sse2_cvtpd2dq<mask_name>"
4741 [(set (match_operand:V4SI 0 "register_operand" "=v")
4742 (vec_concat:V4SI
4743 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
4744 UNSPEC_FIX_NOTRUNC)
4745 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4746 "TARGET_SSE2 && <mask_avx512vl_condition>"
4747 {
4748 if (TARGET_AVX)
4749 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4750 else
4751 return "cvtpd2dq\t{%1, %0|%0, %1}";
4752 }
4753 [(set_attr "type" "ssecvt")
4754 (set_attr "prefix_rep" "1")
4755 (set_attr "prefix_data16" "0")
4756 (set_attr "prefix" "maybe_vex")
4757 (set_attr "mode" "TI")
4758 (set_attr "amdfam10_decode" "double")
4759 (set_attr "athlon_decode" "vector")
4760 (set_attr "bdver1_decode" "double")])
4761
4762 ;; For ufix_notrunc* insn patterns
4763 (define_mode_attr pd2udqsuff
4764 [(V8DF "") (V4DF "{y}")])
4765
4766 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
4767 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
4768 (unspec:<si2dfmode>
4769 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
4770 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4771 "TARGET_AVX512F"
4772 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4773 [(set_attr "type" "ssecvt")
4774 (set_attr "prefix" "evex")
4775 (set_attr "mode" "<sseinsnmode>")])
4776
4777 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
4778 [(set (match_operand:V4SI 0 "register_operand" "=v")
4779 (vec_concat:V4SI
4780 (unspec:V2SI
4781 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4782 UNSPEC_UNSIGNED_FIX_NOTRUNC)
4783 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4784 "TARGET_AVX512VL"
4785 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4786 [(set_attr "type" "ssecvt")
4787 (set_attr "prefix" "evex")
4788 (set_attr "mode" "TI")])
4789
4790 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4791 [(set (match_operand:V8SI 0 "register_operand" "=v")
4792 (any_fix:V8SI
4793 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4794 "TARGET_AVX512F"
4795 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4796 [(set_attr "type" "ssecvt")
4797 (set_attr "prefix" "evex")
4798 (set_attr "mode" "OI")])
4799
4800 (define_insn "ufix_truncv2dfv2si2<mask_name>"
4801 [(set (match_operand:V4SI 0 "register_operand" "=v")
4802 (vec_concat:V4SI
4803 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4804 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4805 "TARGET_AVX512VL"
4806 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4807 [(set_attr "type" "ssecvt")
4808 (set_attr "prefix" "evex")
4809 (set_attr "mode" "TI")])
4810
4811 (define_insn "fix_truncv4dfv4si2<mask_name>"
4812 [(set (match_operand:V4SI 0 "register_operand" "=v")
4813 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4814 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
4815 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4816 [(set_attr "type" "ssecvt")
4817 (set_attr "prefix" "maybe_evex")
4818 (set_attr "mode" "OI")])
4819
4820 (define_insn "ufix_truncv4dfv4si2<mask_name>"
4821 [(set (match_operand:V4SI 0 "register_operand" "=v")
4822 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4823 "TARGET_AVX512VL && TARGET_AVX512F"
4824 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4825 [(set_attr "type" "ssecvt")
4826 (set_attr "prefix" "maybe_evex")
4827 (set_attr "mode" "OI")])
4828
4829 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4830 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4831 (any_fix:<sseintvecmode>
4832 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4833 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4834 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4835 [(set_attr "type" "ssecvt")
4836 (set_attr "prefix" "evex")
4837 (set_attr "mode" "<sseintvecmode2>")])
4838
4839 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4840 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4841 (unspec:<sseintvecmode>
4842 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4843 UNSPEC_FIX_NOTRUNC))]
4844 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4845 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4846 [(set_attr "type" "ssecvt")
4847 (set_attr "prefix" "evex")
4848 (set_attr "mode" "<sseintvecmode2>")])
4849
4850 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4851 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4852 (unspec:<sseintvecmode>
4853 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4854 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4855 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4856 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4857 [(set_attr "type" "ssecvt")
4858 (set_attr "prefix" "evex")
4859 (set_attr "mode" "<sseintvecmode2>")])
4860
4861 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
4862 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
4863 (any_fix:<sselongvecmode>
4864 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4865 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
4866 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4867 [(set_attr "type" "ssecvt")
4868 (set_attr "prefix" "evex")
4869 (set_attr "mode" "<sseintvecmode3>")])
4870
4871 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
4872 [(set (match_operand:V2DI 0 "register_operand" "=v")
4873 (any_fix:V2DI
4874 (vec_select:V2SF
4875 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4876 (parallel [(const_int 0) (const_int 1)]))))]
4877 "TARGET_AVX512DQ && TARGET_AVX512VL"
4878 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4879 [(set_attr "type" "ssecvt")
4880 (set_attr "prefix" "evex")
4881 (set_attr "mode" "TI")])
4882
4883 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
4884 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4885 (unsigned_fix:<sseintvecmode>
4886 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
4887 "TARGET_AVX512VL"
4888 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4889 [(set_attr "type" "ssecvt")
4890 (set_attr "prefix" "evex")
4891 (set_attr "mode" "<sseintvecmode2>")])
4892
4893 (define_expand "avx_cvttpd2dq256_2"
4894 [(set (match_operand:V8SI 0 "register_operand")
4895 (vec_concat:V8SI
4896 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4897 (match_dup 2)))]
4898 "TARGET_AVX"
4899 "operands[2] = CONST0_RTX (V4SImode);")
4900
4901 (define_insn "sse2_cvttpd2dq<mask_name>"
4902 [(set (match_operand:V4SI 0 "register_operand" "=v")
4903 (vec_concat:V4SI
4904 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
4905 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4906 "TARGET_SSE2 && <mask_avx512vl_condition>"
4907 {
4908 if (TARGET_AVX)
4909 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4910 else
4911 return "cvttpd2dq\t{%1, %0|%0, %1}";
4912 }
4913 [(set_attr "type" "ssecvt")
4914 (set_attr "amdfam10_decode" "double")
4915 (set_attr "athlon_decode" "vector")
4916 (set_attr "bdver1_decode" "double")
4917 (set_attr "prefix" "maybe_vex")
4918 (set_attr "mode" "TI")])
4919
4920 (define_insn "sse2_cvtsd2ss<round_name>"
4921 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4922 (vec_merge:V4SF
4923 (vec_duplicate:V4SF
4924 (float_truncate:V2SF
4925 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4926 (match_operand:V4SF 1 "register_operand" "0,0,v")
4927 (const_int 1)))]
4928 "TARGET_SSE2"
4929 "@
4930 cvtsd2ss\t{%2, %0|%0, %2}
4931 cvtsd2ss\t{%2, %0|%0, %q2}
4932 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4933 [(set_attr "isa" "noavx,noavx,avx")
4934 (set_attr "type" "ssecvt")
4935 (set_attr "athlon_decode" "vector,double,*")
4936 (set_attr "amdfam10_decode" "vector,double,*")
4937 (set_attr "bdver1_decode" "direct,direct,*")
4938 (set_attr "btver2_decode" "double,double,double")
4939 (set_attr "prefix" "orig,orig,<round_prefix>")
4940 (set_attr "mode" "SF")])
4941
4942 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4943 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4944 (vec_merge:V2DF
4945 (float_extend:V2DF
4946 (vec_select:V2SF
4947 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
4948 (parallel [(const_int 0) (const_int 1)])))
4949 (match_operand:V2DF 1 "register_operand" "0,0,v")
4950 (const_int 1)))]
4951 "TARGET_SSE2"
4952 "@
4953 cvtss2sd\t{%2, %0|%0, %2}
4954 cvtss2sd\t{%2, %0|%0, %k2}
4955 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4956 [(set_attr "isa" "noavx,noavx,avx")
4957 (set_attr "type" "ssecvt")
4958 (set_attr "amdfam10_decode" "vector,double,*")
4959 (set_attr "athlon_decode" "direct,direct,*")
4960 (set_attr "bdver1_decode" "direct,direct,*")
4961 (set_attr "btver2_decode" "double,double,double")
4962 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4963 (set_attr "mode" "DF")])
4964
4965 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4966 [(set (match_operand:V8SF 0 "register_operand" "=v")
4967 (float_truncate:V8SF
4968 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4969 "TARGET_AVX512F"
4970 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4971 [(set_attr "type" "ssecvt")
4972 (set_attr "prefix" "evex")
4973 (set_attr "mode" "V8SF")])
4974
4975 (define_insn "avx_cvtpd2ps256<mask_name>"
4976 [(set (match_operand:V4SF 0 "register_operand" "=v")
4977 (float_truncate:V4SF
4978 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4979 "TARGET_AVX && <mask_avx512vl_condition>"
4980 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4981 [(set_attr "type" "ssecvt")
4982 (set_attr "prefix" "maybe_evex")
4983 (set_attr "btver2_decode" "vector")
4984 (set_attr "mode" "V4SF")])
4985
4986 (define_expand "sse2_cvtpd2ps"
4987 [(set (match_operand:V4SF 0 "register_operand")
4988 (vec_concat:V4SF
4989 (float_truncate:V2SF
4990 (match_operand:V2DF 1 "vector_operand"))
4991 (match_dup 2)))]
4992 "TARGET_SSE2"
4993 "operands[2] = CONST0_RTX (V2SFmode);")
4994
4995 (define_expand "sse2_cvtpd2ps_mask"
4996 [(set (match_operand:V4SF 0 "register_operand")
4997 (vec_merge:V4SF
4998 (vec_concat:V4SF
4999 (float_truncate:V2SF
5000 (match_operand:V2DF 1 "vector_operand"))
5001 (match_dup 4))
5002 (match_operand:V4SF 2 "register_operand")
5003 (match_operand:QI 3 "register_operand")))]
5004 "TARGET_SSE2"
5005 "operands[4] = CONST0_RTX (V2SFmode);")
5006
5007 (define_insn "*sse2_cvtpd2ps<mask_name>"
5008 [(set (match_operand:V4SF 0 "register_operand" "=v")
5009 (vec_concat:V4SF
5010 (float_truncate:V2SF
5011 (match_operand:V2DF 1 "vector_operand" "vBm"))
5012 (match_operand:V2SF 2 "const0_operand")))]
5013 "TARGET_SSE2 && <mask_avx512vl_condition>"
5014 {
5015 if (TARGET_AVX)
5016 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
5017 else
5018 return "cvtpd2ps\t{%1, %0|%0, %1}";
5019 }
5020 [(set_attr "type" "ssecvt")
5021 (set_attr "amdfam10_decode" "double")
5022 (set_attr "athlon_decode" "vector")
5023 (set_attr "bdver1_decode" "double")
5024 (set_attr "prefix_data16" "1")
5025 (set_attr "prefix" "maybe_vex")
5026 (set_attr "mode" "V4SF")])
5027
5028 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
5029 (define_mode_attr sf2dfmode
5030 [(V8DF "V8SF") (V4DF "V4SF")])
5031
5032 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
5033 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5034 (float_extend:VF2_512_256
5035 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5036 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
5037 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5038 [(set_attr "type" "ssecvt")
5039 (set_attr "prefix" "maybe_vex")
5040 (set_attr "mode" "<MODE>")])
5041
5042 (define_insn "*avx_cvtps2pd256_2"
5043 [(set (match_operand:V4DF 0 "register_operand" "=x")
5044 (float_extend:V4DF
5045 (vec_select:V4SF
5046 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5047 (parallel [(const_int 0) (const_int 1)
5048 (const_int 2) (const_int 3)]))))]
5049 "TARGET_AVX"
5050 "vcvtps2pd\t{%x1, %0|%0, %x1}"
5051 [(set_attr "type" "ssecvt")
5052 (set_attr "prefix" "vex")
5053 (set_attr "mode" "V4DF")])
5054
5055 (define_insn "vec_unpacks_lo_v16sf"
5056 [(set (match_operand:V8DF 0 "register_operand" "=v")
5057 (float_extend:V8DF
5058 (vec_select:V8SF
5059 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5060 (parallel [(const_int 0) (const_int 1)
5061 (const_int 2) (const_int 3)
5062 (const_int 4) (const_int 5)
5063 (const_int 6) (const_int 7)]))))]
5064 "TARGET_AVX512F"
5065 "vcvtps2pd\t{%t1, %0|%0, %t1}"
5066 [(set_attr "type" "ssecvt")
5067 (set_attr "prefix" "evex")
5068 (set_attr "mode" "V8DF")])
5069
5070 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5071 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5072 (unspec:<avx512fmaskmode>
5073 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5074 UNSPEC_CVTINT2MASK))]
5075 "TARGET_AVX512BW"
5076 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5077 [(set_attr "prefix" "evex")
5078 (set_attr "mode" "<sseinsnmode>")])
5079
5080 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5081 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5082 (unspec:<avx512fmaskmode>
5083 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5084 UNSPEC_CVTINT2MASK))]
5085 "TARGET_AVX512DQ"
5086 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5087 [(set_attr "prefix" "evex")
5088 (set_attr "mode" "<sseinsnmode>")])
5089
5090 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5091 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5092 (vec_merge:VI12_AVX512VL
5093 (match_dup 2)
5094 (match_dup 3)
5095 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5096 "TARGET_AVX512BW"
5097 {
5098 operands[2] = CONSTM1_RTX (<MODE>mode);
5099 operands[3] = CONST0_RTX (<MODE>mode);
5100 })
5101
5102 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5103 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5104 (vec_merge:VI12_AVX512VL
5105 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
5106 (match_operand:VI12_AVX512VL 3 "const0_operand")
5107 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5108 "TARGET_AVX512BW"
5109 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5110 [(set_attr "prefix" "evex")
5111 (set_attr "mode" "<sseinsnmode>")])
5112
5113 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5114 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5115 (vec_merge:VI48_AVX512VL
5116 (match_dup 2)
5117 (match_dup 3)
5118 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5119 "TARGET_AVX512DQ"
5120 "{
5121 operands[2] = CONSTM1_RTX (<MODE>mode);
5122 operands[3] = CONST0_RTX (<MODE>mode);
5123 }")
5124
5125 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5126 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5127 (vec_merge:VI48_AVX512VL
5128 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
5129 (match_operand:VI48_AVX512VL 3 "const0_operand")
5130 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5131 "TARGET_AVX512DQ"
5132 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5133 [(set_attr "prefix" "evex")
5134 (set_attr "mode" "<sseinsnmode>")])
5135
5136 (define_insn "sse2_cvtps2pd<mask_name>"
5137 [(set (match_operand:V2DF 0 "register_operand" "=v")
5138 (float_extend:V2DF
5139 (vec_select:V2SF
5140 (match_operand:V4SF 1 "vector_operand" "vm")
5141 (parallel [(const_int 0) (const_int 1)]))))]
5142 "TARGET_SSE2 && <mask_avx512vl_condition>"
5143 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5144 [(set_attr "type" "ssecvt")
5145 (set_attr "amdfam10_decode" "direct")
5146 (set_attr "athlon_decode" "double")
5147 (set_attr "bdver1_decode" "double")
5148 (set_attr "prefix_data16" "0")
5149 (set_attr "prefix" "maybe_vex")
5150 (set_attr "mode" "V2DF")])
5151
5152 (define_expand "vec_unpacks_hi_v4sf"
5153 [(set (match_dup 2)
5154 (vec_select:V4SF
5155 (vec_concat:V8SF
5156 (match_dup 2)
5157 (match_operand:V4SF 1 "vector_operand"))
5158 (parallel [(const_int 6) (const_int 7)
5159 (const_int 2) (const_int 3)])))
5160 (set (match_operand:V2DF 0 "register_operand")
5161 (float_extend:V2DF
5162 (vec_select:V2SF
5163 (match_dup 2)
5164 (parallel [(const_int 0) (const_int 1)]))))]
5165 "TARGET_SSE2"
5166 "operands[2] = gen_reg_rtx (V4SFmode);")
5167
5168 (define_expand "vec_unpacks_hi_v8sf"
5169 [(set (match_dup 2)
5170 (vec_select:V4SF
5171 (match_operand:V8SF 1 "register_operand")
5172 (parallel [(const_int 4) (const_int 5)
5173 (const_int 6) (const_int 7)])))
5174 (set (match_operand:V4DF 0 "register_operand")
5175 (float_extend:V4DF
5176 (match_dup 2)))]
5177 "TARGET_AVX"
5178 "operands[2] = gen_reg_rtx (V4SFmode);")
5179
5180 (define_expand "vec_unpacks_hi_v16sf"
5181 [(set (match_dup 2)
5182 (vec_select:V8SF
5183 (match_operand:V16SF 1 "register_operand")
5184 (parallel [(const_int 8) (const_int 9)
5185 (const_int 10) (const_int 11)
5186 (const_int 12) (const_int 13)
5187 (const_int 14) (const_int 15)])))
5188 (set (match_operand:V8DF 0 "register_operand")
5189 (float_extend:V8DF
5190 (match_dup 2)))]
5191 "TARGET_AVX512F"
5192 "operands[2] = gen_reg_rtx (V8SFmode);")
5193
5194 (define_expand "vec_unpacks_lo_v4sf"
5195 [(set (match_operand:V2DF 0 "register_operand")
5196 (float_extend:V2DF
5197 (vec_select:V2SF
5198 (match_operand:V4SF 1 "vector_operand")
5199 (parallel [(const_int 0) (const_int 1)]))))]
5200 "TARGET_SSE2")
5201
5202 (define_expand "vec_unpacks_lo_v8sf"
5203 [(set (match_operand:V4DF 0 "register_operand")
5204 (float_extend:V4DF
5205 (vec_select:V4SF
5206 (match_operand:V8SF 1 "nonimmediate_operand")
5207 (parallel [(const_int 0) (const_int 1)
5208 (const_int 2) (const_int 3)]))))]
5209 "TARGET_AVX")
5210
5211 (define_mode_attr sseunpackfltmode
5212 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5213 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5214
5215 (define_expand "vec_unpacks_float_hi_<mode>"
5216 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5217 (match_operand:VI2_AVX512F 1 "register_operand")]
5218 "TARGET_SSE2"
5219 {
5220 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5221
5222 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5223 emit_insn (gen_rtx_SET (operands[0],
5224 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5225 DONE;
5226 })
5227
5228 (define_expand "vec_unpacks_float_lo_<mode>"
5229 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5230 (match_operand:VI2_AVX512F 1 "register_operand")]
5231 "TARGET_SSE2"
5232 {
5233 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5234
5235 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5236 emit_insn (gen_rtx_SET (operands[0],
5237 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5238 DONE;
5239 })
5240
5241 (define_expand "vec_unpacku_float_hi_<mode>"
5242 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5243 (match_operand:VI2_AVX512F 1 "register_operand")]
5244 "TARGET_SSE2"
5245 {
5246 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5247
5248 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5249 emit_insn (gen_rtx_SET (operands[0],
5250 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5251 DONE;
5252 })
5253
5254 (define_expand "vec_unpacku_float_lo_<mode>"
5255 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5256 (match_operand:VI2_AVX512F 1 "register_operand")]
5257 "TARGET_SSE2"
5258 {
5259 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5260
5261 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5262 emit_insn (gen_rtx_SET (operands[0],
5263 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5264 DONE;
5265 })
5266
5267 (define_expand "vec_unpacks_float_hi_v4si"
5268 [(set (match_dup 2)
5269 (vec_select:V4SI
5270 (match_operand:V4SI 1 "vector_operand")
5271 (parallel [(const_int 2) (const_int 3)
5272 (const_int 2) (const_int 3)])))
5273 (set (match_operand:V2DF 0 "register_operand")
5274 (float:V2DF
5275 (vec_select:V2SI
5276 (match_dup 2)
5277 (parallel [(const_int 0) (const_int 1)]))))]
5278 "TARGET_SSE2"
5279 "operands[2] = gen_reg_rtx (V4SImode);")
5280
5281 (define_expand "vec_unpacks_float_lo_v4si"
5282 [(set (match_operand:V2DF 0 "register_operand")
5283 (float:V2DF
5284 (vec_select:V2SI
5285 (match_operand:V4SI 1 "vector_operand")
5286 (parallel [(const_int 0) (const_int 1)]))))]
5287 "TARGET_SSE2")
5288
5289 (define_expand "vec_unpacks_float_hi_v8si"
5290 [(set (match_dup 2)
5291 (vec_select:V4SI
5292 (match_operand:V8SI 1 "vector_operand")
5293 (parallel [(const_int 4) (const_int 5)
5294 (const_int 6) (const_int 7)])))
5295 (set (match_operand:V4DF 0 "register_operand")
5296 (float:V4DF
5297 (match_dup 2)))]
5298 "TARGET_AVX"
5299 "operands[2] = gen_reg_rtx (V4SImode);")
5300
5301 (define_expand "vec_unpacks_float_lo_v8si"
5302 [(set (match_operand:V4DF 0 "register_operand")
5303 (float:V4DF
5304 (vec_select:V4SI
5305 (match_operand:V8SI 1 "nonimmediate_operand")
5306 (parallel [(const_int 0) (const_int 1)
5307 (const_int 2) (const_int 3)]))))]
5308 "TARGET_AVX")
5309
5310 (define_expand "vec_unpacks_float_hi_v16si"
5311 [(set (match_dup 2)
5312 (vec_select:V8SI
5313 (match_operand:V16SI 1 "nonimmediate_operand")
5314 (parallel [(const_int 8) (const_int 9)
5315 (const_int 10) (const_int 11)
5316 (const_int 12) (const_int 13)
5317 (const_int 14) (const_int 15)])))
5318 (set (match_operand:V8DF 0 "register_operand")
5319 (float:V8DF
5320 (match_dup 2)))]
5321 "TARGET_AVX512F"
5322 "operands[2] = gen_reg_rtx (V8SImode);")
5323
5324 (define_expand "vec_unpacks_float_lo_v16si"
5325 [(set (match_operand:V8DF 0 "register_operand")
5326 (float:V8DF
5327 (vec_select:V8SI
5328 (match_operand:V16SI 1 "nonimmediate_operand")
5329 (parallel [(const_int 0) (const_int 1)
5330 (const_int 2) (const_int 3)
5331 (const_int 4) (const_int 5)
5332 (const_int 6) (const_int 7)]))))]
5333 "TARGET_AVX512F")
5334
5335 (define_expand "vec_unpacku_float_hi_v4si"
5336 [(set (match_dup 5)
5337 (vec_select:V4SI
5338 (match_operand:V4SI 1 "vector_operand")
5339 (parallel [(const_int 2) (const_int 3)
5340 (const_int 2) (const_int 3)])))
5341 (set (match_dup 6)
5342 (float:V2DF
5343 (vec_select:V2SI
5344 (match_dup 5)
5345 (parallel [(const_int 0) (const_int 1)]))))
5346 (set (match_dup 7)
5347 (lt:V2DF (match_dup 6) (match_dup 3)))
5348 (set (match_dup 8)
5349 (and:V2DF (match_dup 7) (match_dup 4)))
5350 (set (match_operand:V2DF 0 "register_operand")
5351 (plus:V2DF (match_dup 6) (match_dup 8)))]
5352 "TARGET_SSE2"
5353 {
5354 REAL_VALUE_TYPE TWO32r;
5355 rtx x;
5356 int i;
5357
5358 real_ldexp (&TWO32r, &dconst1, 32);
5359 x = const_double_from_real_value (TWO32r, DFmode);
5360
5361 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5362 operands[4] = force_reg (V2DFmode,
5363 ix86_build_const_vector (V2DFmode, 1, x));
5364
5365 operands[5] = gen_reg_rtx (V4SImode);
5366
5367 for (i = 6; i < 9; i++)
5368 operands[i] = gen_reg_rtx (V2DFmode);
5369 })
5370
5371 (define_expand "vec_unpacku_float_lo_v4si"
5372 [(set (match_dup 5)
5373 (float:V2DF
5374 (vec_select:V2SI
5375 (match_operand:V4SI 1 "vector_operand")
5376 (parallel [(const_int 0) (const_int 1)]))))
5377 (set (match_dup 6)
5378 (lt:V2DF (match_dup 5) (match_dup 3)))
5379 (set (match_dup 7)
5380 (and:V2DF (match_dup 6) (match_dup 4)))
5381 (set (match_operand:V2DF 0 "register_operand")
5382 (plus:V2DF (match_dup 5) (match_dup 7)))]
5383 "TARGET_SSE2"
5384 {
5385 REAL_VALUE_TYPE TWO32r;
5386 rtx x;
5387 int i;
5388
5389 real_ldexp (&TWO32r, &dconst1, 32);
5390 x = const_double_from_real_value (TWO32r, DFmode);
5391
5392 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5393 operands[4] = force_reg (V2DFmode,
5394 ix86_build_const_vector (V2DFmode, 1, x));
5395
5396 for (i = 5; i < 8; i++)
5397 operands[i] = gen_reg_rtx (V2DFmode);
5398 })
5399
5400 (define_expand "vec_unpacku_float_hi_v8si"
5401 [(match_operand:V4DF 0 "register_operand")
5402 (match_operand:V8SI 1 "register_operand")]
5403 "TARGET_AVX"
5404 {
5405 REAL_VALUE_TYPE TWO32r;
5406 rtx x, tmp[6];
5407 int i;
5408
5409 real_ldexp (&TWO32r, &dconst1, 32);
5410 x = const_double_from_real_value (TWO32r, DFmode);
5411
5412 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5413 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5414 tmp[5] = gen_reg_rtx (V4SImode);
5415
5416 for (i = 2; i < 5; i++)
5417 tmp[i] = gen_reg_rtx (V4DFmode);
5418 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5419 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5420 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5421 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5422 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5423 DONE;
5424 })
5425
5426 (define_expand "vec_unpacku_float_hi_v16si"
5427 [(match_operand:V8DF 0 "register_operand")
5428 (match_operand:V16SI 1 "register_operand")]
5429 "TARGET_AVX512F"
5430 {
5431 REAL_VALUE_TYPE TWO32r;
5432 rtx k, x, tmp[4];
5433
5434 real_ldexp (&TWO32r, &dconst1, 32);
5435 x = const_double_from_real_value (TWO32r, DFmode);
5436
5437 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5438 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5439 tmp[2] = gen_reg_rtx (V8DFmode);
5440 tmp[3] = gen_reg_rtx (V8SImode);
5441 k = gen_reg_rtx (QImode);
5442
5443 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5444 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5445 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5446 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5447 emit_move_insn (operands[0], tmp[2]);
5448 DONE;
5449 })
5450
5451 (define_expand "vec_unpacku_float_lo_v8si"
5452 [(match_operand:V4DF 0 "register_operand")
5453 (match_operand:V8SI 1 "nonimmediate_operand")]
5454 "TARGET_AVX"
5455 {
5456 REAL_VALUE_TYPE TWO32r;
5457 rtx x, tmp[5];
5458 int i;
5459
5460 real_ldexp (&TWO32r, &dconst1, 32);
5461 x = const_double_from_real_value (TWO32r, DFmode);
5462
5463 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5464 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5465
5466 for (i = 2; i < 5; i++)
5467 tmp[i] = gen_reg_rtx (V4DFmode);
5468 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5469 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5470 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5471 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5472 DONE;
5473 })
5474
5475 (define_expand "vec_unpacku_float_lo_v16si"
5476 [(match_operand:V8DF 0 "register_operand")
5477 (match_operand:V16SI 1 "nonimmediate_operand")]
5478 "TARGET_AVX512F"
5479 {
5480 REAL_VALUE_TYPE TWO32r;
5481 rtx k, x, tmp[3];
5482
5483 real_ldexp (&TWO32r, &dconst1, 32);
5484 x = const_double_from_real_value (TWO32r, DFmode);
5485
5486 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5487 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5488 tmp[2] = gen_reg_rtx (V8DFmode);
5489 k = gen_reg_rtx (QImode);
5490
5491 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5492 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5493 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5494 emit_move_insn (operands[0], tmp[2]);
5495 DONE;
5496 })
5497
5498 (define_expand "vec_pack_trunc_<mode>"
5499 [(set (match_dup 3)
5500 (float_truncate:<sf2dfmode>
5501 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5502 (set (match_dup 4)
5503 (float_truncate:<sf2dfmode>
5504 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5505 (set (match_operand:<ssePSmode> 0 "register_operand")
5506 (vec_concat:<ssePSmode>
5507 (match_dup 3)
5508 (match_dup 4)))]
5509 "TARGET_AVX"
5510 {
5511 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5512 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5513 })
5514
5515 (define_expand "vec_pack_trunc_v2df"
5516 [(match_operand:V4SF 0 "register_operand")
5517 (match_operand:V2DF 1 "vector_operand")
5518 (match_operand:V2DF 2 "vector_operand")]
5519 "TARGET_SSE2"
5520 {
5521 rtx tmp0, tmp1;
5522
5523 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5524 {
5525 tmp0 = gen_reg_rtx (V4DFmode);
5526 tmp1 = force_reg (V2DFmode, operands[1]);
5527
5528 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5529 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5530 }
5531 else
5532 {
5533 tmp0 = gen_reg_rtx (V4SFmode);
5534 tmp1 = gen_reg_rtx (V4SFmode);
5535
5536 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5537 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5538 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5539 }
5540 DONE;
5541 })
5542
5543 (define_expand "vec_pack_sfix_trunc_v8df"
5544 [(match_operand:V16SI 0 "register_operand")
5545 (match_operand:V8DF 1 "nonimmediate_operand")
5546 (match_operand:V8DF 2 "nonimmediate_operand")]
5547 "TARGET_AVX512F"
5548 {
5549 rtx r1, r2;
5550
5551 r1 = gen_reg_rtx (V8SImode);
5552 r2 = gen_reg_rtx (V8SImode);
5553
5554 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5555 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5556 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5557 DONE;
5558 })
5559
5560 (define_expand "vec_pack_sfix_trunc_v4df"
5561 [(match_operand:V8SI 0 "register_operand")
5562 (match_operand:V4DF 1 "nonimmediate_operand")
5563 (match_operand:V4DF 2 "nonimmediate_operand")]
5564 "TARGET_AVX"
5565 {
5566 rtx r1, r2;
5567
5568 r1 = gen_reg_rtx (V4SImode);
5569 r2 = gen_reg_rtx (V4SImode);
5570
5571 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5572 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5573 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5574 DONE;
5575 })
5576
5577 (define_expand "vec_pack_sfix_trunc_v2df"
5578 [(match_operand:V4SI 0 "register_operand")
5579 (match_operand:V2DF 1 "vector_operand")
5580 (match_operand:V2DF 2 "vector_operand")]
5581 "TARGET_SSE2"
5582 {
5583 rtx tmp0, tmp1, tmp2;
5584
5585 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5586 {
5587 tmp0 = gen_reg_rtx (V4DFmode);
5588 tmp1 = force_reg (V2DFmode, operands[1]);
5589
5590 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5591 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5592 }
5593 else
5594 {
5595 tmp0 = gen_reg_rtx (V4SImode);
5596 tmp1 = gen_reg_rtx (V4SImode);
5597 tmp2 = gen_reg_rtx (V2DImode);
5598
5599 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5600 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5601 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5602 gen_lowpart (V2DImode, tmp0),
5603 gen_lowpart (V2DImode, tmp1)));
5604 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5605 }
5606 DONE;
5607 })
5608
5609 (define_mode_attr ssepackfltmode
5610 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5611
5612 (define_expand "vec_pack_ufix_trunc_<mode>"
5613 [(match_operand:<ssepackfltmode> 0 "register_operand")
5614 (match_operand:VF2 1 "register_operand")
5615 (match_operand:VF2 2 "register_operand")]
5616 "TARGET_SSE2"
5617 {
5618 if (<MODE>mode == V8DFmode)
5619 {
5620 rtx r1, r2;
5621
5622 r1 = gen_reg_rtx (V8SImode);
5623 r2 = gen_reg_rtx (V8SImode);
5624
5625 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5626 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5627 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5628 }
5629 else
5630 {
5631 rtx tmp[7];
5632 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5633 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5634 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5635 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5636 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5637 {
5638 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5639 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5640 }
5641 else
5642 {
5643 tmp[5] = gen_reg_rtx (V8SFmode);
5644 ix86_expand_vec_extract_even_odd (tmp[5],
5645 gen_lowpart (V8SFmode, tmp[2]),
5646 gen_lowpart (V8SFmode, tmp[3]), 0);
5647 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5648 }
5649 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5650 operands[0], 0, OPTAB_DIRECT);
5651 if (tmp[6] != operands[0])
5652 emit_move_insn (operands[0], tmp[6]);
5653 }
5654
5655 DONE;
5656 })
5657
5658 (define_expand "vec_pack_sfix_v4df"
5659 [(match_operand:V8SI 0 "register_operand")
5660 (match_operand:V4DF 1 "nonimmediate_operand")
5661 (match_operand:V4DF 2 "nonimmediate_operand")]
5662 "TARGET_AVX"
5663 {
5664 rtx r1, r2;
5665
5666 r1 = gen_reg_rtx (V4SImode);
5667 r2 = gen_reg_rtx (V4SImode);
5668
5669 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5670 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5671 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5672 DONE;
5673 })
5674
5675 (define_expand "vec_pack_sfix_v2df"
5676 [(match_operand:V4SI 0 "register_operand")
5677 (match_operand:V2DF 1 "vector_operand")
5678 (match_operand:V2DF 2 "vector_operand")]
5679 "TARGET_SSE2"
5680 {
5681 rtx tmp0, tmp1, tmp2;
5682
5683 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5684 {
5685 tmp0 = gen_reg_rtx (V4DFmode);
5686 tmp1 = force_reg (V2DFmode, operands[1]);
5687
5688 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5689 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5690 }
5691 else
5692 {
5693 tmp0 = gen_reg_rtx (V4SImode);
5694 tmp1 = gen_reg_rtx (V4SImode);
5695 tmp2 = gen_reg_rtx (V2DImode);
5696
5697 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5698 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
5699 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5700 gen_lowpart (V2DImode, tmp0),
5701 gen_lowpart (V2DImode, tmp1)));
5702 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5703 }
5704 DONE;
5705 })
5706
5707 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5708 ;;
5709 ;; Parallel single-precision floating point element swizzling
5710 ;;
5711 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5712
5713 (define_expand "sse_movhlps_exp"
5714 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5715 (vec_select:V4SF
5716 (vec_concat:V8SF
5717 (match_operand:V4SF 1 "nonimmediate_operand")
5718 (match_operand:V4SF 2 "nonimmediate_operand"))
5719 (parallel [(const_int 6)
5720 (const_int 7)
5721 (const_int 2)
5722 (const_int 3)])))]
5723 "TARGET_SSE"
5724 {
5725 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5726
5727 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5728
5729 /* Fix up the destination if needed. */
5730 if (dst != operands[0])
5731 emit_move_insn (operands[0], dst);
5732
5733 DONE;
5734 })
5735
5736 (define_insn "sse_movhlps"
5737 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5738 (vec_select:V4SF
5739 (vec_concat:V8SF
5740 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5741 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
5742 (parallel [(const_int 6)
5743 (const_int 7)
5744 (const_int 2)
5745 (const_int 3)])))]
5746 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5747 "@
5748 movhlps\t{%2, %0|%0, %2}
5749 vmovhlps\t{%2, %1, %0|%0, %1, %2}
5750 movlps\t{%H2, %0|%0, %H2}
5751 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5752 %vmovhps\t{%2, %0|%q0, %2}"
5753 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5754 (set_attr "type" "ssemov")
5755 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5756 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5757
5758 (define_expand "sse_movlhps_exp"
5759 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5760 (vec_select:V4SF
5761 (vec_concat:V8SF
5762 (match_operand:V4SF 1 "nonimmediate_operand")
5763 (match_operand:V4SF 2 "nonimmediate_operand"))
5764 (parallel [(const_int 0)
5765 (const_int 1)
5766 (const_int 4)
5767 (const_int 5)])))]
5768 "TARGET_SSE"
5769 {
5770 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5771
5772 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5773
5774 /* Fix up the destination if needed. */
5775 if (dst != operands[0])
5776 emit_move_insn (operands[0], dst);
5777
5778 DONE;
5779 })
5780
5781 (define_insn "sse_movlhps"
5782 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5783 (vec_select:V4SF
5784 (vec_concat:V8SF
5785 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5786 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5787 (parallel [(const_int 0)
5788 (const_int 1)
5789 (const_int 4)
5790 (const_int 5)])))]
5791 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5792 "@
5793 movlhps\t{%2, %0|%0, %2}
5794 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5795 movhps\t{%2, %0|%0, %q2}
5796 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5797 %vmovlps\t{%2, %H0|%H0, %2}"
5798 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5799 (set_attr "type" "ssemov")
5800 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5801 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5802
5803 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5804 [(set (match_operand:V16SF 0 "register_operand" "=v")
5805 (vec_select:V16SF
5806 (vec_concat:V32SF
5807 (match_operand:V16SF 1 "register_operand" "v")
5808 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5809 (parallel [(const_int 2) (const_int 18)
5810 (const_int 3) (const_int 19)
5811 (const_int 6) (const_int 22)
5812 (const_int 7) (const_int 23)
5813 (const_int 10) (const_int 26)
5814 (const_int 11) (const_int 27)
5815 (const_int 14) (const_int 30)
5816 (const_int 15) (const_int 31)])))]
5817 "TARGET_AVX512F"
5818 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5819 [(set_attr "type" "sselog")
5820 (set_attr "prefix" "evex")
5821 (set_attr "mode" "V16SF")])
5822
5823 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5824 (define_insn "avx_unpckhps256<mask_name>"
5825 [(set (match_operand:V8SF 0 "register_operand" "=v")
5826 (vec_select:V8SF
5827 (vec_concat:V16SF
5828 (match_operand:V8SF 1 "register_operand" "v")
5829 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5830 (parallel [(const_int 2) (const_int 10)
5831 (const_int 3) (const_int 11)
5832 (const_int 6) (const_int 14)
5833 (const_int 7) (const_int 15)])))]
5834 "TARGET_AVX && <mask_avx512vl_condition>"
5835 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5836 [(set_attr "type" "sselog")
5837 (set_attr "prefix" "vex")
5838 (set_attr "mode" "V8SF")])
5839
5840 (define_expand "vec_interleave_highv8sf"
5841 [(set (match_dup 3)
5842 (vec_select:V8SF
5843 (vec_concat:V16SF
5844 (match_operand:V8SF 1 "register_operand" "x")
5845 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5846 (parallel [(const_int 0) (const_int 8)
5847 (const_int 1) (const_int 9)
5848 (const_int 4) (const_int 12)
5849 (const_int 5) (const_int 13)])))
5850 (set (match_dup 4)
5851 (vec_select:V8SF
5852 (vec_concat:V16SF
5853 (match_dup 1)
5854 (match_dup 2))
5855 (parallel [(const_int 2) (const_int 10)
5856 (const_int 3) (const_int 11)
5857 (const_int 6) (const_int 14)
5858 (const_int 7) (const_int 15)])))
5859 (set (match_operand:V8SF 0 "register_operand")
5860 (vec_select:V8SF
5861 (vec_concat:V16SF
5862 (match_dup 3)
5863 (match_dup 4))
5864 (parallel [(const_int 4) (const_int 5)
5865 (const_int 6) (const_int 7)
5866 (const_int 12) (const_int 13)
5867 (const_int 14) (const_int 15)])))]
5868 "TARGET_AVX"
5869 {
5870 operands[3] = gen_reg_rtx (V8SFmode);
5871 operands[4] = gen_reg_rtx (V8SFmode);
5872 })
5873
5874 (define_insn "vec_interleave_highv4sf<mask_name>"
5875 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
5876 (vec_select:V4SF
5877 (vec_concat:V8SF
5878 (match_operand:V4SF 1 "register_operand" "0,v")
5879 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
5880 (parallel [(const_int 2) (const_int 6)
5881 (const_int 3) (const_int 7)])))]
5882 "TARGET_SSE && <mask_avx512vl_condition>"
5883 "@
5884 unpckhps\t{%2, %0|%0, %2}
5885 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5886 [(set_attr "isa" "noavx,avx")
5887 (set_attr "type" "sselog")
5888 (set_attr "prefix" "orig,vex")
5889 (set_attr "mode" "V4SF")])
5890
5891 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5892 [(set (match_operand:V16SF 0 "register_operand" "=v")
5893 (vec_select:V16SF
5894 (vec_concat:V32SF
5895 (match_operand:V16SF 1 "register_operand" "v")
5896 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5897 (parallel [(const_int 0) (const_int 16)
5898 (const_int 1) (const_int 17)
5899 (const_int 4) (const_int 20)
5900 (const_int 5) (const_int 21)
5901 (const_int 8) (const_int 24)
5902 (const_int 9) (const_int 25)
5903 (const_int 12) (const_int 28)
5904 (const_int 13) (const_int 29)])))]
5905 "TARGET_AVX512F"
5906 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5907 [(set_attr "type" "sselog")
5908 (set_attr "prefix" "evex")
5909 (set_attr "mode" "V16SF")])
5910
5911 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5912 (define_insn "avx_unpcklps256<mask_name>"
5913 [(set (match_operand:V8SF 0 "register_operand" "=v")
5914 (vec_select:V8SF
5915 (vec_concat:V16SF
5916 (match_operand:V8SF 1 "register_operand" "v")
5917 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5918 (parallel [(const_int 0) (const_int 8)
5919 (const_int 1) (const_int 9)
5920 (const_int 4) (const_int 12)
5921 (const_int 5) (const_int 13)])))]
5922 "TARGET_AVX && <mask_avx512vl_condition>"
5923 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5924 [(set_attr "type" "sselog")
5925 (set_attr "prefix" "vex")
5926 (set_attr "mode" "V8SF")])
5927
5928 (define_insn "unpcklps128_mask"
5929 [(set (match_operand:V4SF 0 "register_operand" "=v")
5930 (vec_merge:V4SF
5931 (vec_select:V4SF
5932 (vec_concat:V8SF
5933 (match_operand:V4SF 1 "register_operand" "v")
5934 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5935 (parallel [(const_int 0) (const_int 4)
5936 (const_int 1) (const_int 5)]))
5937 (match_operand:V4SF 3 "vector_move_operand" "0C")
5938 (match_operand:QI 4 "register_operand" "Yk")))]
5939 "TARGET_AVX512VL"
5940 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
5941 [(set_attr "type" "sselog")
5942 (set_attr "prefix" "evex")
5943 (set_attr "mode" "V4SF")])
5944
5945 (define_expand "vec_interleave_lowv8sf"
5946 [(set (match_dup 3)
5947 (vec_select:V8SF
5948 (vec_concat:V16SF
5949 (match_operand:V8SF 1 "register_operand" "x")
5950 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5951 (parallel [(const_int 0) (const_int 8)
5952 (const_int 1) (const_int 9)
5953 (const_int 4) (const_int 12)
5954 (const_int 5) (const_int 13)])))
5955 (set (match_dup 4)
5956 (vec_select:V8SF
5957 (vec_concat:V16SF
5958 (match_dup 1)
5959 (match_dup 2))
5960 (parallel [(const_int 2) (const_int 10)
5961 (const_int 3) (const_int 11)
5962 (const_int 6) (const_int 14)
5963 (const_int 7) (const_int 15)])))
5964 (set (match_operand:V8SF 0 "register_operand")
5965 (vec_select:V8SF
5966 (vec_concat:V16SF
5967 (match_dup 3)
5968 (match_dup 4))
5969 (parallel [(const_int 0) (const_int 1)
5970 (const_int 2) (const_int 3)
5971 (const_int 8) (const_int 9)
5972 (const_int 10) (const_int 11)])))]
5973 "TARGET_AVX"
5974 {
5975 operands[3] = gen_reg_rtx (V8SFmode);
5976 operands[4] = gen_reg_rtx (V8SFmode);
5977 })
5978
5979 (define_insn "vec_interleave_lowv4sf"
5980 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5981 (vec_select:V4SF
5982 (vec_concat:V8SF
5983 (match_operand:V4SF 1 "register_operand" "0,x")
5984 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
5985 (parallel [(const_int 0) (const_int 4)
5986 (const_int 1) (const_int 5)])))]
5987 "TARGET_SSE"
5988 "@
5989 unpcklps\t{%2, %0|%0, %2}
5990 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5991 [(set_attr "isa" "noavx,avx")
5992 (set_attr "type" "sselog")
5993 (set_attr "prefix" "orig,vex")
5994 (set_attr "mode" "V4SF")])
5995
5996 ;; These are modeled with the same vec_concat as the others so that we
5997 ;; capture users of shufps that can use the new instructions
5998 (define_insn "avx_movshdup256<mask_name>"
5999 [(set (match_operand:V8SF 0 "register_operand" "=v")
6000 (vec_select:V8SF
6001 (vec_concat:V16SF
6002 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6003 (match_dup 1))
6004 (parallel [(const_int 1) (const_int 1)
6005 (const_int 3) (const_int 3)
6006 (const_int 5) (const_int 5)
6007 (const_int 7) (const_int 7)])))]
6008 "TARGET_AVX && <mask_avx512vl_condition>"
6009 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6010 [(set_attr "type" "sse")
6011 (set_attr "prefix" "vex")
6012 (set_attr "mode" "V8SF")])
6013
6014 (define_insn "sse3_movshdup<mask_name>"
6015 [(set (match_operand:V4SF 0 "register_operand" "=v")
6016 (vec_select:V4SF
6017 (vec_concat:V8SF
6018 (match_operand:V4SF 1 "vector_operand" "vBm")
6019 (match_dup 1))
6020 (parallel [(const_int 1)
6021 (const_int 1)
6022 (const_int 7)
6023 (const_int 7)])))]
6024 "TARGET_SSE3 && <mask_avx512vl_condition>"
6025 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6026 [(set_attr "type" "sse")
6027 (set_attr "prefix_rep" "1")
6028 (set_attr "prefix" "maybe_vex")
6029 (set_attr "mode" "V4SF")])
6030
6031 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
6032 [(set (match_operand:V16SF 0 "register_operand" "=v")
6033 (vec_select:V16SF
6034 (vec_concat:V32SF
6035 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6036 (match_dup 1))
6037 (parallel [(const_int 1) (const_int 1)
6038 (const_int 3) (const_int 3)
6039 (const_int 5) (const_int 5)
6040 (const_int 7) (const_int 7)
6041 (const_int 9) (const_int 9)
6042 (const_int 11) (const_int 11)
6043 (const_int 13) (const_int 13)
6044 (const_int 15) (const_int 15)])))]
6045 "TARGET_AVX512F"
6046 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6047 [(set_attr "type" "sse")
6048 (set_attr "prefix" "evex")
6049 (set_attr "mode" "V16SF")])
6050
6051 (define_insn "avx_movsldup256<mask_name>"
6052 [(set (match_operand:V8SF 0 "register_operand" "=v")
6053 (vec_select:V8SF
6054 (vec_concat:V16SF
6055 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6056 (match_dup 1))
6057 (parallel [(const_int 0) (const_int 0)
6058 (const_int 2) (const_int 2)
6059 (const_int 4) (const_int 4)
6060 (const_int 6) (const_int 6)])))]
6061 "TARGET_AVX && <mask_avx512vl_condition>"
6062 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6063 [(set_attr "type" "sse")
6064 (set_attr "prefix" "vex")
6065 (set_attr "mode" "V8SF")])
6066
6067 (define_insn "sse3_movsldup<mask_name>"
6068 [(set (match_operand:V4SF 0 "register_operand" "=v")
6069 (vec_select:V4SF
6070 (vec_concat:V8SF
6071 (match_operand:V4SF 1 "vector_operand" "vBm")
6072 (match_dup 1))
6073 (parallel [(const_int 0)
6074 (const_int 0)
6075 (const_int 6)
6076 (const_int 6)])))]
6077 "TARGET_SSE3 && <mask_avx512vl_condition>"
6078 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6079 [(set_attr "type" "sse")
6080 (set_attr "prefix_rep" "1")
6081 (set_attr "prefix" "maybe_vex")
6082 (set_attr "mode" "V4SF")])
6083
6084 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6085 [(set (match_operand:V16SF 0 "register_operand" "=v")
6086 (vec_select:V16SF
6087 (vec_concat:V32SF
6088 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6089 (match_dup 1))
6090 (parallel [(const_int 0) (const_int 0)
6091 (const_int 2) (const_int 2)
6092 (const_int 4) (const_int 4)
6093 (const_int 6) (const_int 6)
6094 (const_int 8) (const_int 8)
6095 (const_int 10) (const_int 10)
6096 (const_int 12) (const_int 12)
6097 (const_int 14) (const_int 14)])))]
6098 "TARGET_AVX512F"
6099 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6100 [(set_attr "type" "sse")
6101 (set_attr "prefix" "evex")
6102 (set_attr "mode" "V16SF")])
6103
6104 (define_expand "avx_shufps256<mask_expand4_name>"
6105 [(match_operand:V8SF 0 "register_operand")
6106 (match_operand:V8SF 1 "register_operand")
6107 (match_operand:V8SF 2 "nonimmediate_operand")
6108 (match_operand:SI 3 "const_int_operand")]
6109 "TARGET_AVX"
6110 {
6111 int mask = INTVAL (operands[3]);
6112 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6113 operands[1],
6114 operands[2],
6115 GEN_INT ((mask >> 0) & 3),
6116 GEN_INT ((mask >> 2) & 3),
6117 GEN_INT (((mask >> 4) & 3) + 8),
6118 GEN_INT (((mask >> 6) & 3) + 8),
6119 GEN_INT (((mask >> 0) & 3) + 4),
6120 GEN_INT (((mask >> 2) & 3) + 4),
6121 GEN_INT (((mask >> 4) & 3) + 12),
6122 GEN_INT (((mask >> 6) & 3) + 12)
6123 <mask_expand4_args>));
6124 DONE;
6125 })
6126
6127 ;; One bit in mask selects 2 elements.
6128 (define_insn "avx_shufps256_1<mask_name>"
6129 [(set (match_operand:V8SF 0 "register_operand" "=v")
6130 (vec_select:V8SF
6131 (vec_concat:V16SF
6132 (match_operand:V8SF 1 "register_operand" "v")
6133 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6134 (parallel [(match_operand 3 "const_0_to_3_operand" )
6135 (match_operand 4 "const_0_to_3_operand" )
6136 (match_operand 5 "const_8_to_11_operand" )
6137 (match_operand 6 "const_8_to_11_operand" )
6138 (match_operand 7 "const_4_to_7_operand" )
6139 (match_operand 8 "const_4_to_7_operand" )
6140 (match_operand 9 "const_12_to_15_operand")
6141 (match_operand 10 "const_12_to_15_operand")])))]
6142 "TARGET_AVX
6143 && <mask_avx512vl_condition>
6144 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6145 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6146 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6147 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6148 {
6149 int mask;
6150 mask = INTVAL (operands[3]);
6151 mask |= INTVAL (operands[4]) << 2;
6152 mask |= (INTVAL (operands[5]) - 8) << 4;
6153 mask |= (INTVAL (operands[6]) - 8) << 6;
6154 operands[3] = GEN_INT (mask);
6155
6156 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6157 }
6158 [(set_attr "type" "sseshuf")
6159 (set_attr "length_immediate" "1")
6160 (set_attr "prefix" "<mask_prefix>")
6161 (set_attr "mode" "V8SF")])
6162
6163 (define_expand "sse_shufps<mask_expand4_name>"
6164 [(match_operand:V4SF 0 "register_operand")
6165 (match_operand:V4SF 1 "register_operand")
6166 (match_operand:V4SF 2 "vector_operand")
6167 (match_operand:SI 3 "const_int_operand")]
6168 "TARGET_SSE"
6169 {
6170 int mask = INTVAL (operands[3]);
6171 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6172 operands[1],
6173 operands[2],
6174 GEN_INT ((mask >> 0) & 3),
6175 GEN_INT ((mask >> 2) & 3),
6176 GEN_INT (((mask >> 4) & 3) + 4),
6177 GEN_INT (((mask >> 6) & 3) + 4)
6178 <mask_expand4_args>));
6179 DONE;
6180 })
6181
6182 (define_insn "sse_shufps_v4sf_mask"
6183 [(set (match_operand:V4SF 0 "register_operand" "=v")
6184 (vec_merge:V4SF
6185 (vec_select:V4SF
6186 (vec_concat:V8SF
6187 (match_operand:V4SF 1 "register_operand" "v")
6188 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6189 (parallel [(match_operand 3 "const_0_to_3_operand")
6190 (match_operand 4 "const_0_to_3_operand")
6191 (match_operand 5 "const_4_to_7_operand")
6192 (match_operand 6 "const_4_to_7_operand")]))
6193 (match_operand:V4SF 7 "vector_move_operand" "0C")
6194 (match_operand:QI 8 "register_operand" "Yk")))]
6195 "TARGET_AVX512VL"
6196 {
6197 int mask = 0;
6198 mask |= INTVAL (operands[3]) << 0;
6199 mask |= INTVAL (operands[4]) << 2;
6200 mask |= (INTVAL (operands[5]) - 4) << 4;
6201 mask |= (INTVAL (operands[6]) - 4) << 6;
6202 operands[3] = GEN_INT (mask);
6203
6204 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6205 }
6206 [(set_attr "type" "sseshuf")
6207 (set_attr "length_immediate" "1")
6208 (set_attr "prefix" "evex")
6209 (set_attr "mode" "V4SF")])
6210
6211 (define_insn "sse_shufps_<mode>"
6212 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
6213 (vec_select:VI4F_128
6214 (vec_concat:<ssedoublevecmode>
6215 (match_operand:VI4F_128 1 "register_operand" "0,x")
6216 (match_operand:VI4F_128 2 "vector_operand" "xBm,xm"))
6217 (parallel [(match_operand 3 "const_0_to_3_operand")
6218 (match_operand 4 "const_0_to_3_operand")
6219 (match_operand 5 "const_4_to_7_operand")
6220 (match_operand 6 "const_4_to_7_operand")])))]
6221 "TARGET_SSE"
6222 {
6223 int mask = 0;
6224 mask |= INTVAL (operands[3]) << 0;
6225 mask |= INTVAL (operands[4]) << 2;
6226 mask |= (INTVAL (operands[5]) - 4) << 4;
6227 mask |= (INTVAL (operands[6]) - 4) << 6;
6228 operands[3] = GEN_INT (mask);
6229
6230 switch (which_alternative)
6231 {
6232 case 0:
6233 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6234 case 1:
6235 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6236 default:
6237 gcc_unreachable ();
6238 }
6239 }
6240 [(set_attr "isa" "noavx,avx")
6241 (set_attr "type" "sseshuf")
6242 (set_attr "length_immediate" "1")
6243 (set_attr "prefix" "orig,vex")
6244 (set_attr "mode" "V4SF")])
6245
6246 (define_insn "sse_storehps"
6247 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6248 (vec_select:V2SF
6249 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
6250 (parallel [(const_int 2) (const_int 3)])))]
6251 "TARGET_SSE"
6252 "@
6253 %vmovhps\t{%1, %0|%q0, %1}
6254 %vmovhlps\t{%1, %d0|%d0, %1}
6255 %vmovlps\t{%H1, %d0|%d0, %H1}"
6256 [(set_attr "type" "ssemov")
6257 (set_attr "prefix" "maybe_vex")
6258 (set_attr "mode" "V2SF,V4SF,V2SF")])
6259
6260 (define_expand "sse_loadhps_exp"
6261 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6262 (vec_concat:V4SF
6263 (vec_select:V2SF
6264 (match_operand:V4SF 1 "nonimmediate_operand")
6265 (parallel [(const_int 0) (const_int 1)]))
6266 (match_operand:V2SF 2 "nonimmediate_operand")))]
6267 "TARGET_SSE"
6268 {
6269 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6270
6271 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6272
6273 /* Fix up the destination if needed. */
6274 if (dst != operands[0])
6275 emit_move_insn (operands[0], dst);
6276
6277 DONE;
6278 })
6279
6280 (define_insn "sse_loadhps"
6281 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
6282 (vec_concat:V4SF
6283 (vec_select:V2SF
6284 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6285 (parallel [(const_int 0) (const_int 1)]))
6286 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
6287 "TARGET_SSE"
6288 "@
6289 movhps\t{%2, %0|%0, %q2}
6290 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6291 movlhps\t{%2, %0|%0, %2}
6292 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6293 %vmovlps\t{%2, %H0|%H0, %2}"
6294 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6295 (set_attr "type" "ssemov")
6296 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6297 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6298
6299 (define_insn "sse_storelps"
6300 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6301 (vec_select:V2SF
6302 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
6303 (parallel [(const_int 0) (const_int 1)])))]
6304 "TARGET_SSE"
6305 "@
6306 %vmovlps\t{%1, %0|%q0, %1}
6307 %vmovaps\t{%1, %0|%0, %1}
6308 %vmovlps\t{%1, %d0|%d0, %q1}"
6309 [(set_attr "type" "ssemov")
6310 (set_attr "prefix" "maybe_vex")
6311 (set_attr "mode" "V2SF,V4SF,V2SF")])
6312
6313 (define_expand "sse_loadlps_exp"
6314 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6315 (vec_concat:V4SF
6316 (match_operand:V2SF 2 "nonimmediate_operand")
6317 (vec_select:V2SF
6318 (match_operand:V4SF 1 "nonimmediate_operand")
6319 (parallel [(const_int 2) (const_int 3)]))))]
6320 "TARGET_SSE"
6321 {
6322 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6323
6324 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6325
6326 /* Fix up the destination if needed. */
6327 if (dst != operands[0])
6328 emit_move_insn (operands[0], dst);
6329
6330 DONE;
6331 })
6332
6333 (define_insn "sse_loadlps"
6334 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
6335 (vec_concat:V4SF
6336 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
6337 (vec_select:V2SF
6338 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
6339 (parallel [(const_int 2) (const_int 3)]))))]
6340 "TARGET_SSE"
6341 "@
6342 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6343 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6344 movlps\t{%2, %0|%0, %q2}
6345 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6346 %vmovlps\t{%2, %0|%q0, %2}"
6347 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6348 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6349 (set_attr "length_immediate" "1,1,*,*,*")
6350 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6351 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6352
6353 (define_insn "sse_movss"
6354 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6355 (vec_merge:V4SF
6356 (match_operand:V4SF 2 "register_operand" " x,x")
6357 (match_operand:V4SF 1 "register_operand" " 0,x")
6358 (const_int 1)))]
6359 "TARGET_SSE"
6360 "@
6361 movss\t{%2, %0|%0, %2}
6362 vmovss\t{%2, %1, %0|%0, %1, %2}"
6363 [(set_attr "isa" "noavx,avx")
6364 (set_attr "type" "ssemov")
6365 (set_attr "prefix" "orig,vex")
6366 (set_attr "mode" "SF")])
6367
6368 (define_insn "avx2_vec_dup<mode>"
6369 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6370 (vec_duplicate:VF1_128_256
6371 (vec_select:SF
6372 (match_operand:V4SF 1 "register_operand" "x")
6373 (parallel [(const_int 0)]))))]
6374 "TARGET_AVX2"
6375 "vbroadcastss\t{%1, %0|%0, %1}"
6376 [(set_attr "type" "sselog1")
6377 (set_attr "prefix" "vex")
6378 (set_attr "mode" "<MODE>")])
6379
6380 (define_insn "avx2_vec_dupv8sf_1"
6381 [(set (match_operand:V8SF 0 "register_operand" "=x")
6382 (vec_duplicate:V8SF
6383 (vec_select:SF
6384 (match_operand:V8SF 1 "register_operand" "x")
6385 (parallel [(const_int 0)]))))]
6386 "TARGET_AVX2"
6387 "vbroadcastss\t{%x1, %0|%0, %x1}"
6388 [(set_attr "type" "sselog1")
6389 (set_attr "prefix" "vex")
6390 (set_attr "mode" "V8SF")])
6391
6392 (define_insn "avx512f_vec_dup<mode>_1"
6393 [(set (match_operand:VF_512 0 "register_operand" "=v")
6394 (vec_duplicate:VF_512
6395 (vec_select:<ssescalarmode>
6396 (match_operand:VF_512 1 "register_operand" "v")
6397 (parallel [(const_int 0)]))))]
6398 "TARGET_AVX512F"
6399 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6400 [(set_attr "type" "sselog1")
6401 (set_attr "prefix" "evex")
6402 (set_attr "mode" "<MODE>")])
6403
6404 ;; Although insertps takes register source, we prefer
6405 ;; unpcklps with register source since it is shorter.
6406 (define_insn "*vec_concatv2sf_sse4_1"
6407 [(set (match_operand:V2SF 0 "register_operand"
6408 "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
6409 (vec_concat:V2SF
6410 (match_operand:SF 1 "nonimmediate_operand"
6411 " 0, 0,x, 0,0, x,m, 0 , m")
6412 (match_operand:SF 2 "vector_move_operand"
6413 " Yr,*x,x, m,m, m,C,*ym, C")))]
6414 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6415 "@
6416 unpcklps\t{%2, %0|%0, %2}
6417 unpcklps\t{%2, %0|%0, %2}
6418 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6419 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6420 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6421 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6422 %vmovss\t{%1, %0|%0, %1}
6423 punpckldq\t{%2, %0|%0, %2}
6424 movd\t{%1, %0|%0, %1}"
6425 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
6426 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6427 (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
6428 (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
6429 (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
6430 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
6431 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6432
6433 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6434 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
6435 ;; alternatives pretty much forces the MMX alternative to be chosen.
6436 (define_insn "*vec_concatv2sf_sse"
6437 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6438 (vec_concat:V2SF
6439 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6440 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6441 "TARGET_SSE"
6442 "@
6443 unpcklps\t{%2, %0|%0, %2}
6444 movss\t{%1, %0|%0, %1}
6445 punpckldq\t{%2, %0|%0, %2}
6446 movd\t{%1, %0|%0, %1}"
6447 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6448 (set_attr "mode" "V4SF,SF,DI,DI")])
6449
6450 (define_insn "*vec_concatv4sf"
6451 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
6452 (vec_concat:V4SF
6453 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6454 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
6455 "TARGET_SSE"
6456 "@
6457 movlhps\t{%2, %0|%0, %2}
6458 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6459 movhps\t{%2, %0|%0, %q2}
6460 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6461 [(set_attr "isa" "noavx,avx,noavx,avx")
6462 (set_attr "type" "ssemov")
6463 (set_attr "prefix" "orig,vex,orig,vex")
6464 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6465
6466 (define_expand "vec_init<mode>"
6467 [(match_operand:V_128 0 "register_operand")
6468 (match_operand 1)]
6469 "TARGET_SSE"
6470 {
6471 ix86_expand_vector_init (false, operands[0], operands[1]);
6472 DONE;
6473 })
6474
6475 ;; Avoid combining registers from different units in a single alternative,
6476 ;; see comment above inline_secondary_memory_needed function in i386.c
6477 (define_insn "vec_set<mode>_0"
6478 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6479 "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6480 (vec_merge:VI4F_128
6481 (vec_duplicate:VI4F_128
6482 (match_operand:<ssescalarmode> 2 "general_operand"
6483 " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6484 (match_operand:VI4F_128 1 "vector_move_operand"
6485 " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
6486 (const_int 1)))]
6487 "TARGET_SSE"
6488 "@
6489 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6490 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6491 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6492 %vmovd\t{%2, %0|%0, %2}
6493 movss\t{%2, %0|%0, %2}
6494 movss\t{%2, %0|%0, %2}
6495 vmovss\t{%2, %1, %0|%0, %1, %2}
6496 pinsrd\t{$0, %2, %0|%0, %2, 0}
6497 pinsrd\t{$0, %2, %0|%0, %2, 0}
6498 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6499 #
6500 #
6501 #"
6502 [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*")
6503 (set (attr "type")
6504 (cond [(eq_attr "alternative" "0,1,7,8,9")
6505 (const_string "sselog")
6506 (eq_attr "alternative" "11")
6507 (const_string "imov")
6508 (eq_attr "alternative" "12")
6509 (const_string "fmov")
6510 ]
6511 (const_string "ssemov")))
6512 (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6513 (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6514 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*")
6515 (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
6516
6517 ;; A subset is vec_setv4sf.
6518 (define_insn "*vec_setv4sf_sse4_1"
6519 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6520 (vec_merge:V4SF
6521 (vec_duplicate:V4SF
6522 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
6523 (match_operand:V4SF 1 "register_operand" "0,0,x")
6524 (match_operand:SI 3 "const_int_operand")))]
6525 "TARGET_SSE4_1
6526 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6527 < GET_MODE_NUNITS (V4SFmode))"
6528 {
6529 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6530 switch (which_alternative)
6531 {
6532 case 0:
6533 case 1:
6534 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6535 case 2:
6536 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6537 default:
6538 gcc_unreachable ();
6539 }
6540 }
6541 [(set_attr "isa" "noavx,noavx,avx")
6542 (set_attr "type" "sselog")
6543 (set_attr "prefix_data16" "1,1,*")
6544 (set_attr "prefix_extra" "1")
6545 (set_attr "length_immediate" "1")
6546 (set_attr "prefix" "orig,orig,vex")
6547 (set_attr "mode" "V4SF")])
6548
6549 (define_insn "sse4_1_insertps"
6550 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6551 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
6552 (match_operand:V4SF 1 "register_operand" "0,0,x")
6553 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
6554 UNSPEC_INSERTPS))]
6555 "TARGET_SSE4_1"
6556 {
6557 if (MEM_P (operands[2]))
6558 {
6559 unsigned count_s = INTVAL (operands[3]) >> 6;
6560 if (count_s)
6561 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6562 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6563 }
6564 switch (which_alternative)
6565 {
6566 case 0:
6567 case 1:
6568 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6569 case 2:
6570 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6571 default:
6572 gcc_unreachable ();
6573 }
6574 }
6575 [(set_attr "isa" "noavx,noavx,avx")
6576 (set_attr "type" "sselog")
6577 (set_attr "prefix_data16" "1,1,*")
6578 (set_attr "prefix_extra" "1")
6579 (set_attr "length_immediate" "1")
6580 (set_attr "prefix" "orig,orig,vex")
6581 (set_attr "mode" "V4SF")])
6582
6583 (define_split
6584 [(set (match_operand:VI4F_128 0 "memory_operand")
6585 (vec_merge:VI4F_128
6586 (vec_duplicate:VI4F_128
6587 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6588 (match_dup 0)
6589 (const_int 1)))]
6590 "TARGET_SSE && reload_completed"
6591 [(set (match_dup 0) (match_dup 1))]
6592 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6593
6594 (define_expand "vec_set<mode>"
6595 [(match_operand:V 0 "register_operand")
6596 (match_operand:<ssescalarmode> 1 "register_operand")
6597 (match_operand 2 "const_int_operand")]
6598 "TARGET_SSE"
6599 {
6600 ix86_expand_vector_set (false, operands[0], operands[1],
6601 INTVAL (operands[2]));
6602 DONE;
6603 })
6604
6605 (define_insn_and_split "*vec_extractv4sf_0"
6606 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
6607 (vec_select:SF
6608 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
6609 (parallel [(const_int 0)])))]
6610 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6611 "#"
6612 "&& reload_completed"
6613 [(set (match_dup 0) (match_dup 1))]
6614 "operands[1] = gen_lowpart (SFmode, operands[1]);")
6615
6616 (define_insn_and_split "*sse4_1_extractps"
6617 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
6618 (vec_select:SF
6619 (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
6620 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
6621 "TARGET_SSE4_1"
6622 "@
6623 %vextractps\t{%2, %1, %0|%0, %1, %2}
6624 %vextractps\t{%2, %1, %0|%0, %1, %2}
6625 #
6626 #"
6627 "&& reload_completed && SSE_REG_P (operands[0])"
6628 [(const_int 0)]
6629 {
6630 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
6631 switch (INTVAL (operands[2]))
6632 {
6633 case 1:
6634 case 3:
6635 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6636 operands[2], operands[2],
6637 GEN_INT (INTVAL (operands[2]) + 4),
6638 GEN_INT (INTVAL (operands[2]) + 4)));
6639 break;
6640 case 2:
6641 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6642 break;
6643 default:
6644 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6645 gcc_unreachable ();
6646 }
6647 DONE;
6648 }
6649 [(set_attr "isa" "*,*,noavx,avx")
6650 (set_attr "type" "sselog,sselog,*,*")
6651 (set_attr "prefix_data16" "1,1,*,*")
6652 (set_attr "prefix_extra" "1,1,*,*")
6653 (set_attr "length_immediate" "1,1,*,*")
6654 (set_attr "prefix" "maybe_vex,maybe_vex,*,*")
6655 (set_attr "mode" "V4SF,V4SF,*,*")])
6656
6657 (define_insn_and_split "*vec_extractv4sf_mem"
6658 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
6659 (vec_select:SF
6660 (match_operand:V4SF 1 "memory_operand" "o,o,o")
6661 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
6662 "TARGET_SSE"
6663 "#"
6664 "&& reload_completed"
6665 [(set (match_dup 0) (match_dup 1))]
6666 {
6667 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
6668 })
6669
6670 (define_mode_attr extract_type
6671 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
6672
6673 (define_mode_attr extract_suf
6674 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
6675
6676 (define_mode_iterator AVX512_VEC
6677 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
6678
6679 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
6680 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
6681 (match_operand:AVX512_VEC 1 "register_operand")
6682 (match_operand:SI 2 "const_0_to_3_operand")
6683 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6684 (match_operand:QI 4 "register_operand")]
6685 "TARGET_AVX512F"
6686 {
6687 int mask;
6688 mask = INTVAL (operands[2]);
6689
6690 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6691 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6692
6693 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
6694 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6695 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
6696 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
6697 operands[4]));
6698 else
6699 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
6700 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
6701 operands[4]));
6702 DONE;
6703 })
6704
6705 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
6706 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6707 (vec_merge:<ssequartermode>
6708 (vec_select:<ssequartermode>
6709 (match_operand:V8FI 1 "register_operand" "v")
6710 (parallel [(match_operand 2 "const_0_to_7_operand")
6711 (match_operand 3 "const_0_to_7_operand")]))
6712 (match_operand:<ssequartermode> 4 "memory_operand" "0")
6713 (match_operand:QI 5 "register_operand" "k")))]
6714 "TARGET_AVX512DQ
6715 && (INTVAL (operands[2]) % 2 == 0)
6716 && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)
6717 && rtx_equal_p (operands[4], operands[0])"
6718 {
6719 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6720 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
6721 }
6722 [(set_attr "type" "sselog")
6723 (set_attr "prefix_extra" "1")
6724 (set_attr "length_immediate" "1")
6725 (set_attr "memory" "store")
6726 (set_attr "prefix" "evex")
6727 (set_attr "mode" "<sseinsnmode>")])
6728
6729 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6730 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6731 (vec_merge:<ssequartermode>
6732 (vec_select:<ssequartermode>
6733 (match_operand:V16FI 1 "register_operand" "v")
6734 (parallel [(match_operand 2 "const_0_to_15_operand")
6735 (match_operand 3 "const_0_to_15_operand")
6736 (match_operand 4 "const_0_to_15_operand")
6737 (match_operand 5 "const_0_to_15_operand")]))
6738 (match_operand:<ssequartermode> 6 "memory_operand" "0")
6739 (match_operand:QI 7 "register_operand" "Yk")))]
6740 "TARGET_AVX512F
6741 && ((INTVAL (operands[2]) % 4 == 0)
6742 && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6743 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6744 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))
6745 && rtx_equal_p (operands[6], operands[0])"
6746 {
6747 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6748 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6749 }
6750 [(set_attr "type" "sselog")
6751 (set_attr "prefix_extra" "1")
6752 (set_attr "length_immediate" "1")
6753 (set_attr "memory" "store")
6754 (set_attr "prefix" "evex")
6755 (set_attr "mode" "<sseinsnmode>")])
6756
6757 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
6758 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6759 (vec_select:<ssequartermode>
6760 (match_operand:V8FI 1 "register_operand" "v")
6761 (parallel [(match_operand 2 "const_0_to_7_operand")
6762 (match_operand 3 "const_0_to_7_operand")])))]
6763 "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
6764 {
6765 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6766 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
6767 }
6768 [(set_attr "type" "sselog1")
6769 (set_attr "prefix_extra" "1")
6770 (set_attr "length_immediate" "1")
6771 (set_attr "prefix" "evex")
6772 (set_attr "mode" "<sseinsnmode>")])
6773
6774 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6775 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6776 (vec_select:<ssequartermode>
6777 (match_operand:V16FI 1 "register_operand" "v")
6778 (parallel [(match_operand 2 "const_0_to_15_operand")
6779 (match_operand 3 "const_0_to_15_operand")
6780 (match_operand 4 "const_0_to_15_operand")
6781 (match_operand 5 "const_0_to_15_operand")])))]
6782 "TARGET_AVX512F
6783 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6784 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6785 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6786 {
6787 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6788 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6789 }
6790 [(set_attr "type" "sselog1")
6791 (set_attr "prefix_extra" "1")
6792 (set_attr "length_immediate" "1")
6793 (set_attr "prefix" "evex")
6794 (set_attr "mode" "<sseinsnmode>")])
6795
6796 (define_mode_attr extract_type_2
6797 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
6798
6799 (define_mode_attr extract_suf_2
6800 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
6801
6802 (define_mode_iterator AVX512_VEC_2
6803 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
6804
6805 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
6806 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6807 (match_operand:AVX512_VEC_2 1 "register_operand")
6808 (match_operand:SI 2 "const_0_to_1_operand")
6809 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6810 (match_operand:QI 4 "register_operand")]
6811 "TARGET_AVX512F"
6812 {
6813 rtx (*insn)(rtx, rtx, rtx, rtx);
6814
6815 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6816 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6817
6818 switch (INTVAL (operands[2]))
6819 {
6820 case 0:
6821 insn = gen_vec_extract_lo_<mode>_mask;
6822 break;
6823 case 1:
6824 insn = gen_vec_extract_hi_<mode>_mask;
6825 break;
6826 default:
6827 gcc_unreachable ();
6828 }
6829
6830 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6831 DONE;
6832 })
6833
6834 (define_split
6835 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6836 (vec_select:<ssehalfvecmode>
6837 (match_operand:V8FI 1 "nonimmediate_operand")
6838 (parallel [(const_int 0) (const_int 1)
6839 (const_int 2) (const_int 3)])))]
6840 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6841 && reload_completed
6842 && (TARGET_AVX512VL
6843 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
6844 [(set (match_dup 0) (match_dup 1))]
6845 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
6846
6847 (define_insn "vec_extract_lo_<mode>_maskm"
6848 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6849 (vec_merge:<ssehalfvecmode>
6850 (vec_select:<ssehalfvecmode>
6851 (match_operand:V8FI 1 "register_operand" "v")
6852 (parallel [(const_int 0) (const_int 1)
6853 (const_int 2) (const_int 3)]))
6854 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6855 (match_operand:QI 3 "register_operand" "Yk")))]
6856 "TARGET_AVX512F
6857 && rtx_equal_p (operands[2], operands[0])"
6858 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6859 [(set_attr "type" "sselog1")
6860 (set_attr "prefix_extra" "1")
6861 (set_attr "length_immediate" "1")
6862 (set_attr "prefix" "evex")
6863 (set_attr "mode" "<sseinsnmode>")])
6864
6865 (define_insn "vec_extract_lo_<mode><mask_name>"
6866 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6867 (vec_select:<ssehalfvecmode>
6868 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6869 (parallel [(const_int 0) (const_int 1)
6870 (const_int 2) (const_int 3)])))]
6871 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6872 {
6873 if (<mask_applied> || !TARGET_AVX512VL)
6874 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6875 else
6876 return "#";
6877 }
6878 [(set_attr "type" "sselog1")
6879 (set_attr "prefix_extra" "1")
6880 (set_attr "length_immediate" "1")
6881 (set_attr "prefix" "evex")
6882 (set_attr "mode" "<sseinsnmode>")])
6883
6884 (define_insn "vec_extract_hi_<mode>_maskm"
6885 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6886 (vec_merge:<ssehalfvecmode>
6887 (vec_select:<ssehalfvecmode>
6888 (match_operand:V8FI 1 "register_operand" "v")
6889 (parallel [(const_int 4) (const_int 5)
6890 (const_int 6) (const_int 7)]))
6891 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6892 (match_operand:QI 3 "register_operand" "Yk")))]
6893 "TARGET_AVX512F
6894 && rtx_equal_p (operands[2], operands[0])"
6895 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6896 [(set_attr "type" "sselog")
6897 (set_attr "prefix_extra" "1")
6898 (set_attr "length_immediate" "1")
6899 (set_attr "memory" "store")
6900 (set_attr "prefix" "evex")
6901 (set_attr "mode" "<sseinsnmode>")])
6902
6903 (define_insn "vec_extract_hi_<mode><mask_name>"
6904 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6905 (vec_select:<ssehalfvecmode>
6906 (match_operand:V8FI 1 "register_operand" "v")
6907 (parallel [(const_int 4) (const_int 5)
6908 (const_int 6) (const_int 7)])))]
6909 "TARGET_AVX512F"
6910 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6911 [(set_attr "type" "sselog1")
6912 (set_attr "prefix_extra" "1")
6913 (set_attr "length_immediate" "1")
6914 (set_attr "prefix" "evex")
6915 (set_attr "mode" "<sseinsnmode>")])
6916
6917 (define_insn "vec_extract_hi_<mode>_maskm"
6918 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6919 (vec_merge:<ssehalfvecmode>
6920 (vec_select:<ssehalfvecmode>
6921 (match_operand:V16FI 1 "register_operand" "v")
6922 (parallel [(const_int 8) (const_int 9)
6923 (const_int 10) (const_int 11)
6924 (const_int 12) (const_int 13)
6925 (const_int 14) (const_int 15)]))
6926 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6927 (match_operand:QI 3 "register_operand" "k")))]
6928 "TARGET_AVX512DQ
6929 && rtx_equal_p (operands[2], operands[0])"
6930 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6931 [(set_attr "type" "sselog1")
6932 (set_attr "prefix_extra" "1")
6933 (set_attr "length_immediate" "1")
6934 (set_attr "prefix" "evex")
6935 (set_attr "mode" "<sseinsnmode>")])
6936
6937 (define_insn "vec_extract_hi_<mode><mask_name>"
6938 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6939 (vec_select:<ssehalfvecmode>
6940 (match_operand:V16FI 1 "register_operand" "v,v")
6941 (parallel [(const_int 8) (const_int 9)
6942 (const_int 10) (const_int 11)
6943 (const_int 12) (const_int 13)
6944 (const_int 14) (const_int 15)])))]
6945 "TARGET_AVX512F && <mask_avx512dq_condition>"
6946 "@
6947 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6948 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6949 [(set_attr "type" "sselog1")
6950 (set_attr "prefix_extra" "1")
6951 (set_attr "isa" "avx512dq,noavx512dq")
6952 (set_attr "length_immediate" "1")
6953 (set_attr "prefix" "evex")
6954 (set_attr "mode" "<sseinsnmode>")])
6955
6956 (define_expand "avx512vl_vextractf128<mode>"
6957 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6958 (match_operand:VI48F_256 1 "register_operand")
6959 (match_operand:SI 2 "const_0_to_1_operand")
6960 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
6961 (match_operand:QI 4 "register_operand")]
6962 "TARGET_AVX512DQ && TARGET_AVX512VL"
6963 {
6964 rtx (*insn)(rtx, rtx, rtx, rtx);
6965
6966 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6967 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
6968
6969 switch (INTVAL (operands[2]))
6970 {
6971 case 0:
6972 insn = gen_vec_extract_lo_<mode>_mask;
6973 break;
6974 case 1:
6975 insn = gen_vec_extract_hi_<mode>_mask;
6976 break;
6977 default:
6978 gcc_unreachable ();
6979 }
6980
6981 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6982 DONE;
6983 })
6984
6985 (define_expand "avx_vextractf128<mode>"
6986 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6987 (match_operand:V_256 1 "register_operand")
6988 (match_operand:SI 2 "const_0_to_1_operand")]
6989 "TARGET_AVX"
6990 {
6991 rtx (*insn)(rtx, rtx);
6992
6993 switch (INTVAL (operands[2]))
6994 {
6995 case 0:
6996 insn = gen_vec_extract_lo_<mode>;
6997 break;
6998 case 1:
6999 insn = gen_vec_extract_hi_<mode>;
7000 break;
7001 default:
7002 gcc_unreachable ();
7003 }
7004
7005 emit_insn (insn (operands[0], operands[1]));
7006 DONE;
7007 })
7008
7009 (define_insn "vec_extract_lo_<mode><mask_name>"
7010 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
7011 (vec_select:<ssehalfvecmode>
7012 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
7013 (parallel [(const_int 0) (const_int 1)
7014 (const_int 2) (const_int 3)
7015 (const_int 4) (const_int 5)
7016 (const_int 6) (const_int 7)])))]
7017 "TARGET_AVX512F
7018 && <mask_mode512bit_condition>
7019 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7020 {
7021 if (<mask_applied>)
7022 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7023 else
7024 return "#";
7025 })
7026
7027 (define_split
7028 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7029 (vec_select:<ssehalfvecmode>
7030 (match_operand:V16FI 1 "nonimmediate_operand")
7031 (parallel [(const_int 0) (const_int 1)
7032 (const_int 2) (const_int 3)
7033 (const_int 4) (const_int 5)
7034 (const_int 6) (const_int 7)])))]
7035 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7036 && reload_completed"
7037 [(set (match_dup 0) (match_dup 1))]
7038 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7039
7040 (define_insn "vec_extract_lo_<mode><mask_name>"
7041 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
7042 (vec_select:<ssehalfvecmode>
7043 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
7044 (parallel [(const_int 0) (const_int 1)])))]
7045 "TARGET_AVX
7046 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7047 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7048 {
7049 if (<mask_applied>)
7050 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
7051 else
7052 return "#";
7053 }
7054 [(set_attr "type" "sselog")
7055 (set_attr "prefix_extra" "1")
7056 (set_attr "length_immediate" "1")
7057 (set_attr "memory" "none,store")
7058 (set_attr "prefix" "evex")
7059 (set_attr "mode" "XI")])
7060
7061 (define_split
7062 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7063 (vec_select:<ssehalfvecmode>
7064 (match_operand:VI8F_256 1 "nonimmediate_operand")
7065 (parallel [(const_int 0) (const_int 1)])))]
7066 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7067 && reload_completed"
7068 [(set (match_dup 0) (match_dup 1))]
7069 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7070
7071 (define_insn "vec_extract_hi_<mode><mask_name>"
7072 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7073 (vec_select:<ssehalfvecmode>
7074 (match_operand:VI8F_256 1 "register_operand" "v,v")
7075 (parallel [(const_int 2) (const_int 3)])))]
7076 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7077 {
7078 if (TARGET_AVX512VL)
7079 {
7080 if (TARGET_AVX512DQ)
7081 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7082 else
7083 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7084 }
7085 else
7086 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7087 }
7088 [(set_attr "type" "sselog")
7089 (set_attr "prefix_extra" "1")
7090 (set_attr "length_immediate" "1")
7091 (set_attr "memory" "none,store")
7092 (set_attr "prefix" "vex")
7093 (set_attr "mode" "<sseinsnmode>")])
7094
7095 (define_split
7096 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7097 (vec_select:<ssehalfvecmode>
7098 (match_operand:VI4F_256 1 "nonimmediate_operand")
7099 (parallel [(const_int 0) (const_int 1)
7100 (const_int 2) (const_int 3)])))]
7101 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7102 && reload_completed"
7103 [(set (match_dup 0) (match_dup 1))]
7104 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7105
7106 (define_insn "vec_extract_lo_<mode><mask_name>"
7107 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7108 (vec_select:<ssehalfvecmode>
7109 (match_operand:VI4F_256 1 "register_operand" "v")
7110 (parallel [(const_int 0) (const_int 1)
7111 (const_int 2) (const_int 3)])))]
7112 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7113 {
7114 if (<mask_applied>)
7115 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7116 else
7117 return "#";
7118 }
7119 [(set_attr "type" "sselog1")
7120 (set_attr "prefix_extra" "1")
7121 (set_attr "length_immediate" "1")
7122 (set_attr "prefix" "evex")
7123 (set_attr "mode" "<sseinsnmode>")])
7124
7125 (define_insn "vec_extract_lo_<mode>_maskm"
7126 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7127 (vec_merge:<ssehalfvecmode>
7128 (vec_select:<ssehalfvecmode>
7129 (match_operand:VI4F_256 1 "register_operand" "v")
7130 (parallel [(const_int 0) (const_int 1)
7131 (const_int 2) (const_int 3)]))
7132 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7133 (match_operand:QI 3 "register_operand" "k")))]
7134 "TARGET_AVX512VL && TARGET_AVX512F
7135 && rtx_equal_p (operands[2], operands[0])"
7136 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7137 [(set_attr "type" "sselog1")
7138 (set_attr "prefix_extra" "1")
7139 (set_attr "length_immediate" "1")
7140 (set_attr "prefix" "evex")
7141 (set_attr "mode" "<sseinsnmode>")])
7142
7143 (define_insn "vec_extract_hi_<mode>_maskm"
7144 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7145 (vec_merge:<ssehalfvecmode>
7146 (vec_select:<ssehalfvecmode>
7147 (match_operand:VI4F_256 1 "register_operand" "v")
7148 (parallel [(const_int 4) (const_int 5)
7149 (const_int 6) (const_int 7)]))
7150 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7151 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
7152 "TARGET_AVX512F && TARGET_AVX512VL
7153 && rtx_equal_p (operands[2], operands[0])"
7154 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7155 [(set_attr "type" "sselog1")
7156 (set_attr "length_immediate" "1")
7157 (set_attr "prefix" "evex")
7158 (set_attr "mode" "<sseinsnmode>")])
7159
7160 (define_insn "vec_extract_hi_<mode>_mask"
7161 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
7162 (vec_merge:<ssehalfvecmode>
7163 (vec_select:<ssehalfvecmode>
7164 (match_operand:VI4F_256 1 "register_operand" "v")
7165 (parallel [(const_int 4) (const_int 5)
7166 (const_int 6) (const_int 7)]))
7167 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C")
7168 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
7169 "TARGET_AVX512VL"
7170 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
7171 [(set_attr "type" "sselog1")
7172 (set_attr "length_immediate" "1")
7173 (set_attr "prefix" "evex")
7174 (set_attr "mode" "<sseinsnmode>")])
7175
7176 (define_insn "vec_extract_hi_<mode>"
7177 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
7178 (vec_select:<ssehalfvecmode>
7179 (match_operand:VI4F_256 1 "register_operand" "x, v")
7180 (parallel [(const_int 4) (const_int 5)
7181 (const_int 6) (const_int 7)])))]
7182 "TARGET_AVX"
7183 "@
7184 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
7185 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7186 [(set_attr "isa" "*, avx512vl")
7187 (set_attr "prefix" "vex, evex")
7188 (set_attr "type" "sselog1")
7189 (set_attr "length_immediate" "1")
7190 (set_attr "mode" "<sseinsnmode>")])
7191
7192 (define_insn_and_split "vec_extract_lo_v32hi"
7193 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7194 (vec_select:V16HI
7195 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7196 (parallel [(const_int 0) (const_int 1)
7197 (const_int 2) (const_int 3)
7198 (const_int 4) (const_int 5)
7199 (const_int 6) (const_int 7)
7200 (const_int 8) (const_int 9)
7201 (const_int 10) (const_int 11)
7202 (const_int 12) (const_int 13)
7203 (const_int 14) (const_int 15)])))]
7204 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7205 "#"
7206 "&& reload_completed"
7207 [(set (match_dup 0) (match_dup 1))]
7208 "operands[1] = gen_lowpart (V16HImode, operands[1]);")
7209
7210 (define_insn "vec_extract_hi_v32hi"
7211 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7212 (vec_select:V16HI
7213 (match_operand:V32HI 1 "register_operand" "v,v")
7214 (parallel [(const_int 16) (const_int 17)
7215 (const_int 18) (const_int 19)
7216 (const_int 20) (const_int 21)
7217 (const_int 22) (const_int 23)
7218 (const_int 24) (const_int 25)
7219 (const_int 26) (const_int 27)
7220 (const_int 28) (const_int 29)
7221 (const_int 30) (const_int 31)])))]
7222 "TARGET_AVX512F"
7223 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7224 [(set_attr "type" "sselog")
7225 (set_attr "prefix_extra" "1")
7226 (set_attr "length_immediate" "1")
7227 (set_attr "memory" "none,store")
7228 (set_attr "prefix" "evex")
7229 (set_attr "mode" "XI")])
7230
7231 (define_insn_and_split "vec_extract_lo_v16hi"
7232 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7233 (vec_select:V8HI
7234 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
7235 (parallel [(const_int 0) (const_int 1)
7236 (const_int 2) (const_int 3)
7237 (const_int 4) (const_int 5)
7238 (const_int 6) (const_int 7)])))]
7239 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7240 "#"
7241 "&& reload_completed"
7242 [(set (match_dup 0) (match_dup 1))]
7243 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
7244
7245 (define_insn "vec_extract_hi_v16hi"
7246 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7247 (vec_select:V8HI
7248 (match_operand:V16HI 1 "register_operand" "x,x")
7249 (parallel [(const_int 8) (const_int 9)
7250 (const_int 10) (const_int 11)
7251 (const_int 12) (const_int 13)
7252 (const_int 14) (const_int 15)])))]
7253 "TARGET_AVX"
7254 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7255 [(set_attr "type" "sselog")
7256 (set_attr "prefix_extra" "1")
7257 (set_attr "length_immediate" "1")
7258 (set_attr "memory" "none,store")
7259 (set_attr "prefix" "vex")
7260 (set_attr "mode" "OI")])
7261
7262 (define_insn_and_split "vec_extract_lo_v64qi"
7263 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7264 (vec_select:V32QI
7265 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7266 (parallel [(const_int 0) (const_int 1)
7267 (const_int 2) (const_int 3)
7268 (const_int 4) (const_int 5)
7269 (const_int 6) (const_int 7)
7270 (const_int 8) (const_int 9)
7271 (const_int 10) (const_int 11)
7272 (const_int 12) (const_int 13)
7273 (const_int 14) (const_int 15)
7274 (const_int 16) (const_int 17)
7275 (const_int 18) (const_int 19)
7276 (const_int 20) (const_int 21)
7277 (const_int 22) (const_int 23)
7278 (const_int 24) (const_int 25)
7279 (const_int 26) (const_int 27)
7280 (const_int 28) (const_int 29)
7281 (const_int 30) (const_int 31)])))]
7282 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7283 "#"
7284 "&& reload_completed"
7285 [(set (match_dup 0) (match_dup 1))]
7286 "operands[1] = gen_lowpart (V32QImode, operands[1]);")
7287
7288 (define_insn "vec_extract_hi_v64qi"
7289 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7290 (vec_select:V32QI
7291 (match_operand:V64QI 1 "register_operand" "v,v")
7292 (parallel [(const_int 32) (const_int 33)
7293 (const_int 34) (const_int 35)
7294 (const_int 36) (const_int 37)
7295 (const_int 38) (const_int 39)
7296 (const_int 40) (const_int 41)
7297 (const_int 42) (const_int 43)
7298 (const_int 44) (const_int 45)
7299 (const_int 46) (const_int 47)
7300 (const_int 48) (const_int 49)
7301 (const_int 50) (const_int 51)
7302 (const_int 52) (const_int 53)
7303 (const_int 54) (const_int 55)
7304 (const_int 56) (const_int 57)
7305 (const_int 58) (const_int 59)
7306 (const_int 60) (const_int 61)
7307 (const_int 62) (const_int 63)])))]
7308 "TARGET_AVX512F"
7309 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7310 [(set_attr "type" "sselog")
7311 (set_attr "prefix_extra" "1")
7312 (set_attr "length_immediate" "1")
7313 (set_attr "memory" "none,store")
7314 (set_attr "prefix" "evex")
7315 (set_attr "mode" "XI")])
7316
7317 (define_insn_and_split "vec_extract_lo_v32qi"
7318 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7319 (vec_select:V16QI
7320 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
7321 (parallel [(const_int 0) (const_int 1)
7322 (const_int 2) (const_int 3)
7323 (const_int 4) (const_int 5)
7324 (const_int 6) (const_int 7)
7325 (const_int 8) (const_int 9)
7326 (const_int 10) (const_int 11)
7327 (const_int 12) (const_int 13)
7328 (const_int 14) (const_int 15)])))]
7329 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7330 "#"
7331 "&& reload_completed"
7332 [(set (match_dup 0) (match_dup 1))]
7333 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
7334
7335 (define_insn "vec_extract_hi_v32qi"
7336 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7337 (vec_select:V16QI
7338 (match_operand:V32QI 1 "register_operand" "x,x")
7339 (parallel [(const_int 16) (const_int 17)
7340 (const_int 18) (const_int 19)
7341 (const_int 20) (const_int 21)
7342 (const_int 22) (const_int 23)
7343 (const_int 24) (const_int 25)
7344 (const_int 26) (const_int 27)
7345 (const_int 28) (const_int 29)
7346 (const_int 30) (const_int 31)])))]
7347 "TARGET_AVX"
7348 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7349 [(set_attr "type" "sselog")
7350 (set_attr "prefix_extra" "1")
7351 (set_attr "length_immediate" "1")
7352 (set_attr "memory" "none,store")
7353 (set_attr "prefix" "vex")
7354 (set_attr "mode" "OI")])
7355
7356 ;; Modes handled by vec_extract patterns.
7357 (define_mode_iterator VEC_EXTRACT_MODE
7358 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7359 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7360 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7361 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7362 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7363 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
7364
7365 (define_expand "vec_extract<mode>"
7366 [(match_operand:<ssescalarmode> 0 "register_operand")
7367 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7368 (match_operand 2 "const_int_operand")]
7369 "TARGET_SSE"
7370 {
7371 ix86_expand_vector_extract (false, operands[0], operands[1],
7372 INTVAL (operands[2]));
7373 DONE;
7374 })
7375
7376 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7377 ;;
7378 ;; Parallel double-precision floating point element swizzling
7379 ;;
7380 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7381
7382 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7383 [(set (match_operand:V8DF 0 "register_operand" "=v")
7384 (vec_select:V8DF
7385 (vec_concat:V16DF
7386 (match_operand:V8DF 1 "register_operand" "v")
7387 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7388 (parallel [(const_int 1) (const_int 9)
7389 (const_int 3) (const_int 11)
7390 (const_int 5) (const_int 13)
7391 (const_int 7) (const_int 15)])))]
7392 "TARGET_AVX512F"
7393 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7394 [(set_attr "type" "sselog")
7395 (set_attr "prefix" "evex")
7396 (set_attr "mode" "V8DF")])
7397
7398 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7399 (define_insn "avx_unpckhpd256<mask_name>"
7400 [(set (match_operand:V4DF 0 "register_operand" "=v")
7401 (vec_select:V4DF
7402 (vec_concat:V8DF
7403 (match_operand:V4DF 1 "register_operand" "v")
7404 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7405 (parallel [(const_int 1) (const_int 5)
7406 (const_int 3) (const_int 7)])))]
7407 "TARGET_AVX && <mask_avx512vl_condition>"
7408 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7409 [(set_attr "type" "sselog")
7410 (set_attr "prefix" "vex")
7411 (set_attr "mode" "V4DF")])
7412
7413 (define_expand "vec_interleave_highv4df"
7414 [(set (match_dup 3)
7415 (vec_select:V4DF
7416 (vec_concat:V8DF
7417 (match_operand:V4DF 1 "register_operand" "x")
7418 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7419 (parallel [(const_int 0) (const_int 4)
7420 (const_int 2) (const_int 6)])))
7421 (set (match_dup 4)
7422 (vec_select:V4DF
7423 (vec_concat:V8DF
7424 (match_dup 1)
7425 (match_dup 2))
7426 (parallel [(const_int 1) (const_int 5)
7427 (const_int 3) (const_int 7)])))
7428 (set (match_operand:V4DF 0 "register_operand")
7429 (vec_select:V4DF
7430 (vec_concat:V8DF
7431 (match_dup 3)
7432 (match_dup 4))
7433 (parallel [(const_int 2) (const_int 3)
7434 (const_int 6) (const_int 7)])))]
7435 "TARGET_AVX"
7436 {
7437 operands[3] = gen_reg_rtx (V4DFmode);
7438 operands[4] = gen_reg_rtx (V4DFmode);
7439 })
7440
7441
7442 (define_insn "avx512vl_unpckhpd128_mask"
7443 [(set (match_operand:V2DF 0 "register_operand" "=v")
7444 (vec_merge:V2DF
7445 (vec_select:V2DF
7446 (vec_concat:V4DF
7447 (match_operand:V2DF 1 "register_operand" "v")
7448 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7449 (parallel [(const_int 1) (const_int 3)]))
7450 (match_operand:V2DF 3 "vector_move_operand" "0C")
7451 (match_operand:QI 4 "register_operand" "Yk")))]
7452 "TARGET_AVX512VL"
7453 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7454 [(set_attr "type" "sselog")
7455 (set_attr "prefix" "evex")
7456 (set_attr "mode" "V2DF")])
7457
7458 (define_expand "vec_interleave_highv2df"
7459 [(set (match_operand:V2DF 0 "register_operand")
7460 (vec_select:V2DF
7461 (vec_concat:V4DF
7462 (match_operand:V2DF 1 "nonimmediate_operand")
7463 (match_operand:V2DF 2 "nonimmediate_operand"))
7464 (parallel [(const_int 1)
7465 (const_int 3)])))]
7466 "TARGET_SSE2"
7467 {
7468 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7469 operands[2] = force_reg (V2DFmode, operands[2]);
7470 })
7471
7472 (define_insn "*vec_interleave_highv2df"
7473 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
7474 (vec_select:V2DF
7475 (vec_concat:V4DF
7476 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7477 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
7478 (parallel [(const_int 1)
7479 (const_int 3)])))]
7480 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
7481 "@
7482 unpckhpd\t{%2, %0|%0, %2}
7483 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7484 %vmovddup\t{%H1, %0|%0, %H1}
7485 movlpd\t{%H1, %0|%0, %H1}
7486 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
7487 %vmovhpd\t{%1, %0|%q0, %1}"
7488 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7489 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7490 (set_attr "prefix_data16" "*,*,*,1,*,1")
7491 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7492 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7493
7494 (define_expand "avx512f_movddup512<mask_name>"
7495 [(set (match_operand:V8DF 0 "register_operand")
7496 (vec_select:V8DF
7497 (vec_concat:V16DF
7498 (match_operand:V8DF 1 "nonimmediate_operand")
7499 (match_dup 1))
7500 (parallel [(const_int 0) (const_int 8)
7501 (const_int 2) (const_int 10)
7502 (const_int 4) (const_int 12)
7503 (const_int 6) (const_int 14)])))]
7504 "TARGET_AVX512F")
7505
7506 (define_expand "avx512f_unpcklpd512<mask_name>"
7507 [(set (match_operand:V8DF 0 "register_operand")
7508 (vec_select:V8DF
7509 (vec_concat:V16DF
7510 (match_operand:V8DF 1 "register_operand")
7511 (match_operand:V8DF 2 "nonimmediate_operand"))
7512 (parallel [(const_int 0) (const_int 8)
7513 (const_int 2) (const_int 10)
7514 (const_int 4) (const_int 12)
7515 (const_int 6) (const_int 14)])))]
7516 "TARGET_AVX512F")
7517
7518 (define_insn "*avx512f_unpcklpd512<mask_name>"
7519 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7520 (vec_select:V8DF
7521 (vec_concat:V16DF
7522 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7523 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7524 (parallel [(const_int 0) (const_int 8)
7525 (const_int 2) (const_int 10)
7526 (const_int 4) (const_int 12)
7527 (const_int 6) (const_int 14)])))]
7528 "TARGET_AVX512F"
7529 "@
7530 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7531 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7532 [(set_attr "type" "sselog")
7533 (set_attr "prefix" "evex")
7534 (set_attr "mode" "V8DF")])
7535
7536 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7537 (define_expand "avx_movddup256<mask_name>"
7538 [(set (match_operand:V4DF 0 "register_operand")
7539 (vec_select:V4DF
7540 (vec_concat:V8DF
7541 (match_operand:V4DF 1 "nonimmediate_operand")
7542 (match_dup 1))
7543 (parallel [(const_int 0) (const_int 4)
7544 (const_int 2) (const_int 6)])))]
7545 "TARGET_AVX && <mask_avx512vl_condition>")
7546
7547 (define_expand "avx_unpcklpd256<mask_name>"
7548 [(set (match_operand:V4DF 0 "register_operand")
7549 (vec_select:V4DF
7550 (vec_concat:V8DF
7551 (match_operand:V4DF 1 "register_operand")
7552 (match_operand:V4DF 2 "nonimmediate_operand"))
7553 (parallel [(const_int 0) (const_int 4)
7554 (const_int 2) (const_int 6)])))]
7555 "TARGET_AVX && <mask_avx512vl_condition>")
7556
7557 (define_insn "*avx_unpcklpd256<mask_name>"
7558 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
7559 (vec_select:V4DF
7560 (vec_concat:V8DF
7561 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7562 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
7563 (parallel [(const_int 0) (const_int 4)
7564 (const_int 2) (const_int 6)])))]
7565 "TARGET_AVX && <mask_avx512vl_condition>"
7566 "@
7567 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7568 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
7569 [(set_attr "type" "sselog")
7570 (set_attr "prefix" "vex")
7571 (set_attr "mode" "V4DF")])
7572
7573 (define_expand "vec_interleave_lowv4df"
7574 [(set (match_dup 3)
7575 (vec_select:V4DF
7576 (vec_concat:V8DF
7577 (match_operand:V4DF 1 "register_operand" "x")
7578 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7579 (parallel [(const_int 0) (const_int 4)
7580 (const_int 2) (const_int 6)])))
7581 (set (match_dup 4)
7582 (vec_select:V4DF
7583 (vec_concat:V8DF
7584 (match_dup 1)
7585 (match_dup 2))
7586 (parallel [(const_int 1) (const_int 5)
7587 (const_int 3) (const_int 7)])))
7588 (set (match_operand:V4DF 0 "register_operand")
7589 (vec_select:V4DF
7590 (vec_concat:V8DF
7591 (match_dup 3)
7592 (match_dup 4))
7593 (parallel [(const_int 0) (const_int 1)
7594 (const_int 4) (const_int 5)])))]
7595 "TARGET_AVX"
7596 {
7597 operands[3] = gen_reg_rtx (V4DFmode);
7598 operands[4] = gen_reg_rtx (V4DFmode);
7599 })
7600
7601 (define_insn "avx512vl_unpcklpd128_mask"
7602 [(set (match_operand:V2DF 0 "register_operand" "=v")
7603 (vec_merge:V2DF
7604 (vec_select:V2DF
7605 (vec_concat:V4DF
7606 (match_operand:V2DF 1 "register_operand" "v")
7607 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7608 (parallel [(const_int 0) (const_int 2)]))
7609 (match_operand:V2DF 3 "vector_move_operand" "0C")
7610 (match_operand:QI 4 "register_operand" "Yk")))]
7611 "TARGET_AVX512VL"
7612 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7613 [(set_attr "type" "sselog")
7614 (set_attr "prefix" "evex")
7615 (set_attr "mode" "V2DF")])
7616
7617 (define_expand "vec_interleave_lowv2df"
7618 [(set (match_operand:V2DF 0 "register_operand")
7619 (vec_select:V2DF
7620 (vec_concat:V4DF
7621 (match_operand:V2DF 1 "nonimmediate_operand")
7622 (match_operand:V2DF 2 "nonimmediate_operand"))
7623 (parallel [(const_int 0)
7624 (const_int 2)])))]
7625 "TARGET_SSE2"
7626 {
7627 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7628 operands[1] = force_reg (V2DFmode, operands[1]);
7629 })
7630
7631 (define_insn "*vec_interleave_lowv2df"
7632 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
7633 (vec_select:V2DF
7634 (vec_concat:V4DF
7635 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
7636 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
7637 (parallel [(const_int 0)
7638 (const_int 2)])))]
7639 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
7640 "@
7641 unpcklpd\t{%2, %0|%0, %2}
7642 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7643 %vmovddup\t{%1, %0|%0, %q1}
7644 movhpd\t{%2, %0|%0, %q2}
7645 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
7646 %vmovlpd\t{%2, %H0|%H0, %2}"
7647 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7648 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7649 (set_attr "prefix_data16" "*,*,*,1,*,1")
7650 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7651 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7652
7653 (define_split
7654 [(set (match_operand:V2DF 0 "memory_operand")
7655 (vec_select:V2DF
7656 (vec_concat:V4DF
7657 (match_operand:V2DF 1 "register_operand")
7658 (match_dup 1))
7659 (parallel [(const_int 0)
7660 (const_int 2)])))]
7661 "TARGET_SSE3 && reload_completed"
7662 [(const_int 0)]
7663 {
7664 rtx low = gen_lowpart (DFmode, operands[1]);
7665
7666 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7667 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7668 DONE;
7669 })
7670
7671 (define_split
7672 [(set (match_operand:V2DF 0 "register_operand")
7673 (vec_select:V2DF
7674 (vec_concat:V4DF
7675 (match_operand:V2DF 1 "memory_operand")
7676 (match_dup 1))
7677 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7678 (match_operand:SI 3 "const_int_operand")])))]
7679 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7680 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7681 {
7682 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7683 })
7684
7685 (define_insn "avx512f_vmscalef<mode><round_name>"
7686 [(set (match_operand:VF_128 0 "register_operand" "=v")
7687 (vec_merge:VF_128
7688 (unspec:VF_128
7689 [(match_operand:VF_128 1 "register_operand" "v")
7690 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
7691 UNSPEC_SCALEF)
7692 (match_dup 1)
7693 (const_int 1)))]
7694 "TARGET_AVX512F"
7695 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
7696 [(set_attr "prefix" "evex")
7697 (set_attr "mode" "<ssescalarmode>")])
7698
7699 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
7700 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7701 (unspec:VF_AVX512VL
7702 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
7703 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
7704 UNSPEC_SCALEF))]
7705 "TARGET_AVX512F"
7706 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
7707 [(set_attr "prefix" "evex")
7708 (set_attr "mode" "<MODE>")])
7709
7710 (define_expand "<avx512>_vternlog<mode>_maskz"
7711 [(match_operand:VI48_AVX512VL 0 "register_operand")
7712 (match_operand:VI48_AVX512VL 1 "register_operand")
7713 (match_operand:VI48_AVX512VL 2 "register_operand")
7714 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
7715 (match_operand:SI 4 "const_0_to_255_operand")
7716 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7717 "TARGET_AVX512F"
7718 {
7719 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
7720 operands[0], operands[1], operands[2], operands[3],
7721 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7722 DONE;
7723 })
7724
7725 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
7726 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7727 (unspec:VI48_AVX512VL
7728 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7729 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7730 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7731 (match_operand:SI 4 "const_0_to_255_operand")]
7732 UNSPEC_VTERNLOG))]
7733 "TARGET_AVX512F"
7734 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
7735 [(set_attr "type" "sselog")
7736 (set_attr "prefix" "evex")
7737 (set_attr "mode" "<sseinsnmode>")])
7738
7739 (define_insn "<avx512>_vternlog<mode>_mask"
7740 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7741 (vec_merge:VI48_AVX512VL
7742 (unspec:VI48_AVX512VL
7743 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7744 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7745 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7746 (match_operand:SI 4 "const_0_to_255_operand")]
7747 UNSPEC_VTERNLOG)
7748 (match_dup 1)
7749 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7750 "TARGET_AVX512F"
7751 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7752 [(set_attr "type" "sselog")
7753 (set_attr "prefix" "evex")
7754 (set_attr "mode" "<sseinsnmode>")])
7755
7756 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
7757 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7758 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7759 UNSPEC_GETEXP))]
7760 "TARGET_AVX512F"
7761 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
7762 [(set_attr "prefix" "evex")
7763 (set_attr "mode" "<MODE>")])
7764
7765 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
7766 [(set (match_operand:VF_128 0 "register_operand" "=v")
7767 (vec_merge:VF_128
7768 (unspec:VF_128
7769 [(match_operand:VF_128 1 "register_operand" "v")
7770 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7771 UNSPEC_GETEXP)
7772 (match_dup 1)
7773 (const_int 1)))]
7774 "TARGET_AVX512F"
7775 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
7776 [(set_attr "prefix" "evex")
7777 (set_attr "mode" "<ssescalarmode>")])
7778
7779 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
7780 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7781 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
7782 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
7783 (match_operand:SI 3 "const_0_to_255_operand")]
7784 UNSPEC_ALIGN))]
7785 "TARGET_AVX512F"
7786 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
7787 [(set_attr "prefix" "evex")
7788 (set_attr "mode" "<sseinsnmode>")])
7789
7790 (define_expand "avx512f_shufps512_mask"
7791 [(match_operand:V16SF 0 "register_operand")
7792 (match_operand:V16SF 1 "register_operand")
7793 (match_operand:V16SF 2 "nonimmediate_operand")
7794 (match_operand:SI 3 "const_0_to_255_operand")
7795 (match_operand:V16SF 4 "register_operand")
7796 (match_operand:HI 5 "register_operand")]
7797 "TARGET_AVX512F"
7798 {
7799 int mask = INTVAL (operands[3]);
7800 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7801 GEN_INT ((mask >> 0) & 3),
7802 GEN_INT ((mask >> 2) & 3),
7803 GEN_INT (((mask >> 4) & 3) + 16),
7804 GEN_INT (((mask >> 6) & 3) + 16),
7805 GEN_INT (((mask >> 0) & 3) + 4),
7806 GEN_INT (((mask >> 2) & 3) + 4),
7807 GEN_INT (((mask >> 4) & 3) + 20),
7808 GEN_INT (((mask >> 6) & 3) + 20),
7809 GEN_INT (((mask >> 0) & 3) + 8),
7810 GEN_INT (((mask >> 2) & 3) + 8),
7811 GEN_INT (((mask >> 4) & 3) + 24),
7812 GEN_INT (((mask >> 6) & 3) + 24),
7813 GEN_INT (((mask >> 0) & 3) + 12),
7814 GEN_INT (((mask >> 2) & 3) + 12),
7815 GEN_INT (((mask >> 4) & 3) + 28),
7816 GEN_INT (((mask >> 6) & 3) + 28),
7817 operands[4], operands[5]));
7818 DONE;
7819 })
7820
7821
7822 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7823 [(match_operand:VF_AVX512VL 0 "register_operand")
7824 (match_operand:VF_AVX512VL 1 "register_operand")
7825 (match_operand:VF_AVX512VL 2 "register_operand")
7826 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7827 (match_operand:SI 4 "const_0_to_255_operand")
7828 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7829 "TARGET_AVX512F"
7830 {
7831 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7832 operands[0], operands[1], operands[2], operands[3],
7833 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7834 <round_saeonly_expand_operand6>));
7835 DONE;
7836 })
7837
7838 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7839 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7840 (unspec:VF_AVX512VL
7841 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7842 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7843 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7844 (match_operand:SI 4 "const_0_to_255_operand")]
7845 UNSPEC_FIXUPIMM))]
7846 "TARGET_AVX512F"
7847 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7848 [(set_attr "prefix" "evex")
7849 (set_attr "mode" "<MODE>")])
7850
7851 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
7852 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7853 (vec_merge:VF_AVX512VL
7854 (unspec:VF_AVX512VL
7855 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7856 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7857 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7858 (match_operand:SI 4 "const_0_to_255_operand")]
7859 UNSPEC_FIXUPIMM)
7860 (match_dup 1)
7861 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7862 "TARGET_AVX512F"
7863 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7864 [(set_attr "prefix" "evex")
7865 (set_attr "mode" "<MODE>")])
7866
7867 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
7868 [(match_operand:VF_128 0 "register_operand")
7869 (match_operand:VF_128 1 "register_operand")
7870 (match_operand:VF_128 2 "register_operand")
7871 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7872 (match_operand:SI 4 "const_0_to_255_operand")
7873 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7874 "TARGET_AVX512F"
7875 {
7876 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7877 operands[0], operands[1], operands[2], operands[3],
7878 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7879 <round_saeonly_expand_operand6>));
7880 DONE;
7881 })
7882
7883 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
7884 [(set (match_operand:VF_128 0 "register_operand" "=v")
7885 (vec_merge:VF_128
7886 (unspec:VF_128
7887 [(match_operand:VF_128 1 "register_operand" "0")
7888 (match_operand:VF_128 2 "register_operand" "v")
7889 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7890 (match_operand:SI 4 "const_0_to_255_operand")]
7891 UNSPEC_FIXUPIMM)
7892 (match_dup 1)
7893 (const_int 1)))]
7894 "TARGET_AVX512F"
7895 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7896 [(set_attr "prefix" "evex")
7897 (set_attr "mode" "<ssescalarmode>")])
7898
7899 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
7900 [(set (match_operand:VF_128 0 "register_operand" "=v")
7901 (vec_merge:VF_128
7902 (vec_merge:VF_128
7903 (unspec:VF_128
7904 [(match_operand:VF_128 1 "register_operand" "0")
7905 (match_operand:VF_128 2 "register_operand" "v")
7906 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7907 (match_operand:SI 4 "const_0_to_255_operand")]
7908 UNSPEC_FIXUPIMM)
7909 (match_dup 1)
7910 (const_int 1))
7911 (match_dup 1)
7912 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7913 "TARGET_AVX512F"
7914 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7915 [(set_attr "prefix" "evex")
7916 (set_attr "mode" "<ssescalarmode>")])
7917
7918 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
7919 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7920 (unspec:VF_AVX512VL
7921 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
7922 (match_operand:SI 2 "const_0_to_255_operand")]
7923 UNSPEC_ROUND))]
7924 "TARGET_AVX512F"
7925 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
7926 [(set_attr "length_immediate" "1")
7927 (set_attr "prefix" "evex")
7928 (set_attr "mode" "<MODE>")])
7929
7930 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
7931 [(set (match_operand:VF_128 0 "register_operand" "=v")
7932 (vec_merge:VF_128
7933 (unspec:VF_128
7934 [(match_operand:VF_128 1 "register_operand" "v")
7935 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7936 (match_operand:SI 3 "const_0_to_255_operand")]
7937 UNSPEC_ROUND)
7938 (match_dup 1)
7939 (const_int 1)))]
7940 "TARGET_AVX512F"
7941 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
7942 [(set_attr "length_immediate" "1")
7943 (set_attr "prefix" "evex")
7944 (set_attr "mode" "<MODE>")])
7945
7946 ;; One bit in mask selects 2 elements.
7947 (define_insn "avx512f_shufps512_1<mask_name>"
7948 [(set (match_operand:V16SF 0 "register_operand" "=v")
7949 (vec_select:V16SF
7950 (vec_concat:V32SF
7951 (match_operand:V16SF 1 "register_operand" "v")
7952 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7953 (parallel [(match_operand 3 "const_0_to_3_operand")
7954 (match_operand 4 "const_0_to_3_operand")
7955 (match_operand 5 "const_16_to_19_operand")
7956 (match_operand 6 "const_16_to_19_operand")
7957 (match_operand 7 "const_4_to_7_operand")
7958 (match_operand 8 "const_4_to_7_operand")
7959 (match_operand 9 "const_20_to_23_operand")
7960 (match_operand 10 "const_20_to_23_operand")
7961 (match_operand 11 "const_8_to_11_operand")
7962 (match_operand 12 "const_8_to_11_operand")
7963 (match_operand 13 "const_24_to_27_operand")
7964 (match_operand 14 "const_24_to_27_operand")
7965 (match_operand 15 "const_12_to_15_operand")
7966 (match_operand 16 "const_12_to_15_operand")
7967 (match_operand 17 "const_28_to_31_operand")
7968 (match_operand 18 "const_28_to_31_operand")])))]
7969 "TARGET_AVX512F
7970 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7971 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7972 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7973 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7974 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7975 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7976 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7977 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7978 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7979 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7980 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7981 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7982 {
7983 int mask;
7984 mask = INTVAL (operands[3]);
7985 mask |= INTVAL (operands[4]) << 2;
7986 mask |= (INTVAL (operands[5]) - 16) << 4;
7987 mask |= (INTVAL (operands[6]) - 16) << 6;
7988 operands[3] = GEN_INT (mask);
7989
7990 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
7991 }
7992 [(set_attr "type" "sselog")
7993 (set_attr "length_immediate" "1")
7994 (set_attr "prefix" "evex")
7995 (set_attr "mode" "V16SF")])
7996
7997 (define_expand "avx512f_shufpd512_mask"
7998 [(match_operand:V8DF 0 "register_operand")
7999 (match_operand:V8DF 1 "register_operand")
8000 (match_operand:V8DF 2 "nonimmediate_operand")
8001 (match_operand:SI 3 "const_0_to_255_operand")
8002 (match_operand:V8DF 4 "register_operand")
8003 (match_operand:QI 5 "register_operand")]
8004 "TARGET_AVX512F"
8005 {
8006 int mask = INTVAL (operands[3]);
8007 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8008 GEN_INT (mask & 1),
8009 GEN_INT (mask & 2 ? 9 : 8),
8010 GEN_INT (mask & 4 ? 3 : 2),
8011 GEN_INT (mask & 8 ? 11 : 10),
8012 GEN_INT (mask & 16 ? 5 : 4),
8013 GEN_INT (mask & 32 ? 13 : 12),
8014 GEN_INT (mask & 64 ? 7 : 6),
8015 GEN_INT (mask & 128 ? 15 : 14),
8016 operands[4], operands[5]));
8017 DONE;
8018 })
8019
8020 (define_insn "avx512f_shufpd512_1<mask_name>"
8021 [(set (match_operand:V8DF 0 "register_operand" "=v")
8022 (vec_select:V8DF
8023 (vec_concat:V16DF
8024 (match_operand:V8DF 1 "register_operand" "v")
8025 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8026 (parallel [(match_operand 3 "const_0_to_1_operand")
8027 (match_operand 4 "const_8_to_9_operand")
8028 (match_operand 5 "const_2_to_3_operand")
8029 (match_operand 6 "const_10_to_11_operand")
8030 (match_operand 7 "const_4_to_5_operand")
8031 (match_operand 8 "const_12_to_13_operand")
8032 (match_operand 9 "const_6_to_7_operand")
8033 (match_operand 10 "const_14_to_15_operand")])))]
8034 "TARGET_AVX512F"
8035 {
8036 int mask;
8037 mask = INTVAL (operands[3]);
8038 mask |= (INTVAL (operands[4]) - 8) << 1;
8039 mask |= (INTVAL (operands[5]) - 2) << 2;
8040 mask |= (INTVAL (operands[6]) - 10) << 3;
8041 mask |= (INTVAL (operands[7]) - 4) << 4;
8042 mask |= (INTVAL (operands[8]) - 12) << 5;
8043 mask |= (INTVAL (operands[9]) - 6) << 6;
8044 mask |= (INTVAL (operands[10]) - 14) << 7;
8045 operands[3] = GEN_INT (mask);
8046
8047 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8048 }
8049 [(set_attr "type" "sselog")
8050 (set_attr "length_immediate" "1")
8051 (set_attr "prefix" "evex")
8052 (set_attr "mode" "V8DF")])
8053
8054 (define_expand "avx_shufpd256<mask_expand4_name>"
8055 [(match_operand:V4DF 0 "register_operand")
8056 (match_operand:V4DF 1 "register_operand")
8057 (match_operand:V4DF 2 "nonimmediate_operand")
8058 (match_operand:SI 3 "const_int_operand")]
8059 "TARGET_AVX"
8060 {
8061 int mask = INTVAL (operands[3]);
8062 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8063 operands[1],
8064 operands[2],
8065 GEN_INT (mask & 1),
8066 GEN_INT (mask & 2 ? 5 : 4),
8067 GEN_INT (mask & 4 ? 3 : 2),
8068 GEN_INT (mask & 8 ? 7 : 6)
8069 <mask_expand4_args>));
8070 DONE;
8071 })
8072
8073 (define_insn "avx_shufpd256_1<mask_name>"
8074 [(set (match_operand:V4DF 0 "register_operand" "=v")
8075 (vec_select:V4DF
8076 (vec_concat:V8DF
8077 (match_operand:V4DF 1 "register_operand" "v")
8078 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8079 (parallel [(match_operand 3 "const_0_to_1_operand")
8080 (match_operand 4 "const_4_to_5_operand")
8081 (match_operand 5 "const_2_to_3_operand")
8082 (match_operand 6 "const_6_to_7_operand")])))]
8083 "TARGET_AVX && <mask_avx512vl_condition>"
8084 {
8085 int mask;
8086 mask = INTVAL (operands[3]);
8087 mask |= (INTVAL (operands[4]) - 4) << 1;
8088 mask |= (INTVAL (operands[5]) - 2) << 2;
8089 mask |= (INTVAL (operands[6]) - 6) << 3;
8090 operands[3] = GEN_INT (mask);
8091
8092 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8093 }
8094 [(set_attr "type" "sseshuf")
8095 (set_attr "length_immediate" "1")
8096 (set_attr "prefix" "vex")
8097 (set_attr "mode" "V4DF")])
8098
8099 (define_expand "sse2_shufpd<mask_expand4_name>"
8100 [(match_operand:V2DF 0 "register_operand")
8101 (match_operand:V2DF 1 "register_operand")
8102 (match_operand:V2DF 2 "vector_operand")
8103 (match_operand:SI 3 "const_int_operand")]
8104 "TARGET_SSE2"
8105 {
8106 int mask = INTVAL (operands[3]);
8107 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8108 operands[2], GEN_INT (mask & 1),
8109 GEN_INT (mask & 2 ? 3 : 2)
8110 <mask_expand4_args>));
8111 DONE;
8112 })
8113
8114 (define_insn "sse2_shufpd_v2df_mask"
8115 [(set (match_operand:V2DF 0 "register_operand" "=v")
8116 (vec_merge:V2DF
8117 (vec_select:V2DF
8118 (vec_concat:V4DF
8119 (match_operand:V2DF 1 "register_operand" "v")
8120 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8121 (parallel [(match_operand 3 "const_0_to_1_operand")
8122 (match_operand 4 "const_2_to_3_operand")]))
8123 (match_operand:V2DF 5 "vector_move_operand" "0C")
8124 (match_operand:QI 6 "register_operand" "Yk")))]
8125 "TARGET_AVX512VL"
8126 {
8127 int mask;
8128 mask = INTVAL (operands[3]);
8129 mask |= (INTVAL (operands[4]) - 2) << 1;
8130 operands[3] = GEN_INT (mask);
8131
8132 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8133 }
8134 [(set_attr "type" "sseshuf")
8135 (set_attr "length_immediate" "1")
8136 (set_attr "prefix" "evex")
8137 (set_attr "mode" "V2DF")])
8138
8139 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8140 (define_insn "avx2_interleave_highv4di<mask_name>"
8141 [(set (match_operand:V4DI 0 "register_operand" "=v")
8142 (vec_select:V4DI
8143 (vec_concat:V8DI
8144 (match_operand:V4DI 1 "register_operand" "v")
8145 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8146 (parallel [(const_int 1)
8147 (const_int 5)
8148 (const_int 3)
8149 (const_int 7)])))]
8150 "TARGET_AVX2 && <mask_avx512vl_condition>"
8151 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8152 [(set_attr "type" "sselog")
8153 (set_attr "prefix" "vex")
8154 (set_attr "mode" "OI")])
8155
8156 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8157 [(set (match_operand:V8DI 0 "register_operand" "=v")
8158 (vec_select:V8DI
8159 (vec_concat:V16DI
8160 (match_operand:V8DI 1 "register_operand" "v")
8161 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8162 (parallel [(const_int 1) (const_int 9)
8163 (const_int 3) (const_int 11)
8164 (const_int 5) (const_int 13)
8165 (const_int 7) (const_int 15)])))]
8166 "TARGET_AVX512F"
8167 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8168 [(set_attr "type" "sselog")
8169 (set_attr "prefix" "evex")
8170 (set_attr "mode" "XI")])
8171
8172 (define_insn "vec_interleave_highv2di<mask_name>"
8173 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8174 (vec_select:V2DI
8175 (vec_concat:V4DI
8176 (match_operand:V2DI 1 "register_operand" "0,v")
8177 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8178 (parallel [(const_int 1)
8179 (const_int 3)])))]
8180 "TARGET_SSE2 && <mask_avx512vl_condition>"
8181 "@
8182 punpckhqdq\t{%2, %0|%0, %2}
8183 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8184 [(set_attr "isa" "noavx,avx")
8185 (set_attr "type" "sselog")
8186 (set_attr "prefix_data16" "1,*")
8187 (set_attr "prefix" "orig,<mask_prefix>")
8188 (set_attr "mode" "TI")])
8189
8190 (define_insn "avx2_interleave_lowv4di<mask_name>"
8191 [(set (match_operand:V4DI 0 "register_operand" "=v")
8192 (vec_select:V4DI
8193 (vec_concat:V8DI
8194 (match_operand:V4DI 1 "register_operand" "v")
8195 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8196 (parallel [(const_int 0)
8197 (const_int 4)
8198 (const_int 2)
8199 (const_int 6)])))]
8200 "TARGET_AVX2 && <mask_avx512vl_condition>"
8201 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8202 [(set_attr "type" "sselog")
8203 (set_attr "prefix" "vex")
8204 (set_attr "mode" "OI")])
8205
8206 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8207 [(set (match_operand:V8DI 0 "register_operand" "=v")
8208 (vec_select:V8DI
8209 (vec_concat:V16DI
8210 (match_operand:V8DI 1 "register_operand" "v")
8211 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8212 (parallel [(const_int 0) (const_int 8)
8213 (const_int 2) (const_int 10)
8214 (const_int 4) (const_int 12)
8215 (const_int 6) (const_int 14)])))]
8216 "TARGET_AVX512F"
8217 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8218 [(set_attr "type" "sselog")
8219 (set_attr "prefix" "evex")
8220 (set_attr "mode" "XI")])
8221
8222 (define_insn "vec_interleave_lowv2di<mask_name>"
8223 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8224 (vec_select:V2DI
8225 (vec_concat:V4DI
8226 (match_operand:V2DI 1 "register_operand" "0,v")
8227 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8228 (parallel [(const_int 0)
8229 (const_int 2)])))]
8230 "TARGET_SSE2 && <mask_avx512vl_condition>"
8231 "@
8232 punpcklqdq\t{%2, %0|%0, %2}
8233 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8234 [(set_attr "isa" "noavx,avx")
8235 (set_attr "type" "sselog")
8236 (set_attr "prefix_data16" "1,*")
8237 (set_attr "prefix" "orig,vex")
8238 (set_attr "mode" "TI")])
8239
8240 (define_insn "sse2_shufpd_<mode>"
8241 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
8242 (vec_select:VI8F_128
8243 (vec_concat:<ssedoublevecmode>
8244 (match_operand:VI8F_128 1 "register_operand" "0,x")
8245 (match_operand:VI8F_128 2 "vector_operand" "xBm,xm"))
8246 (parallel [(match_operand 3 "const_0_to_1_operand")
8247 (match_operand 4 "const_2_to_3_operand")])))]
8248 "TARGET_SSE2"
8249 {
8250 int mask;
8251 mask = INTVAL (operands[3]);
8252 mask |= (INTVAL (operands[4]) - 2) << 1;
8253 operands[3] = GEN_INT (mask);
8254
8255 switch (which_alternative)
8256 {
8257 case 0:
8258 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8259 case 1:
8260 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8261 default:
8262 gcc_unreachable ();
8263 }
8264 }
8265 [(set_attr "isa" "noavx,avx")
8266 (set_attr "type" "sseshuf")
8267 (set_attr "length_immediate" "1")
8268 (set_attr "prefix" "orig,vex")
8269 (set_attr "mode" "V2DF")])
8270
8271 ;; Avoid combining registers from different units in a single alternative,
8272 ;; see comment above inline_secondary_memory_needed function in i386.c
8273 (define_insn "sse2_storehpd"
8274 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
8275 (vec_select:DF
8276 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
8277 (parallel [(const_int 1)])))]
8278 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8279 "@
8280 %vmovhpd\t{%1, %0|%0, %1}
8281 unpckhpd\t%0, %0
8282 vunpckhpd\t{%d1, %0|%0, %d1}
8283 #
8284 #
8285 #"
8286 [(set_attr "isa" "*,noavx,avx,*,*,*")
8287 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8288 (set (attr "prefix_data16")
8289 (if_then_else
8290 (and (eq_attr "alternative" "0")
8291 (not (match_test "TARGET_AVX")))
8292 (const_string "1")
8293 (const_string "*")))
8294 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
8295 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8296
8297 (define_split
8298 [(set (match_operand:DF 0 "register_operand")
8299 (vec_select:DF
8300 (match_operand:V2DF 1 "memory_operand")
8301 (parallel [(const_int 1)])))]
8302 "TARGET_SSE2 && reload_completed"
8303 [(set (match_dup 0) (match_dup 1))]
8304 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8305
8306 (define_insn "*vec_extractv2df_1_sse"
8307 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8308 (vec_select:DF
8309 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8310 (parallel [(const_int 1)])))]
8311 "!TARGET_SSE2 && TARGET_SSE
8312 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8313 "@
8314 movhps\t{%1, %0|%q0, %1}
8315 movhlps\t{%1, %0|%0, %1}
8316 movlps\t{%H1, %0|%0, %H1}"
8317 [(set_attr "type" "ssemov")
8318 (set_attr "mode" "V2SF,V4SF,V2SF")])
8319
8320 ;; Avoid combining registers from different units in a single alternative,
8321 ;; see comment above inline_secondary_memory_needed function in i386.c
8322 (define_insn "sse2_storelpd"
8323 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8324 (vec_select:DF
8325 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
8326 (parallel [(const_int 0)])))]
8327 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8328 "@
8329 %vmovlpd\t{%1, %0|%0, %1}
8330 #
8331 #
8332 #
8333 #"
8334 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8335 (set_attr "prefix_data16" "1,*,*,*,*")
8336 (set_attr "prefix" "maybe_vex")
8337 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8338
8339 (define_split
8340 [(set (match_operand:DF 0 "register_operand")
8341 (vec_select:DF
8342 (match_operand:V2DF 1 "nonimmediate_operand")
8343 (parallel [(const_int 0)])))]
8344 "TARGET_SSE2 && reload_completed"
8345 [(set (match_dup 0) (match_dup 1))]
8346 "operands[1] = gen_lowpart (DFmode, operands[1]);")
8347
8348 (define_insn "*vec_extractv2df_0_sse"
8349 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8350 (vec_select:DF
8351 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8352 (parallel [(const_int 0)])))]
8353 "!TARGET_SSE2 && TARGET_SSE
8354 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8355 "@
8356 movlps\t{%1, %0|%0, %1}
8357 movaps\t{%1, %0|%0, %1}
8358 movlps\t{%1, %0|%0, %q1}"
8359 [(set_attr "type" "ssemov")
8360 (set_attr "mode" "V2SF,V4SF,V2SF")])
8361
8362 (define_expand "sse2_loadhpd_exp"
8363 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8364 (vec_concat:V2DF
8365 (vec_select:DF
8366 (match_operand:V2DF 1 "nonimmediate_operand")
8367 (parallel [(const_int 0)]))
8368 (match_operand:DF 2 "nonimmediate_operand")))]
8369 "TARGET_SSE2"
8370 {
8371 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8372
8373 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8374
8375 /* Fix up the destination if needed. */
8376 if (dst != operands[0])
8377 emit_move_insn (operands[0], dst);
8378
8379 DONE;
8380 })
8381
8382 ;; Avoid combining registers from different units in a single alternative,
8383 ;; see comment above inline_secondary_memory_needed function in i386.c
8384 (define_insn "sse2_loadhpd"
8385 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8386 "=x,x,x,x,o,o ,o")
8387 (vec_concat:V2DF
8388 (vec_select:DF
8389 (match_operand:V2DF 1 "nonimmediate_operand"
8390 " 0,x,0,x,0,0 ,0")
8391 (parallel [(const_int 0)]))
8392 (match_operand:DF 2 "nonimmediate_operand"
8393 " m,m,x,x,x,*f,r")))]
8394 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8395 "@
8396 movhpd\t{%2, %0|%0, %2}
8397 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8398 unpcklpd\t{%2, %0|%0, %2}
8399 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8400 #
8401 #
8402 #"
8403 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8404 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8405 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8406 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8407 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8408
8409 (define_split
8410 [(set (match_operand:V2DF 0 "memory_operand")
8411 (vec_concat:V2DF
8412 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8413 (match_operand:DF 1 "register_operand")))]
8414 "TARGET_SSE2 && reload_completed"
8415 [(set (match_dup 0) (match_dup 1))]
8416 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8417
8418 (define_expand "sse2_loadlpd_exp"
8419 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8420 (vec_concat:V2DF
8421 (match_operand:DF 2 "nonimmediate_operand")
8422 (vec_select:DF
8423 (match_operand:V2DF 1 "nonimmediate_operand")
8424 (parallel [(const_int 1)]))))]
8425 "TARGET_SSE2"
8426 {
8427 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8428
8429 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8430
8431 /* Fix up the destination if needed. */
8432 if (dst != operands[0])
8433 emit_move_insn (operands[0], dst);
8434
8435 DONE;
8436 })
8437
8438 ;; Avoid combining registers from different units in a single alternative,
8439 ;; see comment above inline_secondary_memory_needed function in i386.c
8440 (define_insn "sse2_loadlpd"
8441 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8442 "=x,x,x,x,x,x,x,x,m,m ,m")
8443 (vec_concat:V2DF
8444 (match_operand:DF 2 "nonimmediate_operand"
8445 "xm,m,m,x,x,0,0,x,x,*f,r")
8446 (vec_select:DF
8447 (match_operand:V2DF 1 "vector_move_operand"
8448 " C,0,x,0,x,x,o,o,0,0 ,0")
8449 (parallel [(const_int 1)]))))]
8450 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8451 "@
8452 %vmovq\t{%2, %0|%0, %2}
8453 movlpd\t{%2, %0|%0, %2}
8454 vmovlpd\t{%2, %1, %0|%0, %1, %2}
8455 movsd\t{%2, %0|%0, %2}
8456 vmovsd\t{%2, %1, %0|%0, %1, %2}
8457 shufpd\t{$2, %1, %0|%0, %1, 2}
8458 movhpd\t{%H1, %0|%0, %H1}
8459 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
8460 #
8461 #
8462 #"
8463 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8464 (set (attr "type")
8465 (cond [(eq_attr "alternative" "5")
8466 (const_string "sselog")
8467 (eq_attr "alternative" "9")
8468 (const_string "fmov")
8469 (eq_attr "alternative" "10")
8470 (const_string "imov")
8471 ]
8472 (const_string "ssemov")))
8473 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8474 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8475 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8476 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
8477
8478 (define_split
8479 [(set (match_operand:V2DF 0 "memory_operand")
8480 (vec_concat:V2DF
8481 (match_operand:DF 1 "register_operand")
8482 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8483 "TARGET_SSE2 && reload_completed"
8484 [(set (match_dup 0) (match_dup 1))]
8485 "operands[0] = adjust_address (operands[0], DFmode, 0);")
8486
8487 (define_insn "sse2_movsd"
8488 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
8489 (vec_merge:V2DF
8490 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8491 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
8492 (const_int 1)))]
8493 "TARGET_SSE2"
8494 "@
8495 movsd\t{%2, %0|%0, %2}
8496 vmovsd\t{%2, %1, %0|%0, %1, %2}
8497 movlpd\t{%2, %0|%0, %q2}
8498 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8499 %vmovlpd\t{%2, %0|%q0, %2}
8500 shufpd\t{$2, %1, %0|%0, %1, 2}
8501 movhps\t{%H1, %0|%0, %H1}
8502 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8503 %vmovhps\t{%1, %H0|%H0, %1}"
8504 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8505 (set (attr "type")
8506 (if_then_else
8507 (eq_attr "alternative" "5")
8508 (const_string "sselog")
8509 (const_string "ssemov")))
8510 (set (attr "prefix_data16")
8511 (if_then_else
8512 (and (eq_attr "alternative" "2,4")
8513 (not (match_test "TARGET_AVX")))
8514 (const_string "1")
8515 (const_string "*")))
8516 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8517 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8518 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
8519
8520 (define_insn "vec_dupv2df<mask_name>"
8521 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
8522 (vec_duplicate:V2DF
8523 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
8524 "TARGET_SSE2 && <mask_avx512vl_condition>"
8525 "@
8526 unpcklpd\t%0, %0
8527 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
8528 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8529 [(set_attr "isa" "noavx,sse3,avx512vl")
8530 (set_attr "type" "sselog1")
8531 (set_attr "prefix" "orig,maybe_vex,evex")
8532 (set_attr "mode" "V2DF,DF,DF")])
8533
8534 (define_insn "vec_concatv2df"
8535 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
8536 (vec_concat:V2DF
8537 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,xm,0,0")
8538 (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m, C,x,m")))]
8539 "TARGET_SSE
8540 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
8541 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
8542 "@
8543 unpcklpd\t{%2, %0|%0, %2}
8544 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8545 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8546 %vmovddup\t{%1, %0|%0, %1}
8547 vmovddup\t{%1, %0|%0, %1}
8548 movhpd\t{%2, %0|%0, %2}
8549 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8550 %vmovq\t{%1, %0|%0, %1}
8551 movlhps\t{%2, %0|%0, %2}
8552 movhps\t{%2, %0|%0, %2}"
8553 [(set_attr "isa" "sse2_noavx,avx,avx512vl,sse3,avx512vl,sse2_noavx,avx,sse2,noavx,noavx")
8554 (set (attr "type")
8555 (if_then_else
8556 (eq_attr "alternative" "0,1,2,3,4")
8557 (const_string "sselog")
8558 (const_string "ssemov")))
8559 (set (attr "prefix_data16")
8560 (if_then_else (eq_attr "alternative" "5")
8561 (const_string "1")
8562 (const_string "*")))
8563 (set_attr "prefix" "orig,vex,evex,maybe_vex,evex,orig,vex,maybe_vex,orig,orig")
8564 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
8565
8566 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8567 ;;
8568 ;; Parallel integer down-conversion operations
8569 ;;
8570 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8571
8572 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
8573 (define_mode_attr pmov_src_mode
8574 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8575 (define_mode_attr pmov_src_lower
8576 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
8577 (define_mode_attr pmov_suff_1
8578 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8579
8580 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
8581 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8582 (any_truncate:PMOV_DST_MODE_1
8583 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8584 "TARGET_AVX512F"
8585 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
8586 [(set_attr "type" "ssemov")
8587 (set_attr "memory" "none,store")
8588 (set_attr "prefix" "evex")
8589 (set_attr "mode" "<sseinsnmode>")])
8590
8591 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
8592 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8593 (vec_merge:PMOV_DST_MODE_1
8594 (any_truncate:PMOV_DST_MODE_1
8595 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
8596 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
8597 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8598 "TARGET_AVX512F"
8599 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8600 [(set_attr "type" "ssemov")
8601 (set_attr "memory" "none,store")
8602 (set_attr "prefix" "evex")
8603 (set_attr "mode" "<sseinsnmode>")])
8604
8605 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
8606 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
8607 (vec_merge:PMOV_DST_MODE_1
8608 (any_truncate:PMOV_DST_MODE_1
8609 (match_operand:<pmov_src_mode> 1 "register_operand"))
8610 (match_dup 0)
8611 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8612 "TARGET_AVX512F")
8613
8614 (define_insn "avx512bw_<code>v32hiv32qi2"
8615 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8616 (any_truncate:V32QI
8617 (match_operand:V32HI 1 "register_operand" "v,v")))]
8618 "TARGET_AVX512BW"
8619 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
8620 [(set_attr "type" "ssemov")
8621 (set_attr "memory" "none,store")
8622 (set_attr "prefix" "evex")
8623 (set_attr "mode" "XI")])
8624
8625 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
8626 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8627 (vec_merge:V32QI
8628 (any_truncate:V32QI
8629 (match_operand:V32HI 1 "register_operand" "v,v"))
8630 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
8631 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
8632 "TARGET_AVX512BW"
8633 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8634 [(set_attr "type" "ssemov")
8635 (set_attr "memory" "none,store")
8636 (set_attr "prefix" "evex")
8637 (set_attr "mode" "XI")])
8638
8639 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
8640 [(set (match_operand:V32QI 0 "nonimmediate_operand")
8641 (vec_merge:V32QI
8642 (any_truncate:V32QI
8643 (match_operand:V32HI 1 "register_operand"))
8644 (match_dup 0)
8645 (match_operand:SI 2 "register_operand")))]
8646 "TARGET_AVX512BW")
8647
8648 (define_mode_iterator PMOV_DST_MODE_2
8649 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
8650 (define_mode_attr pmov_suff_2
8651 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
8652
8653 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
8654 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8655 (any_truncate:PMOV_DST_MODE_2
8656 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
8657 "TARGET_AVX512VL"
8658 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
8659 [(set_attr "type" "ssemov")
8660 (set_attr "memory" "none,store")
8661 (set_attr "prefix" "evex")
8662 (set_attr "mode" "<sseinsnmode>")])
8663
8664 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
8665 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8666 (vec_merge:PMOV_DST_MODE_2
8667 (any_truncate:PMOV_DST_MODE_2
8668 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
8669 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
8670 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8671 "TARGET_AVX512VL"
8672 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8673 [(set_attr "type" "ssemov")
8674 (set_attr "memory" "none,store")
8675 (set_attr "prefix" "evex")
8676 (set_attr "mode" "<sseinsnmode>")])
8677
8678 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
8679 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
8680 (vec_merge:PMOV_DST_MODE_2
8681 (any_truncate:PMOV_DST_MODE_2
8682 (match_operand:<ssedoublemode> 1 "register_operand"))
8683 (match_dup 0)
8684 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8685 "TARGET_AVX512VL")
8686
8687 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
8688 (define_mode_attr pmov_dst_3
8689 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
8690 (define_mode_attr pmov_dst_zeroed_3
8691 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
8692 (define_mode_attr pmov_suff_3
8693 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
8694
8695 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
8696 [(set (match_operand:V16QI 0 "register_operand" "=v")
8697 (vec_concat:V16QI
8698 (any_truncate:<pmov_dst_3>
8699 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
8700 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
8701 "TARGET_AVX512VL"
8702 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8703 [(set_attr "type" "ssemov")
8704 (set_attr "prefix" "evex")
8705 (set_attr "mode" "TI")])
8706
8707 (define_insn "*avx512vl_<code>v2div2qi2_store"
8708 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8709 (vec_concat:V16QI
8710 (any_truncate:V2QI
8711 (match_operand:V2DI 1 "register_operand" "v"))
8712 (vec_select:V14QI
8713 (match_dup 0)
8714 (parallel [(const_int 2) (const_int 3)
8715 (const_int 4) (const_int 5)
8716 (const_int 6) (const_int 7)
8717 (const_int 8) (const_int 9)
8718 (const_int 10) (const_int 11)
8719 (const_int 12) (const_int 13)
8720 (const_int 14) (const_int 15)]))))]
8721 "TARGET_AVX512VL"
8722 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8723 [(set_attr "type" "ssemov")
8724 (set_attr "memory" "store")
8725 (set_attr "prefix" "evex")
8726 (set_attr "mode" "TI")])
8727
8728 (define_insn "avx512vl_<code>v2div2qi2_mask"
8729 [(set (match_operand:V16QI 0 "register_operand" "=v")
8730 (vec_concat:V16QI
8731 (vec_merge:V2QI
8732 (any_truncate:V2QI
8733 (match_operand:V2DI 1 "register_operand" "v"))
8734 (vec_select:V2QI
8735 (match_operand:V16QI 2 "vector_move_operand" "0C")
8736 (parallel [(const_int 0) (const_int 1)]))
8737 (match_operand:QI 3 "register_operand" "Yk"))
8738 (const_vector:V14QI [(const_int 0) (const_int 0)
8739 (const_int 0) (const_int 0)
8740 (const_int 0) (const_int 0)
8741 (const_int 0) (const_int 0)
8742 (const_int 0) (const_int 0)
8743 (const_int 0) (const_int 0)
8744 (const_int 0) (const_int 0)])))]
8745 "TARGET_AVX512VL"
8746 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8747 [(set_attr "type" "ssemov")
8748 (set_attr "prefix" "evex")
8749 (set_attr "mode" "TI")])
8750
8751 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
8752 [(set (match_operand:V16QI 0 "register_operand" "=v")
8753 (vec_concat:V16QI
8754 (vec_merge:V2QI
8755 (any_truncate:V2QI
8756 (match_operand:V2DI 1 "register_operand" "v"))
8757 (const_vector:V2QI [(const_int 0) (const_int 0)])
8758 (match_operand:QI 2 "register_operand" "Yk"))
8759 (const_vector:V14QI [(const_int 0) (const_int 0)
8760 (const_int 0) (const_int 0)
8761 (const_int 0) (const_int 0)
8762 (const_int 0) (const_int 0)
8763 (const_int 0) (const_int 0)
8764 (const_int 0) (const_int 0)
8765 (const_int 0) (const_int 0)])))]
8766 "TARGET_AVX512VL"
8767 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
8768 [(set_attr "type" "ssemov")
8769 (set_attr "prefix" "evex")
8770 (set_attr "mode" "TI")])
8771
8772 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
8773 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8774 (vec_concat:V16QI
8775 (vec_merge:V2QI
8776 (any_truncate:V2QI
8777 (match_operand:V2DI 1 "register_operand" "v"))
8778 (vec_select:V2QI
8779 (match_dup 0)
8780 (parallel [(const_int 0) (const_int 1)]))
8781 (match_operand:QI 2 "register_operand" "Yk"))
8782 (vec_select:V14QI
8783 (match_dup 0)
8784 (parallel [(const_int 2) (const_int 3)
8785 (const_int 4) (const_int 5)
8786 (const_int 6) (const_int 7)
8787 (const_int 8) (const_int 9)
8788 (const_int 10) (const_int 11)
8789 (const_int 12) (const_int 13)
8790 (const_int 14) (const_int 15)]))))]
8791 "TARGET_AVX512VL"
8792 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8793 [(set_attr "type" "ssemov")
8794 (set_attr "memory" "store")
8795 (set_attr "prefix" "evex")
8796 (set_attr "mode" "TI")])
8797
8798 (define_insn "*avx512vl_<code><mode>v4qi2_store"
8799 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8800 (vec_concat:V16QI
8801 (any_truncate:V4QI
8802 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8803 (vec_select:V12QI
8804 (match_dup 0)
8805 (parallel [(const_int 4) (const_int 5)
8806 (const_int 6) (const_int 7)
8807 (const_int 8) (const_int 9)
8808 (const_int 10) (const_int 11)
8809 (const_int 12) (const_int 13)
8810 (const_int 14) (const_int 15)]))))]
8811 "TARGET_AVX512VL"
8812 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8813 [(set_attr "type" "ssemov")
8814 (set_attr "memory" "store")
8815 (set_attr "prefix" "evex")
8816 (set_attr "mode" "TI")])
8817
8818 (define_insn "avx512vl_<code><mode>v4qi2_mask"
8819 [(set (match_operand:V16QI 0 "register_operand" "=v")
8820 (vec_concat:V16QI
8821 (vec_merge:V4QI
8822 (any_truncate:V4QI
8823 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8824 (vec_select:V4QI
8825 (match_operand:V16QI 2 "vector_move_operand" "0C")
8826 (parallel [(const_int 0) (const_int 1)
8827 (const_int 2) (const_int 3)]))
8828 (match_operand:QI 3 "register_operand" "Yk"))
8829 (const_vector:V12QI [(const_int 0) (const_int 0)
8830 (const_int 0) (const_int 0)
8831 (const_int 0) (const_int 0)
8832 (const_int 0) (const_int 0)
8833 (const_int 0) (const_int 0)
8834 (const_int 0) (const_int 0)])))]
8835 "TARGET_AVX512VL"
8836 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8837 [(set_attr "type" "ssemov")
8838 (set_attr "prefix" "evex")
8839 (set_attr "mode" "TI")])
8840
8841 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
8842 [(set (match_operand:V16QI 0 "register_operand" "=v")
8843 (vec_concat:V16QI
8844 (vec_merge:V4QI
8845 (any_truncate:V4QI
8846 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8847 (const_vector:V4QI [(const_int 0) (const_int 0)
8848 (const_int 0) (const_int 0)])
8849 (match_operand:QI 2 "register_operand" "Yk"))
8850 (const_vector:V12QI [(const_int 0) (const_int 0)
8851 (const_int 0) (const_int 0)
8852 (const_int 0) (const_int 0)
8853 (const_int 0) (const_int 0)
8854 (const_int 0) (const_int 0)
8855 (const_int 0) (const_int 0)])))]
8856 "TARGET_AVX512VL"
8857 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
8858 [(set_attr "type" "ssemov")
8859 (set_attr "prefix" "evex")
8860 (set_attr "mode" "TI")])
8861
8862 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
8863 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8864 (vec_concat:V16QI
8865 (vec_merge:V4QI
8866 (any_truncate:V4QI
8867 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8868 (vec_select:V4QI
8869 (match_dup 0)
8870 (parallel [(const_int 0) (const_int 1)
8871 (const_int 2) (const_int 3)]))
8872 (match_operand:QI 2 "register_operand" "Yk"))
8873 (vec_select:V12QI
8874 (match_dup 0)
8875 (parallel [(const_int 4) (const_int 5)
8876 (const_int 6) (const_int 7)
8877 (const_int 8) (const_int 9)
8878 (const_int 10) (const_int 11)
8879 (const_int 12) (const_int 13)
8880 (const_int 14) (const_int 15)]))))]
8881 "TARGET_AVX512VL"
8882 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8883 [(set_attr "type" "ssemov")
8884 (set_attr "memory" "store")
8885 (set_attr "prefix" "evex")
8886 (set_attr "mode" "TI")])
8887
8888 (define_mode_iterator VI2_128_BW_4_256
8889 [(V8HI "TARGET_AVX512BW") V8SI])
8890
8891 (define_insn "*avx512vl_<code><mode>v8qi2_store"
8892 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8893 (vec_concat:V16QI
8894 (any_truncate:V8QI
8895 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8896 (vec_select:V8QI
8897 (match_dup 0)
8898 (parallel [(const_int 8) (const_int 9)
8899 (const_int 10) (const_int 11)
8900 (const_int 12) (const_int 13)
8901 (const_int 14) (const_int 15)]))))]
8902 "TARGET_AVX512VL"
8903 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8904 [(set_attr "type" "ssemov")
8905 (set_attr "memory" "store")
8906 (set_attr "prefix" "evex")
8907 (set_attr "mode" "TI")])
8908
8909 (define_insn "avx512vl_<code><mode>v8qi2_mask"
8910 [(set (match_operand:V16QI 0 "register_operand" "=v")
8911 (vec_concat:V16QI
8912 (vec_merge:V8QI
8913 (any_truncate:V8QI
8914 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8915 (vec_select:V8QI
8916 (match_operand:V16QI 2 "vector_move_operand" "0C")
8917 (parallel [(const_int 0) (const_int 1)
8918 (const_int 2) (const_int 3)
8919 (const_int 4) (const_int 5)
8920 (const_int 6) (const_int 7)]))
8921 (match_operand:QI 3 "register_operand" "Yk"))
8922 (const_vector:V8QI [(const_int 0) (const_int 0)
8923 (const_int 0) (const_int 0)
8924 (const_int 0) (const_int 0)
8925 (const_int 0) (const_int 0)])))]
8926 "TARGET_AVX512VL"
8927 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8928 [(set_attr "type" "ssemov")
8929 (set_attr "prefix" "evex")
8930 (set_attr "mode" "TI")])
8931
8932 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
8933 [(set (match_operand:V16QI 0 "register_operand" "=v")
8934 (vec_concat:V16QI
8935 (vec_merge:V8QI
8936 (any_truncate:V8QI
8937 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8938 (const_vector:V8QI [(const_int 0) (const_int 0)
8939 (const_int 0) (const_int 0)
8940 (const_int 0) (const_int 0)
8941 (const_int 0) (const_int 0)])
8942 (match_operand:QI 2 "register_operand" "Yk"))
8943 (const_vector:V8QI [(const_int 0) (const_int 0)
8944 (const_int 0) (const_int 0)
8945 (const_int 0) (const_int 0)
8946 (const_int 0) (const_int 0)])))]
8947 "TARGET_AVX512VL"
8948 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
8949 [(set_attr "type" "ssemov")
8950 (set_attr "prefix" "evex")
8951 (set_attr "mode" "TI")])
8952
8953 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
8954 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8955 (vec_concat:V16QI
8956 (vec_merge:V8QI
8957 (any_truncate:V8QI
8958 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8959 (vec_select:V8QI
8960 (match_dup 0)
8961 (parallel [(const_int 0) (const_int 1)
8962 (const_int 2) (const_int 3)
8963 (const_int 4) (const_int 5)
8964 (const_int 6) (const_int 7)]))
8965 (match_operand:QI 2 "register_operand" "Yk"))
8966 (vec_select:V8QI
8967 (match_dup 0)
8968 (parallel [(const_int 8) (const_int 9)
8969 (const_int 10) (const_int 11)
8970 (const_int 12) (const_int 13)
8971 (const_int 14) (const_int 15)]))))]
8972 "TARGET_AVX512VL"
8973 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8974 [(set_attr "type" "ssemov")
8975 (set_attr "memory" "store")
8976 (set_attr "prefix" "evex")
8977 (set_attr "mode" "TI")])
8978
8979 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
8980 (define_mode_attr pmov_dst_4
8981 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
8982 (define_mode_attr pmov_dst_zeroed_4
8983 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
8984 (define_mode_attr pmov_suff_4
8985 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
8986
8987 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
8988 [(set (match_operand:V8HI 0 "register_operand" "=v")
8989 (vec_concat:V8HI
8990 (any_truncate:<pmov_dst_4>
8991 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
8992 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
8993 "TARGET_AVX512VL"
8994 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8995 [(set_attr "type" "ssemov")
8996 (set_attr "prefix" "evex")
8997 (set_attr "mode" "TI")])
8998
8999 (define_insn "*avx512vl_<code><mode>v4hi2_store"
9000 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9001 (vec_concat:V8HI
9002 (any_truncate:V4HI
9003 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9004 (vec_select:V4HI
9005 (match_dup 0)
9006 (parallel [(const_int 4) (const_int 5)
9007 (const_int 6) (const_int 7)]))))]
9008 "TARGET_AVX512VL"
9009 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9010 [(set_attr "type" "ssemov")
9011 (set_attr "memory" "store")
9012 (set_attr "prefix" "evex")
9013 (set_attr "mode" "TI")])
9014
9015 (define_insn "avx512vl_<code><mode>v4hi2_mask"
9016 [(set (match_operand:V8HI 0 "register_operand" "=v")
9017 (vec_concat:V8HI
9018 (vec_merge:V4HI
9019 (any_truncate:V4HI
9020 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9021 (vec_select:V4HI
9022 (match_operand:V8HI 2 "vector_move_operand" "0C")
9023 (parallel [(const_int 0) (const_int 1)
9024 (const_int 2) (const_int 3)]))
9025 (match_operand:QI 3 "register_operand" "Yk"))
9026 (const_vector:V4HI [(const_int 0) (const_int 0)
9027 (const_int 0) (const_int 0)])))]
9028 "TARGET_AVX512VL"
9029 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9030 [(set_attr "type" "ssemov")
9031 (set_attr "prefix" "evex")
9032 (set_attr "mode" "TI")])
9033
9034 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
9035 [(set (match_operand:V8HI 0 "register_operand" "=v")
9036 (vec_concat:V8HI
9037 (vec_merge:V4HI
9038 (any_truncate:V4HI
9039 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9040 (const_vector:V4HI [(const_int 0) (const_int 0)
9041 (const_int 0) (const_int 0)])
9042 (match_operand:QI 2 "register_operand" "Yk"))
9043 (const_vector:V4HI [(const_int 0) (const_int 0)
9044 (const_int 0) (const_int 0)])))]
9045 "TARGET_AVX512VL"
9046 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9047 [(set_attr "type" "ssemov")
9048 (set_attr "prefix" "evex")
9049 (set_attr "mode" "TI")])
9050
9051 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
9052 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9053 (vec_concat:V8HI
9054 (vec_merge:V4HI
9055 (any_truncate:V4HI
9056 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9057 (vec_select:V4HI
9058 (match_dup 0)
9059 (parallel [(const_int 0) (const_int 1)
9060 (const_int 2) (const_int 3)]))
9061 (match_operand:QI 2 "register_operand" "Yk"))
9062 (vec_select:V4HI
9063 (match_dup 0)
9064 (parallel [(const_int 4) (const_int 5)
9065 (const_int 6) (const_int 7)]))))]
9066 "TARGET_AVX512VL"
9067 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9068 [(set_attr "type" "ssemov")
9069 (set_attr "memory" "store")
9070 (set_attr "prefix" "evex")
9071 (set_attr "mode" "TI")])
9072
9073 (define_insn "*avx512vl_<code>v2div2hi2_store"
9074 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9075 (vec_concat:V8HI
9076 (any_truncate:V2HI
9077 (match_operand:V2DI 1 "register_operand" "v"))
9078 (vec_select:V6HI
9079 (match_dup 0)
9080 (parallel [(const_int 2) (const_int 3)
9081 (const_int 4) (const_int 5)
9082 (const_int 6) (const_int 7)]))))]
9083 "TARGET_AVX512VL"
9084 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
9085 [(set_attr "type" "ssemov")
9086 (set_attr "memory" "store")
9087 (set_attr "prefix" "evex")
9088 (set_attr "mode" "TI")])
9089
9090 (define_insn "avx512vl_<code>v2div2hi2_mask"
9091 [(set (match_operand:V8HI 0 "register_operand" "=v")
9092 (vec_concat:V8HI
9093 (vec_merge:V2HI
9094 (any_truncate:V2HI
9095 (match_operand:V2DI 1 "register_operand" "v"))
9096 (vec_select:V2HI
9097 (match_operand:V8HI 2 "vector_move_operand" "0C")
9098 (parallel [(const_int 0) (const_int 1)]))
9099 (match_operand:QI 3 "register_operand" "Yk"))
9100 (const_vector:V6HI [(const_int 0) (const_int 0)
9101 (const_int 0) (const_int 0)
9102 (const_int 0) (const_int 0)])))]
9103 "TARGET_AVX512VL"
9104 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9105 [(set_attr "type" "ssemov")
9106 (set_attr "prefix" "evex")
9107 (set_attr "mode" "TI")])
9108
9109 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
9110 [(set (match_operand:V8HI 0 "register_operand" "=v")
9111 (vec_concat:V8HI
9112 (vec_merge:V2HI
9113 (any_truncate:V2HI
9114 (match_operand:V2DI 1 "register_operand" "v"))
9115 (const_vector:V2HI [(const_int 0) (const_int 0)])
9116 (match_operand:QI 2 "register_operand" "Yk"))
9117 (const_vector:V6HI [(const_int 0) (const_int 0)
9118 (const_int 0) (const_int 0)
9119 (const_int 0) (const_int 0)])))]
9120 "TARGET_AVX512VL"
9121 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9122 [(set_attr "type" "ssemov")
9123 (set_attr "prefix" "evex")
9124 (set_attr "mode" "TI")])
9125
9126 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
9127 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9128 (vec_concat:V8HI
9129 (vec_merge:V2HI
9130 (any_truncate:V2HI
9131 (match_operand:V2DI 1 "register_operand" "v"))
9132 (vec_select:V2HI
9133 (match_dup 0)
9134 (parallel [(const_int 0) (const_int 1)]))
9135 (match_operand:QI 2 "register_operand" "Yk"))
9136 (vec_select:V6HI
9137 (match_dup 0)
9138 (parallel [(const_int 2) (const_int 3)
9139 (const_int 4) (const_int 5)
9140 (const_int 6) (const_int 7)]))))]
9141 "TARGET_AVX512VL"
9142 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9143 [(set_attr "type" "ssemov")
9144 (set_attr "memory" "store")
9145 (set_attr "prefix" "evex")
9146 (set_attr "mode" "TI")])
9147
9148 (define_insn "*avx512vl_<code>v2div2si2"
9149 [(set (match_operand:V4SI 0 "register_operand" "=v")
9150 (vec_concat:V4SI
9151 (any_truncate:V2SI
9152 (match_operand:V2DI 1 "register_operand" "v"))
9153 (match_operand:V2SI 2 "const0_operand")))]
9154 "TARGET_AVX512VL"
9155 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9156 [(set_attr "type" "ssemov")
9157 (set_attr "prefix" "evex")
9158 (set_attr "mode" "TI")])
9159
9160 (define_insn "*avx512vl_<code>v2div2si2_store"
9161 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9162 (vec_concat:V4SI
9163 (any_truncate:V2SI
9164 (match_operand:V2DI 1 "register_operand" "v"))
9165 (vec_select:V2SI
9166 (match_dup 0)
9167 (parallel [(const_int 2) (const_int 3)]))))]
9168 "TARGET_AVX512VL"
9169 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9170 [(set_attr "type" "ssemov")
9171 (set_attr "memory" "store")
9172 (set_attr "prefix" "evex")
9173 (set_attr "mode" "TI")])
9174
9175 (define_insn "avx512vl_<code>v2div2si2_mask"
9176 [(set (match_operand:V4SI 0 "register_operand" "=v")
9177 (vec_concat:V4SI
9178 (vec_merge:V2SI
9179 (any_truncate:V2SI
9180 (match_operand:V2DI 1 "register_operand" "v"))
9181 (vec_select:V2SI
9182 (match_operand:V4SI 2 "vector_move_operand" "0C")
9183 (parallel [(const_int 0) (const_int 1)]))
9184 (match_operand:QI 3 "register_operand" "Yk"))
9185 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9186 "TARGET_AVX512VL"
9187 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9188 [(set_attr "type" "ssemov")
9189 (set_attr "prefix" "evex")
9190 (set_attr "mode" "TI")])
9191
9192 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
9193 [(set (match_operand:V4SI 0 "register_operand" "=v")
9194 (vec_concat:V4SI
9195 (vec_merge:V2SI
9196 (any_truncate:V2SI
9197 (match_operand:V2DI 1 "register_operand" "v"))
9198 (const_vector:V2SI [(const_int 0) (const_int 0)])
9199 (match_operand:QI 2 "register_operand" "Yk"))
9200 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9201 "TARGET_AVX512VL"
9202 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9203 [(set_attr "type" "ssemov")
9204 (set_attr "prefix" "evex")
9205 (set_attr "mode" "TI")])
9206
9207 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9208 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9209 (vec_concat:V4SI
9210 (vec_merge:V2SI
9211 (any_truncate:V2SI
9212 (match_operand:V2DI 1 "register_operand" "v"))
9213 (vec_select:V2SI
9214 (match_dup 0)
9215 (parallel [(const_int 0) (const_int 1)]))
9216 (match_operand:QI 2 "register_operand" "Yk"))
9217 (vec_select:V2SI
9218 (match_dup 0)
9219 (parallel [(const_int 2) (const_int 3)]))))]
9220 "TARGET_AVX512VL"
9221 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9222 [(set_attr "type" "ssemov")
9223 (set_attr "memory" "store")
9224 (set_attr "prefix" "evex")
9225 (set_attr "mode" "TI")])
9226
9227 (define_insn "*avx512f_<code>v8div16qi2"
9228 [(set (match_operand:V16QI 0 "register_operand" "=v")
9229 (vec_concat:V16QI
9230 (any_truncate:V8QI
9231 (match_operand:V8DI 1 "register_operand" "v"))
9232 (const_vector:V8QI [(const_int 0) (const_int 0)
9233 (const_int 0) (const_int 0)
9234 (const_int 0) (const_int 0)
9235 (const_int 0) (const_int 0)])))]
9236 "TARGET_AVX512F"
9237 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9238 [(set_attr "type" "ssemov")
9239 (set_attr "prefix" "evex")
9240 (set_attr "mode" "TI")])
9241
9242 (define_insn "*avx512f_<code>v8div16qi2_store"
9243 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9244 (vec_concat:V16QI
9245 (any_truncate:V8QI
9246 (match_operand:V8DI 1 "register_operand" "v"))
9247 (vec_select:V8QI
9248 (match_dup 0)
9249 (parallel [(const_int 8) (const_int 9)
9250 (const_int 10) (const_int 11)
9251 (const_int 12) (const_int 13)
9252 (const_int 14) (const_int 15)]))))]
9253 "TARGET_AVX512F"
9254 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9255 [(set_attr "type" "ssemov")
9256 (set_attr "memory" "store")
9257 (set_attr "prefix" "evex")
9258 (set_attr "mode" "TI")])
9259
9260 (define_insn "avx512f_<code>v8div16qi2_mask"
9261 [(set (match_operand:V16QI 0 "register_operand" "=v")
9262 (vec_concat:V16QI
9263 (vec_merge:V8QI
9264 (any_truncate:V8QI
9265 (match_operand:V8DI 1 "register_operand" "v"))
9266 (vec_select:V8QI
9267 (match_operand:V16QI 2 "vector_move_operand" "0C")
9268 (parallel [(const_int 0) (const_int 1)
9269 (const_int 2) (const_int 3)
9270 (const_int 4) (const_int 5)
9271 (const_int 6) (const_int 7)]))
9272 (match_operand:QI 3 "register_operand" "Yk"))
9273 (const_vector:V8QI [(const_int 0) (const_int 0)
9274 (const_int 0) (const_int 0)
9275 (const_int 0) (const_int 0)
9276 (const_int 0) (const_int 0)])))]
9277 "TARGET_AVX512F"
9278 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9279 [(set_attr "type" "ssemov")
9280 (set_attr "prefix" "evex")
9281 (set_attr "mode" "TI")])
9282
9283 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
9284 [(set (match_operand:V16QI 0 "register_operand" "=v")
9285 (vec_concat:V16QI
9286 (vec_merge:V8QI
9287 (any_truncate:V8QI
9288 (match_operand:V8DI 1 "register_operand" "v"))
9289 (const_vector:V8QI [(const_int 0) (const_int 0)
9290 (const_int 0) (const_int 0)
9291 (const_int 0) (const_int 0)
9292 (const_int 0) (const_int 0)])
9293 (match_operand:QI 2 "register_operand" "Yk"))
9294 (const_vector:V8QI [(const_int 0) (const_int 0)
9295 (const_int 0) (const_int 0)
9296 (const_int 0) (const_int 0)
9297 (const_int 0) (const_int 0)])))]
9298 "TARGET_AVX512F"
9299 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9300 [(set_attr "type" "ssemov")
9301 (set_attr "prefix" "evex")
9302 (set_attr "mode" "TI")])
9303
9304 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9305 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9306 (vec_concat:V16QI
9307 (vec_merge:V8QI
9308 (any_truncate:V8QI
9309 (match_operand:V8DI 1 "register_operand" "v"))
9310 (vec_select:V8QI
9311 (match_dup 0)
9312 (parallel [(const_int 0) (const_int 1)
9313 (const_int 2) (const_int 3)
9314 (const_int 4) (const_int 5)
9315 (const_int 6) (const_int 7)]))
9316 (match_operand:QI 2 "register_operand" "Yk"))
9317 (vec_select:V8QI
9318 (match_dup 0)
9319 (parallel [(const_int 8) (const_int 9)
9320 (const_int 10) (const_int 11)
9321 (const_int 12) (const_int 13)
9322 (const_int 14) (const_int 15)]))))]
9323 "TARGET_AVX512F"
9324 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9325 [(set_attr "type" "ssemov")
9326 (set_attr "memory" "store")
9327 (set_attr "prefix" "evex")
9328 (set_attr "mode" "TI")])
9329
9330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9331 ;;
9332 ;; Parallel integral arithmetic
9333 ;;
9334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9335
9336 (define_expand "neg<mode>2"
9337 [(set (match_operand:VI_AVX2 0 "register_operand")
9338 (minus:VI_AVX2
9339 (match_dup 2)
9340 (match_operand:VI_AVX2 1 "vector_operand")))]
9341 "TARGET_SSE2"
9342 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9343
9344 (define_expand "<plusminus_insn><mode>3"
9345 [(set (match_operand:VI_AVX2 0 "register_operand")
9346 (plusminus:VI_AVX2
9347 (match_operand:VI_AVX2 1 "vector_operand")
9348 (match_operand:VI_AVX2 2 "vector_operand")))]
9349 "TARGET_SSE2"
9350 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9351
9352 (define_expand "<plusminus_insn><mode>3_mask"
9353 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9354 (vec_merge:VI48_AVX512VL
9355 (plusminus:VI48_AVX512VL
9356 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9357 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9358 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9359 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9360 "TARGET_AVX512F"
9361 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9362
9363 (define_expand "<plusminus_insn><mode>3_mask"
9364 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9365 (vec_merge:VI12_AVX512VL
9366 (plusminus:VI12_AVX512VL
9367 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9368 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9369 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9370 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9371 "TARGET_AVX512BW"
9372 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9373
9374 (define_insn "*<plusminus_insn><mode>3"
9375 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
9376 (plusminus:VI_AVX2
9377 (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
9378 (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
9379 "TARGET_SSE2
9380 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9381 "@
9382 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9383 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9384 [(set_attr "isa" "noavx,avx")
9385 (set_attr "type" "sseiadd")
9386 (set_attr "prefix_data16" "1,*")
9387 (set_attr "prefix" "<mask_prefix3>")
9388 (set_attr "mode" "<sseinsnmode>")])
9389
9390 (define_insn "*<plusminus_insn><mode>3_mask"
9391 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9392 (vec_merge:VI48_AVX512VL
9393 (plusminus:VI48_AVX512VL
9394 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9395 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
9396 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
9397 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9398 "TARGET_AVX512F
9399 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9400 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9401 [(set_attr "type" "sseiadd")
9402 (set_attr "prefix" "evex")
9403 (set_attr "mode" "<sseinsnmode>")])
9404
9405 (define_insn "*<plusminus_insn><mode>3_mask"
9406 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9407 (vec_merge:VI12_AVX512VL
9408 (plusminus:VI12_AVX512VL
9409 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9410 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
9411 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
9412 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9413 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9414 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9415 [(set_attr "type" "sseiadd")
9416 (set_attr "prefix" "evex")
9417 (set_attr "mode" "<sseinsnmode>")])
9418
9419 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9420 [(set (match_operand:VI12_AVX2 0 "register_operand")
9421 (sat_plusminus:VI12_AVX2
9422 (match_operand:VI12_AVX2 1 "vector_operand")
9423 (match_operand:VI12_AVX2 2 "vector_operand")))]
9424 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9425 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9426
9427 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9428 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
9429 (sat_plusminus:VI12_AVX2
9430 (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
9431 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
9432 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9433 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9434 "@
9435 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9436 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9437 [(set_attr "isa" "noavx,avx")
9438 (set_attr "type" "sseiadd")
9439 (set_attr "prefix_data16" "1,*")
9440 (set_attr "prefix" "orig,maybe_evex")
9441 (set_attr "mode" "TI")])
9442
9443 (define_expand "mul<mode>3<mask_name>"
9444 [(set (match_operand:VI1_AVX512 0 "register_operand")
9445 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
9446 (match_operand:VI1_AVX512 2 "register_operand")))]
9447 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9448 {
9449 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
9450 DONE;
9451 })
9452
9453 (define_expand "mul<mode>3<mask_name>"
9454 [(set (match_operand:VI2_AVX2 0 "register_operand")
9455 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
9456 (match_operand:VI2_AVX2 2 "vector_operand")))]
9457 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9458 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9459
9460 (define_insn "*mul<mode>3<mask_name>"
9461 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9462 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
9463 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
9464 "TARGET_SSE2
9465 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9466 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9467 "@
9468 pmullw\t{%2, %0|%0, %2}
9469 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9470 [(set_attr "isa" "noavx,avx")
9471 (set_attr "type" "sseimul")
9472 (set_attr "prefix_data16" "1,*")
9473 (set_attr "prefix" "orig,vex")
9474 (set_attr "mode" "<sseinsnmode>")])
9475
9476 (define_expand "<s>mul<mode>3_highpart<mask_name>"
9477 [(set (match_operand:VI2_AVX2 0 "register_operand")
9478 (truncate:VI2_AVX2
9479 (lshiftrt:<ssedoublemode>
9480 (mult:<ssedoublemode>
9481 (any_extend:<ssedoublemode>
9482 (match_operand:VI2_AVX2 1 "vector_operand"))
9483 (any_extend:<ssedoublemode>
9484 (match_operand:VI2_AVX2 2 "vector_operand")))
9485 (const_int 16))))]
9486 "TARGET_SSE2
9487 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9488 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9489
9490 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
9491 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9492 (truncate:VI2_AVX2
9493 (lshiftrt:<ssedoublemode>
9494 (mult:<ssedoublemode>
9495 (any_extend:<ssedoublemode>
9496 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
9497 (any_extend:<ssedoublemode>
9498 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
9499 (const_int 16))))]
9500 "TARGET_SSE2
9501 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9502 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9503 "@
9504 pmulh<u>w\t{%2, %0|%0, %2}
9505 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9506 [(set_attr "isa" "noavx,avx")
9507 (set_attr "type" "sseimul")
9508 (set_attr "prefix_data16" "1,*")
9509 (set_attr "prefix" "orig,vex")
9510 (set_attr "mode" "<sseinsnmode>")])
9511
9512 (define_expand "vec_widen_umult_even_v16si<mask_name>"
9513 [(set (match_operand:V8DI 0 "register_operand")
9514 (mult:V8DI
9515 (zero_extend:V8DI
9516 (vec_select:V8SI
9517 (match_operand:V16SI 1 "nonimmediate_operand")
9518 (parallel [(const_int 0) (const_int 2)
9519 (const_int 4) (const_int 6)
9520 (const_int 8) (const_int 10)
9521 (const_int 12) (const_int 14)])))
9522 (zero_extend:V8DI
9523 (vec_select:V8SI
9524 (match_operand:V16SI 2 "nonimmediate_operand")
9525 (parallel [(const_int 0) (const_int 2)
9526 (const_int 4) (const_int 6)
9527 (const_int 8) (const_int 10)
9528 (const_int 12) (const_int 14)])))))]
9529 "TARGET_AVX512F"
9530 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9531
9532 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
9533 [(set (match_operand:V8DI 0 "register_operand" "=v")
9534 (mult:V8DI
9535 (zero_extend:V8DI
9536 (vec_select:V8SI
9537 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9538 (parallel [(const_int 0) (const_int 2)
9539 (const_int 4) (const_int 6)
9540 (const_int 8) (const_int 10)
9541 (const_int 12) (const_int 14)])))
9542 (zero_extend:V8DI
9543 (vec_select:V8SI
9544 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9545 (parallel [(const_int 0) (const_int 2)
9546 (const_int 4) (const_int 6)
9547 (const_int 8) (const_int 10)
9548 (const_int 12) (const_int 14)])))))]
9549 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9550 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9551 [(set_attr "isa" "avx512f")
9552 (set_attr "type" "sseimul")
9553 (set_attr "prefix_extra" "1")
9554 (set_attr "prefix" "evex")
9555 (set_attr "mode" "XI")])
9556
9557 (define_expand "vec_widen_umult_even_v8si<mask_name>"
9558 [(set (match_operand:V4DI 0 "register_operand")
9559 (mult:V4DI
9560 (zero_extend:V4DI
9561 (vec_select:V4SI
9562 (match_operand:V8SI 1 "nonimmediate_operand")
9563 (parallel [(const_int 0) (const_int 2)
9564 (const_int 4) (const_int 6)])))
9565 (zero_extend:V4DI
9566 (vec_select:V4SI
9567 (match_operand:V8SI 2 "nonimmediate_operand")
9568 (parallel [(const_int 0) (const_int 2)
9569 (const_int 4) (const_int 6)])))))]
9570 "TARGET_AVX2 && <mask_avx512vl_condition>"
9571 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9572
9573 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
9574 [(set (match_operand:V4DI 0 "register_operand" "=v")
9575 (mult:V4DI
9576 (zero_extend:V4DI
9577 (vec_select:V4SI
9578 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9579 (parallel [(const_int 0) (const_int 2)
9580 (const_int 4) (const_int 6)])))
9581 (zero_extend:V4DI
9582 (vec_select:V4SI
9583 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9584 (parallel [(const_int 0) (const_int 2)
9585 (const_int 4) (const_int 6)])))))]
9586 "TARGET_AVX2 && <mask_avx512vl_condition>
9587 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9588 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9589 [(set_attr "type" "sseimul")
9590 (set_attr "prefix" "maybe_evex")
9591 (set_attr "mode" "OI")])
9592
9593 (define_expand "vec_widen_umult_even_v4si<mask_name>"
9594 [(set (match_operand:V2DI 0 "register_operand")
9595 (mult:V2DI
9596 (zero_extend:V2DI
9597 (vec_select:V2SI
9598 (match_operand:V4SI 1 "vector_operand")
9599 (parallel [(const_int 0) (const_int 2)])))
9600 (zero_extend:V2DI
9601 (vec_select:V2SI
9602 (match_operand:V4SI 2 "vector_operand")
9603 (parallel [(const_int 0) (const_int 2)])))))]
9604 "TARGET_SSE2 && <mask_avx512vl_condition>"
9605 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9606
9607 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
9608 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9609 (mult:V2DI
9610 (zero_extend:V2DI
9611 (vec_select:V2SI
9612 (match_operand:V4SI 1 "vector_operand" "%0,v")
9613 (parallel [(const_int 0) (const_int 2)])))
9614 (zero_extend:V2DI
9615 (vec_select:V2SI
9616 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
9617 (parallel [(const_int 0) (const_int 2)])))))]
9618 "TARGET_SSE2 && <mask_avx512vl_condition>
9619 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9620 "@
9621 pmuludq\t{%2, %0|%0, %2}
9622 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9623 [(set_attr "isa" "noavx,avx")
9624 (set_attr "type" "sseimul")
9625 (set_attr "prefix_data16" "1,*")
9626 (set_attr "prefix" "orig,maybe_evex")
9627 (set_attr "mode" "TI")])
9628
9629 (define_expand "vec_widen_smult_even_v16si<mask_name>"
9630 [(set (match_operand:V8DI 0 "register_operand")
9631 (mult:V8DI
9632 (sign_extend:V8DI
9633 (vec_select:V8SI
9634 (match_operand:V16SI 1 "nonimmediate_operand")
9635 (parallel [(const_int 0) (const_int 2)
9636 (const_int 4) (const_int 6)
9637 (const_int 8) (const_int 10)
9638 (const_int 12) (const_int 14)])))
9639 (sign_extend:V8DI
9640 (vec_select:V8SI
9641 (match_operand:V16SI 2 "nonimmediate_operand")
9642 (parallel [(const_int 0) (const_int 2)
9643 (const_int 4) (const_int 6)
9644 (const_int 8) (const_int 10)
9645 (const_int 12) (const_int 14)])))))]
9646 "TARGET_AVX512F"
9647 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9648
9649 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
9650 [(set (match_operand:V8DI 0 "register_operand" "=v")
9651 (mult:V8DI
9652 (sign_extend:V8DI
9653 (vec_select:V8SI
9654 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9655 (parallel [(const_int 0) (const_int 2)
9656 (const_int 4) (const_int 6)
9657 (const_int 8) (const_int 10)
9658 (const_int 12) (const_int 14)])))
9659 (sign_extend:V8DI
9660 (vec_select:V8SI
9661 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9662 (parallel [(const_int 0) (const_int 2)
9663 (const_int 4) (const_int 6)
9664 (const_int 8) (const_int 10)
9665 (const_int 12) (const_int 14)])))))]
9666 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9667 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9668 [(set_attr "isa" "avx512f")
9669 (set_attr "type" "sseimul")
9670 (set_attr "prefix_extra" "1")
9671 (set_attr "prefix" "evex")
9672 (set_attr "mode" "XI")])
9673
9674 (define_expand "vec_widen_smult_even_v8si<mask_name>"
9675 [(set (match_operand:V4DI 0 "register_operand")
9676 (mult:V4DI
9677 (sign_extend:V4DI
9678 (vec_select:V4SI
9679 (match_operand:V8SI 1 "nonimmediate_operand")
9680 (parallel [(const_int 0) (const_int 2)
9681 (const_int 4) (const_int 6)])))
9682 (sign_extend:V4DI
9683 (vec_select:V4SI
9684 (match_operand:V8SI 2 "nonimmediate_operand")
9685 (parallel [(const_int 0) (const_int 2)
9686 (const_int 4) (const_int 6)])))))]
9687 "TARGET_AVX2 && <mask_avx512vl_condition>"
9688 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9689
9690 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
9691 [(set (match_operand:V4DI 0 "register_operand" "=v")
9692 (mult:V4DI
9693 (sign_extend:V4DI
9694 (vec_select:V4SI
9695 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9696 (parallel [(const_int 0) (const_int 2)
9697 (const_int 4) (const_int 6)])))
9698 (sign_extend:V4DI
9699 (vec_select:V4SI
9700 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9701 (parallel [(const_int 0) (const_int 2)
9702 (const_int 4) (const_int 6)])))))]
9703 "TARGET_AVX2
9704 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9705 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9706 [(set_attr "type" "sseimul")
9707 (set_attr "prefix_extra" "1")
9708 (set_attr "prefix" "vex")
9709 (set_attr "mode" "OI")])
9710
9711 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
9712 [(set (match_operand:V2DI 0 "register_operand")
9713 (mult:V2DI
9714 (sign_extend:V2DI
9715 (vec_select:V2SI
9716 (match_operand:V4SI 1 "vector_operand")
9717 (parallel [(const_int 0) (const_int 2)])))
9718 (sign_extend:V2DI
9719 (vec_select:V2SI
9720 (match_operand:V4SI 2 "vector_operand")
9721 (parallel [(const_int 0) (const_int 2)])))))]
9722 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
9723 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9724
9725 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
9726 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
9727 (mult:V2DI
9728 (sign_extend:V2DI
9729 (vec_select:V2SI
9730 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
9731 (parallel [(const_int 0) (const_int 2)])))
9732 (sign_extend:V2DI
9733 (vec_select:V2SI
9734 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
9735 (parallel [(const_int 0) (const_int 2)])))))]
9736 "TARGET_SSE4_1 && <mask_avx512vl_condition>
9737 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9738 "@
9739 pmuldq\t{%2, %0|%0, %2}
9740 pmuldq\t{%2, %0|%0, %2}
9741 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9742 [(set_attr "isa" "noavx,noavx,avx")
9743 (set_attr "type" "sseimul")
9744 (set_attr "prefix_data16" "1,1,*")
9745 (set_attr "prefix_extra" "1")
9746 (set_attr "prefix" "orig,orig,vex")
9747 (set_attr "mode" "TI")])
9748
9749 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
9750 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
9751 (unspec:<sseunpackmode>
9752 [(match_operand:VI2_AVX2 1 "register_operand" "v")
9753 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
9754 UNSPEC_PMADDWD512))]
9755 "TARGET_AVX512BW && <mask_mode512bit_condition>"
9756 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
9757 [(set_attr "type" "sseiadd")
9758 (set_attr "prefix" "evex")
9759 (set_attr "mode" "XI")])
9760
9761 (define_expand "avx2_pmaddwd"
9762 [(set (match_operand:V8SI 0 "register_operand")
9763 (plus:V8SI
9764 (mult:V8SI
9765 (sign_extend:V8SI
9766 (vec_select:V8HI
9767 (match_operand:V16HI 1 "nonimmediate_operand")
9768 (parallel [(const_int 0) (const_int 2)
9769 (const_int 4) (const_int 6)
9770 (const_int 8) (const_int 10)
9771 (const_int 12) (const_int 14)])))
9772 (sign_extend:V8SI
9773 (vec_select:V8HI
9774 (match_operand:V16HI 2 "nonimmediate_operand")
9775 (parallel [(const_int 0) (const_int 2)
9776 (const_int 4) (const_int 6)
9777 (const_int 8) (const_int 10)
9778 (const_int 12) (const_int 14)]))))
9779 (mult:V8SI
9780 (sign_extend:V8SI
9781 (vec_select:V8HI (match_dup 1)
9782 (parallel [(const_int 1) (const_int 3)
9783 (const_int 5) (const_int 7)
9784 (const_int 9) (const_int 11)
9785 (const_int 13) (const_int 15)])))
9786 (sign_extend:V8SI
9787 (vec_select:V8HI (match_dup 2)
9788 (parallel [(const_int 1) (const_int 3)
9789 (const_int 5) (const_int 7)
9790 (const_int 9) (const_int 11)
9791 (const_int 13) (const_int 15)]))))))]
9792 "TARGET_AVX2"
9793 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9794
9795 (define_insn "*avx2_pmaddwd"
9796 [(set (match_operand:V8SI 0 "register_operand" "=x")
9797 (plus:V8SI
9798 (mult:V8SI
9799 (sign_extend:V8SI
9800 (vec_select:V8HI
9801 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
9802 (parallel [(const_int 0) (const_int 2)
9803 (const_int 4) (const_int 6)
9804 (const_int 8) (const_int 10)
9805 (const_int 12) (const_int 14)])))
9806 (sign_extend:V8SI
9807 (vec_select:V8HI
9808 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9809 (parallel [(const_int 0) (const_int 2)
9810 (const_int 4) (const_int 6)
9811 (const_int 8) (const_int 10)
9812 (const_int 12) (const_int 14)]))))
9813 (mult:V8SI
9814 (sign_extend:V8SI
9815 (vec_select:V8HI (match_dup 1)
9816 (parallel [(const_int 1) (const_int 3)
9817 (const_int 5) (const_int 7)
9818 (const_int 9) (const_int 11)
9819 (const_int 13) (const_int 15)])))
9820 (sign_extend:V8SI
9821 (vec_select:V8HI (match_dup 2)
9822 (parallel [(const_int 1) (const_int 3)
9823 (const_int 5) (const_int 7)
9824 (const_int 9) (const_int 11)
9825 (const_int 13) (const_int 15)]))))))]
9826 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9827 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9828 [(set_attr "type" "sseiadd")
9829 (set_attr "prefix" "vex")
9830 (set_attr "mode" "OI")])
9831
9832 (define_expand "sse2_pmaddwd"
9833 [(set (match_operand:V4SI 0 "register_operand")
9834 (plus:V4SI
9835 (mult:V4SI
9836 (sign_extend:V4SI
9837 (vec_select:V4HI
9838 (match_operand:V8HI 1 "vector_operand")
9839 (parallel [(const_int 0) (const_int 2)
9840 (const_int 4) (const_int 6)])))
9841 (sign_extend:V4SI
9842 (vec_select:V4HI
9843 (match_operand:V8HI 2 "vector_operand")
9844 (parallel [(const_int 0) (const_int 2)
9845 (const_int 4) (const_int 6)]))))
9846 (mult:V4SI
9847 (sign_extend:V4SI
9848 (vec_select:V4HI (match_dup 1)
9849 (parallel [(const_int 1) (const_int 3)
9850 (const_int 5) (const_int 7)])))
9851 (sign_extend:V4SI
9852 (vec_select:V4HI (match_dup 2)
9853 (parallel [(const_int 1) (const_int 3)
9854 (const_int 5) (const_int 7)]))))))]
9855 "TARGET_SSE2"
9856 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9857
9858 (define_insn "*sse2_pmaddwd"
9859 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9860 (plus:V4SI
9861 (mult:V4SI
9862 (sign_extend:V4SI
9863 (vec_select:V4HI
9864 (match_operand:V8HI 1 "vector_operand" "%0,x")
9865 (parallel [(const_int 0) (const_int 2)
9866 (const_int 4) (const_int 6)])))
9867 (sign_extend:V4SI
9868 (vec_select:V4HI
9869 (match_operand:V8HI 2 "vector_operand" "xBm,xm")
9870 (parallel [(const_int 0) (const_int 2)
9871 (const_int 4) (const_int 6)]))))
9872 (mult:V4SI
9873 (sign_extend:V4SI
9874 (vec_select:V4HI (match_dup 1)
9875 (parallel [(const_int 1) (const_int 3)
9876 (const_int 5) (const_int 7)])))
9877 (sign_extend:V4SI
9878 (vec_select:V4HI (match_dup 2)
9879 (parallel [(const_int 1) (const_int 3)
9880 (const_int 5) (const_int 7)]))))))]
9881 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9882 "@
9883 pmaddwd\t{%2, %0|%0, %2}
9884 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9885 [(set_attr "isa" "noavx,avx")
9886 (set_attr "type" "sseiadd")
9887 (set_attr "atom_unit" "simul")
9888 (set_attr "prefix_data16" "1,*")
9889 (set_attr "prefix" "orig,vex")
9890 (set_attr "mode" "TI")])
9891
9892 (define_insn "avx512dq_mul<mode>3<mask_name>"
9893 [(set (match_operand:VI8 0 "register_operand" "=v")
9894 (mult:VI8
9895 (match_operand:VI8 1 "register_operand" "v")
9896 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
9897 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
9898 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9899 [(set_attr "type" "sseimul")
9900 (set_attr "prefix" "evex")
9901 (set_attr "mode" "<sseinsnmode>")])
9902
9903 (define_expand "mul<mode>3<mask_name>"
9904 [(set (match_operand:VI4_AVX512F 0 "register_operand")
9905 (mult:VI4_AVX512F
9906 (match_operand:VI4_AVX512F 1 "general_vector_operand")
9907 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
9908 "TARGET_SSE2 && <mask_mode512bit_condition>"
9909 {
9910 if (TARGET_SSE4_1)
9911 {
9912 if (!vector_operand (operands[1], <MODE>mode))
9913 operands[1] = force_reg (<MODE>mode, operands[1]);
9914 if (!vector_operand (operands[2], <MODE>mode))
9915 operands[2] = force_reg (<MODE>mode, operands[2]);
9916 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
9917 }
9918 else
9919 {
9920 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
9921 DONE;
9922 }
9923 })
9924
9925 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
9926 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
9927 (mult:VI4_AVX512F
9928 (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
9929 (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
9930 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
9931 "@
9932 pmulld\t{%2, %0|%0, %2}
9933 pmulld\t{%2, %0|%0, %2}
9934 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9935 [(set_attr "isa" "noavx,noavx,avx")
9936 (set_attr "type" "sseimul")
9937 (set_attr "prefix_extra" "1")
9938 (set_attr "prefix" "<mask_prefix4>")
9939 (set_attr "btver2_decode" "vector,vector,vector")
9940 (set_attr "mode" "<sseinsnmode>")])
9941
9942 (define_expand "mul<mode>3"
9943 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
9944 (mult:VI8_AVX2_AVX512F
9945 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
9946 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
9947 "TARGET_SSE2"
9948 {
9949 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
9950 DONE;
9951 })
9952
9953 (define_expand "vec_widen_<s>mult_hi_<mode>"
9954 [(match_operand:<sseunpackmode> 0 "register_operand")
9955 (any_extend:<sseunpackmode>
9956 (match_operand:VI124_AVX2 1 "register_operand"))
9957 (match_operand:VI124_AVX2 2 "register_operand")]
9958 "TARGET_SSE2"
9959 {
9960 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9961 <u_bool>, true);
9962 DONE;
9963 })
9964
9965 (define_expand "vec_widen_<s>mult_lo_<mode>"
9966 [(match_operand:<sseunpackmode> 0 "register_operand")
9967 (any_extend:<sseunpackmode>
9968 (match_operand:VI124_AVX2 1 "register_operand"))
9969 (match_operand:VI124_AVX2 2 "register_operand")]
9970 "TARGET_SSE2"
9971 {
9972 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9973 <u_bool>, false);
9974 DONE;
9975 })
9976
9977 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
9978 ;; named patterns, but signed V4SI needs special help for plain SSE2.
9979 (define_expand "vec_widen_smult_even_v4si"
9980 [(match_operand:V2DI 0 "register_operand")
9981 (match_operand:V4SI 1 "vector_operand")
9982 (match_operand:V4SI 2 "vector_operand")]
9983 "TARGET_SSE2"
9984 {
9985 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9986 false, false);
9987 DONE;
9988 })
9989
9990 (define_expand "vec_widen_<s>mult_odd_<mode>"
9991 [(match_operand:<sseunpackmode> 0 "register_operand")
9992 (any_extend:<sseunpackmode>
9993 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
9994 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
9995 "TARGET_SSE2"
9996 {
9997 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9998 <u_bool>, true);
9999 DONE;
10000 })
10001
10002 (define_mode_attr SDOT_PMADD_SUF
10003 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
10004
10005 (define_expand "sdot_prod<mode>"
10006 [(match_operand:<sseunpackmode> 0 "register_operand")
10007 (match_operand:VI2_AVX2 1 "register_operand")
10008 (match_operand:VI2_AVX2 2 "register_operand")
10009 (match_operand:<sseunpackmode> 3 "register_operand")]
10010 "TARGET_SSE2"
10011 {
10012 rtx t = gen_reg_rtx (<sseunpackmode>mode);
10013 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
10014 emit_insn (gen_rtx_SET (operands[0],
10015 gen_rtx_PLUS (<sseunpackmode>mode,
10016 operands[3], t)));
10017 DONE;
10018 })
10019
10020 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
10021 ;; back together when madd is available.
10022 (define_expand "sdot_prodv4si"
10023 [(match_operand:V2DI 0 "register_operand")
10024 (match_operand:V4SI 1 "register_operand")
10025 (match_operand:V4SI 2 "register_operand")
10026 (match_operand:V2DI 3 "register_operand")]
10027 "TARGET_XOP"
10028 {
10029 rtx t = gen_reg_rtx (V2DImode);
10030 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
10031 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
10032 DONE;
10033 })
10034
10035 (define_expand "usadv16qi"
10036 [(match_operand:V4SI 0 "register_operand")
10037 (match_operand:V16QI 1 "register_operand")
10038 (match_operand:V16QI 2 "vector_operand")
10039 (match_operand:V4SI 3 "vector_operand")]
10040 "TARGET_SSE2"
10041 {
10042 rtx t1 = gen_reg_rtx (V2DImode);
10043 rtx t2 = gen_reg_rtx (V4SImode);
10044 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
10045 convert_move (t2, t1, 0);
10046 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
10047 DONE;
10048 })
10049
10050 (define_expand "usadv32qi"
10051 [(match_operand:V8SI 0 "register_operand")
10052 (match_operand:V32QI 1 "register_operand")
10053 (match_operand:V32QI 2 "nonimmediate_operand")
10054 (match_operand:V8SI 3 "nonimmediate_operand")]
10055 "TARGET_AVX2"
10056 {
10057 rtx t1 = gen_reg_rtx (V4DImode);
10058 rtx t2 = gen_reg_rtx (V8SImode);
10059 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
10060 convert_move (t2, t1, 0);
10061 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
10062 DONE;
10063 })
10064
10065 (define_insn "ashr<mode>3"
10066 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
10067 (ashiftrt:VI24_AVX2
10068 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
10069 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
10070 "TARGET_SSE2"
10071 "@
10072 psra<ssemodesuffix>\t{%2, %0|%0, %2}
10073 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10074 [(set_attr "isa" "noavx,avx")
10075 (set_attr "type" "sseishft")
10076 (set (attr "length_immediate")
10077 (if_then_else (match_operand 2 "const_int_operand")
10078 (const_string "1")
10079 (const_string "0")))
10080 (set_attr "prefix_data16" "1,*")
10081 (set_attr "prefix" "orig,vex")
10082 (set_attr "mode" "<sseinsnmode>")])
10083
10084 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
10085 [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
10086 (ashiftrt:VI24_AVX512BW_1
10087 (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
10088 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
10089 "TARGET_AVX512VL"
10090 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10091 [(set_attr "type" "sseishft")
10092 (set (attr "length_immediate")
10093 (if_then_else (match_operand 2 "const_int_operand")
10094 (const_string "1")
10095 (const_string "0")))
10096 (set_attr "mode" "<sseinsnmode>")])
10097
10098 (define_insn "<mask_codefor>ashrv2di3<mask_name>"
10099 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
10100 (ashiftrt:V2DI
10101 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
10102 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10103 "TARGET_AVX512VL"
10104 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10105 [(set_attr "type" "sseishft")
10106 (set (attr "length_immediate")
10107 (if_then_else (match_operand 2 "const_int_operand")
10108 (const_string "1")
10109 (const_string "0")))
10110 (set_attr "mode" "TI")])
10111
10112 (define_insn "ashr<mode>3<mask_name>"
10113 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
10114 (ashiftrt:VI248_AVX512BW_AVX512VL
10115 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
10116 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
10117 "TARGET_AVX512F"
10118 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10119 [(set_attr "type" "sseishft")
10120 (set (attr "length_immediate")
10121 (if_then_else (match_operand 2 "const_int_operand")
10122 (const_string "1")
10123 (const_string "0")))
10124 (set_attr "mode" "<sseinsnmode>")])
10125
10126 (define_insn "<shift_insn><mode>3<mask_name>"
10127 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
10128 (any_lshift:VI2_AVX2_AVX512BW
10129 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
10130 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
10131 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10132 "@
10133 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10134 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10135 [(set_attr "isa" "noavx,avx")
10136 (set_attr "type" "sseishft")
10137 (set (attr "length_immediate")
10138 (if_then_else (match_operand 2 "const_int_operand")
10139 (const_string "1")
10140 (const_string "0")))
10141 (set_attr "prefix_data16" "1,*")
10142 (set_attr "prefix" "orig,vex")
10143 (set_attr "mode" "<sseinsnmode>")])
10144
10145 (define_insn "<shift_insn><mode>3<mask_name>"
10146 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,x,v")
10147 (any_lshift:VI48_AVX2
10148 (match_operand:VI48_AVX2 1 "register_operand" "0,x,v")
10149 (match_operand:SI 2 "nonmemory_operand" "xN,xN,vN")))]
10150 "TARGET_SSE2 && <mask_mode512bit_condition>"
10151 "@
10152 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10153 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
10154 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10155 [(set_attr "isa" "noavx,avx,avx512bw")
10156 (set_attr "type" "sseishft")
10157 (set (attr "length_immediate")
10158 (if_then_else (match_operand 2 "const_int_operand")
10159 (const_string "1")
10160 (const_string "0")))
10161 (set_attr "prefix_data16" "1,*,*")
10162 (set_attr "prefix" "orig,vex,evex")
10163 (set_attr "mode" "<sseinsnmode>")])
10164
10165 (define_insn "<shift_insn><mode>3<mask_name>"
10166 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
10167 (any_lshift:VI48_512
10168 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
10169 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
10170 "TARGET_AVX512F && <mask_mode512bit_condition>"
10171 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10172 [(set_attr "isa" "avx512f")
10173 (set_attr "type" "sseishft")
10174 (set (attr "length_immediate")
10175 (if_then_else (match_operand 2 "const_int_operand")
10176 (const_string "1")
10177 (const_string "0")))
10178 (set_attr "prefix" "evex")
10179 (set_attr "mode" "<sseinsnmode>")])
10180
10181
10182 (define_expand "vec_shl_<mode>"
10183 [(set (match_dup 3)
10184 (ashift:V1TI
10185 (match_operand:VI_128 1 "register_operand")
10186 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10187 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10188 "TARGET_SSE2"
10189 {
10190 operands[1] = gen_lowpart (V1TImode, operands[1]);
10191 operands[3] = gen_reg_rtx (V1TImode);
10192 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10193 })
10194
10195 (define_insn "<sse2_avx2>_ashl<mode>3"
10196 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10197 (ashift:VIMAX_AVX2
10198 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10199 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10200 "TARGET_SSE2"
10201 {
10202 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10203
10204 switch (which_alternative)
10205 {
10206 case 0:
10207 return "pslldq\t{%2, %0|%0, %2}";
10208 case 1:
10209 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10210 default:
10211 gcc_unreachable ();
10212 }
10213 }
10214 [(set_attr "isa" "noavx,avx")
10215 (set_attr "type" "sseishft")
10216 (set_attr "length_immediate" "1")
10217 (set_attr "prefix_data16" "1,*")
10218 (set_attr "prefix" "orig,vex")
10219 (set_attr "mode" "<sseinsnmode>")])
10220
10221 (define_expand "vec_shr_<mode>"
10222 [(set (match_dup 3)
10223 (lshiftrt:V1TI
10224 (match_operand:VI_128 1 "register_operand")
10225 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10226 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10227 "TARGET_SSE2"
10228 {
10229 operands[1] = gen_lowpart (V1TImode, operands[1]);
10230 operands[3] = gen_reg_rtx (V1TImode);
10231 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10232 })
10233
10234 (define_insn "<sse2_avx2>_lshr<mode>3"
10235 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10236 (lshiftrt:VIMAX_AVX2
10237 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10238 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10239 "TARGET_SSE2"
10240 {
10241 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10242
10243 switch (which_alternative)
10244 {
10245 case 0:
10246 return "psrldq\t{%2, %0|%0, %2}";
10247 case 1:
10248 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10249 default:
10250 gcc_unreachable ();
10251 }
10252 }
10253 [(set_attr "isa" "noavx,avx")
10254 (set_attr "type" "sseishft")
10255 (set_attr "length_immediate" "1")
10256 (set_attr "atom_unit" "sishuf")
10257 (set_attr "prefix_data16" "1,*")
10258 (set_attr "prefix" "orig,vex")
10259 (set_attr "mode" "<sseinsnmode>")])
10260
10261 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10262 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10263 (any_rotate:VI48_AVX512VL
10264 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10265 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10266 "TARGET_AVX512F"
10267 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10268 [(set_attr "prefix" "evex")
10269 (set_attr "mode" "<sseinsnmode>")])
10270
10271 (define_insn "<avx512>_<rotate><mode><mask_name>"
10272 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10273 (any_rotate:VI48_AVX512VL
10274 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10275 (match_operand:SI 2 "const_0_to_255_operand")))]
10276 "TARGET_AVX512F"
10277 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10278 [(set_attr "prefix" "evex")
10279 (set_attr "mode" "<sseinsnmode>")])
10280
10281 (define_expand "<code><mode>3"
10282 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10283 (maxmin:VI124_256_AVX512F_AVX512BW
10284 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10285 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10286 "TARGET_AVX2"
10287 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10288
10289 (define_insn "*avx2_<code><mode>3"
10290 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10291 (maxmin:VI124_256
10292 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10293 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10294 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10295 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10296 [(set_attr "type" "sseiadd")
10297 (set_attr "prefix_extra" "1")
10298 (set_attr "prefix" "vex")
10299 (set_attr "mode" "OI")])
10300
10301 (define_expand "<code><mode>3_mask"
10302 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10303 (vec_merge:VI48_AVX512VL
10304 (maxmin:VI48_AVX512VL
10305 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10306 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10307 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10308 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10309 "TARGET_AVX512F"
10310 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10311
10312 (define_insn "*avx512bw_<code><mode>3<mask_name>"
10313 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10314 (maxmin:VI48_AVX512VL
10315 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10316 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10317 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10318 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10319 [(set_attr "type" "sseiadd")
10320 (set_attr "prefix_extra" "1")
10321 (set_attr "prefix" "maybe_evex")
10322 (set_attr "mode" "<sseinsnmode>")])
10323
10324 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10325 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10326 (maxmin:VI12_AVX512VL
10327 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10328 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10329 "TARGET_AVX512BW"
10330 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10331 [(set_attr "type" "sseiadd")
10332 (set_attr "prefix" "evex")
10333 (set_attr "mode" "<sseinsnmode>")])
10334
10335 (define_expand "<code><mode>3"
10336 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
10337 (maxmin:VI8_AVX2_AVX512BW
10338 (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
10339 (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
10340 "TARGET_SSE4_2"
10341 {
10342 if (TARGET_AVX512F
10343 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10344 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10345 else
10346 {
10347 enum rtx_code code;
10348 rtx xops[6];
10349 bool ok;
10350
10351
10352 xops[0] = operands[0];
10353
10354 if (<CODE> == SMAX || <CODE> == UMAX)
10355 {
10356 xops[1] = operands[1];
10357 xops[2] = operands[2];
10358 }
10359 else
10360 {
10361 xops[1] = operands[2];
10362 xops[2] = operands[1];
10363 }
10364
10365 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10366
10367 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10368 xops[4] = operands[1];
10369 xops[5] = operands[2];
10370
10371 ok = ix86_expand_int_vcond (xops);
10372 gcc_assert (ok);
10373 DONE;
10374 }
10375 })
10376
10377 (define_expand "<code><mode>3"
10378 [(set (match_operand:VI124_128 0 "register_operand")
10379 (smaxmin:VI124_128
10380 (match_operand:VI124_128 1 "vector_operand")
10381 (match_operand:VI124_128 2 "vector_operand")))]
10382 "TARGET_SSE2"
10383 {
10384 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10385 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10386 else
10387 {
10388 rtx xops[6];
10389 bool ok;
10390
10391 xops[0] = operands[0];
10392 operands[1] = force_reg (<MODE>mode, operands[1]);
10393 operands[2] = force_reg (<MODE>mode, operands[2]);
10394
10395 if (<CODE> == SMAX)
10396 {
10397 xops[1] = operands[1];
10398 xops[2] = operands[2];
10399 }
10400 else
10401 {
10402 xops[1] = operands[2];
10403 xops[2] = operands[1];
10404 }
10405
10406 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10407 xops[4] = operands[1];
10408 xops[5] = operands[2];
10409
10410 ok = ix86_expand_int_vcond (xops);
10411 gcc_assert (ok);
10412 DONE;
10413 }
10414 })
10415
10416 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10417 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
10418 (smaxmin:VI14_128
10419 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
10420 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
10421 "TARGET_SSE4_1
10422 && <mask_mode512bit_condition>
10423 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10424 "@
10425 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10426 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10427 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10428 [(set_attr "isa" "noavx,noavx,avx")
10429 (set_attr "type" "sseiadd")
10430 (set_attr "prefix_extra" "1,1,*")
10431 (set_attr "prefix" "orig,orig,vex")
10432 (set_attr "mode" "TI")])
10433
10434 (define_insn "*<code>v8hi3"
10435 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10436 (smaxmin:V8HI
10437 (match_operand:V8HI 1 "vector_operand" "%0,x")
10438 (match_operand:V8HI 2 "vector_operand" "xBm,xm")))]
10439 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
10440 "@
10441 p<maxmin_int>w\t{%2, %0|%0, %2}
10442 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
10443 [(set_attr "isa" "noavx,avx")
10444 (set_attr "type" "sseiadd")
10445 (set_attr "prefix_data16" "1,*")
10446 (set_attr "prefix_extra" "*,1")
10447 (set_attr "prefix" "orig,vex")
10448 (set_attr "mode" "TI")])
10449
10450 (define_expand "<code><mode>3"
10451 [(set (match_operand:VI124_128 0 "register_operand")
10452 (umaxmin:VI124_128
10453 (match_operand:VI124_128 1 "vector_operand")
10454 (match_operand:VI124_128 2 "vector_operand")))]
10455 "TARGET_SSE2"
10456 {
10457 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
10458 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10459 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10460 {
10461 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
10462 operands[1] = force_reg (<MODE>mode, operands[1]);
10463 if (rtx_equal_p (op3, op2))
10464 op3 = gen_reg_rtx (V8HImode);
10465 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10466 emit_insn (gen_addv8hi3 (op0, op3, op2));
10467 DONE;
10468 }
10469 else
10470 {
10471 rtx xops[6];
10472 bool ok;
10473
10474 operands[1] = force_reg (<MODE>mode, operands[1]);
10475 operands[2] = force_reg (<MODE>mode, operands[2]);
10476
10477 xops[0] = operands[0];
10478
10479 if (<CODE> == UMAX)
10480 {
10481 xops[1] = operands[1];
10482 xops[2] = operands[2];
10483 }
10484 else
10485 {
10486 xops[1] = operands[2];
10487 xops[2] = operands[1];
10488 }
10489
10490 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
10491 xops[4] = operands[1];
10492 xops[5] = operands[2];
10493
10494 ok = ix86_expand_int_vcond (xops);
10495 gcc_assert (ok);
10496 DONE;
10497 }
10498 })
10499
10500 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10501 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
10502 (umaxmin:VI24_128
10503 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
10504 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
10505 "TARGET_SSE4_1
10506 && <mask_mode512bit_condition>
10507 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10508 "@
10509 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10510 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10511 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10512 [(set_attr "isa" "noavx,noavx,avx")
10513 (set_attr "type" "sseiadd")
10514 (set_attr "prefix_extra" "1,1,*")
10515 (set_attr "prefix" "orig,orig,vex")
10516 (set_attr "mode" "TI")])
10517
10518 (define_insn "*<code>v16qi3"
10519 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10520 (umaxmin:V16QI
10521 (match_operand:V16QI 1 "vector_operand" "%0,x")
10522 (match_operand:V16QI 2 "vector_operand" "xBm,xm")))]
10523 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10524 "@
10525 p<maxmin_int>b\t{%2, %0|%0, %2}
10526 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10527 [(set_attr "isa" "noavx,avx")
10528 (set_attr "type" "sseiadd")
10529 (set_attr "prefix_data16" "1,*")
10530 (set_attr "prefix_extra" "*,1")
10531 (set_attr "prefix" "orig,vex")
10532 (set_attr "mode" "TI")])
10533
10534 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10535 ;;
10536 ;; Parallel integral comparisons
10537 ;;
10538 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10539
10540 (define_expand "avx2_eq<mode>3"
10541 [(set (match_operand:VI_256 0 "register_operand")
10542 (eq:VI_256
10543 (match_operand:VI_256 1 "nonimmediate_operand")
10544 (match_operand:VI_256 2 "nonimmediate_operand")))]
10545 "TARGET_AVX2"
10546 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10547
10548 (define_insn "*avx2_eq<mode>3"
10549 [(set (match_operand:VI_256 0 "register_operand" "=x")
10550 (eq:VI_256
10551 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10552 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10553 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10554 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10555 [(set_attr "type" "ssecmp")
10556 (set_attr "prefix_extra" "1")
10557 (set_attr "prefix" "vex")
10558 (set_attr "mode" "OI")])
10559
10560 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10561 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10562 (unspec:<avx512fmaskmode>
10563 [(match_operand:VI12_AVX512VL 1 "register_operand")
10564 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
10565 UNSPEC_MASKED_EQ))]
10566 "TARGET_AVX512BW"
10567 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10568
10569 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10570 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10571 (unspec:<avx512fmaskmode>
10572 [(match_operand:VI48_AVX512VL 1 "register_operand")
10573 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
10574 UNSPEC_MASKED_EQ))]
10575 "TARGET_AVX512F"
10576 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10577
10578 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10579 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10580 (unspec:<avx512fmaskmode>
10581 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
10582 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10583 UNSPEC_MASKED_EQ))]
10584 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10585 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10586 [(set_attr "type" "ssecmp")
10587 (set_attr "prefix_extra" "1")
10588 (set_attr "prefix" "evex")
10589 (set_attr "mode" "<sseinsnmode>")])
10590
10591 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10592 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10593 (unspec:<avx512fmaskmode>
10594 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
10595 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
10596 UNSPEC_MASKED_EQ))]
10597 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10598 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10599 [(set_attr "type" "ssecmp")
10600 (set_attr "prefix_extra" "1")
10601 (set_attr "prefix" "evex")
10602 (set_attr "mode" "<sseinsnmode>")])
10603
10604 (define_insn "*sse4_1_eqv2di3"
10605 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10606 (eq:V2DI
10607 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
10608 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
10609 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10610 "@
10611 pcmpeqq\t{%2, %0|%0, %2}
10612 pcmpeqq\t{%2, %0|%0, %2}
10613 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
10614 [(set_attr "isa" "noavx,noavx,avx")
10615 (set_attr "type" "ssecmp")
10616 (set_attr "prefix_extra" "1")
10617 (set_attr "prefix" "orig,orig,vex")
10618 (set_attr "mode" "TI")])
10619
10620 (define_insn "*sse2_eq<mode>3"
10621 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10622 (eq:VI124_128
10623 (match_operand:VI124_128 1 "vector_operand" "%0,x")
10624 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
10625 "TARGET_SSE2 && !TARGET_XOP
10626 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10627 "@
10628 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10629 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10630 [(set_attr "isa" "noavx,avx")
10631 (set_attr "type" "ssecmp")
10632 (set_attr "prefix_data16" "1,*")
10633 (set_attr "prefix" "orig,vex")
10634 (set_attr "mode" "TI")])
10635
10636 (define_expand "sse2_eq<mode>3"
10637 [(set (match_operand:VI124_128 0 "register_operand")
10638 (eq:VI124_128
10639 (match_operand:VI124_128 1 "vector_operand")
10640 (match_operand:VI124_128 2 "vector_operand")))]
10641 "TARGET_SSE2 && !TARGET_XOP "
10642 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10643
10644 (define_expand "sse4_1_eqv2di3"
10645 [(set (match_operand:V2DI 0 "register_operand")
10646 (eq:V2DI
10647 (match_operand:V2DI 1 "vector_operand")
10648 (match_operand:V2DI 2 "vector_operand")))]
10649 "TARGET_SSE4_1"
10650 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10651
10652 (define_insn "sse4_2_gtv2di3"
10653 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10654 (gt:V2DI
10655 (match_operand:V2DI 1 "register_operand" "0,0,x")
10656 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
10657 "TARGET_SSE4_2"
10658 "@
10659 pcmpgtq\t{%2, %0|%0, %2}
10660 pcmpgtq\t{%2, %0|%0, %2}
10661 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
10662 [(set_attr "isa" "noavx,noavx,avx")
10663 (set_attr "type" "ssecmp")
10664 (set_attr "prefix_extra" "1")
10665 (set_attr "prefix" "orig,orig,vex")
10666 (set_attr "mode" "TI")])
10667
10668 (define_insn "avx2_gt<mode>3"
10669 [(set (match_operand:VI_256 0 "register_operand" "=x")
10670 (gt:VI_256
10671 (match_operand:VI_256 1 "register_operand" "x")
10672 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10673 "TARGET_AVX2"
10674 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10675 [(set_attr "type" "ssecmp")
10676 (set_attr "prefix_extra" "1")
10677 (set_attr "prefix" "vex")
10678 (set_attr "mode" "OI")])
10679
10680 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10681 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10682 (unspec:<avx512fmaskmode>
10683 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10684 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10685 "TARGET_AVX512F"
10686 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10687 [(set_attr "type" "ssecmp")
10688 (set_attr "prefix_extra" "1")
10689 (set_attr "prefix" "evex")
10690 (set_attr "mode" "<sseinsnmode>")])
10691
10692 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10693 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10694 (unspec:<avx512fmaskmode>
10695 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10696 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10697 "TARGET_AVX512BW"
10698 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10699 [(set_attr "type" "ssecmp")
10700 (set_attr "prefix_extra" "1")
10701 (set_attr "prefix" "evex")
10702 (set_attr "mode" "<sseinsnmode>")])
10703
10704 (define_insn "sse2_gt<mode>3"
10705 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10706 (gt:VI124_128
10707 (match_operand:VI124_128 1 "register_operand" "0,x")
10708 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
10709 "TARGET_SSE2 && !TARGET_XOP"
10710 "@
10711 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10712 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10713 [(set_attr "isa" "noavx,avx")
10714 (set_attr "type" "ssecmp")
10715 (set_attr "prefix_data16" "1,*")
10716 (set_attr "prefix" "orig,vex")
10717 (set_attr "mode" "TI")])
10718
10719 (define_expand "vcond<V_512:mode><VI_512:mode>"
10720 [(set (match_operand:V_512 0 "register_operand")
10721 (if_then_else:V_512
10722 (match_operator 3 ""
10723 [(match_operand:VI_512 4 "nonimmediate_operand")
10724 (match_operand:VI_512 5 "general_operand")])
10725 (match_operand:V_512 1)
10726 (match_operand:V_512 2)))]
10727 "TARGET_AVX512F
10728 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10729 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10730 {
10731 bool ok = ix86_expand_int_vcond (operands);
10732 gcc_assert (ok);
10733 DONE;
10734 })
10735
10736 (define_expand "vcond<V_256:mode><VI_256:mode>"
10737 [(set (match_operand:V_256 0 "register_operand")
10738 (if_then_else:V_256
10739 (match_operator 3 ""
10740 [(match_operand:VI_256 4 "nonimmediate_operand")
10741 (match_operand:VI_256 5 "general_operand")])
10742 (match_operand:V_256 1)
10743 (match_operand:V_256 2)))]
10744 "TARGET_AVX2
10745 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10746 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10747 {
10748 bool ok = ix86_expand_int_vcond (operands);
10749 gcc_assert (ok);
10750 DONE;
10751 })
10752
10753 (define_expand "vcond<V_128:mode><VI124_128:mode>"
10754 [(set (match_operand:V_128 0 "register_operand")
10755 (if_then_else:V_128
10756 (match_operator 3 ""
10757 [(match_operand:VI124_128 4 "vector_operand")
10758 (match_operand:VI124_128 5 "general_operand")])
10759 (match_operand:V_128 1)
10760 (match_operand:V_128 2)))]
10761 "TARGET_SSE2
10762 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10763 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10764 {
10765 bool ok = ix86_expand_int_vcond (operands);
10766 gcc_assert (ok);
10767 DONE;
10768 })
10769
10770 (define_expand "vcond<VI8F_128:mode>v2di"
10771 [(set (match_operand:VI8F_128 0 "register_operand")
10772 (if_then_else:VI8F_128
10773 (match_operator 3 ""
10774 [(match_operand:V2DI 4 "vector_operand")
10775 (match_operand:V2DI 5 "general_operand")])
10776 (match_operand:VI8F_128 1)
10777 (match_operand:VI8F_128 2)))]
10778 "TARGET_SSE4_2"
10779 {
10780 bool ok = ix86_expand_int_vcond (operands);
10781 gcc_assert (ok);
10782 DONE;
10783 })
10784
10785 (define_expand "vcondu<V_512:mode><VI_512:mode>"
10786 [(set (match_operand:V_512 0 "register_operand")
10787 (if_then_else:V_512
10788 (match_operator 3 ""
10789 [(match_operand:VI_512 4 "nonimmediate_operand")
10790 (match_operand:VI_512 5 "nonimmediate_operand")])
10791 (match_operand:V_512 1 "general_operand")
10792 (match_operand:V_512 2 "general_operand")))]
10793 "TARGET_AVX512F
10794 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10795 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10796 {
10797 bool ok = ix86_expand_int_vcond (operands);
10798 gcc_assert (ok);
10799 DONE;
10800 })
10801
10802 (define_expand "vcondu<V_256:mode><VI_256:mode>"
10803 [(set (match_operand:V_256 0 "register_operand")
10804 (if_then_else:V_256
10805 (match_operator 3 ""
10806 [(match_operand:VI_256 4 "nonimmediate_operand")
10807 (match_operand:VI_256 5 "nonimmediate_operand")])
10808 (match_operand:V_256 1 "general_operand")
10809 (match_operand:V_256 2 "general_operand")))]
10810 "TARGET_AVX2
10811 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10812 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10813 {
10814 bool ok = ix86_expand_int_vcond (operands);
10815 gcc_assert (ok);
10816 DONE;
10817 })
10818
10819 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
10820 [(set (match_operand:V_128 0 "register_operand")
10821 (if_then_else:V_128
10822 (match_operator 3 ""
10823 [(match_operand:VI124_128 4 "vector_operand")
10824 (match_operand:VI124_128 5 "vector_operand")])
10825 (match_operand:V_128 1 "general_operand")
10826 (match_operand:V_128 2 "general_operand")))]
10827 "TARGET_SSE2
10828 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10829 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10830 {
10831 bool ok = ix86_expand_int_vcond (operands);
10832 gcc_assert (ok);
10833 DONE;
10834 })
10835
10836 (define_expand "vcondu<VI8F_128:mode>v2di"
10837 [(set (match_operand:VI8F_128 0 "register_operand")
10838 (if_then_else:VI8F_128
10839 (match_operator 3 ""
10840 [(match_operand:V2DI 4 "vector_operand")
10841 (match_operand:V2DI 5 "vector_operand")])
10842 (match_operand:VI8F_128 1 "general_operand")
10843 (match_operand:VI8F_128 2 "general_operand")))]
10844 "TARGET_SSE4_2"
10845 {
10846 bool ok = ix86_expand_int_vcond (operands);
10847 gcc_assert (ok);
10848 DONE;
10849 })
10850
10851 (define_mode_iterator VEC_PERM_AVX2
10852 [V16QI V8HI V4SI V2DI V4SF V2DF
10853 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10854 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
10855 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
10856 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10857 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10858 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
10859
10860 (define_expand "vec_perm<mode>"
10861 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
10862 (match_operand:VEC_PERM_AVX2 1 "register_operand")
10863 (match_operand:VEC_PERM_AVX2 2 "register_operand")
10864 (match_operand:<sseintvecmode> 3 "register_operand")]
10865 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
10866 {
10867 ix86_expand_vec_perm (operands);
10868 DONE;
10869 })
10870
10871 (define_mode_iterator VEC_PERM_CONST
10872 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
10873 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
10874 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
10875 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
10876 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
10877 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10878 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10879 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10880 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
10881
10882 (define_expand "vec_perm_const<mode>"
10883 [(match_operand:VEC_PERM_CONST 0 "register_operand")
10884 (match_operand:VEC_PERM_CONST 1 "register_operand")
10885 (match_operand:VEC_PERM_CONST 2 "register_operand")
10886 (match_operand:<sseintvecmode> 3)]
10887 ""
10888 {
10889 if (ix86_expand_vec_perm_const (operands))
10890 DONE;
10891 else
10892 FAIL;
10893 })
10894
10895 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10896 ;;
10897 ;; Parallel bitwise logical operations
10898 ;;
10899 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10900
10901 (define_expand "one_cmpl<mode>2"
10902 [(set (match_operand:VI 0 "register_operand")
10903 (xor:VI (match_operand:VI 1 "vector_operand")
10904 (match_dup 2)))]
10905 "TARGET_SSE"
10906 {
10907 int i, n = GET_MODE_NUNITS (<MODE>mode);
10908 rtvec v = rtvec_alloc (n);
10909
10910 for (i = 0; i < n; ++i)
10911 RTVEC_ELT (v, i) = constm1_rtx;
10912
10913 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
10914 })
10915
10916 (define_expand "<sse2_avx2>_andnot<mode>3"
10917 [(set (match_operand:VI_AVX2 0 "register_operand")
10918 (and:VI_AVX2
10919 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
10920 (match_operand:VI_AVX2 2 "vector_operand")))]
10921 "TARGET_SSE2")
10922
10923 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10924 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10925 (vec_merge:VI48_AVX512VL
10926 (and:VI48_AVX512VL
10927 (not:VI48_AVX512VL
10928 (match_operand:VI48_AVX512VL 1 "register_operand"))
10929 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10930 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10931 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10932 "TARGET_AVX512F")
10933
10934 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10935 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10936 (vec_merge:VI12_AVX512VL
10937 (and:VI12_AVX512VL
10938 (not:VI12_AVX512VL
10939 (match_operand:VI12_AVX512VL 1 "register_operand"))
10940 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10941 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10942 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10943 "TARGET_AVX512BW")
10944
10945 (define_insn "*andnot<mode>3"
10946 [(set (match_operand:VI 0 "register_operand" "=x,v")
10947 (and:VI
10948 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
10949 (match_operand:VI 2 "vector_operand" "xBm,vm")))]
10950 "TARGET_SSE"
10951 {
10952 static char buf[64];
10953 const char *ops;
10954 const char *tmp;
10955
10956 switch (get_attr_mode (insn))
10957 {
10958 case MODE_XI:
10959 gcc_assert (TARGET_AVX512F);
10960 case MODE_OI:
10961 gcc_assert (TARGET_AVX2);
10962 case MODE_TI:
10963 gcc_assert (TARGET_SSE2);
10964 switch (<MODE>mode)
10965 {
10966 case V64QImode:
10967 case V32HImode:
10968 /* There is no vpandnb or vpandnw instruction, nor vpandn for
10969 512-bit vectors. Use vpandnq instead. */
10970 tmp = "pandnq";
10971 break;
10972 case V16SImode:
10973 case V8DImode:
10974 tmp = "pandn<ssemodesuffix>";
10975 break;
10976 case V8SImode:
10977 case V4DImode:
10978 case V4SImode:
10979 case V2DImode:
10980 tmp = TARGET_AVX512VL ? "pandn<ssemodesuffix>" : "pandn";
10981 break;
10982 default:
10983 tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
10984 break;
10985 }
10986 break;
10987
10988 case MODE_V16SF:
10989 gcc_assert (TARGET_AVX512F);
10990 case MODE_V8SF:
10991 gcc_assert (TARGET_AVX);
10992 case MODE_V4SF:
10993 gcc_assert (TARGET_SSE);
10994
10995 tmp = "andnps";
10996 break;
10997
10998 default:
10999 gcc_unreachable ();
11000 }
11001
11002 switch (which_alternative)
11003 {
11004 case 0:
11005 ops = "%s\t{%%2, %%0|%%0, %%2}";
11006 break;
11007 case 1:
11008 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11009 break;
11010 default:
11011 gcc_unreachable ();
11012 }
11013
11014 snprintf (buf, sizeof (buf), ops, tmp);
11015 return buf;
11016 }
11017 [(set_attr "isa" "noavx,avx")
11018 (set_attr "type" "sselog")
11019 (set (attr "prefix_data16")
11020 (if_then_else
11021 (and (eq_attr "alternative" "0")
11022 (eq_attr "mode" "TI"))
11023 (const_string "1")
11024 (const_string "*")))
11025 (set_attr "prefix" "orig,vex")
11026 (set (attr "mode")
11027 (cond [(and (match_test "<MODE_SIZE> == 16")
11028 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11029 (const_string "<ssePSmode>")
11030 (match_test "TARGET_AVX2")
11031 (const_string "<sseinsnmode>")
11032 (match_test "TARGET_AVX")
11033 (if_then_else
11034 (match_test "<MODE_SIZE> > 16")
11035 (const_string "V8SF")
11036 (const_string "<sseinsnmode>"))
11037 (ior (not (match_test "TARGET_SSE2"))
11038 (match_test "optimize_function_for_size_p (cfun)"))
11039 (const_string "V4SF")
11040 ]
11041 (const_string "<sseinsnmode>")))])
11042
11043 (define_insn "*andnot<mode>3_mask"
11044 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11045 (vec_merge:VI48_AVX512VL
11046 (and:VI48_AVX512VL
11047 (not:VI48_AVX512VL
11048 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
11049 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11050 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
11051 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11052 "TARGET_AVX512F"
11053 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11054 [(set_attr "type" "sselog")
11055 (set_attr "prefix" "evex")
11056 (set_attr "mode" "<sseinsnmode>")])
11057
11058 (define_expand "<code><mode>3"
11059 [(set (match_operand:VI 0 "register_operand")
11060 (any_logic:VI
11061 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
11062 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
11063 "TARGET_SSE"
11064 {
11065 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
11066 DONE;
11067 })
11068
11069 (define_insn "<mask_codefor><code><mode>3<mask_name>"
11070 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v")
11071 (any_logic:VI48_AVX_AVX512F
11072 (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,v")
11073 (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,vm")))]
11074 "TARGET_SSE && <mask_mode512bit_condition>
11075 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11076 {
11077 static char buf[64];
11078 const char *ops;
11079 const char *tmp;
11080
11081 switch (get_attr_mode (insn))
11082 {
11083 case MODE_XI:
11084 gcc_assert (TARGET_AVX512F);
11085 case MODE_OI:
11086 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
11087 case MODE_TI:
11088 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
11089 switch (<MODE>mode)
11090 {
11091 case V16SImode:
11092 case V8DImode:
11093 if (TARGET_AVX512F)
11094 {
11095 tmp = "p<logic><ssemodesuffix>";
11096 break;
11097 }
11098 case V8SImode:
11099 case V4DImode:
11100 case V4SImode:
11101 case V2DImode:
11102 tmp = TARGET_AVX512VL ? "p<logic><ssemodesuffix>" : "p<logic>";
11103 break;
11104 default:
11105 gcc_unreachable ();
11106 }
11107 break;
11108
11109 case MODE_V8SF:
11110 gcc_assert (TARGET_AVX);
11111 case MODE_V4SF:
11112 gcc_assert (TARGET_SSE);
11113 gcc_assert (!<mask_applied>);
11114 tmp = "<logic>ps";
11115 break;
11116
11117 default:
11118 gcc_unreachable ();
11119 }
11120
11121 switch (which_alternative)
11122 {
11123 case 0:
11124 if (<mask_applied>)
11125 ops = "v%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
11126 else
11127 ops = "%s\t{%%2, %%0|%%0, %%2}";
11128 break;
11129 case 1:
11130 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11131 break;
11132 default:
11133 gcc_unreachable ();
11134 }
11135
11136 snprintf (buf, sizeof (buf), ops, tmp);
11137 return buf;
11138 }
11139 [(set_attr "isa" "noavx,avx")
11140 (set_attr "type" "sselog")
11141 (set (attr "prefix_data16")
11142 (if_then_else
11143 (and (eq_attr "alternative" "0")
11144 (eq_attr "mode" "TI"))
11145 (const_string "1")
11146 (const_string "*")))
11147 (set_attr "prefix" "<mask_prefix3>")
11148 (set (attr "mode")
11149 (cond [(and (match_test "<MODE_SIZE> == 16")
11150 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11151 (const_string "<ssePSmode>")
11152 (match_test "TARGET_AVX2")
11153 (const_string "<sseinsnmode>")
11154 (match_test "TARGET_AVX")
11155 (if_then_else
11156 (match_test "<MODE_SIZE> > 16")
11157 (const_string "V8SF")
11158 (const_string "<sseinsnmode>"))
11159 (ior (not (match_test "TARGET_SSE2"))
11160 (match_test "optimize_function_for_size_p (cfun)"))
11161 (const_string "V4SF")
11162 ]
11163 (const_string "<sseinsnmode>")))])
11164
11165 (define_insn "*<code><mode>3"
11166 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v")
11167 (any_logic: VI12_AVX_AVX512F
11168 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,v")
11169 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,vm")))]
11170 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11171 {
11172 static char buf[64];
11173 const char *ops;
11174 const char *tmp;
11175 const char *ssesuffix;
11176
11177 switch (get_attr_mode (insn))
11178 {
11179 case MODE_XI:
11180 gcc_assert (TARGET_AVX512F);
11181 case MODE_OI:
11182 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
11183 case MODE_TI:
11184 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
11185 switch (<MODE>mode)
11186 {
11187 case V64QImode:
11188 case V32HImode:
11189 if (TARGET_AVX512F)
11190 {
11191 tmp = "p<logic>";
11192 ssesuffix = "q";
11193 break;
11194 }
11195 case V32QImode:
11196 case V16HImode:
11197 case V16QImode:
11198 case V8HImode:
11199 if (TARGET_AVX512VL || TARGET_AVX2 || TARGET_SSE2)
11200 {
11201 tmp = "p<logic>";
11202 ssesuffix = TARGET_AVX512VL ? "q" : "";
11203 break;
11204 }
11205 default:
11206 gcc_unreachable ();
11207 }
11208 break;
11209
11210 case MODE_V8SF:
11211 gcc_assert (TARGET_AVX);
11212 case MODE_V4SF:
11213 gcc_assert (TARGET_SSE);
11214 tmp = "<logic>ps";
11215 ssesuffix = "";
11216 break;
11217
11218 default:
11219 gcc_unreachable ();
11220 }
11221
11222 switch (which_alternative)
11223 {
11224 case 0:
11225 ops = "%s\t{%%2, %%0|%%0, %%2}";
11226 snprintf (buf, sizeof (buf), ops, tmp);
11227 break;
11228 case 1:
11229 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11230 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11231 break;
11232 default:
11233 gcc_unreachable ();
11234 }
11235
11236 return buf;
11237 }
11238 [(set_attr "isa" "noavx,avx")
11239 (set_attr "type" "sselog")
11240 (set (attr "prefix_data16")
11241 (if_then_else
11242 (and (eq_attr "alternative" "0")
11243 (eq_attr "mode" "TI"))
11244 (const_string "1")
11245 (const_string "*")))
11246 (set_attr "prefix" "<mask_prefix3>")
11247 (set (attr "mode")
11248 (cond [(and (match_test "<MODE_SIZE> == 16")
11249 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11250 (const_string "<ssePSmode>")
11251 (match_test "TARGET_AVX2")
11252 (const_string "<sseinsnmode>")
11253 (match_test "TARGET_AVX")
11254 (if_then_else
11255 (match_test "<MODE_SIZE> > 16")
11256 (const_string "V8SF")
11257 (const_string "<sseinsnmode>"))
11258 (ior (not (match_test "TARGET_SSE2"))
11259 (match_test "optimize_function_for_size_p (cfun)"))
11260 (const_string "V4SF")
11261 ]
11262 (const_string "<sseinsnmode>")))])
11263
11264 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11265 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11266 (unspec:<avx512fmaskmode>
11267 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11268 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11269 UNSPEC_TESTM))]
11270 "TARGET_AVX512BW"
11271 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11272 [(set_attr "prefix" "evex")
11273 (set_attr "mode" "<sseinsnmode>")])
11274
11275 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11276 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11277 (unspec:<avx512fmaskmode>
11278 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11279 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11280 UNSPEC_TESTM))]
11281 "TARGET_AVX512F"
11282 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11283 [(set_attr "prefix" "evex")
11284 (set_attr "mode" "<sseinsnmode>")])
11285
11286 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11287 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11288 (unspec:<avx512fmaskmode>
11289 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11290 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11291 UNSPEC_TESTNM))]
11292 "TARGET_AVX512BW"
11293 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11294 [(set_attr "prefix" "evex")
11295 (set_attr "mode" "<sseinsnmode>")])
11296
11297 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11298 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11299 (unspec:<avx512fmaskmode>
11300 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11301 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11302 UNSPEC_TESTNM))]
11303 "TARGET_AVX512F"
11304 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11305 [(set_attr "prefix" "evex")
11306 (set_attr "mode" "<sseinsnmode>")])
11307
11308 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11309 ;;
11310 ;; Parallel integral element swizzling
11311 ;;
11312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11313
11314 (define_expand "vec_pack_trunc_<mode>"
11315 [(match_operand:<ssepackmode> 0 "register_operand")
11316 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
11317 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
11318 "TARGET_SSE2"
11319 {
11320 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11321 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11322 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11323 DONE;
11324 })
11325
11326 (define_expand "vec_pack_trunc_qi"
11327 [(set (match_operand:HI 0 ("register_operand"))
11328 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand")))
11329 (const_int 8))
11330 (zero_extend:HI (match_operand:QI 1 ("register_operand")))))]
11331 "TARGET_AVX512F")
11332
11333 (define_expand "vec_pack_trunc_<mode>"
11334 [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
11335 (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))
11336 (match_dup 3))
11337 (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))))]
11338 "TARGET_AVX512BW"
11339 {
11340 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
11341 })
11342
11343 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11344 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11345 (vec_concat:VI1_AVX512
11346 (ss_truncate:<ssehalfvecmode>
11347 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11348 (ss_truncate:<ssehalfvecmode>
11349 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))]
11350 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11351 "@
11352 packsswb\t{%2, %0|%0, %2}
11353 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11354 [(set_attr "isa" "noavx,avx")
11355 (set_attr "type" "sselog")
11356 (set_attr "prefix_data16" "1,*")
11357 (set_attr "prefix" "orig,maybe_evex")
11358 (set_attr "mode" "<sseinsnmode>")])
11359
11360 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11361 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11362 (vec_concat:VI2_AVX2
11363 (ss_truncate:<ssehalfvecmode>
11364 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11365 (ss_truncate:<ssehalfvecmode>
11366 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))]
11367 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11368 "@
11369 packssdw\t{%2, %0|%0, %2}
11370 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11371 [(set_attr "isa" "noavx,avx")
11372 (set_attr "type" "sselog")
11373 (set_attr "prefix_data16" "1,*")
11374 (set_attr "prefix" "orig,vex")
11375 (set_attr "mode" "<sseinsnmode>")])
11376
11377 (define_insn "<sse2_avx2>_packuswb<mask_name>"
11378 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11379 (vec_concat:VI1_AVX512
11380 (us_truncate:<ssehalfvecmode>
11381 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11382 (us_truncate:<ssehalfvecmode>
11383 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))]
11384 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11385 "@
11386 packuswb\t{%2, %0|%0, %2}
11387 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11388 [(set_attr "isa" "noavx,avx")
11389 (set_attr "type" "sselog")
11390 (set_attr "prefix_data16" "1,*")
11391 (set_attr "prefix" "orig,vex")
11392 (set_attr "mode" "<sseinsnmode>")])
11393
11394 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
11395 [(set (match_operand:V64QI 0 "register_operand" "=v")
11396 (vec_select:V64QI
11397 (vec_concat:V128QI
11398 (match_operand:V64QI 1 "register_operand" "v")
11399 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11400 (parallel [(const_int 8) (const_int 72)
11401 (const_int 9) (const_int 73)
11402 (const_int 10) (const_int 74)
11403 (const_int 11) (const_int 75)
11404 (const_int 12) (const_int 76)
11405 (const_int 13) (const_int 77)
11406 (const_int 14) (const_int 78)
11407 (const_int 15) (const_int 79)
11408 (const_int 24) (const_int 88)
11409 (const_int 25) (const_int 89)
11410 (const_int 26) (const_int 90)
11411 (const_int 27) (const_int 91)
11412 (const_int 28) (const_int 92)
11413 (const_int 29) (const_int 93)
11414 (const_int 30) (const_int 94)
11415 (const_int 31) (const_int 95)
11416 (const_int 40) (const_int 104)
11417 (const_int 41) (const_int 105)
11418 (const_int 42) (const_int 106)
11419 (const_int 43) (const_int 107)
11420 (const_int 44) (const_int 108)
11421 (const_int 45) (const_int 109)
11422 (const_int 46) (const_int 110)
11423 (const_int 47) (const_int 111)
11424 (const_int 56) (const_int 120)
11425 (const_int 57) (const_int 121)
11426 (const_int 58) (const_int 122)
11427 (const_int 59) (const_int 123)
11428 (const_int 60) (const_int 124)
11429 (const_int 61) (const_int 125)
11430 (const_int 62) (const_int 126)
11431 (const_int 63) (const_int 127)])))]
11432 "TARGET_AVX512BW"
11433 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11434 [(set_attr "type" "sselog")
11435 (set_attr "prefix" "evex")
11436 (set_attr "mode" "XI")])
11437
11438 (define_insn "avx2_interleave_highv32qi<mask_name>"
11439 [(set (match_operand:V32QI 0 "register_operand" "=v")
11440 (vec_select:V32QI
11441 (vec_concat:V64QI
11442 (match_operand:V32QI 1 "register_operand" "v")
11443 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11444 (parallel [(const_int 8) (const_int 40)
11445 (const_int 9) (const_int 41)
11446 (const_int 10) (const_int 42)
11447 (const_int 11) (const_int 43)
11448 (const_int 12) (const_int 44)
11449 (const_int 13) (const_int 45)
11450 (const_int 14) (const_int 46)
11451 (const_int 15) (const_int 47)
11452 (const_int 24) (const_int 56)
11453 (const_int 25) (const_int 57)
11454 (const_int 26) (const_int 58)
11455 (const_int 27) (const_int 59)
11456 (const_int 28) (const_int 60)
11457 (const_int 29) (const_int 61)
11458 (const_int 30) (const_int 62)
11459 (const_int 31) (const_int 63)])))]
11460 "TARGET_AVX2 && <mask_avx512vl_condition>"
11461 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11462 [(set_attr "type" "sselog")
11463 (set_attr "prefix" "<mask_prefix>")
11464 (set_attr "mode" "OI")])
11465
11466 (define_insn "vec_interleave_highv16qi<mask_name>"
11467 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11468 (vec_select:V16QI
11469 (vec_concat:V32QI
11470 (match_operand:V16QI 1 "register_operand" "0,v")
11471 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
11472 (parallel [(const_int 8) (const_int 24)
11473 (const_int 9) (const_int 25)
11474 (const_int 10) (const_int 26)
11475 (const_int 11) (const_int 27)
11476 (const_int 12) (const_int 28)
11477 (const_int 13) (const_int 29)
11478 (const_int 14) (const_int 30)
11479 (const_int 15) (const_int 31)])))]
11480 "TARGET_SSE2 && <mask_avx512vl_condition>"
11481 "@
11482 punpckhbw\t{%2, %0|%0, %2}
11483 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11484 [(set_attr "isa" "noavx,avx")
11485 (set_attr "type" "sselog")
11486 (set_attr "prefix_data16" "1,*")
11487 (set_attr "prefix" "orig,<mask_prefix>")
11488 (set_attr "mode" "TI")])
11489
11490 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
11491 [(set (match_operand:V64QI 0 "register_operand" "=v")
11492 (vec_select:V64QI
11493 (vec_concat:V128QI
11494 (match_operand:V64QI 1 "register_operand" "v")
11495 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11496 (parallel [(const_int 0) (const_int 64)
11497 (const_int 1) (const_int 65)
11498 (const_int 2) (const_int 66)
11499 (const_int 3) (const_int 67)
11500 (const_int 4) (const_int 68)
11501 (const_int 5) (const_int 69)
11502 (const_int 6) (const_int 70)
11503 (const_int 7) (const_int 71)
11504 (const_int 16) (const_int 80)
11505 (const_int 17) (const_int 81)
11506 (const_int 18) (const_int 82)
11507 (const_int 19) (const_int 83)
11508 (const_int 20) (const_int 84)
11509 (const_int 21) (const_int 85)
11510 (const_int 22) (const_int 86)
11511 (const_int 23) (const_int 87)
11512 (const_int 32) (const_int 96)
11513 (const_int 33) (const_int 97)
11514 (const_int 34) (const_int 98)
11515 (const_int 35) (const_int 99)
11516 (const_int 36) (const_int 100)
11517 (const_int 37) (const_int 101)
11518 (const_int 38) (const_int 102)
11519 (const_int 39) (const_int 103)
11520 (const_int 48) (const_int 112)
11521 (const_int 49) (const_int 113)
11522 (const_int 50) (const_int 114)
11523 (const_int 51) (const_int 115)
11524 (const_int 52) (const_int 116)
11525 (const_int 53) (const_int 117)
11526 (const_int 54) (const_int 118)
11527 (const_int 55) (const_int 119)])))]
11528 "TARGET_AVX512BW"
11529 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11530 [(set_attr "type" "sselog")
11531 (set_attr "prefix" "evex")
11532 (set_attr "mode" "XI")])
11533
11534 (define_insn "avx2_interleave_lowv32qi<mask_name>"
11535 [(set (match_operand:V32QI 0 "register_operand" "=v")
11536 (vec_select:V32QI
11537 (vec_concat:V64QI
11538 (match_operand:V32QI 1 "register_operand" "v")
11539 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11540 (parallel [(const_int 0) (const_int 32)
11541 (const_int 1) (const_int 33)
11542 (const_int 2) (const_int 34)
11543 (const_int 3) (const_int 35)
11544 (const_int 4) (const_int 36)
11545 (const_int 5) (const_int 37)
11546 (const_int 6) (const_int 38)
11547 (const_int 7) (const_int 39)
11548 (const_int 16) (const_int 48)
11549 (const_int 17) (const_int 49)
11550 (const_int 18) (const_int 50)
11551 (const_int 19) (const_int 51)
11552 (const_int 20) (const_int 52)
11553 (const_int 21) (const_int 53)
11554 (const_int 22) (const_int 54)
11555 (const_int 23) (const_int 55)])))]
11556 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11557 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11558 [(set_attr "type" "sselog")
11559 (set_attr "prefix" "maybe_vex")
11560 (set_attr "mode" "OI")])
11561
11562 (define_insn "vec_interleave_lowv16qi<mask_name>"
11563 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11564 (vec_select:V16QI
11565 (vec_concat:V32QI
11566 (match_operand:V16QI 1 "register_operand" "0,v")
11567 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
11568 (parallel [(const_int 0) (const_int 16)
11569 (const_int 1) (const_int 17)
11570 (const_int 2) (const_int 18)
11571 (const_int 3) (const_int 19)
11572 (const_int 4) (const_int 20)
11573 (const_int 5) (const_int 21)
11574 (const_int 6) (const_int 22)
11575 (const_int 7) (const_int 23)])))]
11576 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11577 "@
11578 punpcklbw\t{%2, %0|%0, %2}
11579 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11580 [(set_attr "isa" "noavx,avx")
11581 (set_attr "type" "sselog")
11582 (set_attr "prefix_data16" "1,*")
11583 (set_attr "prefix" "orig,vex")
11584 (set_attr "mode" "TI")])
11585
11586 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
11587 [(set (match_operand:V32HI 0 "register_operand" "=v")
11588 (vec_select:V32HI
11589 (vec_concat:V64HI
11590 (match_operand:V32HI 1 "register_operand" "v")
11591 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11592 (parallel [(const_int 4) (const_int 36)
11593 (const_int 5) (const_int 37)
11594 (const_int 6) (const_int 38)
11595 (const_int 7) (const_int 39)
11596 (const_int 12) (const_int 44)
11597 (const_int 13) (const_int 45)
11598 (const_int 14) (const_int 46)
11599 (const_int 15) (const_int 47)
11600 (const_int 20) (const_int 52)
11601 (const_int 21) (const_int 53)
11602 (const_int 22) (const_int 54)
11603 (const_int 23) (const_int 55)
11604 (const_int 28) (const_int 60)
11605 (const_int 29) (const_int 61)
11606 (const_int 30) (const_int 62)
11607 (const_int 31) (const_int 63)])))]
11608 "TARGET_AVX512BW"
11609 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11610 [(set_attr "type" "sselog")
11611 (set_attr "prefix" "evex")
11612 (set_attr "mode" "XI")])
11613
11614 (define_insn "avx2_interleave_highv16hi<mask_name>"
11615 [(set (match_operand:V16HI 0 "register_operand" "=v")
11616 (vec_select:V16HI
11617 (vec_concat:V32HI
11618 (match_operand:V16HI 1 "register_operand" "v")
11619 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11620 (parallel [(const_int 4) (const_int 20)
11621 (const_int 5) (const_int 21)
11622 (const_int 6) (const_int 22)
11623 (const_int 7) (const_int 23)
11624 (const_int 12) (const_int 28)
11625 (const_int 13) (const_int 29)
11626 (const_int 14) (const_int 30)
11627 (const_int 15) (const_int 31)])))]
11628 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11629 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11630 [(set_attr "type" "sselog")
11631 (set_attr "prefix" "maybe_evex")
11632 (set_attr "mode" "OI")])
11633
11634 (define_insn "vec_interleave_highv8hi<mask_name>"
11635 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11636 (vec_select:V8HI
11637 (vec_concat:V16HI
11638 (match_operand:V8HI 1 "register_operand" "0,v")
11639 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
11640 (parallel [(const_int 4) (const_int 12)
11641 (const_int 5) (const_int 13)
11642 (const_int 6) (const_int 14)
11643 (const_int 7) (const_int 15)])))]
11644 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11645 "@
11646 punpckhwd\t{%2, %0|%0, %2}
11647 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11648 [(set_attr "isa" "noavx,avx")
11649 (set_attr "type" "sselog")
11650 (set_attr "prefix_data16" "1,*")
11651 (set_attr "prefix" "orig,maybe_vex")
11652 (set_attr "mode" "TI")])
11653
11654 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
11655 [(set (match_operand:V32HI 0 "register_operand" "=v")
11656 (vec_select:V32HI
11657 (vec_concat:V64HI
11658 (match_operand:V32HI 1 "register_operand" "v")
11659 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11660 (parallel [(const_int 0) (const_int 32)
11661 (const_int 1) (const_int 33)
11662 (const_int 2) (const_int 34)
11663 (const_int 3) (const_int 35)
11664 (const_int 8) (const_int 40)
11665 (const_int 9) (const_int 41)
11666 (const_int 10) (const_int 42)
11667 (const_int 11) (const_int 43)
11668 (const_int 16) (const_int 48)
11669 (const_int 17) (const_int 49)
11670 (const_int 18) (const_int 50)
11671 (const_int 19) (const_int 51)
11672 (const_int 24) (const_int 56)
11673 (const_int 25) (const_int 57)
11674 (const_int 26) (const_int 58)
11675 (const_int 27) (const_int 59)])))]
11676 "TARGET_AVX512BW"
11677 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11678 [(set_attr "type" "sselog")
11679 (set_attr "prefix" "evex")
11680 (set_attr "mode" "XI")])
11681
11682 (define_insn "avx2_interleave_lowv16hi<mask_name>"
11683 [(set (match_operand:V16HI 0 "register_operand" "=v")
11684 (vec_select:V16HI
11685 (vec_concat:V32HI
11686 (match_operand:V16HI 1 "register_operand" "v")
11687 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11688 (parallel [(const_int 0) (const_int 16)
11689 (const_int 1) (const_int 17)
11690 (const_int 2) (const_int 18)
11691 (const_int 3) (const_int 19)
11692 (const_int 8) (const_int 24)
11693 (const_int 9) (const_int 25)
11694 (const_int 10) (const_int 26)
11695 (const_int 11) (const_int 27)])))]
11696 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11697 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11698 [(set_attr "type" "sselog")
11699 (set_attr "prefix" "maybe_evex")
11700 (set_attr "mode" "OI")])
11701
11702 (define_insn "vec_interleave_lowv8hi<mask_name>"
11703 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11704 (vec_select:V8HI
11705 (vec_concat:V16HI
11706 (match_operand:V8HI 1 "register_operand" "0,v")
11707 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
11708 (parallel [(const_int 0) (const_int 8)
11709 (const_int 1) (const_int 9)
11710 (const_int 2) (const_int 10)
11711 (const_int 3) (const_int 11)])))]
11712 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11713 "@
11714 punpcklwd\t{%2, %0|%0, %2}
11715 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11716 [(set_attr "isa" "noavx,avx")
11717 (set_attr "type" "sselog")
11718 (set_attr "prefix_data16" "1,*")
11719 (set_attr "prefix" "orig,maybe_evex")
11720 (set_attr "mode" "TI")])
11721
11722 (define_insn "avx2_interleave_highv8si<mask_name>"
11723 [(set (match_operand:V8SI 0 "register_operand" "=v")
11724 (vec_select:V8SI
11725 (vec_concat:V16SI
11726 (match_operand:V8SI 1 "register_operand" "v")
11727 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11728 (parallel [(const_int 2) (const_int 10)
11729 (const_int 3) (const_int 11)
11730 (const_int 6) (const_int 14)
11731 (const_int 7) (const_int 15)])))]
11732 "TARGET_AVX2 && <mask_avx512vl_condition>"
11733 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11734 [(set_attr "type" "sselog")
11735 (set_attr "prefix" "maybe_evex")
11736 (set_attr "mode" "OI")])
11737
11738 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
11739 [(set (match_operand:V16SI 0 "register_operand" "=v")
11740 (vec_select:V16SI
11741 (vec_concat:V32SI
11742 (match_operand:V16SI 1 "register_operand" "v")
11743 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11744 (parallel [(const_int 2) (const_int 18)
11745 (const_int 3) (const_int 19)
11746 (const_int 6) (const_int 22)
11747 (const_int 7) (const_int 23)
11748 (const_int 10) (const_int 26)
11749 (const_int 11) (const_int 27)
11750 (const_int 14) (const_int 30)
11751 (const_int 15) (const_int 31)])))]
11752 "TARGET_AVX512F"
11753 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11754 [(set_attr "type" "sselog")
11755 (set_attr "prefix" "evex")
11756 (set_attr "mode" "XI")])
11757
11758
11759 (define_insn "vec_interleave_highv4si<mask_name>"
11760 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11761 (vec_select:V4SI
11762 (vec_concat:V8SI
11763 (match_operand:V4SI 1 "register_operand" "0,v")
11764 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
11765 (parallel [(const_int 2) (const_int 6)
11766 (const_int 3) (const_int 7)])))]
11767 "TARGET_SSE2 && <mask_avx512vl_condition>"
11768 "@
11769 punpckhdq\t{%2, %0|%0, %2}
11770 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11771 [(set_attr "isa" "noavx,avx")
11772 (set_attr "type" "sselog")
11773 (set_attr "prefix_data16" "1,*")
11774 (set_attr "prefix" "orig,maybe_vex")
11775 (set_attr "mode" "TI")])
11776
11777 (define_insn "avx2_interleave_lowv8si<mask_name>"
11778 [(set (match_operand:V8SI 0 "register_operand" "=v")
11779 (vec_select:V8SI
11780 (vec_concat:V16SI
11781 (match_operand:V8SI 1 "register_operand" "v")
11782 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11783 (parallel [(const_int 0) (const_int 8)
11784 (const_int 1) (const_int 9)
11785 (const_int 4) (const_int 12)
11786 (const_int 5) (const_int 13)])))]
11787 "TARGET_AVX2 && <mask_avx512vl_condition>"
11788 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11789 [(set_attr "type" "sselog")
11790 (set_attr "prefix" "maybe_evex")
11791 (set_attr "mode" "OI")])
11792
11793 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
11794 [(set (match_operand:V16SI 0 "register_operand" "=v")
11795 (vec_select:V16SI
11796 (vec_concat:V32SI
11797 (match_operand:V16SI 1 "register_operand" "v")
11798 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11799 (parallel [(const_int 0) (const_int 16)
11800 (const_int 1) (const_int 17)
11801 (const_int 4) (const_int 20)
11802 (const_int 5) (const_int 21)
11803 (const_int 8) (const_int 24)
11804 (const_int 9) (const_int 25)
11805 (const_int 12) (const_int 28)
11806 (const_int 13) (const_int 29)])))]
11807 "TARGET_AVX512F"
11808 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11809 [(set_attr "type" "sselog")
11810 (set_attr "prefix" "evex")
11811 (set_attr "mode" "XI")])
11812
11813 (define_insn "vec_interleave_lowv4si<mask_name>"
11814 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11815 (vec_select:V4SI
11816 (vec_concat:V8SI
11817 (match_operand:V4SI 1 "register_operand" "0,v")
11818 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
11819 (parallel [(const_int 0) (const_int 4)
11820 (const_int 1) (const_int 5)])))]
11821 "TARGET_SSE2 && <mask_avx512vl_condition>"
11822 "@
11823 punpckldq\t{%2, %0|%0, %2}
11824 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11825 [(set_attr "isa" "noavx,avx")
11826 (set_attr "type" "sselog")
11827 (set_attr "prefix_data16" "1,*")
11828 (set_attr "prefix" "orig,vex")
11829 (set_attr "mode" "TI")])
11830
11831 (define_expand "vec_interleave_high<mode>"
11832 [(match_operand:VI_256 0 "register_operand" "=x")
11833 (match_operand:VI_256 1 "register_operand" "x")
11834 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11835 "TARGET_AVX2"
11836 {
11837 rtx t1 = gen_reg_rtx (<MODE>mode);
11838 rtx t2 = gen_reg_rtx (<MODE>mode);
11839 rtx t3 = gen_reg_rtx (V4DImode);
11840 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11841 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11842 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11843 gen_lowpart (V4DImode, t2),
11844 GEN_INT (1 + (3 << 4))));
11845 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11846 DONE;
11847 })
11848
11849 (define_expand "vec_interleave_low<mode>"
11850 [(match_operand:VI_256 0 "register_operand" "=x")
11851 (match_operand:VI_256 1 "register_operand" "x")
11852 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11853 "TARGET_AVX2"
11854 {
11855 rtx t1 = gen_reg_rtx (<MODE>mode);
11856 rtx t2 = gen_reg_rtx (<MODE>mode);
11857 rtx t3 = gen_reg_rtx (V4DImode);
11858 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11859 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11860 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11861 gen_lowpart (V4DImode, t2),
11862 GEN_INT (0 + (2 << 4))));
11863 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11864 DONE;
11865 })
11866
11867 ;; Modes handled by pinsr patterns.
11868 (define_mode_iterator PINSR_MODE
11869 [(V16QI "TARGET_SSE4_1") V8HI
11870 (V4SI "TARGET_SSE4_1")
11871 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
11872
11873 (define_mode_attr sse2p4_1
11874 [(V16QI "sse4_1") (V8HI "sse2")
11875 (V4SI "sse4_1") (V2DI "sse4_1")])
11876
11877 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
11878 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
11879 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
11880 (vec_merge:PINSR_MODE
11881 (vec_duplicate:PINSR_MODE
11882 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
11883 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
11884 (match_operand:SI 3 "const_int_operand")))]
11885 "TARGET_SSE2
11886 && ((unsigned) exact_log2 (INTVAL (operands[3]))
11887 < GET_MODE_NUNITS (<MODE>mode))"
11888 {
11889 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
11890
11891 switch (which_alternative)
11892 {
11893 case 0:
11894 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11895 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
11896 /* FALLTHRU */
11897 case 1:
11898 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
11899 case 2:
11900 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11901 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
11902 /* FALLTHRU */
11903 case 3:
11904 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11905 default:
11906 gcc_unreachable ();
11907 }
11908 }
11909 [(set_attr "isa" "noavx,noavx,avx,avx")
11910 (set_attr "type" "sselog")
11911 (set (attr "prefix_rex")
11912 (if_then_else
11913 (and (not (match_test "TARGET_AVX"))
11914 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
11915 (const_string "1")
11916 (const_string "*")))
11917 (set (attr "prefix_data16")
11918 (if_then_else
11919 (and (not (match_test "TARGET_AVX"))
11920 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11921 (const_string "1")
11922 (const_string "*")))
11923 (set (attr "prefix_extra")
11924 (if_then_else
11925 (and (not (match_test "TARGET_AVX"))
11926 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11927 (const_string "*")
11928 (const_string "1")))
11929 (set_attr "length_immediate" "1")
11930 (set_attr "prefix" "orig,orig,vex,vex")
11931 (set_attr "mode" "TI")])
11932
11933 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
11934 [(match_operand:AVX512_VEC 0 "register_operand")
11935 (match_operand:AVX512_VEC 1 "register_operand")
11936 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
11937 (match_operand:SI 3 "const_0_to_3_operand")
11938 (match_operand:AVX512_VEC 4 "register_operand")
11939 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11940 "TARGET_AVX512F"
11941 {
11942 int mask,selector;
11943 mask = INTVAL (operands[3]);
11944 selector = GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ?
11945 0xFFFF ^ (0xF000 >> mask * 4)
11946 : 0xFF ^ (0xC0 >> mask * 2);
11947 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
11948 (operands[0], operands[1], operands[2], GEN_INT (selector),
11949 operands[4], operands[5]));
11950 DONE;
11951 })
11952
11953 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
11954 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
11955 (vec_merge:AVX512_VEC
11956 (match_operand:AVX512_VEC 1 "register_operand" "v")
11957 (vec_duplicate:AVX512_VEC
11958 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
11959 (match_operand:SI 3 "const_int_operand" "n")))]
11960 "TARGET_AVX512F"
11961 {
11962 int mask;
11963 int selector = INTVAL (operands[3]);
11964
11965 if (selector == 0xFFF || selector == 0x3F)
11966 mask = 0;
11967 else if ( selector == 0xF0FF || selector == 0xCF)
11968 mask = 1;
11969 else if ( selector == 0xFF0F || selector == 0xF3)
11970 mask = 2;
11971 else if ( selector == 0xFFF0 || selector == 0xFC)
11972 mask = 3;
11973 else
11974 gcc_unreachable ();
11975
11976 operands[3] = GEN_INT (mask);
11977
11978 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
11979 }
11980 [(set_attr "type" "sselog")
11981 (set_attr "length_immediate" "1")
11982 (set_attr "prefix" "evex")
11983 (set_attr "mode" "<sseinsnmode>")])
11984
11985 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
11986 [(match_operand:AVX512_VEC_2 0 "register_operand")
11987 (match_operand:AVX512_VEC_2 1 "register_operand")
11988 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11989 (match_operand:SI 3 "const_0_to_1_operand")
11990 (match_operand:AVX512_VEC_2 4 "register_operand")
11991 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11992 "TARGET_AVX512F"
11993 {
11994 int mask = INTVAL (operands[3]);
11995 if (mask == 0)
11996 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
11997 operands[2], operands[4],
11998 operands[5]));
11999 else
12000 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
12001 operands[2], operands[4],
12002 operands[5]));
12003 DONE;
12004 })
12005
12006 (define_insn "vec_set_lo_<mode><mask_name>"
12007 [(set (match_operand:V16FI 0 "register_operand" "=v")
12008 (vec_concat:V16FI
12009 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12010 (vec_select:<ssehalfvecmode>
12011 (match_operand:V16FI 1 "register_operand" "v")
12012 (parallel [(const_int 8) (const_int 9)
12013 (const_int 10) (const_int 11)
12014 (const_int 12) (const_int 13)
12015 (const_int 14) (const_int 15)]))))]
12016 "TARGET_AVX512DQ"
12017 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
12018 [(set_attr "type" "sselog")
12019 (set_attr "length_immediate" "1")
12020 (set_attr "prefix" "evex")
12021 (set_attr "mode" "<sseinsnmode>")])
12022
12023 (define_insn "vec_set_hi_<mode><mask_name>"
12024 [(set (match_operand:V16FI 0 "register_operand" "=v")
12025 (vec_concat:V16FI
12026 (vec_select:<ssehalfvecmode>
12027 (match_operand:V16FI 1 "register_operand" "v")
12028 (parallel [(const_int 0) (const_int 1)
12029 (const_int 2) (const_int 3)
12030 (const_int 4) (const_int 5)
12031 (const_int 6) (const_int 7)]))
12032 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12033 "TARGET_AVX512DQ"
12034 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
12035 [(set_attr "type" "sselog")
12036 (set_attr "length_immediate" "1")
12037 (set_attr "prefix" "evex")
12038 (set_attr "mode" "<sseinsnmode>")])
12039
12040 (define_insn "vec_set_lo_<mode><mask_name>"
12041 [(set (match_operand:V8FI 0 "register_operand" "=v")
12042 (vec_concat:V8FI
12043 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12044 (vec_select:<ssehalfvecmode>
12045 (match_operand:V8FI 1 "register_operand" "v")
12046 (parallel [(const_int 4) (const_int 5)
12047 (const_int 6) (const_int 7)]))))]
12048 "TARGET_AVX512F"
12049 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
12050 [(set_attr "type" "sselog")
12051 (set_attr "length_immediate" "1")
12052 (set_attr "prefix" "evex")
12053 (set_attr "mode" "XI")])
12054
12055 (define_insn "vec_set_hi_<mode><mask_name>"
12056 [(set (match_operand:V8FI 0 "register_operand" "=v")
12057 (vec_concat:V8FI
12058 (vec_select:<ssehalfvecmode>
12059 (match_operand:V8FI 1 "register_operand" "v")
12060 (parallel [(const_int 0) (const_int 1)
12061 (const_int 2) (const_int 3)]))
12062 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12063 "TARGET_AVX512F"
12064 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
12065 [(set_attr "type" "sselog")
12066 (set_attr "length_immediate" "1")
12067 (set_attr "prefix" "evex")
12068 (set_attr "mode" "XI")])
12069
12070 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
12071 [(match_operand:VI8F_256 0 "register_operand")
12072 (match_operand:VI8F_256 1 "register_operand")
12073 (match_operand:VI8F_256 2 "nonimmediate_operand")
12074 (match_operand:SI 3 "const_0_to_3_operand")
12075 (match_operand:VI8F_256 4 "register_operand")
12076 (match_operand:QI 5 "register_operand")]
12077 "TARGET_AVX512DQ"
12078 {
12079 int mask = INTVAL (operands[3]);
12080 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
12081 (operands[0], operands[1], operands[2],
12082 GEN_INT (((mask >> 0) & 1) * 2 + 0),
12083 GEN_INT (((mask >> 0) & 1) * 2 + 1),
12084 GEN_INT (((mask >> 1) & 1) * 2 + 4),
12085 GEN_INT (((mask >> 1) & 1) * 2 + 5),
12086 operands[4], operands[5]));
12087 DONE;
12088 })
12089
12090 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
12091 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
12092 (vec_select:VI8F_256
12093 (vec_concat:<ssedoublemode>
12094 (match_operand:VI8F_256 1 "register_operand" "v")
12095 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
12096 (parallel [(match_operand 3 "const_0_to_3_operand")
12097 (match_operand 4 "const_0_to_3_operand")
12098 (match_operand 5 "const_4_to_7_operand")
12099 (match_operand 6 "const_4_to_7_operand")])))]
12100 "TARGET_AVX512VL
12101 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12102 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
12103 {
12104 int mask;
12105 mask = INTVAL (operands[3]) / 2;
12106 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
12107 operands[3] = GEN_INT (mask);
12108 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
12109 }
12110 [(set_attr "type" "sselog")
12111 (set_attr "length_immediate" "1")
12112 (set_attr "prefix" "evex")
12113 (set_attr "mode" "XI")])
12114
12115 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
12116 [(match_operand:V8FI 0 "register_operand")
12117 (match_operand:V8FI 1 "register_operand")
12118 (match_operand:V8FI 2 "nonimmediate_operand")
12119 (match_operand:SI 3 "const_0_to_255_operand")
12120 (match_operand:V8FI 4 "register_operand")
12121 (match_operand:QI 5 "register_operand")]
12122 "TARGET_AVX512F"
12123 {
12124 int mask = INTVAL (operands[3]);
12125 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
12126 (operands[0], operands[1], operands[2],
12127 GEN_INT (((mask >> 0) & 3) * 2),
12128 GEN_INT (((mask >> 0) & 3) * 2 + 1),
12129 GEN_INT (((mask >> 2) & 3) * 2),
12130 GEN_INT (((mask >> 2) & 3) * 2 + 1),
12131 GEN_INT (((mask >> 4) & 3) * 2 + 8),
12132 GEN_INT (((mask >> 4) & 3) * 2 + 9),
12133 GEN_INT (((mask >> 6) & 3) * 2 + 8),
12134 GEN_INT (((mask >> 6) & 3) * 2 + 9),
12135 operands[4], operands[5]));
12136 DONE;
12137 })
12138
12139 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
12140 [(set (match_operand:V8FI 0 "register_operand" "=v")
12141 (vec_select:V8FI
12142 (vec_concat:<ssedoublemode>
12143 (match_operand:V8FI 1 "register_operand" "v")
12144 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
12145 (parallel [(match_operand 3 "const_0_to_7_operand")
12146 (match_operand 4 "const_0_to_7_operand")
12147 (match_operand 5 "const_0_to_7_operand")
12148 (match_operand 6 "const_0_to_7_operand")
12149 (match_operand 7 "const_8_to_15_operand")
12150 (match_operand 8 "const_8_to_15_operand")
12151 (match_operand 9 "const_8_to_15_operand")
12152 (match_operand 10 "const_8_to_15_operand")])))]
12153 "TARGET_AVX512F
12154 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12155 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
12156 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12157 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
12158 {
12159 int mask;
12160 mask = INTVAL (operands[3]) / 2;
12161 mask |= INTVAL (operands[5]) / 2 << 2;
12162 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
12163 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
12164 operands[3] = GEN_INT (mask);
12165
12166 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12167 }
12168 [(set_attr "type" "sselog")
12169 (set_attr "length_immediate" "1")
12170 (set_attr "prefix" "evex")
12171 (set_attr "mode" "<sseinsnmode>")])
12172
12173 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
12174 [(match_operand:VI4F_256 0 "register_operand")
12175 (match_operand:VI4F_256 1 "register_operand")
12176 (match_operand:VI4F_256 2 "nonimmediate_operand")
12177 (match_operand:SI 3 "const_0_to_3_operand")
12178 (match_operand:VI4F_256 4 "register_operand")
12179 (match_operand:QI 5 "register_operand")]
12180 "TARGET_AVX512VL"
12181 {
12182 int mask = INTVAL (operands[3]);
12183 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
12184 (operands[0], operands[1], operands[2],
12185 GEN_INT (((mask >> 0) & 1) * 4 + 0),
12186 GEN_INT (((mask >> 0) & 1) * 4 + 1),
12187 GEN_INT (((mask >> 0) & 1) * 4 + 2),
12188 GEN_INT (((mask >> 0) & 1) * 4 + 3),
12189 GEN_INT (((mask >> 1) & 1) * 4 + 8),
12190 GEN_INT (((mask >> 1) & 1) * 4 + 9),
12191 GEN_INT (((mask >> 1) & 1) * 4 + 10),
12192 GEN_INT (((mask >> 1) & 1) * 4 + 11),
12193 operands[4], operands[5]));
12194 DONE;
12195 })
12196
12197 (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
12198 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
12199 (vec_select:VI4F_256
12200 (vec_concat:<ssedoublemode>
12201 (match_operand:VI4F_256 1 "register_operand" "v")
12202 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
12203 (parallel [(match_operand 3 "const_0_to_7_operand")
12204 (match_operand 4 "const_0_to_7_operand")
12205 (match_operand 5 "const_0_to_7_operand")
12206 (match_operand 6 "const_0_to_7_operand")
12207 (match_operand 7 "const_8_to_15_operand")
12208 (match_operand 8 "const_8_to_15_operand")
12209 (match_operand 9 "const_8_to_15_operand")
12210 (match_operand 10 "const_8_to_15_operand")])))]
12211 "TARGET_AVX512VL
12212 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12213 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12214 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12215 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12216 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12217 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
12218 {
12219 int mask;
12220 mask = INTVAL (operands[3]) / 4;
12221 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
12222 operands[3] = GEN_INT (mask);
12223
12224 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12225 }
12226 [(set_attr "type" "sselog")
12227 (set_attr "length_immediate" "1")
12228 (set_attr "prefix" "evex")
12229 (set_attr "mode" "<sseinsnmode>")])
12230
12231 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
12232 [(match_operand:V16FI 0 "register_operand")
12233 (match_operand:V16FI 1 "register_operand")
12234 (match_operand:V16FI 2 "nonimmediate_operand")
12235 (match_operand:SI 3 "const_0_to_255_operand")
12236 (match_operand:V16FI 4 "register_operand")
12237 (match_operand:HI 5 "register_operand")]
12238 "TARGET_AVX512F"
12239 {
12240 int mask = INTVAL (operands[3]);
12241 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
12242 (operands[0], operands[1], operands[2],
12243 GEN_INT (((mask >> 0) & 3) * 4),
12244 GEN_INT (((mask >> 0) & 3) * 4 + 1),
12245 GEN_INT (((mask >> 0) & 3) * 4 + 2),
12246 GEN_INT (((mask >> 0) & 3) * 4 + 3),
12247 GEN_INT (((mask >> 2) & 3) * 4),
12248 GEN_INT (((mask >> 2) & 3) * 4 + 1),
12249 GEN_INT (((mask >> 2) & 3) * 4 + 2),
12250 GEN_INT (((mask >> 2) & 3) * 4 + 3),
12251 GEN_INT (((mask >> 4) & 3) * 4 + 16),
12252 GEN_INT (((mask >> 4) & 3) * 4 + 17),
12253 GEN_INT (((mask >> 4) & 3) * 4 + 18),
12254 GEN_INT (((mask >> 4) & 3) * 4 + 19),
12255 GEN_INT (((mask >> 6) & 3) * 4 + 16),
12256 GEN_INT (((mask >> 6) & 3) * 4 + 17),
12257 GEN_INT (((mask >> 6) & 3) * 4 + 18),
12258 GEN_INT (((mask >> 6) & 3) * 4 + 19),
12259 operands[4], operands[5]));
12260 DONE;
12261 })
12262
12263 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
12264 [(set (match_operand:V16FI 0 "register_operand" "=v")
12265 (vec_select:V16FI
12266 (vec_concat:<ssedoublemode>
12267 (match_operand:V16FI 1 "register_operand" "v")
12268 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
12269 (parallel [(match_operand 3 "const_0_to_15_operand")
12270 (match_operand 4 "const_0_to_15_operand")
12271 (match_operand 5 "const_0_to_15_operand")
12272 (match_operand 6 "const_0_to_15_operand")
12273 (match_operand 7 "const_0_to_15_operand")
12274 (match_operand 8 "const_0_to_15_operand")
12275 (match_operand 9 "const_0_to_15_operand")
12276 (match_operand 10 "const_0_to_15_operand")
12277 (match_operand 11 "const_16_to_31_operand")
12278 (match_operand 12 "const_16_to_31_operand")
12279 (match_operand 13 "const_16_to_31_operand")
12280 (match_operand 14 "const_16_to_31_operand")
12281 (match_operand 15 "const_16_to_31_operand")
12282 (match_operand 16 "const_16_to_31_operand")
12283 (match_operand 17 "const_16_to_31_operand")
12284 (match_operand 18 "const_16_to_31_operand")])))]
12285 "TARGET_AVX512F
12286 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12287 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12288 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12289 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12290 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12291 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12292 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12293 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12294 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12295 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12296 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12297 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12298 {
12299 int mask;
12300 mask = INTVAL (operands[3]) / 4;
12301 mask |= INTVAL (operands[7]) / 4 << 2;
12302 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12303 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12304 operands[3] = GEN_INT (mask);
12305
12306 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12307 }
12308 [(set_attr "type" "sselog")
12309 (set_attr "length_immediate" "1")
12310 (set_attr "prefix" "evex")
12311 (set_attr "mode" "<sseinsnmode>")])
12312
12313 (define_expand "avx512f_pshufdv3_mask"
12314 [(match_operand:V16SI 0 "register_operand")
12315 (match_operand:V16SI 1 "nonimmediate_operand")
12316 (match_operand:SI 2 "const_0_to_255_operand")
12317 (match_operand:V16SI 3 "register_operand")
12318 (match_operand:HI 4 "register_operand")]
12319 "TARGET_AVX512F"
12320 {
12321 int mask = INTVAL (operands[2]);
12322 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12323 GEN_INT ((mask >> 0) & 3),
12324 GEN_INT ((mask >> 2) & 3),
12325 GEN_INT ((mask >> 4) & 3),
12326 GEN_INT ((mask >> 6) & 3),
12327 GEN_INT (((mask >> 0) & 3) + 4),
12328 GEN_INT (((mask >> 2) & 3) + 4),
12329 GEN_INT (((mask >> 4) & 3) + 4),
12330 GEN_INT (((mask >> 6) & 3) + 4),
12331 GEN_INT (((mask >> 0) & 3) + 8),
12332 GEN_INT (((mask >> 2) & 3) + 8),
12333 GEN_INT (((mask >> 4) & 3) + 8),
12334 GEN_INT (((mask >> 6) & 3) + 8),
12335 GEN_INT (((mask >> 0) & 3) + 12),
12336 GEN_INT (((mask >> 2) & 3) + 12),
12337 GEN_INT (((mask >> 4) & 3) + 12),
12338 GEN_INT (((mask >> 6) & 3) + 12),
12339 operands[3], operands[4]));
12340 DONE;
12341 })
12342
12343 (define_insn "avx512f_pshufd_1<mask_name>"
12344 [(set (match_operand:V16SI 0 "register_operand" "=v")
12345 (vec_select:V16SI
12346 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12347 (parallel [(match_operand 2 "const_0_to_3_operand")
12348 (match_operand 3 "const_0_to_3_operand")
12349 (match_operand 4 "const_0_to_3_operand")
12350 (match_operand 5 "const_0_to_3_operand")
12351 (match_operand 6 "const_4_to_7_operand")
12352 (match_operand 7 "const_4_to_7_operand")
12353 (match_operand 8 "const_4_to_7_operand")
12354 (match_operand 9 "const_4_to_7_operand")
12355 (match_operand 10 "const_8_to_11_operand")
12356 (match_operand 11 "const_8_to_11_operand")
12357 (match_operand 12 "const_8_to_11_operand")
12358 (match_operand 13 "const_8_to_11_operand")
12359 (match_operand 14 "const_12_to_15_operand")
12360 (match_operand 15 "const_12_to_15_operand")
12361 (match_operand 16 "const_12_to_15_operand")
12362 (match_operand 17 "const_12_to_15_operand")])))]
12363 "TARGET_AVX512F
12364 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12365 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12366 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12367 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12368 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12369 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12370 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12371 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12372 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12373 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12374 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12375 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12376 {
12377 int mask = 0;
12378 mask |= INTVAL (operands[2]) << 0;
12379 mask |= INTVAL (operands[3]) << 2;
12380 mask |= INTVAL (operands[4]) << 4;
12381 mask |= INTVAL (operands[5]) << 6;
12382 operands[2] = GEN_INT (mask);
12383
12384 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
12385 }
12386 [(set_attr "type" "sselog1")
12387 (set_attr "prefix" "evex")
12388 (set_attr "length_immediate" "1")
12389 (set_attr "mode" "XI")])
12390
12391 (define_expand "avx512vl_pshufdv3_mask"
12392 [(match_operand:V8SI 0 "register_operand")
12393 (match_operand:V8SI 1 "nonimmediate_operand")
12394 (match_operand:SI 2 "const_0_to_255_operand")
12395 (match_operand:V8SI 3 "register_operand")
12396 (match_operand:QI 4 "register_operand")]
12397 "TARGET_AVX512VL"
12398 {
12399 int mask = INTVAL (operands[2]);
12400 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
12401 GEN_INT ((mask >> 0) & 3),
12402 GEN_INT ((mask >> 2) & 3),
12403 GEN_INT ((mask >> 4) & 3),
12404 GEN_INT ((mask >> 6) & 3),
12405 GEN_INT (((mask >> 0) & 3) + 4),
12406 GEN_INT (((mask >> 2) & 3) + 4),
12407 GEN_INT (((mask >> 4) & 3) + 4),
12408 GEN_INT (((mask >> 6) & 3) + 4),
12409 operands[3], operands[4]));
12410 DONE;
12411 })
12412
12413 (define_expand "avx2_pshufdv3"
12414 [(match_operand:V8SI 0 "register_operand")
12415 (match_operand:V8SI 1 "nonimmediate_operand")
12416 (match_operand:SI 2 "const_0_to_255_operand")]
12417 "TARGET_AVX2"
12418 {
12419 int mask = INTVAL (operands[2]);
12420 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
12421 GEN_INT ((mask >> 0) & 3),
12422 GEN_INT ((mask >> 2) & 3),
12423 GEN_INT ((mask >> 4) & 3),
12424 GEN_INT ((mask >> 6) & 3),
12425 GEN_INT (((mask >> 0) & 3) + 4),
12426 GEN_INT (((mask >> 2) & 3) + 4),
12427 GEN_INT (((mask >> 4) & 3) + 4),
12428 GEN_INT (((mask >> 6) & 3) + 4)));
12429 DONE;
12430 })
12431
12432 (define_insn "avx2_pshufd_1<mask_name>"
12433 [(set (match_operand:V8SI 0 "register_operand" "=v")
12434 (vec_select:V8SI
12435 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
12436 (parallel [(match_operand 2 "const_0_to_3_operand")
12437 (match_operand 3 "const_0_to_3_operand")
12438 (match_operand 4 "const_0_to_3_operand")
12439 (match_operand 5 "const_0_to_3_operand")
12440 (match_operand 6 "const_4_to_7_operand")
12441 (match_operand 7 "const_4_to_7_operand")
12442 (match_operand 8 "const_4_to_7_operand")
12443 (match_operand 9 "const_4_to_7_operand")])))]
12444 "TARGET_AVX2
12445 && <mask_avx512vl_condition>
12446 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12447 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12448 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12449 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
12450 {
12451 int mask = 0;
12452 mask |= INTVAL (operands[2]) << 0;
12453 mask |= INTVAL (operands[3]) << 2;
12454 mask |= INTVAL (operands[4]) << 4;
12455 mask |= INTVAL (operands[5]) << 6;
12456 operands[2] = GEN_INT (mask);
12457
12458 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12459 }
12460 [(set_attr "type" "sselog1")
12461 (set_attr "prefix" "maybe_evex")
12462 (set_attr "length_immediate" "1")
12463 (set_attr "mode" "OI")])
12464
12465 (define_expand "avx512vl_pshufd_mask"
12466 [(match_operand:V4SI 0 "register_operand")
12467 (match_operand:V4SI 1 "nonimmediate_operand")
12468 (match_operand:SI 2 "const_0_to_255_operand")
12469 (match_operand:V4SI 3 "register_operand")
12470 (match_operand:QI 4 "register_operand")]
12471 "TARGET_AVX512VL"
12472 {
12473 int mask = INTVAL (operands[2]);
12474 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
12475 GEN_INT ((mask >> 0) & 3),
12476 GEN_INT ((mask >> 2) & 3),
12477 GEN_INT ((mask >> 4) & 3),
12478 GEN_INT ((mask >> 6) & 3),
12479 operands[3], operands[4]));
12480 DONE;
12481 })
12482
12483 (define_expand "sse2_pshufd"
12484 [(match_operand:V4SI 0 "register_operand")
12485 (match_operand:V4SI 1 "vector_operand")
12486 (match_operand:SI 2 "const_int_operand")]
12487 "TARGET_SSE2"
12488 {
12489 int mask = INTVAL (operands[2]);
12490 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
12491 GEN_INT ((mask >> 0) & 3),
12492 GEN_INT ((mask >> 2) & 3),
12493 GEN_INT ((mask >> 4) & 3),
12494 GEN_INT ((mask >> 6) & 3)));
12495 DONE;
12496 })
12497
12498 (define_insn "sse2_pshufd_1<mask_name>"
12499 [(set (match_operand:V4SI 0 "register_operand" "=v")
12500 (vec_select:V4SI
12501 (match_operand:V4SI 1 "vector_operand" "vBm")
12502 (parallel [(match_operand 2 "const_0_to_3_operand")
12503 (match_operand 3 "const_0_to_3_operand")
12504 (match_operand 4 "const_0_to_3_operand")
12505 (match_operand 5 "const_0_to_3_operand")])))]
12506 "TARGET_SSE2 && <mask_avx512vl_condition>"
12507 {
12508 int mask = 0;
12509 mask |= INTVAL (operands[2]) << 0;
12510 mask |= INTVAL (operands[3]) << 2;
12511 mask |= INTVAL (operands[4]) << 4;
12512 mask |= INTVAL (operands[5]) << 6;
12513 operands[2] = GEN_INT (mask);
12514
12515 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12516 }
12517 [(set_attr "type" "sselog1")
12518 (set_attr "prefix_data16" "1")
12519 (set_attr "prefix" "<mask_prefix2>")
12520 (set_attr "length_immediate" "1")
12521 (set_attr "mode" "TI")])
12522
12523 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
12524 [(set (match_operand:V32HI 0 "register_operand" "=v")
12525 (unspec:V32HI
12526 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12527 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12528 UNSPEC_PSHUFLW))]
12529 "TARGET_AVX512BW"
12530 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12531 [(set_attr "type" "sselog")
12532 (set_attr "prefix" "evex")
12533 (set_attr "mode" "XI")])
12534
12535 (define_expand "avx512vl_pshuflwv3_mask"
12536 [(match_operand:V16HI 0 "register_operand")
12537 (match_operand:V16HI 1 "nonimmediate_operand")
12538 (match_operand:SI 2 "const_0_to_255_operand")
12539 (match_operand:V16HI 3 "register_operand")
12540 (match_operand:HI 4 "register_operand")]
12541 "TARGET_AVX512VL && TARGET_AVX512BW"
12542 {
12543 int mask = INTVAL (operands[2]);
12544 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
12545 GEN_INT ((mask >> 0) & 3),
12546 GEN_INT ((mask >> 2) & 3),
12547 GEN_INT ((mask >> 4) & 3),
12548 GEN_INT ((mask >> 6) & 3),
12549 GEN_INT (((mask >> 0) & 3) + 8),
12550 GEN_INT (((mask >> 2) & 3) + 8),
12551 GEN_INT (((mask >> 4) & 3) + 8),
12552 GEN_INT (((mask >> 6) & 3) + 8),
12553 operands[3], operands[4]));
12554 DONE;
12555 })
12556
12557 (define_expand "avx2_pshuflwv3"
12558 [(match_operand:V16HI 0 "register_operand")
12559 (match_operand:V16HI 1 "nonimmediate_operand")
12560 (match_operand:SI 2 "const_0_to_255_operand")]
12561 "TARGET_AVX2"
12562 {
12563 int mask = INTVAL (operands[2]);
12564 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
12565 GEN_INT ((mask >> 0) & 3),
12566 GEN_INT ((mask >> 2) & 3),
12567 GEN_INT ((mask >> 4) & 3),
12568 GEN_INT ((mask >> 6) & 3),
12569 GEN_INT (((mask >> 0) & 3) + 8),
12570 GEN_INT (((mask >> 2) & 3) + 8),
12571 GEN_INT (((mask >> 4) & 3) + 8),
12572 GEN_INT (((mask >> 6) & 3) + 8)));
12573 DONE;
12574 })
12575
12576 (define_insn "avx2_pshuflw_1<mask_name>"
12577 [(set (match_operand:V16HI 0 "register_operand" "=v")
12578 (vec_select:V16HI
12579 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12580 (parallel [(match_operand 2 "const_0_to_3_operand")
12581 (match_operand 3 "const_0_to_3_operand")
12582 (match_operand 4 "const_0_to_3_operand")
12583 (match_operand 5 "const_0_to_3_operand")
12584 (const_int 4)
12585 (const_int 5)
12586 (const_int 6)
12587 (const_int 7)
12588 (match_operand 6 "const_8_to_11_operand")
12589 (match_operand 7 "const_8_to_11_operand")
12590 (match_operand 8 "const_8_to_11_operand")
12591 (match_operand 9 "const_8_to_11_operand")
12592 (const_int 12)
12593 (const_int 13)
12594 (const_int 14)
12595 (const_int 15)])))]
12596 "TARGET_AVX2
12597 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12598 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12599 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12600 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12601 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12602 {
12603 int mask = 0;
12604 mask |= INTVAL (operands[2]) << 0;
12605 mask |= INTVAL (operands[3]) << 2;
12606 mask |= INTVAL (operands[4]) << 4;
12607 mask |= INTVAL (operands[5]) << 6;
12608 operands[2] = GEN_INT (mask);
12609
12610 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12611 }
12612 [(set_attr "type" "sselog")
12613 (set_attr "prefix" "maybe_evex")
12614 (set_attr "length_immediate" "1")
12615 (set_attr "mode" "OI")])
12616
12617 (define_expand "avx512vl_pshuflw_mask"
12618 [(match_operand:V8HI 0 "register_operand")
12619 (match_operand:V8HI 1 "nonimmediate_operand")
12620 (match_operand:SI 2 "const_0_to_255_operand")
12621 (match_operand:V8HI 3 "register_operand")
12622 (match_operand:QI 4 "register_operand")]
12623 "TARGET_AVX512VL && TARGET_AVX512BW"
12624 {
12625 int mask = INTVAL (operands[2]);
12626 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
12627 GEN_INT ((mask >> 0) & 3),
12628 GEN_INT ((mask >> 2) & 3),
12629 GEN_INT ((mask >> 4) & 3),
12630 GEN_INT ((mask >> 6) & 3),
12631 operands[3], operands[4]));
12632 DONE;
12633 })
12634
12635 (define_expand "sse2_pshuflw"
12636 [(match_operand:V8HI 0 "register_operand")
12637 (match_operand:V8HI 1 "vector_operand")
12638 (match_operand:SI 2 "const_int_operand")]
12639 "TARGET_SSE2"
12640 {
12641 int mask = INTVAL (operands[2]);
12642 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
12643 GEN_INT ((mask >> 0) & 3),
12644 GEN_INT ((mask >> 2) & 3),
12645 GEN_INT ((mask >> 4) & 3),
12646 GEN_INT ((mask >> 6) & 3)));
12647 DONE;
12648 })
12649
12650 (define_insn "sse2_pshuflw_1<mask_name>"
12651 [(set (match_operand:V8HI 0 "register_operand" "=v")
12652 (vec_select:V8HI
12653 (match_operand:V8HI 1 "vector_operand" "vBm")
12654 (parallel [(match_operand 2 "const_0_to_3_operand")
12655 (match_operand 3 "const_0_to_3_operand")
12656 (match_operand 4 "const_0_to_3_operand")
12657 (match_operand 5 "const_0_to_3_operand")
12658 (const_int 4)
12659 (const_int 5)
12660 (const_int 6)
12661 (const_int 7)])))]
12662 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12663 {
12664 int mask = 0;
12665 mask |= INTVAL (operands[2]) << 0;
12666 mask |= INTVAL (operands[3]) << 2;
12667 mask |= INTVAL (operands[4]) << 4;
12668 mask |= INTVAL (operands[5]) << 6;
12669 operands[2] = GEN_INT (mask);
12670
12671 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12672 }
12673 [(set_attr "type" "sselog")
12674 (set_attr "prefix_data16" "0")
12675 (set_attr "prefix_rep" "1")
12676 (set_attr "prefix" "maybe_vex")
12677 (set_attr "length_immediate" "1")
12678 (set_attr "mode" "TI")])
12679
12680 (define_expand "avx2_pshufhwv3"
12681 [(match_operand:V16HI 0 "register_operand")
12682 (match_operand:V16HI 1 "nonimmediate_operand")
12683 (match_operand:SI 2 "const_0_to_255_operand")]
12684 "TARGET_AVX2"
12685 {
12686 int mask = INTVAL (operands[2]);
12687 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
12688 GEN_INT (((mask >> 0) & 3) + 4),
12689 GEN_INT (((mask >> 2) & 3) + 4),
12690 GEN_INT (((mask >> 4) & 3) + 4),
12691 GEN_INT (((mask >> 6) & 3) + 4),
12692 GEN_INT (((mask >> 0) & 3) + 12),
12693 GEN_INT (((mask >> 2) & 3) + 12),
12694 GEN_INT (((mask >> 4) & 3) + 12),
12695 GEN_INT (((mask >> 6) & 3) + 12)));
12696 DONE;
12697 })
12698
12699 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12700 [(set (match_operand:V32HI 0 "register_operand" "=v")
12701 (unspec:V32HI
12702 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12703 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12704 UNSPEC_PSHUFHW))]
12705 "TARGET_AVX512BW"
12706 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12707 [(set_attr "type" "sselog")
12708 (set_attr "prefix" "evex")
12709 (set_attr "mode" "XI")])
12710
12711 (define_expand "avx512vl_pshufhwv3_mask"
12712 [(match_operand:V16HI 0 "register_operand")
12713 (match_operand:V16HI 1 "nonimmediate_operand")
12714 (match_operand:SI 2 "const_0_to_255_operand")
12715 (match_operand:V16HI 3 "register_operand")
12716 (match_operand:HI 4 "register_operand")]
12717 "TARGET_AVX512VL && TARGET_AVX512BW"
12718 {
12719 int mask = INTVAL (operands[2]);
12720 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
12721 GEN_INT (((mask >> 0) & 3) + 4),
12722 GEN_INT (((mask >> 2) & 3) + 4),
12723 GEN_INT (((mask >> 4) & 3) + 4),
12724 GEN_INT (((mask >> 6) & 3) + 4),
12725 GEN_INT (((mask >> 0) & 3) + 12),
12726 GEN_INT (((mask >> 2) & 3) + 12),
12727 GEN_INT (((mask >> 4) & 3) + 12),
12728 GEN_INT (((mask >> 6) & 3) + 12),
12729 operands[3], operands[4]));
12730 DONE;
12731 })
12732
12733 (define_insn "avx2_pshufhw_1<mask_name>"
12734 [(set (match_operand:V16HI 0 "register_operand" "=v")
12735 (vec_select:V16HI
12736 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12737 (parallel [(const_int 0)
12738 (const_int 1)
12739 (const_int 2)
12740 (const_int 3)
12741 (match_operand 2 "const_4_to_7_operand")
12742 (match_operand 3 "const_4_to_7_operand")
12743 (match_operand 4 "const_4_to_7_operand")
12744 (match_operand 5 "const_4_to_7_operand")
12745 (const_int 8)
12746 (const_int 9)
12747 (const_int 10)
12748 (const_int 11)
12749 (match_operand 6 "const_12_to_15_operand")
12750 (match_operand 7 "const_12_to_15_operand")
12751 (match_operand 8 "const_12_to_15_operand")
12752 (match_operand 9 "const_12_to_15_operand")])))]
12753 "TARGET_AVX2
12754 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12755 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12756 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12757 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12758 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12759 {
12760 int mask = 0;
12761 mask |= (INTVAL (operands[2]) - 4) << 0;
12762 mask |= (INTVAL (operands[3]) - 4) << 2;
12763 mask |= (INTVAL (operands[4]) - 4) << 4;
12764 mask |= (INTVAL (operands[5]) - 4) << 6;
12765 operands[2] = GEN_INT (mask);
12766
12767 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12768 }
12769 [(set_attr "type" "sselog")
12770 (set_attr "prefix" "maybe_evex")
12771 (set_attr "length_immediate" "1")
12772 (set_attr "mode" "OI")])
12773
12774 (define_expand "avx512vl_pshufhw_mask"
12775 [(match_operand:V8HI 0 "register_operand")
12776 (match_operand:V8HI 1 "nonimmediate_operand")
12777 (match_operand:SI 2 "const_0_to_255_operand")
12778 (match_operand:V8HI 3 "register_operand")
12779 (match_operand:QI 4 "register_operand")]
12780 "TARGET_AVX512VL && TARGET_AVX512BW"
12781 {
12782 int mask = INTVAL (operands[2]);
12783 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
12784 GEN_INT (((mask >> 0) & 3) + 4),
12785 GEN_INT (((mask >> 2) & 3) + 4),
12786 GEN_INT (((mask >> 4) & 3) + 4),
12787 GEN_INT (((mask >> 6) & 3) + 4),
12788 operands[3], operands[4]));
12789 DONE;
12790 })
12791
12792 (define_expand "sse2_pshufhw"
12793 [(match_operand:V8HI 0 "register_operand")
12794 (match_operand:V8HI 1 "vector_operand")
12795 (match_operand:SI 2 "const_int_operand")]
12796 "TARGET_SSE2"
12797 {
12798 int mask = INTVAL (operands[2]);
12799 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
12800 GEN_INT (((mask >> 0) & 3) + 4),
12801 GEN_INT (((mask >> 2) & 3) + 4),
12802 GEN_INT (((mask >> 4) & 3) + 4),
12803 GEN_INT (((mask >> 6) & 3) + 4)));
12804 DONE;
12805 })
12806
12807 (define_insn "sse2_pshufhw_1<mask_name>"
12808 [(set (match_operand:V8HI 0 "register_operand" "=v")
12809 (vec_select:V8HI
12810 (match_operand:V8HI 1 "vector_operand" "vBm")
12811 (parallel [(const_int 0)
12812 (const_int 1)
12813 (const_int 2)
12814 (const_int 3)
12815 (match_operand 2 "const_4_to_7_operand")
12816 (match_operand 3 "const_4_to_7_operand")
12817 (match_operand 4 "const_4_to_7_operand")
12818 (match_operand 5 "const_4_to_7_operand")])))]
12819 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12820 {
12821 int mask = 0;
12822 mask |= (INTVAL (operands[2]) - 4) << 0;
12823 mask |= (INTVAL (operands[3]) - 4) << 2;
12824 mask |= (INTVAL (operands[4]) - 4) << 4;
12825 mask |= (INTVAL (operands[5]) - 4) << 6;
12826 operands[2] = GEN_INT (mask);
12827
12828 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12829 }
12830 [(set_attr "type" "sselog")
12831 (set_attr "prefix_rep" "1")
12832 (set_attr "prefix_data16" "0")
12833 (set_attr "prefix" "maybe_vex")
12834 (set_attr "length_immediate" "1")
12835 (set_attr "mode" "TI")])
12836
12837 (define_expand "sse2_loadd"
12838 [(set (match_operand:V4SI 0 "register_operand")
12839 (vec_merge:V4SI
12840 (vec_duplicate:V4SI
12841 (match_operand:SI 1 "nonimmediate_operand"))
12842 (match_dup 2)
12843 (const_int 1)))]
12844 "TARGET_SSE"
12845 "operands[2] = CONST0_RTX (V4SImode);")
12846
12847 (define_insn "sse2_loadld"
12848 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
12849 (vec_merge:V4SI
12850 (vec_duplicate:V4SI
12851 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
12852 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
12853 (const_int 1)))]
12854 "TARGET_SSE"
12855 "@
12856 %vmovd\t{%2, %0|%0, %2}
12857 %vmovd\t{%2, %0|%0, %2}
12858 movss\t{%2, %0|%0, %2}
12859 movss\t{%2, %0|%0, %2}
12860 vmovss\t{%2, %1, %0|%0, %1, %2}"
12861 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
12862 (set_attr "type" "ssemov")
12863 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
12864 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
12865
12866 ;; QI and HI modes handled by pextr patterns.
12867 (define_mode_iterator PEXTR_MODE12
12868 [(V16QI "TARGET_SSE4_1") V8HI])
12869
12870 (define_insn "*vec_extract<mode>"
12871 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m")
12872 (vec_select:<ssescalarmode>
12873 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x")
12874 (parallel
12875 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
12876 "TARGET_SSE2"
12877 "@
12878 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
12879 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12880 [(set_attr "isa" "*,sse4")
12881 (set_attr "type" "sselog1")
12882 (set_attr "prefix_data16" "1")
12883 (set (attr "prefix_extra")
12884 (if_then_else
12885 (and (eq_attr "alternative" "0")
12886 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12887 (const_string "*")
12888 (const_string "1")))
12889 (set_attr "length_immediate" "1")
12890 (set_attr "prefix" "maybe_vex")
12891 (set_attr "mode" "TI")])
12892
12893 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
12894 [(set (match_operand:SWI48 0 "register_operand" "=r")
12895 (zero_extend:SWI48
12896 (vec_select:<PEXTR_MODE12:ssescalarmode>
12897 (match_operand:PEXTR_MODE12 1 "register_operand" "x")
12898 (parallel
12899 [(match_operand:SI 2
12900 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
12901 "TARGET_SSE2"
12902 "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
12903 [(set_attr "type" "sselog1")
12904 (set_attr "prefix_data16" "1")
12905 (set (attr "prefix_extra")
12906 (if_then_else
12907 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
12908 (const_string "*")
12909 (const_string "1")))
12910 (set_attr "length_immediate" "1")
12911 (set_attr "prefix" "maybe_vex")
12912 (set_attr "mode" "TI")])
12913
12914 (define_insn "*vec_extract<mode>_mem"
12915 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
12916 (vec_select:<ssescalarmode>
12917 (match_operand:VI12_128 1 "memory_operand" "o")
12918 (parallel
12919 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12920 "TARGET_SSE"
12921 "#")
12922
12923 (define_insn "*vec_extract<ssevecmodelower>_0"
12924 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
12925 (vec_select:SWI48
12926 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
12927 (parallel [(const_int 0)])))]
12928 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12929 "#"
12930 [(set_attr "isa" "*,sse4,*,*")])
12931
12932 (define_insn_and_split "*vec_extractv4si_0_zext"
12933 [(set (match_operand:DI 0 "register_operand" "=r")
12934 (zero_extend:DI
12935 (vec_select:SI
12936 (match_operand:V4SI 1 "register_operand" "x")
12937 (parallel [(const_int 0)]))))]
12938 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
12939 "#"
12940 "&& reload_completed"
12941 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12942 "operands[1] = gen_lowpart (SImode, operands[1]);")
12943
12944 (define_insn "*vec_extractv2di_0_sse"
12945 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
12946 (vec_select:DI
12947 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
12948 (parallel [(const_int 0)])))]
12949 "TARGET_SSE && !TARGET_64BIT
12950 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12951 "#")
12952
12953 (define_split
12954 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12955 (vec_select:SWI48x
12956 (match_operand:<ssevecmode> 1 "register_operand")
12957 (parallel [(const_int 0)])))]
12958 "TARGET_SSE && reload_completed"
12959 [(set (match_dup 0) (match_dup 1))]
12960 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
12961
12962 (define_insn "*vec_extractv4si"
12963 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
12964 (vec_select:SI
12965 (match_operand:V4SI 1 "register_operand" "x,0,0,x")
12966 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
12967 "TARGET_SSE4_1"
12968 {
12969 switch (which_alternative)
12970 {
12971 case 0:
12972 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
12973
12974 case 1:
12975 case 2:
12976 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12977 return "psrldq\t{%2, %0|%0, %2}";
12978
12979 case 3:
12980 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12981 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
12982
12983 default:
12984 gcc_unreachable ();
12985 }
12986 }
12987 [(set_attr "isa" "*,noavx,noavx,avx")
12988 (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
12989 (set_attr "prefix_extra" "1,*,*,*")
12990 (set_attr "length_immediate" "1")
12991 (set_attr "prefix" "maybe_vex,orig,orig,vex")
12992 (set_attr "mode" "TI")])
12993
12994 (define_insn "*vec_extractv4si_zext"
12995 [(set (match_operand:DI 0 "register_operand" "=r")
12996 (zero_extend:DI
12997 (vec_select:SI
12998 (match_operand:V4SI 1 "register_operand" "x")
12999 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13000 "TARGET_64BIT && TARGET_SSE4_1"
13001 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
13002 [(set_attr "type" "sselog1")
13003 (set_attr "prefix_extra" "1")
13004 (set_attr "length_immediate" "1")
13005 (set_attr "prefix" "maybe_vex")
13006 (set_attr "mode" "TI")])
13007
13008 (define_insn "*vec_extractv4si_mem"
13009 [(set (match_operand:SI 0 "register_operand" "=x,r")
13010 (vec_select:SI
13011 (match_operand:V4SI 1 "memory_operand" "o,o")
13012 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
13013 "TARGET_SSE"
13014 "#")
13015
13016 (define_insn_and_split "*vec_extractv4si_zext_mem"
13017 [(set (match_operand:DI 0 "register_operand" "=x,r")
13018 (zero_extend:DI
13019 (vec_select:SI
13020 (match_operand:V4SI 1 "memory_operand" "o,o")
13021 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13022 "TARGET_64BIT && TARGET_SSE"
13023 "#"
13024 "&& reload_completed"
13025 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13026 {
13027 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
13028 })
13029
13030 (define_insn "*vec_extractv2di_1"
13031 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
13032 (vec_select:DI
13033 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
13034 (parallel [(const_int 1)])))]
13035 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13036 "@
13037 %vpextrq\t{$1, %1, %0|%0, %1, 1}
13038 %vmovhps\t{%1, %0|%0, %1}
13039 psrldq\t{$8, %0|%0, 8}
13040 vpsrldq\t{$8, %1, %0|%0, %1, 8}
13041 movhlps\t{%1, %0|%0, %1}
13042 #
13043 #"
13044 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
13045 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
13046 (set_attr "length_immediate" "1,*,1,1,*,*,*")
13047 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
13048 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
13049 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
13050 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
13051
13052 (define_split
13053 [(set (match_operand:<ssescalarmode> 0 "register_operand")
13054 (vec_select:<ssescalarmode>
13055 (match_operand:VI_128 1 "memory_operand")
13056 (parallel
13057 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13058 "TARGET_SSE && reload_completed"
13059 [(set (match_dup 0) (match_dup 1))]
13060 {
13061 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
13062
13063 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
13064 })
13065
13066 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
13067 ;; vector modes into vec_extract*.
13068 (define_split
13069 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13070 (match_operand:SWI48x 1 "register_operand"))]
13071 "can_create_pseudo_p ()
13072 && SUBREG_P (operands[1])
13073 && REG_P (SUBREG_REG (operands[1]))
13074 && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT
13075 || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1])))
13076 == MODE_VECTOR_FLOAT))
13077 && SUBREG_BYTE (operands[1]) == 0
13078 && TARGET_SSE
13079 && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16
13080 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32
13081 && TARGET_AVX)
13082 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64
13083 && TARGET_AVX512F))
13084 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
13085 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
13086 (parallel [(const_int 0)])))]
13087 {
13088 rtx tmp;
13089 operands[1] = SUBREG_REG (operands[1]);
13090 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
13091 {
13092 case 64:
13093 if (<MODE>mode == SImode)
13094 {
13095 tmp = gen_reg_rtx (V8SImode);
13096 emit_insn (gen_vec_extract_lo_v16si (tmp,
13097 gen_lowpart (V16SImode,
13098 operands[1])));
13099 }
13100 else
13101 {
13102 tmp = gen_reg_rtx (V4DImode);
13103 emit_insn (gen_vec_extract_lo_v8di (tmp,
13104 gen_lowpart (V8DImode,
13105 operands[1])));
13106 }
13107 operands[1] = tmp;
13108 /* FALLTHRU */
13109 case 32:
13110 tmp = gen_reg_rtx (<ssevecmode>mode);
13111 if (<MODE>mode == SImode)
13112 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
13113 operands[1])));
13114 else
13115 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
13116 operands[1])));
13117 operands[1] = tmp;
13118 break;
13119 case 16:
13120 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
13121 break;
13122 }
13123 })
13124
13125 (define_insn "*vec_concatv2si_sse4_1"
13126 [(set (match_operand:V2SI 0 "register_operand"
13127 "=Yr,*x,x, Yr,*x,x, x, *y,*y")
13128 (vec_concat:V2SI
13129 (match_operand:SI 1 "nonimmediate_operand"
13130 " 0, 0,x, 0,0, x,rm, 0,rm")
13131 (match_operand:SI 2 "vector_move_operand"
13132 " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
13133 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13134 "@
13135 pinsrd\t{$1, %2, %0|%0, %2, 1}
13136 pinsrd\t{$1, %2, %0|%0, %2, 1}
13137 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13138 punpckldq\t{%2, %0|%0, %2}
13139 punpckldq\t{%2, %0|%0, %2}
13140 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
13141 %vmovd\t{%1, %0|%0, %1}
13142 punpckldq\t{%2, %0|%0, %2}
13143 movd\t{%1, %0|%0, %1}"
13144 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
13145 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
13146 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
13147 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
13148 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
13149 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
13150
13151 ;; ??? In theory we can match memory for the MMX alternative, but allowing
13152 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
13153 ;; alternatives pretty much forces the MMX alternative to be chosen.
13154 (define_insn "*vec_concatv2si"
13155 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
13156 (vec_concat:V2SI
13157 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
13158 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
13159 "TARGET_SSE && !TARGET_SSE4_1"
13160 "@
13161 punpckldq\t{%2, %0|%0, %2}
13162 movd\t{%1, %0|%0, %1}
13163 movd\t{%1, %0|%0, %1}
13164 unpcklps\t{%2, %0|%0, %2}
13165 movss\t{%1, %0|%0, %1}
13166 punpckldq\t{%2, %0|%0, %2}
13167 movd\t{%1, %0|%0, %1}"
13168 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
13169 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
13170 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
13171
13172 (define_insn "*vec_concatv4si"
13173 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
13174 (vec_concat:V4SI
13175 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
13176 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
13177 "TARGET_SSE"
13178 "@
13179 punpcklqdq\t{%2, %0|%0, %2}
13180 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13181 movlhps\t{%2, %0|%0, %2}
13182 movhps\t{%2, %0|%0, %q2}
13183 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
13184 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
13185 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
13186 (set_attr "prefix" "orig,vex,orig,orig,vex")
13187 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
13188
13189 ;; movd instead of movq is required to handle broken assemblers.
13190 (define_insn "vec_concatv2di"
13191 [(set (match_operand:V2DI 0 "register_operand"
13192 "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
13193 (vec_concat:V2DI
13194 (match_operand:DI 1 "nonimmediate_operand"
13195 " 0, 0,x ,r ,xm,*y,0,x,0,0,x")
13196 (match_operand:DI 2 "vector_move_operand"
13197 "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
13198 "TARGET_SSE"
13199 "@
13200 pinsrq\t{$1, %2, %0|%0, %2, 1}
13201 pinsrq\t{$1, %2, %0|%0, %2, 1}
13202 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13203 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
13204 %vmovq\t{%1, %0|%0, %1}
13205 movq2dq\t{%1, %0|%0, %1}
13206 punpcklqdq\t{%2, %0|%0, %2}
13207 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13208 movlhps\t{%2, %0|%0, %2}
13209 movhps\t{%2, %0|%0, %2}
13210 vmovhps\t{%2, %1, %0|%0, %1, %2}"
13211 [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
13212 (set (attr "type")
13213 (if_then_else
13214 (eq_attr "alternative" "0,1,2,6,7")
13215 (const_string "sselog")
13216 (const_string "ssemov")))
13217 (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*")
13218 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*")
13219 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*")
13220 (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
13221 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
13222
13223 (define_expand "vec_unpacks_lo_<mode>"
13224 [(match_operand:<sseunpackmode> 0 "register_operand")
13225 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13226 "TARGET_SSE2"
13227 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
13228
13229 (define_expand "vec_unpacks_hi_<mode>"
13230 [(match_operand:<sseunpackmode> 0 "register_operand")
13231 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13232 "TARGET_SSE2"
13233 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
13234
13235 (define_expand "vec_unpacku_lo_<mode>"
13236 [(match_operand:<sseunpackmode> 0 "register_operand")
13237 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13238 "TARGET_SSE2"
13239 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
13240
13241 (define_expand "vec_unpacks_lo_hi"
13242 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
13243 (match_operand:HI 1 "register_operand"))]
13244 "TARGET_AVX512F")
13245
13246 (define_expand "vec_unpacks_lo_si"
13247 [(set (match_operand:HI 0 "register_operand")
13248 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
13249 "TARGET_AVX512F")
13250
13251 (define_expand "vec_unpacks_lo_di"
13252 [(set (match_operand:SI 0 "register_operand")
13253 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
13254 "TARGET_AVX512BW")
13255
13256 (define_expand "vec_unpacku_hi_<mode>"
13257 [(match_operand:<sseunpackmode> 0 "register_operand")
13258 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13259 "TARGET_SSE2"
13260 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
13261
13262 (define_expand "vec_unpacks_hi_hi"
13263 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
13264 (lshiftrt:HI (match_operand:HI 1 "register_operand")
13265 (const_int 8)))]
13266 "TARGET_AVX512F")
13267
13268 (define_expand "vec_unpacks_hi_<mode>"
13269 [(set (subreg:SWI48x (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
13270 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
13271 (match_dup 2)))]
13272 "TARGET_AVX512BW"
13273 {
13274 operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));
13275 })
13276
13277 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13278 ;;
13279 ;; Miscellaneous
13280 ;;
13281 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13282
13283 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
13284 [(set (match_operand:VI12_AVX2 0 "register_operand")
13285 (truncate:VI12_AVX2
13286 (lshiftrt:<ssedoublemode>
13287 (plus:<ssedoublemode>
13288 (plus:<ssedoublemode>
13289 (zero_extend:<ssedoublemode>
13290 (match_operand:VI12_AVX2 1 "vector_operand"))
13291 (zero_extend:<ssedoublemode>
13292 (match_operand:VI12_AVX2 2 "vector_operand")))
13293 (match_dup <mask_expand_op3>))
13294 (const_int 1))))]
13295 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13296 {
13297 rtx tmp;
13298 if (<mask_applied>)
13299 tmp = operands[3];
13300 operands[3] = CONST1_RTX(<MODE>mode);
13301 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
13302
13303 if (<mask_applied>)
13304 {
13305 operands[5] = operands[3];
13306 operands[3] = tmp;
13307 }
13308 })
13309
13310 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
13311 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
13312 (truncate:VI12_AVX2
13313 (lshiftrt:<ssedoublemode>
13314 (plus:<ssedoublemode>
13315 (plus:<ssedoublemode>
13316 (zero_extend:<ssedoublemode>
13317 (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
13318 (zero_extend:<ssedoublemode>
13319 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
13320 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
13321 (const_int 1))))]
13322 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13323 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
13324 "@
13325 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
13326 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13327 [(set_attr "isa" "noavx,avx")
13328 (set_attr "type" "sseiadd")
13329 (set_attr "prefix_data16" "1,*")
13330 (set_attr "prefix" "orig,<mask_prefix>")
13331 (set_attr "mode" "<sseinsnmode>")])
13332
13333 ;; The correct representation for this is absolutely enormous, and
13334 ;; surely not generally useful.
13335 (define_insn "<sse2_avx2>_psadbw"
13336 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
13337 (unspec:VI8_AVX2_AVX512BW
13338 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
13339 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
13340 UNSPEC_PSADBW))]
13341 "TARGET_SSE2"
13342 "@
13343 psadbw\t{%2, %0|%0, %2}
13344 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
13345 [(set_attr "isa" "noavx,avx")
13346 (set_attr "type" "sseiadd")
13347 (set_attr "atom_unit" "simul")
13348 (set_attr "prefix_data16" "1,*")
13349 (set_attr "prefix" "orig,maybe_evex")
13350 (set_attr "mode" "<sseinsnmode>")])
13351
13352 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
13353 [(set (match_operand:SI 0 "register_operand" "=r")
13354 (unspec:SI
13355 [(match_operand:VF_128_256 1 "register_operand" "x")]
13356 UNSPEC_MOVMSK))]
13357 "TARGET_SSE"
13358 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
13359 [(set_attr "type" "ssemov")
13360 (set_attr "prefix" "maybe_vex")
13361 (set_attr "mode" "<MODE>")])
13362
13363 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
13364 [(set (match_operand:DI 0 "register_operand" "=r")
13365 (zero_extend:DI
13366 (unspec:SI
13367 [(match_operand:VF_128_256 1 "register_operand" "x")]
13368 UNSPEC_MOVMSK)))]
13369 "TARGET_64BIT && TARGET_SSE"
13370 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
13371 [(set_attr "type" "ssemov")
13372 (set_attr "prefix" "maybe_vex")
13373 (set_attr "mode" "<MODE>")])
13374
13375 (define_insn "<sse2_avx2>_pmovmskb"
13376 [(set (match_operand:SI 0 "register_operand" "=r")
13377 (unspec:SI
13378 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
13379 UNSPEC_MOVMSK))]
13380 "TARGET_SSE2"
13381 "%vpmovmskb\t{%1, %0|%0, %1}"
13382 [(set_attr "type" "ssemov")
13383 (set (attr "prefix_data16")
13384 (if_then_else
13385 (match_test "TARGET_AVX")
13386 (const_string "*")
13387 (const_string "1")))
13388 (set_attr "prefix" "maybe_vex")
13389 (set_attr "mode" "SI")])
13390
13391 (define_insn "*<sse2_avx2>_pmovmskb_zext"
13392 [(set (match_operand:DI 0 "register_operand" "=r")
13393 (zero_extend:DI
13394 (unspec:SI
13395 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
13396 UNSPEC_MOVMSK)))]
13397 "TARGET_64BIT && TARGET_SSE2"
13398 "%vpmovmskb\t{%1, %k0|%k0, %1}"
13399 [(set_attr "type" "ssemov")
13400 (set (attr "prefix_data16")
13401 (if_then_else
13402 (match_test "TARGET_AVX")
13403 (const_string "*")
13404 (const_string "1")))
13405 (set_attr "prefix" "maybe_vex")
13406 (set_attr "mode" "SI")])
13407
13408 (define_expand "sse2_maskmovdqu"
13409 [(set (match_operand:V16QI 0 "memory_operand")
13410 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
13411 (match_operand:V16QI 2 "register_operand")
13412 (match_dup 0)]
13413 UNSPEC_MASKMOV))]
13414 "TARGET_SSE2")
13415
13416 (define_insn "*sse2_maskmovdqu"
13417 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
13418 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
13419 (match_operand:V16QI 2 "register_operand" "x")
13420 (mem:V16QI (match_dup 0))]
13421 UNSPEC_MASKMOV))]
13422 "TARGET_SSE2"
13423 {
13424 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
13425 that requires %v to be at the beginning of the opcode name. */
13426 if (Pmode != word_mode)
13427 fputs ("\taddr32", asm_out_file);
13428 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
13429 }
13430 [(set_attr "type" "ssemov")
13431 (set_attr "prefix_data16" "1")
13432 (set (attr "length_address")
13433 (symbol_ref ("Pmode != word_mode")))
13434 ;; The implicit %rdi operand confuses default length_vex computation.
13435 (set (attr "length_vex")
13436 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
13437 (set_attr "prefix" "maybe_vex")
13438 (set_attr "znver1_decode" "vector")
13439 (set_attr "mode" "TI")])
13440
13441 (define_insn "sse_ldmxcsr"
13442 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
13443 UNSPECV_LDMXCSR)]
13444 "TARGET_SSE"
13445 "%vldmxcsr\t%0"
13446 [(set_attr "type" "sse")
13447 (set_attr "atom_sse_attr" "mxcsr")
13448 (set_attr "prefix" "maybe_vex")
13449 (set_attr "memory" "load")])
13450
13451 (define_insn "sse_stmxcsr"
13452 [(set (match_operand:SI 0 "memory_operand" "=m")
13453 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
13454 "TARGET_SSE"
13455 "%vstmxcsr\t%0"
13456 [(set_attr "type" "sse")
13457 (set_attr "atom_sse_attr" "mxcsr")
13458 (set_attr "prefix" "maybe_vex")
13459 (set_attr "memory" "store")])
13460
13461 (define_insn "sse2_clflush"
13462 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
13463 UNSPECV_CLFLUSH)]
13464 "TARGET_SSE2"
13465 "clflush\t%a0"
13466 [(set_attr "type" "sse")
13467 (set_attr "atom_sse_attr" "fence")
13468 (set_attr "memory" "unknown")])
13469
13470 ;; As per AMD and Intel ISA manuals, the first operand is extensions
13471 ;; and it goes to %ecx. The second operand received is hints and it goes
13472 ;; to %eax.
13473 (define_insn "sse3_mwait"
13474 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
13475 (match_operand:SI 1 "register_operand" "a")]
13476 UNSPECV_MWAIT)]
13477 "TARGET_SSE3"
13478 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
13479 ;; Since 32bit register operands are implicitly zero extended to 64bit,
13480 ;; we only need to set up 32bit registers.
13481 "mwait"
13482 [(set_attr "length" "3")])
13483
13484 (define_insn "sse3_monitor_<mode>"
13485 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
13486 (match_operand:SI 1 "register_operand" "c")
13487 (match_operand:SI 2 "register_operand" "d")]
13488 UNSPECV_MONITOR)]
13489 "TARGET_SSE3"
13490 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
13491 ;; RCX and RDX are used. Since 32bit register operands are implicitly
13492 ;; zero extended to 64bit, we only need to set up 32bit registers.
13493 "%^monitor"
13494 [(set (attr "length")
13495 (symbol_ref ("(Pmode != word_mode) + 3")))])
13496
13497 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13498 ;;
13499 ;; SSSE3 instructions
13500 ;;
13501 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13502
13503 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
13504
13505 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
13506 [(set (match_operand:V16HI 0 "register_operand" "=x")
13507 (vec_concat:V16HI
13508 (vec_concat:V8HI
13509 (vec_concat:V4HI
13510 (vec_concat:V2HI
13511 (ssse3_plusminus:HI
13512 (vec_select:HI
13513 (match_operand:V16HI 1 "register_operand" "x")
13514 (parallel [(const_int 0)]))
13515 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13516 (ssse3_plusminus:HI
13517 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13518 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13519 (vec_concat:V2HI
13520 (ssse3_plusminus:HI
13521 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13522 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13523 (ssse3_plusminus:HI
13524 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13525 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13526 (vec_concat:V4HI
13527 (vec_concat:V2HI
13528 (ssse3_plusminus:HI
13529 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
13530 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
13531 (ssse3_plusminus:HI
13532 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
13533 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
13534 (vec_concat:V2HI
13535 (ssse3_plusminus:HI
13536 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
13537 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
13538 (ssse3_plusminus:HI
13539 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
13540 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
13541 (vec_concat:V8HI
13542 (vec_concat:V4HI
13543 (vec_concat:V2HI
13544 (ssse3_plusminus:HI
13545 (vec_select:HI
13546 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13547 (parallel [(const_int 0)]))
13548 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13549 (ssse3_plusminus:HI
13550 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13551 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13552 (vec_concat:V2HI
13553 (ssse3_plusminus:HI
13554 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13555 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13556 (ssse3_plusminus:HI
13557 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13558 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
13559 (vec_concat:V4HI
13560 (vec_concat:V2HI
13561 (ssse3_plusminus:HI
13562 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
13563 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
13564 (ssse3_plusminus:HI
13565 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
13566 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
13567 (vec_concat:V2HI
13568 (ssse3_plusminus:HI
13569 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
13570 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
13571 (ssse3_plusminus:HI
13572 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
13573 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
13574 "TARGET_AVX2"
13575 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13576 [(set_attr "type" "sseiadd")
13577 (set_attr "prefix_extra" "1")
13578 (set_attr "prefix" "vex")
13579 (set_attr "mode" "OI")])
13580
13581 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
13582 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13583 (vec_concat:V8HI
13584 (vec_concat:V4HI
13585 (vec_concat:V2HI
13586 (ssse3_plusminus:HI
13587 (vec_select:HI
13588 (match_operand:V8HI 1 "register_operand" "0,x")
13589 (parallel [(const_int 0)]))
13590 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13591 (ssse3_plusminus:HI
13592 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13593 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13594 (vec_concat:V2HI
13595 (ssse3_plusminus:HI
13596 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13597 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13598 (ssse3_plusminus:HI
13599 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13600 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13601 (vec_concat:V4HI
13602 (vec_concat:V2HI
13603 (ssse3_plusminus:HI
13604 (vec_select:HI
13605 (match_operand:V8HI 2 "vector_operand" "xBm,xm")
13606 (parallel [(const_int 0)]))
13607 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13608 (ssse3_plusminus:HI
13609 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13610 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13611 (vec_concat:V2HI
13612 (ssse3_plusminus:HI
13613 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13614 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13615 (ssse3_plusminus:HI
13616 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13617 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
13618 "TARGET_SSSE3"
13619 "@
13620 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
13621 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13622 [(set_attr "isa" "noavx,avx")
13623 (set_attr "type" "sseiadd")
13624 (set_attr "atom_unit" "complex")
13625 (set_attr "prefix_data16" "1,*")
13626 (set_attr "prefix_extra" "1")
13627 (set_attr "prefix" "orig,vex")
13628 (set_attr "mode" "TI")])
13629
13630 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
13631 [(set (match_operand:V4HI 0 "register_operand" "=y")
13632 (vec_concat:V4HI
13633 (vec_concat:V2HI
13634 (ssse3_plusminus:HI
13635 (vec_select:HI
13636 (match_operand:V4HI 1 "register_operand" "0")
13637 (parallel [(const_int 0)]))
13638 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13639 (ssse3_plusminus:HI
13640 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13641 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13642 (vec_concat:V2HI
13643 (ssse3_plusminus:HI
13644 (vec_select:HI
13645 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
13646 (parallel [(const_int 0)]))
13647 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13648 (ssse3_plusminus:HI
13649 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13650 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
13651 "TARGET_SSSE3"
13652 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
13653 [(set_attr "type" "sseiadd")
13654 (set_attr "atom_unit" "complex")
13655 (set_attr "prefix_extra" "1")
13656 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13657 (set_attr "mode" "DI")])
13658
13659 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
13660 [(set (match_operand:V8SI 0 "register_operand" "=x")
13661 (vec_concat:V8SI
13662 (vec_concat:V4SI
13663 (vec_concat:V2SI
13664 (plusminus:SI
13665 (vec_select:SI
13666 (match_operand:V8SI 1 "register_operand" "x")
13667 (parallel [(const_int 0)]))
13668 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13669 (plusminus:SI
13670 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13671 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13672 (vec_concat:V2SI
13673 (plusminus:SI
13674 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
13675 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
13676 (plusminus:SI
13677 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
13678 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
13679 (vec_concat:V4SI
13680 (vec_concat:V2SI
13681 (plusminus:SI
13682 (vec_select:SI
13683 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
13684 (parallel [(const_int 0)]))
13685 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13686 (plusminus:SI
13687 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13688 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
13689 (vec_concat:V2SI
13690 (plusminus:SI
13691 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
13692 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
13693 (plusminus:SI
13694 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
13695 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
13696 "TARGET_AVX2"
13697 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13698 [(set_attr "type" "sseiadd")
13699 (set_attr "prefix_extra" "1")
13700 (set_attr "prefix" "vex")
13701 (set_attr "mode" "OI")])
13702
13703 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
13704 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13705 (vec_concat:V4SI
13706 (vec_concat:V2SI
13707 (plusminus:SI
13708 (vec_select:SI
13709 (match_operand:V4SI 1 "register_operand" "0,x")
13710 (parallel [(const_int 0)]))
13711 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13712 (plusminus:SI
13713 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13714 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13715 (vec_concat:V2SI
13716 (plusminus:SI
13717 (vec_select:SI
13718 (match_operand:V4SI 2 "vector_operand" "xBm,xm")
13719 (parallel [(const_int 0)]))
13720 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13721 (plusminus:SI
13722 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13723 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
13724 "TARGET_SSSE3"
13725 "@
13726 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
13727 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13728 [(set_attr "isa" "noavx,avx")
13729 (set_attr "type" "sseiadd")
13730 (set_attr "atom_unit" "complex")
13731 (set_attr "prefix_data16" "1,*")
13732 (set_attr "prefix_extra" "1")
13733 (set_attr "prefix" "orig,vex")
13734 (set_attr "mode" "TI")])
13735
13736 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
13737 [(set (match_operand:V2SI 0 "register_operand" "=y")
13738 (vec_concat:V2SI
13739 (plusminus:SI
13740 (vec_select:SI
13741 (match_operand:V2SI 1 "register_operand" "0")
13742 (parallel [(const_int 0)]))
13743 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13744 (plusminus:SI
13745 (vec_select:SI
13746 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
13747 (parallel [(const_int 0)]))
13748 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
13749 "TARGET_SSSE3"
13750 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
13751 [(set_attr "type" "sseiadd")
13752 (set_attr "atom_unit" "complex")
13753 (set_attr "prefix_extra" "1")
13754 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13755 (set_attr "mode" "DI")])
13756
13757 (define_insn "avx2_pmaddubsw256"
13758 [(set (match_operand:V16HI 0 "register_operand" "=x")
13759 (ss_plus:V16HI
13760 (mult:V16HI
13761 (zero_extend:V16HI
13762 (vec_select:V16QI
13763 (match_operand:V32QI 1 "register_operand" "x")
13764 (parallel [(const_int 0) (const_int 2)
13765 (const_int 4) (const_int 6)
13766 (const_int 8) (const_int 10)
13767 (const_int 12) (const_int 14)
13768 (const_int 16) (const_int 18)
13769 (const_int 20) (const_int 22)
13770 (const_int 24) (const_int 26)
13771 (const_int 28) (const_int 30)])))
13772 (sign_extend:V16HI
13773 (vec_select:V16QI
13774 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
13775 (parallel [(const_int 0) (const_int 2)
13776 (const_int 4) (const_int 6)
13777 (const_int 8) (const_int 10)
13778 (const_int 12) (const_int 14)
13779 (const_int 16) (const_int 18)
13780 (const_int 20) (const_int 22)
13781 (const_int 24) (const_int 26)
13782 (const_int 28) (const_int 30)]))))
13783 (mult:V16HI
13784 (zero_extend:V16HI
13785 (vec_select:V16QI (match_dup 1)
13786 (parallel [(const_int 1) (const_int 3)
13787 (const_int 5) (const_int 7)
13788 (const_int 9) (const_int 11)
13789 (const_int 13) (const_int 15)
13790 (const_int 17) (const_int 19)
13791 (const_int 21) (const_int 23)
13792 (const_int 25) (const_int 27)
13793 (const_int 29) (const_int 31)])))
13794 (sign_extend:V16HI
13795 (vec_select:V16QI (match_dup 2)
13796 (parallel [(const_int 1) (const_int 3)
13797 (const_int 5) (const_int 7)
13798 (const_int 9) (const_int 11)
13799 (const_int 13) (const_int 15)
13800 (const_int 17) (const_int 19)
13801 (const_int 21) (const_int 23)
13802 (const_int 25) (const_int 27)
13803 (const_int 29) (const_int 31)]))))))]
13804 "TARGET_AVX2"
13805 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13806 [(set_attr "type" "sseiadd")
13807 (set_attr "prefix_extra" "1")
13808 (set_attr "prefix" "vex")
13809 (set_attr "mode" "OI")])
13810
13811 ;; The correct representation for this is absolutely enormous, and
13812 ;; surely not generally useful.
13813 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
13814 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
13815 (unspec:VI2_AVX512VL
13816 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
13817 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
13818 UNSPEC_PMADDUBSW512))]
13819 "TARGET_AVX512BW"
13820 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
13821 [(set_attr "type" "sseiadd")
13822 (set_attr "prefix" "evex")
13823 (set_attr "mode" "XI")])
13824
13825 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
13826 [(set (match_operand:V32HI 0 "register_operand" "=v")
13827 (truncate:V32HI
13828 (lshiftrt:V32SI
13829 (plus:V32SI
13830 (lshiftrt:V32SI
13831 (mult:V32SI
13832 (sign_extend:V32SI
13833 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
13834 (sign_extend:V32SI
13835 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
13836 (const_int 14))
13837 (const_vector:V32HI [(const_int 1) (const_int 1)
13838 (const_int 1) (const_int 1)
13839 (const_int 1) (const_int 1)
13840 (const_int 1) (const_int 1)
13841 (const_int 1) (const_int 1)
13842 (const_int 1) (const_int 1)
13843 (const_int 1) (const_int 1)
13844 (const_int 1) (const_int 1)
13845 (const_int 1) (const_int 1)
13846 (const_int 1) (const_int 1)
13847 (const_int 1) (const_int 1)
13848 (const_int 1) (const_int 1)
13849 (const_int 1) (const_int 1)
13850 (const_int 1) (const_int 1)
13851 (const_int 1) (const_int 1)
13852 (const_int 1) (const_int 1)]))
13853 (const_int 1))))]
13854 "TARGET_AVX512BW"
13855 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13856 [(set_attr "type" "sseimul")
13857 (set_attr "prefix" "evex")
13858 (set_attr "mode" "XI")])
13859
13860 (define_insn "ssse3_pmaddubsw128"
13861 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13862 (ss_plus:V8HI
13863 (mult:V8HI
13864 (zero_extend:V8HI
13865 (vec_select:V8QI
13866 (match_operand:V16QI 1 "register_operand" "0,x")
13867 (parallel [(const_int 0) (const_int 2)
13868 (const_int 4) (const_int 6)
13869 (const_int 8) (const_int 10)
13870 (const_int 12) (const_int 14)])))
13871 (sign_extend:V8HI
13872 (vec_select:V8QI
13873 (match_operand:V16QI 2 "vector_operand" "xBm,xm")
13874 (parallel [(const_int 0) (const_int 2)
13875 (const_int 4) (const_int 6)
13876 (const_int 8) (const_int 10)
13877 (const_int 12) (const_int 14)]))))
13878 (mult:V8HI
13879 (zero_extend:V8HI
13880 (vec_select:V8QI (match_dup 1)
13881 (parallel [(const_int 1) (const_int 3)
13882 (const_int 5) (const_int 7)
13883 (const_int 9) (const_int 11)
13884 (const_int 13) (const_int 15)])))
13885 (sign_extend:V8HI
13886 (vec_select:V8QI (match_dup 2)
13887 (parallel [(const_int 1) (const_int 3)
13888 (const_int 5) (const_int 7)
13889 (const_int 9) (const_int 11)
13890 (const_int 13) (const_int 15)]))))))]
13891 "TARGET_SSSE3"
13892 "@
13893 pmaddubsw\t{%2, %0|%0, %2}
13894 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13895 [(set_attr "isa" "noavx,avx")
13896 (set_attr "type" "sseiadd")
13897 (set_attr "atom_unit" "simul")
13898 (set_attr "prefix_data16" "1,*")
13899 (set_attr "prefix_extra" "1")
13900 (set_attr "prefix" "orig,vex")
13901 (set_attr "mode" "TI")])
13902
13903 (define_insn "ssse3_pmaddubsw"
13904 [(set (match_operand:V4HI 0 "register_operand" "=y")
13905 (ss_plus:V4HI
13906 (mult:V4HI
13907 (zero_extend:V4HI
13908 (vec_select:V4QI
13909 (match_operand:V8QI 1 "register_operand" "0")
13910 (parallel [(const_int 0) (const_int 2)
13911 (const_int 4) (const_int 6)])))
13912 (sign_extend:V4HI
13913 (vec_select:V4QI
13914 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
13915 (parallel [(const_int 0) (const_int 2)
13916 (const_int 4) (const_int 6)]))))
13917 (mult:V4HI
13918 (zero_extend:V4HI
13919 (vec_select:V4QI (match_dup 1)
13920 (parallel [(const_int 1) (const_int 3)
13921 (const_int 5) (const_int 7)])))
13922 (sign_extend:V4HI
13923 (vec_select:V4QI (match_dup 2)
13924 (parallel [(const_int 1) (const_int 3)
13925 (const_int 5) (const_int 7)]))))))]
13926 "TARGET_SSSE3"
13927 "pmaddubsw\t{%2, %0|%0, %2}"
13928 [(set_attr "type" "sseiadd")
13929 (set_attr "atom_unit" "simul")
13930 (set_attr "prefix_extra" "1")
13931 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13932 (set_attr "mode" "DI")])
13933
13934 (define_mode_iterator PMULHRSW
13935 [V4HI V8HI (V16HI "TARGET_AVX2")])
13936
13937 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
13938 [(set (match_operand:PMULHRSW 0 "register_operand")
13939 (vec_merge:PMULHRSW
13940 (truncate:PMULHRSW
13941 (lshiftrt:<ssedoublemode>
13942 (plus:<ssedoublemode>
13943 (lshiftrt:<ssedoublemode>
13944 (mult:<ssedoublemode>
13945 (sign_extend:<ssedoublemode>
13946 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13947 (sign_extend:<ssedoublemode>
13948 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13949 (const_int 14))
13950 (match_dup 5))
13951 (const_int 1)))
13952 (match_operand:PMULHRSW 3 "register_operand")
13953 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13954 "TARGET_AVX512BW && TARGET_AVX512VL"
13955 {
13956 operands[5] = CONST1_RTX(<MODE>mode);
13957 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13958 })
13959
13960 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
13961 [(set (match_operand:PMULHRSW 0 "register_operand")
13962 (truncate:PMULHRSW
13963 (lshiftrt:<ssedoublemode>
13964 (plus:<ssedoublemode>
13965 (lshiftrt:<ssedoublemode>
13966 (mult:<ssedoublemode>
13967 (sign_extend:<ssedoublemode>
13968 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13969 (sign_extend:<ssedoublemode>
13970 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13971 (const_int 14))
13972 (match_dup 3))
13973 (const_int 1))))]
13974 "TARGET_AVX2"
13975 {
13976 operands[3] = CONST1_RTX(<MODE>mode);
13977 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13978 })
13979
13980 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
13981 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
13982 (truncate:VI2_AVX2
13983 (lshiftrt:<ssedoublemode>
13984 (plus:<ssedoublemode>
13985 (lshiftrt:<ssedoublemode>
13986 (mult:<ssedoublemode>
13987 (sign_extend:<ssedoublemode>
13988 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
13989 (sign_extend:<ssedoublemode>
13990 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
13991 (const_int 14))
13992 (match_operand:VI2_AVX2 3 "const1_operand"))
13993 (const_int 1))))]
13994 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13995 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
13996 "@
13997 pmulhrsw\t{%2, %0|%0, %2}
13998 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
13999 [(set_attr "isa" "noavx,avx")
14000 (set_attr "type" "sseimul")
14001 (set_attr "prefix_data16" "1,*")
14002 (set_attr "prefix_extra" "1")
14003 (set_attr "prefix" "orig,maybe_evex")
14004 (set_attr "mode" "<sseinsnmode>")])
14005
14006 (define_insn "*ssse3_pmulhrswv4hi3"
14007 [(set (match_operand:V4HI 0 "register_operand" "=y")
14008 (truncate:V4HI
14009 (lshiftrt:V4SI
14010 (plus:V4SI
14011 (lshiftrt:V4SI
14012 (mult:V4SI
14013 (sign_extend:V4SI
14014 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
14015 (sign_extend:V4SI
14016 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
14017 (const_int 14))
14018 (match_operand:V4HI 3 "const1_operand"))
14019 (const_int 1))))]
14020 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
14021 "pmulhrsw\t{%2, %0|%0, %2}"
14022 [(set_attr "type" "sseimul")
14023 (set_attr "prefix_extra" "1")
14024 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14025 (set_attr "mode" "DI")])
14026
14027 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
14028 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
14029 (unspec:VI1_AVX512
14030 [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
14031 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,vm")]
14032 UNSPEC_PSHUFB))]
14033 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14034 "@
14035 pshufb\t{%2, %0|%0, %2}
14036 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14037 [(set_attr "isa" "noavx,avx")
14038 (set_attr "type" "sselog1")
14039 (set_attr "prefix_data16" "1,*")
14040 (set_attr "prefix_extra" "1")
14041 (set_attr "prefix" "orig,maybe_evex")
14042 (set_attr "btver2_decode" "vector,vector")
14043 (set_attr "mode" "<sseinsnmode>")])
14044
14045 (define_insn "ssse3_pshufbv8qi3"
14046 [(set (match_operand:V8QI 0 "register_operand" "=y")
14047 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
14048 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
14049 UNSPEC_PSHUFB))]
14050 "TARGET_SSSE3"
14051 "pshufb\t{%2, %0|%0, %2}";
14052 [(set_attr "type" "sselog1")
14053 (set_attr "prefix_extra" "1")
14054 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14055 (set_attr "mode" "DI")])
14056
14057 (define_insn "<ssse3_avx2>_psign<mode>3"
14058 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
14059 (unspec:VI124_AVX2
14060 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
14061 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
14062 UNSPEC_PSIGN))]
14063 "TARGET_SSSE3"
14064 "@
14065 psign<ssemodesuffix>\t{%2, %0|%0, %2}
14066 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14067 [(set_attr "isa" "noavx,avx")
14068 (set_attr "type" "sselog1")
14069 (set_attr "prefix_data16" "1,*")
14070 (set_attr "prefix_extra" "1")
14071 (set_attr "prefix" "orig,vex")
14072 (set_attr "mode" "<sseinsnmode>")])
14073
14074 (define_insn "ssse3_psign<mode>3"
14075 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14076 (unspec:MMXMODEI
14077 [(match_operand:MMXMODEI 1 "register_operand" "0")
14078 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
14079 UNSPEC_PSIGN))]
14080 "TARGET_SSSE3"
14081 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
14082 [(set_attr "type" "sselog1")
14083 (set_attr "prefix_extra" "1")
14084 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14085 (set_attr "mode" "DI")])
14086
14087 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
14088 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
14089 (vec_merge:VI1_AVX512
14090 (unspec:VI1_AVX512
14091 [(match_operand:VI1_AVX512 1 "register_operand" "v")
14092 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
14093 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14094 UNSPEC_PALIGNR)
14095 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
14096 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
14097 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
14098 {
14099 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14100 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
14101 }
14102 [(set_attr "type" "sseishft")
14103 (set_attr "atom_unit" "sishuf")
14104 (set_attr "prefix_extra" "1")
14105 (set_attr "length_immediate" "1")
14106 (set_attr "prefix" "evex")
14107 (set_attr "mode" "<sseinsnmode>")])
14108
14109 (define_insn "<ssse3_avx2>_palignr<mode>"
14110 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
14111 (unspec:SSESCALARMODE
14112 [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
14113 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,vm")
14114 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
14115 UNSPEC_PALIGNR))]
14116 "TARGET_SSSE3"
14117 {
14118 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14119
14120 switch (which_alternative)
14121 {
14122 case 0:
14123 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14124 case 1:
14125 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14126 default:
14127 gcc_unreachable ();
14128 }
14129 }
14130 [(set_attr "isa" "noavx,avx")
14131 (set_attr "type" "sseishft")
14132 (set_attr "atom_unit" "sishuf")
14133 (set_attr "prefix_data16" "1,*")
14134 (set_attr "prefix_extra" "1")
14135 (set_attr "length_immediate" "1")
14136 (set_attr "prefix" "orig,vex")
14137 (set_attr "mode" "<sseinsnmode>")])
14138
14139 (define_insn "ssse3_palignrdi"
14140 [(set (match_operand:DI 0 "register_operand" "=y")
14141 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
14142 (match_operand:DI 2 "nonimmediate_operand" "ym")
14143 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14144 UNSPEC_PALIGNR))]
14145 "TARGET_SSSE3"
14146 {
14147 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14148 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14149 }
14150 [(set_attr "type" "sseishft")
14151 (set_attr "atom_unit" "sishuf")
14152 (set_attr "prefix_extra" "1")
14153 (set_attr "length_immediate" "1")
14154 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14155 (set_attr "mode" "DI")])
14156
14157 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
14158 ;; modes for abs instruction on pre AVX-512 targets.
14159 (define_mode_iterator VI1248_AVX512VL_AVX512BW
14160 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
14161 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
14162 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
14163 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
14164
14165 (define_insn "*abs<mode>2"
14166 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
14167 (abs:VI1248_AVX512VL_AVX512BW
14168 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
14169 "TARGET_SSSE3"
14170 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
14171 [(set_attr "type" "sselog1")
14172 (set_attr "prefix_data16" "1")
14173 (set_attr "prefix_extra" "1")
14174 (set_attr "prefix" "maybe_vex")
14175 (set_attr "mode" "<sseinsnmode>")])
14176
14177 (define_insn "abs<mode>2_mask"
14178 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
14179 (vec_merge:VI48_AVX512VL
14180 (abs:VI48_AVX512VL
14181 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
14182 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
14183 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14184 "TARGET_AVX512F"
14185 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14186 [(set_attr "type" "sselog1")
14187 (set_attr "prefix" "evex")
14188 (set_attr "mode" "<sseinsnmode>")])
14189
14190 (define_insn "abs<mode>2_mask"
14191 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
14192 (vec_merge:VI12_AVX512VL
14193 (abs:VI12_AVX512VL
14194 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
14195 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
14196 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14197 "TARGET_AVX512BW"
14198 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14199 [(set_attr "type" "sselog1")
14200 (set_attr "prefix" "evex")
14201 (set_attr "mode" "<sseinsnmode>")])
14202
14203 (define_expand "abs<mode>2"
14204 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
14205 (abs:VI1248_AVX512VL_AVX512BW
14206 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))]
14207 "TARGET_SSE2"
14208 {
14209 if (!TARGET_SSSE3)
14210 {
14211 ix86_expand_sse2_abs (operands[0], operands[1]);
14212 DONE;
14213 }
14214 })
14215
14216 (define_insn "abs<mode>2"
14217 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14218 (abs:MMXMODEI
14219 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
14220 "TARGET_SSSE3"
14221 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
14222 [(set_attr "type" "sselog1")
14223 (set_attr "prefix_rep" "0")
14224 (set_attr "prefix_extra" "1")
14225 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14226 (set_attr "mode" "DI")])
14227
14228 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14229 ;;
14230 ;; AMD SSE4A instructions
14231 ;;
14232 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14233
14234 (define_insn "sse4a_movnt<mode>"
14235 [(set (match_operand:MODEF 0 "memory_operand" "=m")
14236 (unspec:MODEF
14237 [(match_operand:MODEF 1 "register_operand" "x")]
14238 UNSPEC_MOVNT))]
14239 "TARGET_SSE4A"
14240 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
14241 [(set_attr "type" "ssemov")
14242 (set_attr "mode" "<MODE>")])
14243
14244 (define_insn "sse4a_vmmovnt<mode>"
14245 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
14246 (unspec:<ssescalarmode>
14247 [(vec_select:<ssescalarmode>
14248 (match_operand:VF_128 1 "register_operand" "x")
14249 (parallel [(const_int 0)]))]
14250 UNSPEC_MOVNT))]
14251 "TARGET_SSE4A"
14252 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
14253 [(set_attr "type" "ssemov")
14254 (set_attr "mode" "<ssescalarmode>")])
14255
14256 (define_insn "sse4a_extrqi"
14257 [(set (match_operand:V2DI 0 "register_operand" "=x")
14258 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14259 (match_operand 2 "const_0_to_255_operand")
14260 (match_operand 3 "const_0_to_255_operand")]
14261 UNSPEC_EXTRQI))]
14262 "TARGET_SSE4A"
14263 "extrq\t{%3, %2, %0|%0, %2, %3}"
14264 [(set_attr "type" "sse")
14265 (set_attr "prefix_data16" "1")
14266 (set_attr "length_immediate" "2")
14267 (set_attr "mode" "TI")])
14268
14269 (define_insn "sse4a_extrq"
14270 [(set (match_operand:V2DI 0 "register_operand" "=x")
14271 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14272 (match_operand:V16QI 2 "register_operand" "x")]
14273 UNSPEC_EXTRQ))]
14274 "TARGET_SSE4A"
14275 "extrq\t{%2, %0|%0, %2}"
14276 [(set_attr "type" "sse")
14277 (set_attr "prefix_data16" "1")
14278 (set_attr "mode" "TI")])
14279
14280 (define_insn "sse4a_insertqi"
14281 [(set (match_operand:V2DI 0 "register_operand" "=x")
14282 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14283 (match_operand:V2DI 2 "register_operand" "x")
14284 (match_operand 3 "const_0_to_255_operand")
14285 (match_operand 4 "const_0_to_255_operand")]
14286 UNSPEC_INSERTQI))]
14287 "TARGET_SSE4A"
14288 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
14289 [(set_attr "type" "sseins")
14290 (set_attr "prefix_data16" "0")
14291 (set_attr "prefix_rep" "1")
14292 (set_attr "length_immediate" "2")
14293 (set_attr "mode" "TI")])
14294
14295 (define_insn "sse4a_insertq"
14296 [(set (match_operand:V2DI 0 "register_operand" "=x")
14297 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14298 (match_operand:V2DI 2 "register_operand" "x")]
14299 UNSPEC_INSERTQ))]
14300 "TARGET_SSE4A"
14301 "insertq\t{%2, %0|%0, %2}"
14302 [(set_attr "type" "sseins")
14303 (set_attr "prefix_data16" "0")
14304 (set_attr "prefix_rep" "1")
14305 (set_attr "mode" "TI")])
14306
14307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14308 ;;
14309 ;; Intel SSE4.1 instructions
14310 ;;
14311 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14312
14313 ;; Mapping of immediate bits for blend instructions
14314 (define_mode_attr blendbits
14315 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
14316
14317 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
14318 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14319 (vec_merge:VF_128_256
14320 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
14321 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
14322 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
14323 "TARGET_SSE4_1"
14324 "@
14325 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14326 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14327 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14328 [(set_attr "isa" "noavx,noavx,avx")
14329 (set_attr "type" "ssemov")
14330 (set_attr "length_immediate" "1")
14331 (set_attr "prefix_data16" "1,1,*")
14332 (set_attr "prefix_extra" "1")
14333 (set_attr "prefix" "orig,orig,vex")
14334 (set_attr "mode" "<MODE>")])
14335
14336 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
14337 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14338 (unspec:VF_128_256
14339 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
14340 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
14341 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
14342 UNSPEC_BLENDV))]
14343 "TARGET_SSE4_1"
14344 "@
14345 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14346 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14347 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14348 [(set_attr "isa" "noavx,noavx,avx")
14349 (set_attr "type" "ssemov")
14350 (set_attr "length_immediate" "1")
14351 (set_attr "prefix_data16" "1,1,*")
14352 (set_attr "prefix_extra" "1")
14353 (set_attr "prefix" "orig,orig,vex")
14354 (set_attr "btver2_decode" "vector,vector,vector")
14355 (set_attr "mode" "<MODE>")])
14356
14357 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
14358 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14359 (unspec:VF_128_256
14360 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
14361 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
14362 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14363 UNSPEC_DP))]
14364 "TARGET_SSE4_1"
14365 "@
14366 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14367 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14368 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14369 [(set_attr "isa" "noavx,noavx,avx")
14370 (set_attr "type" "ssemul")
14371 (set_attr "length_immediate" "1")
14372 (set_attr "prefix_data16" "1,1,*")
14373 (set_attr "prefix_extra" "1")
14374 (set_attr "prefix" "orig,orig,vex")
14375 (set_attr "btver2_decode" "vector,vector,vector")
14376 (set_attr "znver1_decode" "vector,vector,vector")
14377 (set_attr "mode" "<MODE>")])
14378
14379 ;; Mode attribute used by `vmovntdqa' pattern
14380 (define_mode_attr vi8_sse4_1_avx2_avx512
14381 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
14382
14383 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
14384 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v")
14385 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")]
14386 UNSPEC_MOVNTDQA))]
14387 "TARGET_SSE4_1"
14388 "%vmovntdqa\t{%1, %0|%0, %1}"
14389 [(set_attr "type" "ssemov")
14390 (set_attr "prefix_extra" "1,1,*")
14391 (set_attr "prefix" "maybe_vex,maybe_vex,evex")
14392 (set_attr "mode" "<sseinsnmode>")])
14393
14394 (define_insn "<sse4_1_avx2>_mpsadbw"
14395 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14396 (unspec:VI1_AVX2
14397 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14398 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
14399 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14400 UNSPEC_MPSADBW))]
14401 "TARGET_SSE4_1"
14402 "@
14403 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14404 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14405 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14406 [(set_attr "isa" "noavx,noavx,avx")
14407 (set_attr "type" "sselog1")
14408 (set_attr "length_immediate" "1")
14409 (set_attr "prefix_extra" "1")
14410 (set_attr "prefix" "orig,orig,vex")
14411 (set_attr "btver2_decode" "vector,vector,vector")
14412 (set_attr "znver1_decode" "vector,vector,vector")
14413 (set_attr "mode" "<sseinsnmode>")])
14414
14415 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
14416 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
14417 (vec_concat:VI2_AVX2
14418 (us_truncate:<ssehalfvecmode>
14419 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
14420 (us_truncate:<ssehalfvecmode>
14421 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,vm"))))]
14422 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14423 "@
14424 packusdw\t{%2, %0|%0, %2}
14425 packusdw\t{%2, %0|%0, %2}
14426 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14427 [(set_attr "isa" "noavx,noavx,avx")
14428 (set_attr "type" "sselog")
14429 (set_attr "prefix_extra" "1")
14430 (set_attr "prefix" "orig,orig,maybe_evex")
14431 (set_attr "mode" "<sseinsnmode>")])
14432
14433 (define_insn "<sse4_1_avx2>_pblendvb"
14434 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14435 (unspec:VI1_AVX2
14436 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14437 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
14438 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
14439 UNSPEC_BLENDV))]
14440 "TARGET_SSE4_1"
14441 "@
14442 pblendvb\t{%3, %2, %0|%0, %2, %3}
14443 pblendvb\t{%3, %2, %0|%0, %2, %3}
14444 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14445 [(set_attr "isa" "noavx,noavx,avx")
14446 (set_attr "type" "ssemov")
14447 (set_attr "prefix_extra" "1")
14448 (set_attr "length_immediate" "*,*,1")
14449 (set_attr "prefix" "orig,orig,vex")
14450 (set_attr "btver2_decode" "vector,vector,vector")
14451 (set_attr "mode" "<sseinsnmode>")])
14452
14453 (define_insn "sse4_1_pblendw"
14454 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
14455 (vec_merge:V8HI
14456 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
14457 (match_operand:V8HI 1 "register_operand" "0,0,x")
14458 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
14459 "TARGET_SSE4_1"
14460 "@
14461 pblendw\t{%3, %2, %0|%0, %2, %3}
14462 pblendw\t{%3, %2, %0|%0, %2, %3}
14463 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14464 [(set_attr "isa" "noavx,noavx,avx")
14465 (set_attr "type" "ssemov")
14466 (set_attr "prefix_extra" "1")
14467 (set_attr "length_immediate" "1")
14468 (set_attr "prefix" "orig,orig,vex")
14469 (set_attr "mode" "TI")])
14470
14471 ;; The builtin uses an 8-bit immediate. Expand that.
14472 (define_expand "avx2_pblendw"
14473 [(set (match_operand:V16HI 0 "register_operand")
14474 (vec_merge:V16HI
14475 (match_operand:V16HI 2 "nonimmediate_operand")
14476 (match_operand:V16HI 1 "register_operand")
14477 (match_operand:SI 3 "const_0_to_255_operand")))]
14478 "TARGET_AVX2"
14479 {
14480 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
14481 operands[3] = GEN_INT (val << 8 | val);
14482 })
14483
14484 (define_insn "*avx2_pblendw"
14485 [(set (match_operand:V16HI 0 "register_operand" "=x")
14486 (vec_merge:V16HI
14487 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14488 (match_operand:V16HI 1 "register_operand" "x")
14489 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
14490 "TARGET_AVX2"
14491 {
14492 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
14493 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14494 }
14495 [(set_attr "type" "ssemov")
14496 (set_attr "prefix_extra" "1")
14497 (set_attr "length_immediate" "1")
14498 (set_attr "prefix" "vex")
14499 (set_attr "mode" "OI")])
14500
14501 (define_insn "avx2_pblendd<mode>"
14502 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
14503 (vec_merge:VI4_AVX2
14504 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
14505 (match_operand:VI4_AVX2 1 "register_operand" "x")
14506 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
14507 "TARGET_AVX2"
14508 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14509 [(set_attr "type" "ssemov")
14510 (set_attr "prefix_extra" "1")
14511 (set_attr "length_immediate" "1")
14512 (set_attr "prefix" "vex")
14513 (set_attr "mode" "<sseinsnmode>")])
14514
14515 (define_insn "sse4_1_phminposuw"
14516 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x")
14517 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm")]
14518 UNSPEC_PHMINPOSUW))]
14519 "TARGET_SSE4_1"
14520 "%vphminposuw\t{%1, %0|%0, %1}"
14521 [(set_attr "type" "sselog1")
14522 (set_attr "prefix_extra" "1")
14523 (set_attr "prefix" "maybe_vex")
14524 (set_attr "mode" "TI")])
14525
14526 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
14527 [(set (match_operand:V16HI 0 "register_operand" "=v")
14528 (any_extend:V16HI
14529 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14530 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14531 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14532 [(set_attr "type" "ssemov")
14533 (set_attr "prefix_extra" "1")
14534 (set_attr "prefix" "maybe_evex")
14535 (set_attr "mode" "OI")])
14536
14537 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
14538 [(set (match_operand:V32HI 0 "register_operand" "=v")
14539 (any_extend:V32HI
14540 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
14541 "TARGET_AVX512BW"
14542 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14543 [(set_attr "type" "ssemov")
14544 (set_attr "prefix_extra" "1")
14545 (set_attr "prefix" "evex")
14546 (set_attr "mode" "XI")])
14547
14548 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
14549 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v")
14550 (any_extend:V8HI
14551 (vec_select:V8QI
14552 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14553 (parallel [(const_int 0) (const_int 1)
14554 (const_int 2) (const_int 3)
14555 (const_int 4) (const_int 5)
14556 (const_int 6) (const_int 7)]))))]
14557 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14558 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14559 [(set_attr "type" "ssemov")
14560 (set_attr "prefix_extra" "1")
14561 (set_attr "prefix" "maybe_vex")
14562 (set_attr "mode" "TI")])
14563
14564 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
14565 [(set (match_operand:V16SI 0 "register_operand" "=v")
14566 (any_extend:V16SI
14567 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14568 "TARGET_AVX512F"
14569 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14570 [(set_attr "type" "ssemov")
14571 (set_attr "prefix" "evex")
14572 (set_attr "mode" "XI")])
14573
14574 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
14575 [(set (match_operand:V8SI 0 "register_operand" "=v")
14576 (any_extend:V8SI
14577 (vec_select:V8QI
14578 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14579 (parallel [(const_int 0) (const_int 1)
14580 (const_int 2) (const_int 3)
14581 (const_int 4) (const_int 5)
14582 (const_int 6) (const_int 7)]))))]
14583 "TARGET_AVX2 && <mask_avx512vl_condition>"
14584 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14585 [(set_attr "type" "ssemov")
14586 (set_attr "prefix_extra" "1")
14587 (set_attr "prefix" "maybe_evex")
14588 (set_attr "mode" "OI")])
14589
14590 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
14591 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14592 (any_extend:V4SI
14593 (vec_select:V4QI
14594 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14595 (parallel [(const_int 0) (const_int 1)
14596 (const_int 2) (const_int 3)]))))]
14597 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14598 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14599 [(set_attr "type" "ssemov")
14600 (set_attr "prefix_extra" "1")
14601 (set_attr "prefix" "maybe_vex")
14602 (set_attr "mode" "TI")])
14603
14604 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
14605 [(set (match_operand:V16SI 0 "register_operand" "=v")
14606 (any_extend:V16SI
14607 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
14608 "TARGET_AVX512F"
14609 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14610 [(set_attr "type" "ssemov")
14611 (set_attr "prefix" "evex")
14612 (set_attr "mode" "XI")])
14613
14614 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
14615 [(set (match_operand:V8SI 0 "register_operand" "=v")
14616 (any_extend:V8SI
14617 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14618 "TARGET_AVX2 && <mask_avx512vl_condition>"
14619 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14620 [(set_attr "type" "ssemov")
14621 (set_attr "prefix_extra" "1")
14622 (set_attr "prefix" "maybe_evex")
14623 (set_attr "mode" "OI")])
14624
14625 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
14626 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14627 (any_extend:V4SI
14628 (vec_select:V4HI
14629 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14630 (parallel [(const_int 0) (const_int 1)
14631 (const_int 2) (const_int 3)]))))]
14632 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14633 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14634 [(set_attr "type" "ssemov")
14635 (set_attr "prefix_extra" "1")
14636 (set_attr "prefix" "maybe_vex")
14637 (set_attr "mode" "TI")])
14638
14639 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
14640 [(set (match_operand:V8DI 0 "register_operand" "=v")
14641 (any_extend:V8DI
14642 (vec_select:V8QI
14643 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14644 (parallel [(const_int 0) (const_int 1)
14645 (const_int 2) (const_int 3)
14646 (const_int 4) (const_int 5)
14647 (const_int 6) (const_int 7)]))))]
14648 "TARGET_AVX512F"
14649 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14650 [(set_attr "type" "ssemov")
14651 (set_attr "prefix" "evex")
14652 (set_attr "mode" "XI")])
14653
14654 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
14655 [(set (match_operand:V4DI 0 "register_operand" "=v")
14656 (any_extend:V4DI
14657 (vec_select:V4QI
14658 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14659 (parallel [(const_int 0) (const_int 1)
14660 (const_int 2) (const_int 3)]))))]
14661 "TARGET_AVX2 && <mask_avx512vl_condition>"
14662 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14663 [(set_attr "type" "ssemov")
14664 (set_attr "prefix_extra" "1")
14665 (set_attr "prefix" "maybe_evex")
14666 (set_attr "mode" "OI")])
14667
14668 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
14669 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14670 (any_extend:V2DI
14671 (vec_select:V2QI
14672 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14673 (parallel [(const_int 0) (const_int 1)]))))]
14674 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14675 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
14676 [(set_attr "type" "ssemov")
14677 (set_attr "prefix_extra" "1")
14678 (set_attr "prefix" "maybe_vex")
14679 (set_attr "mode" "TI")])
14680
14681 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
14682 [(set (match_operand:V8DI 0 "register_operand" "=v")
14683 (any_extend:V8DI
14684 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14685 "TARGET_AVX512F"
14686 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14687 [(set_attr "type" "ssemov")
14688 (set_attr "prefix" "evex")
14689 (set_attr "mode" "XI")])
14690
14691 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
14692 [(set (match_operand:V4DI 0 "register_operand" "=v")
14693 (any_extend:V4DI
14694 (vec_select:V4HI
14695 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
14696 (parallel [(const_int 0) (const_int 1)
14697 (const_int 2) (const_int 3)]))))]
14698 "TARGET_AVX2 && <mask_avx512vl_condition>"
14699 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14700 [(set_attr "type" "ssemov")
14701 (set_attr "prefix_extra" "1")
14702 (set_attr "prefix" "maybe_evex")
14703 (set_attr "mode" "OI")])
14704
14705 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
14706 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14707 (any_extend:V2DI
14708 (vec_select:V2HI
14709 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14710 (parallel [(const_int 0) (const_int 1)]))))]
14711 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14712 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14713 [(set_attr "type" "ssemov")
14714 (set_attr "prefix_extra" "1")
14715 (set_attr "prefix" "maybe_vex")
14716 (set_attr "mode" "TI")])
14717
14718 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
14719 [(set (match_operand:V8DI 0 "register_operand" "=v")
14720 (any_extend:V8DI
14721 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
14722 "TARGET_AVX512F"
14723 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14724 [(set_attr "type" "ssemov")
14725 (set_attr "prefix" "evex")
14726 (set_attr "mode" "XI")])
14727
14728 (define_insn "avx2_<code>v4siv4di2<mask_name>"
14729 [(set (match_operand:V4DI 0 "register_operand" "=v")
14730 (any_extend:V4DI
14731 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
14732 "TARGET_AVX2 && <mask_avx512vl_condition>"
14733 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14734 [(set_attr "type" "ssemov")
14735 (set_attr "prefix" "maybe_evex")
14736 (set_attr "prefix_extra" "1")
14737 (set_attr "mode" "OI")])
14738
14739 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
14740 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14741 (any_extend:V2DI
14742 (vec_select:V2SI
14743 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm")
14744 (parallel [(const_int 0) (const_int 1)]))))]
14745 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14746 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14747 [(set_attr "type" "ssemov")
14748 (set_attr "prefix_extra" "1")
14749 (set_attr "prefix" "maybe_vex")
14750 (set_attr "mode" "TI")])
14751
14752 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
14753 ;; setting FLAGS_REG. But it is not a really compare instruction.
14754 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
14755 [(set (reg:CC FLAGS_REG)
14756 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
14757 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
14758 UNSPEC_VTESTP))]
14759 "TARGET_AVX"
14760 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
14761 [(set_attr "type" "ssecomi")
14762 (set_attr "prefix_extra" "1")
14763 (set_attr "prefix" "vex")
14764 (set_attr "mode" "<MODE>")])
14765
14766 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
14767 ;; But it is not a really compare instruction.
14768 (define_insn "<sse4_1>_ptest<mode>"
14769 [(set (reg:CC FLAGS_REG)
14770 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
14771 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
14772 UNSPEC_PTEST))]
14773 "TARGET_SSE4_1"
14774 "%vptest\t{%1, %0|%0, %1}"
14775 [(set_attr "isa" "*,*,avx")
14776 (set_attr "type" "ssecomi")
14777 (set_attr "prefix_extra" "1")
14778 (set_attr "prefix" "maybe_vex")
14779 (set (attr "btver2_decode")
14780 (if_then_else
14781 (match_test "<sseinsnmode>mode==OImode")
14782 (const_string "vector")
14783 (const_string "*")))
14784 (set_attr "mode" "<sseinsnmode>")])
14785
14786 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
14787 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x")
14788 (unspec:VF_128_256
14789 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm")
14790 (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
14791 UNSPEC_ROUND))]
14792 "TARGET_ROUND"
14793 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14794 [(set_attr "type" "ssecvt")
14795 (set (attr "prefix_data16")
14796 (if_then_else
14797 (match_test "TARGET_AVX")
14798 (const_string "*")
14799 (const_string "1")))
14800 (set_attr "prefix_extra" "1")
14801 (set_attr "length_immediate" "1")
14802 (set_attr "prefix" "maybe_vex")
14803 (set_attr "mode" "<MODE>")])
14804
14805 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
14806 [(match_operand:<sseintvecmode> 0 "register_operand")
14807 (match_operand:VF1_128_256 1 "vector_operand")
14808 (match_operand:SI 2 "const_0_to_15_operand")]
14809 "TARGET_ROUND"
14810 {
14811 rtx tmp = gen_reg_rtx (<MODE>mode);
14812
14813 emit_insn
14814 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
14815 operands[2]));
14816 emit_insn
14817 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14818 DONE;
14819 })
14820
14821 (define_expand "avx512f_roundpd512"
14822 [(match_operand:V8DF 0 "register_operand")
14823 (match_operand:V8DF 1 "nonimmediate_operand")
14824 (match_operand:SI 2 "const_0_to_15_operand")]
14825 "TARGET_AVX512F"
14826 {
14827 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
14828 DONE;
14829 })
14830
14831 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
14832 [(match_operand:<ssepackfltmode> 0 "register_operand")
14833 (match_operand:VF2 1 "vector_operand")
14834 (match_operand:VF2 2 "vector_operand")
14835 (match_operand:SI 3 "const_0_to_15_operand")]
14836 "TARGET_ROUND"
14837 {
14838 rtx tmp0, tmp1;
14839
14840 if (<MODE>mode == V2DFmode
14841 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14842 {
14843 rtx tmp2 = gen_reg_rtx (V4DFmode);
14844
14845 tmp0 = gen_reg_rtx (V4DFmode);
14846 tmp1 = force_reg (V2DFmode, operands[1]);
14847
14848 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14849 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
14850 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14851 }
14852 else
14853 {
14854 tmp0 = gen_reg_rtx (<MODE>mode);
14855 tmp1 = gen_reg_rtx (<MODE>mode);
14856
14857 emit_insn
14858 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
14859 operands[3]));
14860 emit_insn
14861 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
14862 operands[3]));
14863 emit_insn
14864 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14865 }
14866 DONE;
14867 })
14868
14869 (define_insn "sse4_1_round<ssescalarmodesuffix>"
14870 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
14871 (vec_merge:VF_128
14872 (unspec:VF_128
14873 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
14874 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
14875 UNSPEC_ROUND)
14876 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
14877 (const_int 1)))]
14878 "TARGET_ROUND"
14879 "@
14880 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14881 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14882 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
14883 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14884 [(set_attr "isa" "noavx,noavx,avx,avx512f")
14885 (set_attr "type" "ssecvt")
14886 (set_attr "length_immediate" "1")
14887 (set_attr "prefix_data16" "1,1,*,*")
14888 (set_attr "prefix_extra" "1")
14889 (set_attr "prefix" "orig,orig,vex,evex")
14890 (set_attr "mode" "<MODE>")])
14891
14892 (define_expand "round<mode>2"
14893 [(set (match_dup 4)
14894 (plus:VF
14895 (match_operand:VF 1 "register_operand")
14896 (match_dup 3)))
14897 (set (match_operand:VF 0 "register_operand")
14898 (unspec:VF
14899 [(match_dup 4) (match_dup 5)]
14900 UNSPEC_ROUND))]
14901 "TARGET_ROUND && !flag_trapping_math"
14902 {
14903 machine_mode scalar_mode;
14904 const struct real_format *fmt;
14905 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
14906 rtx half, vec_half;
14907
14908 scalar_mode = GET_MODE_INNER (<MODE>mode);
14909
14910 /* load nextafter (0.5, 0.0) */
14911 fmt = REAL_MODE_FORMAT (scalar_mode);
14912 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
14913 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
14914 half = const_double_from_real_value (pred_half, scalar_mode);
14915
14916 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
14917 vec_half = force_reg (<MODE>mode, vec_half);
14918
14919 operands[3] = gen_reg_rtx (<MODE>mode);
14920 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
14921
14922 operands[4] = gen_reg_rtx (<MODE>mode);
14923 operands[5] = GEN_INT (ROUND_TRUNC);
14924 })
14925
14926 (define_expand "round<mode>2_sfix"
14927 [(match_operand:<sseintvecmode> 0 "register_operand")
14928 (match_operand:VF1_128_256 1 "register_operand")]
14929 "TARGET_ROUND && !flag_trapping_math"
14930 {
14931 rtx tmp = gen_reg_rtx (<MODE>mode);
14932
14933 emit_insn (gen_round<mode>2 (tmp, operands[1]));
14934
14935 emit_insn
14936 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14937 DONE;
14938 })
14939
14940 (define_expand "round<mode>2_vec_pack_sfix"
14941 [(match_operand:<ssepackfltmode> 0 "register_operand")
14942 (match_operand:VF2 1 "register_operand")
14943 (match_operand:VF2 2 "register_operand")]
14944 "TARGET_ROUND && !flag_trapping_math"
14945 {
14946 rtx tmp0, tmp1;
14947
14948 if (<MODE>mode == V2DFmode
14949 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14950 {
14951 rtx tmp2 = gen_reg_rtx (V4DFmode);
14952
14953 tmp0 = gen_reg_rtx (V4DFmode);
14954 tmp1 = force_reg (V2DFmode, operands[1]);
14955
14956 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14957 emit_insn (gen_roundv4df2 (tmp2, tmp0));
14958 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14959 }
14960 else
14961 {
14962 tmp0 = gen_reg_rtx (<MODE>mode);
14963 tmp1 = gen_reg_rtx (<MODE>mode);
14964
14965 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
14966 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
14967
14968 emit_insn
14969 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14970 }
14971 DONE;
14972 })
14973
14974 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14975 ;;
14976 ;; Intel SSE4.2 string/text processing instructions
14977 ;;
14978 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14979
14980 (define_insn_and_split "sse4_2_pcmpestr"
14981 [(set (match_operand:SI 0 "register_operand" "=c,c")
14982 (unspec:SI
14983 [(match_operand:V16QI 2 "register_operand" "x,x")
14984 (match_operand:SI 3 "register_operand" "a,a")
14985 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
14986 (match_operand:SI 5 "register_operand" "d,d")
14987 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
14988 UNSPEC_PCMPESTR))
14989 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14990 (unspec:V16QI
14991 [(match_dup 2)
14992 (match_dup 3)
14993 (match_dup 4)
14994 (match_dup 5)
14995 (match_dup 6)]
14996 UNSPEC_PCMPESTR))
14997 (set (reg:CC FLAGS_REG)
14998 (unspec:CC
14999 [(match_dup 2)
15000 (match_dup 3)
15001 (match_dup 4)
15002 (match_dup 5)
15003 (match_dup 6)]
15004 UNSPEC_PCMPESTR))]
15005 "TARGET_SSE4_2
15006 && can_create_pseudo_p ()"
15007 "#"
15008 "&& 1"
15009 [(const_int 0)]
15010 {
15011 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15012 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15013 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15014
15015 if (ecx)
15016 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
15017 operands[3], operands[4],
15018 operands[5], operands[6]));
15019 if (xmm0)
15020 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
15021 operands[3], operands[4],
15022 operands[5], operands[6]));
15023 if (flags && !(ecx || xmm0))
15024 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
15025 operands[2], operands[3],
15026 operands[4], operands[5],
15027 operands[6]));
15028 if (!(flags || ecx || xmm0))
15029 emit_note (NOTE_INSN_DELETED);
15030
15031 DONE;
15032 }
15033 [(set_attr "type" "sselog")
15034 (set_attr "prefix_data16" "1")
15035 (set_attr "prefix_extra" "1")
15036 (set_attr "length_immediate" "1")
15037 (set_attr "memory" "none,load")
15038 (set_attr "mode" "TI")])
15039
15040 (define_insn "sse4_2_pcmpestri"
15041 [(set (match_operand:SI 0 "register_operand" "=c,c")
15042 (unspec:SI
15043 [(match_operand:V16QI 1 "register_operand" "x,x")
15044 (match_operand:SI 2 "register_operand" "a,a")
15045 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15046 (match_operand:SI 4 "register_operand" "d,d")
15047 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15048 UNSPEC_PCMPESTR))
15049 (set (reg:CC FLAGS_REG)
15050 (unspec:CC
15051 [(match_dup 1)
15052 (match_dup 2)
15053 (match_dup 3)
15054 (match_dup 4)
15055 (match_dup 5)]
15056 UNSPEC_PCMPESTR))]
15057 "TARGET_SSE4_2"
15058 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
15059 [(set_attr "type" "sselog")
15060 (set_attr "prefix_data16" "1")
15061 (set_attr "prefix_extra" "1")
15062 (set_attr "prefix" "maybe_vex")
15063 (set_attr "length_immediate" "1")
15064 (set_attr "btver2_decode" "vector")
15065 (set_attr "memory" "none,load")
15066 (set_attr "mode" "TI")])
15067
15068 (define_insn "sse4_2_pcmpestrm"
15069 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15070 (unspec:V16QI
15071 [(match_operand:V16QI 1 "register_operand" "x,x")
15072 (match_operand:SI 2 "register_operand" "a,a")
15073 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15074 (match_operand:SI 4 "register_operand" "d,d")
15075 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15076 UNSPEC_PCMPESTR))
15077 (set (reg:CC FLAGS_REG)
15078 (unspec:CC
15079 [(match_dup 1)
15080 (match_dup 2)
15081 (match_dup 3)
15082 (match_dup 4)
15083 (match_dup 5)]
15084 UNSPEC_PCMPESTR))]
15085 "TARGET_SSE4_2"
15086 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
15087 [(set_attr "type" "sselog")
15088 (set_attr "prefix_data16" "1")
15089 (set_attr "prefix_extra" "1")
15090 (set_attr "length_immediate" "1")
15091 (set_attr "prefix" "maybe_vex")
15092 (set_attr "btver2_decode" "vector")
15093 (set_attr "memory" "none,load")
15094 (set_attr "mode" "TI")])
15095
15096 (define_insn "sse4_2_pcmpestr_cconly"
15097 [(set (reg:CC FLAGS_REG)
15098 (unspec:CC
15099 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15100 (match_operand:SI 3 "register_operand" "a,a,a,a")
15101 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
15102 (match_operand:SI 5 "register_operand" "d,d,d,d")
15103 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
15104 UNSPEC_PCMPESTR))
15105 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15106 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15107 "TARGET_SSE4_2"
15108 "@
15109 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15110 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15111 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
15112 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
15113 [(set_attr "type" "sselog")
15114 (set_attr "prefix_data16" "1")
15115 (set_attr "prefix_extra" "1")
15116 (set_attr "length_immediate" "1")
15117 (set_attr "memory" "none,load,none,load")
15118 (set_attr "btver2_decode" "vector,vector,vector,vector")
15119 (set_attr "prefix" "maybe_vex")
15120 (set_attr "mode" "TI")])
15121
15122 (define_insn_and_split "sse4_2_pcmpistr"
15123 [(set (match_operand:SI 0 "register_operand" "=c,c")
15124 (unspec:SI
15125 [(match_operand:V16QI 2 "register_operand" "x,x")
15126 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15127 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
15128 UNSPEC_PCMPISTR))
15129 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15130 (unspec:V16QI
15131 [(match_dup 2)
15132 (match_dup 3)
15133 (match_dup 4)]
15134 UNSPEC_PCMPISTR))
15135 (set (reg:CC FLAGS_REG)
15136 (unspec:CC
15137 [(match_dup 2)
15138 (match_dup 3)
15139 (match_dup 4)]
15140 UNSPEC_PCMPISTR))]
15141 "TARGET_SSE4_2
15142 && can_create_pseudo_p ()"
15143 "#"
15144 "&& 1"
15145 [(const_int 0)]
15146 {
15147 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15148 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15149 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15150
15151 if (ecx)
15152 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15153 operands[3], operands[4]));
15154 if (xmm0)
15155 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15156 operands[3], operands[4]));
15157 if (flags && !(ecx || xmm0))
15158 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15159 operands[2], operands[3],
15160 operands[4]));
15161 if (!(flags || ecx || xmm0))
15162 emit_note (NOTE_INSN_DELETED);
15163
15164 DONE;
15165 }
15166 [(set_attr "type" "sselog")
15167 (set_attr "prefix_data16" "1")
15168 (set_attr "prefix_extra" "1")
15169 (set_attr "length_immediate" "1")
15170 (set_attr "memory" "none,load")
15171 (set_attr "mode" "TI")])
15172
15173 (define_insn "sse4_2_pcmpistri"
15174 [(set (match_operand:SI 0 "register_operand" "=c,c")
15175 (unspec:SI
15176 [(match_operand:V16QI 1 "register_operand" "x,x")
15177 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15178 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15179 UNSPEC_PCMPISTR))
15180 (set (reg:CC FLAGS_REG)
15181 (unspec:CC
15182 [(match_dup 1)
15183 (match_dup 2)
15184 (match_dup 3)]
15185 UNSPEC_PCMPISTR))]
15186 "TARGET_SSE4_2"
15187 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
15188 [(set_attr "type" "sselog")
15189 (set_attr "prefix_data16" "1")
15190 (set_attr "prefix_extra" "1")
15191 (set_attr "length_immediate" "1")
15192 (set_attr "prefix" "maybe_vex")
15193 (set_attr "memory" "none,load")
15194 (set_attr "btver2_decode" "vector")
15195 (set_attr "mode" "TI")])
15196
15197 (define_insn "sse4_2_pcmpistrm"
15198 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15199 (unspec:V16QI
15200 [(match_operand:V16QI 1 "register_operand" "x,x")
15201 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15202 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15203 UNSPEC_PCMPISTR))
15204 (set (reg:CC FLAGS_REG)
15205 (unspec:CC
15206 [(match_dup 1)
15207 (match_dup 2)
15208 (match_dup 3)]
15209 UNSPEC_PCMPISTR))]
15210 "TARGET_SSE4_2"
15211 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
15212 [(set_attr "type" "sselog")
15213 (set_attr "prefix_data16" "1")
15214 (set_attr "prefix_extra" "1")
15215 (set_attr "length_immediate" "1")
15216 (set_attr "prefix" "maybe_vex")
15217 (set_attr "memory" "none,load")
15218 (set_attr "btver2_decode" "vector")
15219 (set_attr "mode" "TI")])
15220
15221 (define_insn "sse4_2_pcmpistr_cconly"
15222 [(set (reg:CC FLAGS_REG)
15223 (unspec:CC
15224 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15225 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
15226 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
15227 UNSPEC_PCMPISTR))
15228 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15229 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15230 "TARGET_SSE4_2"
15231 "@
15232 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15233 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15234 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
15235 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
15236 [(set_attr "type" "sselog")
15237 (set_attr "prefix_data16" "1")
15238 (set_attr "prefix_extra" "1")
15239 (set_attr "length_immediate" "1")
15240 (set_attr "memory" "none,load,none,load")
15241 (set_attr "prefix" "maybe_vex")
15242 (set_attr "btver2_decode" "vector,vector,vector,vector")
15243 (set_attr "mode" "TI")])
15244
15245 ;; Packed float variants
15246 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
15247 [(V8DI "V8SF") (V16SI "V16SF")])
15248
15249 (define_expand "avx512pf_gatherpf<mode>sf"
15250 [(unspec
15251 [(match_operand:<avx512fmaskmode> 0 "register_operand")
15252 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15253 (match_par_dup 5
15254 [(match_operand 2 "vsib_address_operand")
15255 (match_operand:VI48_512 1 "register_operand")
15256 (match_operand:SI 3 "const1248_operand")]))
15257 (match_operand:SI 4 "const_2_to_3_operand")]
15258 UNSPEC_GATHER_PREFETCH)]
15259 "TARGET_AVX512PF"
15260 {
15261 operands[5]
15262 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15263 operands[3]), UNSPEC_VSIBADDR);
15264 })
15265
15266 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
15267 [(unspec
15268 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15269 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15270 [(unspec:P
15271 [(match_operand:P 2 "vsib_address_operand" "Tv")
15272 (match_operand:VI48_512 1 "register_operand" "v")
15273 (match_operand:SI 3 "const1248_operand" "n")]
15274 UNSPEC_VSIBADDR)])
15275 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15276 UNSPEC_GATHER_PREFETCH)]
15277 "TARGET_AVX512PF"
15278 {
15279 switch (INTVAL (operands[4]))
15280 {
15281 case 3:
15282 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15283 case 2:
15284 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15285 default:
15286 gcc_unreachable ();
15287 }
15288 }
15289 [(set_attr "type" "sse")
15290 (set_attr "prefix" "evex")
15291 (set_attr "mode" "XI")])
15292
15293 ;; Packed double variants
15294 (define_expand "avx512pf_gatherpf<mode>df"
15295 [(unspec
15296 [(match_operand:<avx512fmaskmode> 0 "register_operand")
15297 (mem:V8DF
15298 (match_par_dup 5
15299 [(match_operand 2 "vsib_address_operand")
15300 (match_operand:VI4_256_8_512 1 "register_operand")
15301 (match_operand:SI 3 "const1248_operand")]))
15302 (match_operand:SI 4 "const_2_to_3_operand")]
15303 UNSPEC_GATHER_PREFETCH)]
15304 "TARGET_AVX512PF"
15305 {
15306 operands[5]
15307 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15308 operands[3]), UNSPEC_VSIBADDR);
15309 })
15310
15311 (define_insn "*avx512pf_gatherpf<mode>df_mask"
15312 [(unspec
15313 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15314 (match_operator:V8DF 5 "vsib_mem_operator"
15315 [(unspec:P
15316 [(match_operand:P 2 "vsib_address_operand" "Tv")
15317 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15318 (match_operand:SI 3 "const1248_operand" "n")]
15319 UNSPEC_VSIBADDR)])
15320 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15321 UNSPEC_GATHER_PREFETCH)]
15322 "TARGET_AVX512PF"
15323 {
15324 switch (INTVAL (operands[4]))
15325 {
15326 case 3:
15327 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15328 case 2:
15329 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15330 default:
15331 gcc_unreachable ();
15332 }
15333 }
15334 [(set_attr "type" "sse")
15335 (set_attr "prefix" "evex")
15336 (set_attr "mode" "XI")])
15337
15338 ;; Packed float variants
15339 (define_expand "avx512pf_scatterpf<mode>sf"
15340 [(unspec
15341 [(match_operand:<avx512fmaskmode> 0 "register_operand")
15342 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15343 (match_par_dup 5
15344 [(match_operand 2 "vsib_address_operand")
15345 (match_operand:VI48_512 1 "register_operand")
15346 (match_operand:SI 3 "const1248_operand")]))
15347 (match_operand:SI 4 "const2367_operand")]
15348 UNSPEC_SCATTER_PREFETCH)]
15349 "TARGET_AVX512PF"
15350 {
15351 operands[5]
15352 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15353 operands[3]), UNSPEC_VSIBADDR);
15354 })
15355
15356 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
15357 [(unspec
15358 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15359 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15360 [(unspec:P
15361 [(match_operand:P 2 "vsib_address_operand" "Tv")
15362 (match_operand:VI48_512 1 "register_operand" "v")
15363 (match_operand:SI 3 "const1248_operand" "n")]
15364 UNSPEC_VSIBADDR)])
15365 (match_operand:SI 4 "const2367_operand" "n")]
15366 UNSPEC_SCATTER_PREFETCH)]
15367 "TARGET_AVX512PF"
15368 {
15369 switch (INTVAL (operands[4]))
15370 {
15371 case 3:
15372 case 7:
15373 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15374 case 2:
15375 case 6:
15376 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15377 default:
15378 gcc_unreachable ();
15379 }
15380 }
15381 [(set_attr "type" "sse")
15382 (set_attr "prefix" "evex")
15383 (set_attr "mode" "XI")])
15384
15385 ;; Packed double variants
15386 (define_expand "avx512pf_scatterpf<mode>df"
15387 [(unspec
15388 [(match_operand:<avx512fmaskmode> 0 "register_operand")
15389 (mem:V8DF
15390 (match_par_dup 5
15391 [(match_operand 2 "vsib_address_operand")
15392 (match_operand:VI4_256_8_512 1 "register_operand")
15393 (match_operand:SI 3 "const1248_operand")]))
15394 (match_operand:SI 4 "const2367_operand")]
15395 UNSPEC_SCATTER_PREFETCH)]
15396 "TARGET_AVX512PF"
15397 {
15398 operands[5]
15399 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15400 operands[3]), UNSPEC_VSIBADDR);
15401 })
15402
15403 (define_insn "*avx512pf_scatterpf<mode>df_mask"
15404 [(unspec
15405 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15406 (match_operator:V8DF 5 "vsib_mem_operator"
15407 [(unspec:P
15408 [(match_operand:P 2 "vsib_address_operand" "Tv")
15409 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15410 (match_operand:SI 3 "const1248_operand" "n")]
15411 UNSPEC_VSIBADDR)])
15412 (match_operand:SI 4 "const2367_operand" "n")]
15413 UNSPEC_SCATTER_PREFETCH)]
15414 "TARGET_AVX512PF"
15415 {
15416 switch (INTVAL (operands[4]))
15417 {
15418 case 3:
15419 case 7:
15420 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15421 case 2:
15422 case 6:
15423 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15424 default:
15425 gcc_unreachable ();
15426 }
15427 }
15428 [(set_attr "type" "sse")
15429 (set_attr "prefix" "evex")
15430 (set_attr "mode" "XI")])
15431
15432 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
15433 [(set (match_operand:VF_512 0 "register_operand" "=v")
15434 (unspec:VF_512
15435 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15436 UNSPEC_EXP2))]
15437 "TARGET_AVX512ER"
15438 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15439 [(set_attr "prefix" "evex")
15440 (set_attr "type" "sse")
15441 (set_attr "mode" "<MODE>")])
15442
15443 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
15444 [(set (match_operand:VF_512 0 "register_operand" "=v")
15445 (unspec:VF_512
15446 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15447 UNSPEC_RCP28))]
15448 "TARGET_AVX512ER"
15449 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15450 [(set_attr "prefix" "evex")
15451 (set_attr "type" "sse")
15452 (set_attr "mode" "<MODE>")])
15453
15454 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
15455 [(set (match_operand:VF_128 0 "register_operand" "=v")
15456 (vec_merge:VF_128
15457 (unspec:VF_128
15458 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15459 UNSPEC_RCP28)
15460 (match_operand:VF_128 2 "register_operand" "v")
15461 (const_int 1)))]
15462 "TARGET_AVX512ER"
15463 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15464 [(set_attr "length_immediate" "1")
15465 (set_attr "prefix" "evex")
15466 (set_attr "type" "sse")
15467 (set_attr "mode" "<MODE>")])
15468
15469 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
15470 [(set (match_operand:VF_512 0 "register_operand" "=v")
15471 (unspec:VF_512
15472 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15473 UNSPEC_RSQRT28))]
15474 "TARGET_AVX512ER"
15475 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15476 [(set_attr "prefix" "evex")
15477 (set_attr "type" "sse")
15478 (set_attr "mode" "<MODE>")])
15479
15480 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
15481 [(set (match_operand:VF_128 0 "register_operand" "=v")
15482 (vec_merge:VF_128
15483 (unspec:VF_128
15484 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15485 UNSPEC_RSQRT28)
15486 (match_operand:VF_128 2 "register_operand" "v")
15487 (const_int 1)))]
15488 "TARGET_AVX512ER"
15489 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15490 [(set_attr "length_immediate" "1")
15491 (set_attr "type" "sse")
15492 (set_attr "prefix" "evex")
15493 (set_attr "mode" "<MODE>")])
15494
15495 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15496 ;;
15497 ;; XOP instructions
15498 ;;
15499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15500
15501 (define_code_iterator xop_plus [plus ss_plus])
15502
15503 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
15504 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
15505
15506 ;; XOP parallel integer multiply/add instructions.
15507
15508 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
15509 [(set (match_operand:VI24_128 0 "register_operand" "=x")
15510 (xop_plus:VI24_128
15511 (mult:VI24_128
15512 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
15513 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
15514 (match_operand:VI24_128 3 "register_operand" "x")))]
15515 "TARGET_XOP"
15516 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15517 [(set_attr "type" "ssemuladd")
15518 (set_attr "mode" "TI")])
15519
15520 (define_insn "xop_p<macs>dql"
15521 [(set (match_operand:V2DI 0 "register_operand" "=x")
15522 (xop_plus:V2DI
15523 (mult:V2DI
15524 (sign_extend:V2DI
15525 (vec_select:V2SI
15526 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15527 (parallel [(const_int 0) (const_int 2)])))
15528 (sign_extend:V2DI
15529 (vec_select:V2SI
15530 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15531 (parallel [(const_int 0) (const_int 2)]))))
15532 (match_operand:V2DI 3 "register_operand" "x")))]
15533 "TARGET_XOP"
15534 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15535 [(set_attr "type" "ssemuladd")
15536 (set_attr "mode" "TI")])
15537
15538 (define_insn "xop_p<macs>dqh"
15539 [(set (match_operand:V2DI 0 "register_operand" "=x")
15540 (xop_plus:V2DI
15541 (mult:V2DI
15542 (sign_extend:V2DI
15543 (vec_select:V2SI
15544 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15545 (parallel [(const_int 1) (const_int 3)])))
15546 (sign_extend:V2DI
15547 (vec_select:V2SI
15548 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15549 (parallel [(const_int 1) (const_int 3)]))))
15550 (match_operand:V2DI 3 "register_operand" "x")))]
15551 "TARGET_XOP"
15552 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15553 [(set_attr "type" "ssemuladd")
15554 (set_attr "mode" "TI")])
15555
15556 ;; XOP parallel integer multiply/add instructions for the intrinisics
15557 (define_insn "xop_p<macs>wd"
15558 [(set (match_operand:V4SI 0 "register_operand" "=x")
15559 (xop_plus:V4SI
15560 (mult:V4SI
15561 (sign_extend:V4SI
15562 (vec_select:V4HI
15563 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15564 (parallel [(const_int 1) (const_int 3)
15565 (const_int 5) (const_int 7)])))
15566 (sign_extend:V4SI
15567 (vec_select:V4HI
15568 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15569 (parallel [(const_int 1) (const_int 3)
15570 (const_int 5) (const_int 7)]))))
15571 (match_operand:V4SI 3 "register_operand" "x")))]
15572 "TARGET_XOP"
15573 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15574 [(set_attr "type" "ssemuladd")
15575 (set_attr "mode" "TI")])
15576
15577 (define_insn "xop_p<madcs>wd"
15578 [(set (match_operand:V4SI 0 "register_operand" "=x")
15579 (xop_plus:V4SI
15580 (plus:V4SI
15581 (mult:V4SI
15582 (sign_extend:V4SI
15583 (vec_select:V4HI
15584 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15585 (parallel [(const_int 0) (const_int 2)
15586 (const_int 4) (const_int 6)])))
15587 (sign_extend:V4SI
15588 (vec_select:V4HI
15589 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15590 (parallel [(const_int 0) (const_int 2)
15591 (const_int 4) (const_int 6)]))))
15592 (mult:V4SI
15593 (sign_extend:V4SI
15594 (vec_select:V4HI
15595 (match_dup 1)
15596 (parallel [(const_int 1) (const_int 3)
15597 (const_int 5) (const_int 7)])))
15598 (sign_extend:V4SI
15599 (vec_select:V4HI
15600 (match_dup 2)
15601 (parallel [(const_int 1) (const_int 3)
15602 (const_int 5) (const_int 7)])))))
15603 (match_operand:V4SI 3 "register_operand" "x")))]
15604 "TARGET_XOP"
15605 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15606 [(set_attr "type" "ssemuladd")
15607 (set_attr "mode" "TI")])
15608
15609 ;; XOP parallel XMM conditional moves
15610 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
15611 [(set (match_operand:V 0 "register_operand" "=x,x")
15612 (if_then_else:V
15613 (match_operand:V 3 "nonimmediate_operand" "x,m")
15614 (match_operand:V 1 "register_operand" "x,x")
15615 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
15616 "TARGET_XOP"
15617 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15618 [(set_attr "type" "sse4arg")])
15619
15620 ;; XOP horizontal add/subtract instructions
15621 (define_insn "xop_phadd<u>bw"
15622 [(set (match_operand:V8HI 0 "register_operand" "=x")
15623 (plus:V8HI
15624 (any_extend:V8HI
15625 (vec_select:V8QI
15626 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15627 (parallel [(const_int 0) (const_int 2)
15628 (const_int 4) (const_int 6)
15629 (const_int 8) (const_int 10)
15630 (const_int 12) (const_int 14)])))
15631 (any_extend:V8HI
15632 (vec_select:V8QI
15633 (match_dup 1)
15634 (parallel [(const_int 1) (const_int 3)
15635 (const_int 5) (const_int 7)
15636 (const_int 9) (const_int 11)
15637 (const_int 13) (const_int 15)])))))]
15638 "TARGET_XOP"
15639 "vphadd<u>bw\t{%1, %0|%0, %1}"
15640 [(set_attr "type" "sseiadd1")])
15641
15642 (define_insn "xop_phadd<u>bd"
15643 [(set (match_operand:V4SI 0 "register_operand" "=x")
15644 (plus:V4SI
15645 (plus:V4SI
15646 (any_extend:V4SI
15647 (vec_select:V4QI
15648 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15649 (parallel [(const_int 0) (const_int 4)
15650 (const_int 8) (const_int 12)])))
15651 (any_extend:V4SI
15652 (vec_select:V4QI
15653 (match_dup 1)
15654 (parallel [(const_int 1) (const_int 5)
15655 (const_int 9) (const_int 13)]))))
15656 (plus:V4SI
15657 (any_extend:V4SI
15658 (vec_select:V4QI
15659 (match_dup 1)
15660 (parallel [(const_int 2) (const_int 6)
15661 (const_int 10) (const_int 14)])))
15662 (any_extend:V4SI
15663 (vec_select:V4QI
15664 (match_dup 1)
15665 (parallel [(const_int 3) (const_int 7)
15666 (const_int 11) (const_int 15)]))))))]
15667 "TARGET_XOP"
15668 "vphadd<u>bd\t{%1, %0|%0, %1}"
15669 [(set_attr "type" "sseiadd1")])
15670
15671 (define_insn "xop_phadd<u>bq"
15672 [(set (match_operand:V2DI 0 "register_operand" "=x")
15673 (plus:V2DI
15674 (plus:V2DI
15675 (plus:V2DI
15676 (any_extend:V2DI
15677 (vec_select:V2QI
15678 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15679 (parallel [(const_int 0) (const_int 8)])))
15680 (any_extend:V2DI
15681 (vec_select:V2QI
15682 (match_dup 1)
15683 (parallel [(const_int 1) (const_int 9)]))))
15684 (plus:V2DI
15685 (any_extend:V2DI
15686 (vec_select:V2QI
15687 (match_dup 1)
15688 (parallel [(const_int 2) (const_int 10)])))
15689 (any_extend:V2DI
15690 (vec_select:V2QI
15691 (match_dup 1)
15692 (parallel [(const_int 3) (const_int 11)])))))
15693 (plus:V2DI
15694 (plus:V2DI
15695 (any_extend:V2DI
15696 (vec_select:V2QI
15697 (match_dup 1)
15698 (parallel [(const_int 4) (const_int 12)])))
15699 (any_extend:V2DI
15700 (vec_select:V2QI
15701 (match_dup 1)
15702 (parallel [(const_int 5) (const_int 13)]))))
15703 (plus:V2DI
15704 (any_extend:V2DI
15705 (vec_select:V2QI
15706 (match_dup 1)
15707 (parallel [(const_int 6) (const_int 14)])))
15708 (any_extend:V2DI
15709 (vec_select:V2QI
15710 (match_dup 1)
15711 (parallel [(const_int 7) (const_int 15)])))))))]
15712 "TARGET_XOP"
15713 "vphadd<u>bq\t{%1, %0|%0, %1}"
15714 [(set_attr "type" "sseiadd1")])
15715
15716 (define_insn "xop_phadd<u>wd"
15717 [(set (match_operand:V4SI 0 "register_operand" "=x")
15718 (plus:V4SI
15719 (any_extend:V4SI
15720 (vec_select:V4HI
15721 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15722 (parallel [(const_int 0) (const_int 2)
15723 (const_int 4) (const_int 6)])))
15724 (any_extend:V4SI
15725 (vec_select:V4HI
15726 (match_dup 1)
15727 (parallel [(const_int 1) (const_int 3)
15728 (const_int 5) (const_int 7)])))))]
15729 "TARGET_XOP"
15730 "vphadd<u>wd\t{%1, %0|%0, %1}"
15731 [(set_attr "type" "sseiadd1")])
15732
15733 (define_insn "xop_phadd<u>wq"
15734 [(set (match_operand:V2DI 0 "register_operand" "=x")
15735 (plus:V2DI
15736 (plus:V2DI
15737 (any_extend:V2DI
15738 (vec_select:V2HI
15739 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15740 (parallel [(const_int 0) (const_int 4)])))
15741 (any_extend:V2DI
15742 (vec_select:V2HI
15743 (match_dup 1)
15744 (parallel [(const_int 1) (const_int 5)]))))
15745 (plus:V2DI
15746 (any_extend:V2DI
15747 (vec_select:V2HI
15748 (match_dup 1)
15749 (parallel [(const_int 2) (const_int 6)])))
15750 (any_extend:V2DI
15751 (vec_select:V2HI
15752 (match_dup 1)
15753 (parallel [(const_int 3) (const_int 7)]))))))]
15754 "TARGET_XOP"
15755 "vphadd<u>wq\t{%1, %0|%0, %1}"
15756 [(set_attr "type" "sseiadd1")])
15757
15758 (define_insn "xop_phadd<u>dq"
15759 [(set (match_operand:V2DI 0 "register_operand" "=x")
15760 (plus:V2DI
15761 (any_extend:V2DI
15762 (vec_select:V2SI
15763 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15764 (parallel [(const_int 0) (const_int 2)])))
15765 (any_extend:V2DI
15766 (vec_select:V2SI
15767 (match_dup 1)
15768 (parallel [(const_int 1) (const_int 3)])))))]
15769 "TARGET_XOP"
15770 "vphadd<u>dq\t{%1, %0|%0, %1}"
15771 [(set_attr "type" "sseiadd1")])
15772
15773 (define_insn "xop_phsubbw"
15774 [(set (match_operand:V8HI 0 "register_operand" "=x")
15775 (minus:V8HI
15776 (sign_extend:V8HI
15777 (vec_select:V8QI
15778 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15779 (parallel [(const_int 0) (const_int 2)
15780 (const_int 4) (const_int 6)
15781 (const_int 8) (const_int 10)
15782 (const_int 12) (const_int 14)])))
15783 (sign_extend:V8HI
15784 (vec_select:V8QI
15785 (match_dup 1)
15786 (parallel [(const_int 1) (const_int 3)
15787 (const_int 5) (const_int 7)
15788 (const_int 9) (const_int 11)
15789 (const_int 13) (const_int 15)])))))]
15790 "TARGET_XOP"
15791 "vphsubbw\t{%1, %0|%0, %1}"
15792 [(set_attr "type" "sseiadd1")])
15793
15794 (define_insn "xop_phsubwd"
15795 [(set (match_operand:V4SI 0 "register_operand" "=x")
15796 (minus:V4SI
15797 (sign_extend:V4SI
15798 (vec_select:V4HI
15799 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15800 (parallel [(const_int 0) (const_int 2)
15801 (const_int 4) (const_int 6)])))
15802 (sign_extend:V4SI
15803 (vec_select:V4HI
15804 (match_dup 1)
15805 (parallel [(const_int 1) (const_int 3)
15806 (const_int 5) (const_int 7)])))))]
15807 "TARGET_XOP"
15808 "vphsubwd\t{%1, %0|%0, %1}"
15809 [(set_attr "type" "sseiadd1")])
15810
15811 (define_insn "xop_phsubdq"
15812 [(set (match_operand:V2DI 0 "register_operand" "=x")
15813 (minus:V2DI
15814 (sign_extend:V2DI
15815 (vec_select:V2SI
15816 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15817 (parallel [(const_int 0) (const_int 2)])))
15818 (sign_extend:V2DI
15819 (vec_select:V2SI
15820 (match_dup 1)
15821 (parallel [(const_int 1) (const_int 3)])))))]
15822 "TARGET_XOP"
15823 "vphsubdq\t{%1, %0|%0, %1}"
15824 [(set_attr "type" "sseiadd1")])
15825
15826 ;; XOP permute instructions
15827 (define_insn "xop_pperm"
15828 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15829 (unspec:V16QI
15830 [(match_operand:V16QI 1 "register_operand" "x,x")
15831 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15832 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
15833 UNSPEC_XOP_PERMUTE))]
15834 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15835 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15836 [(set_attr "type" "sse4arg")
15837 (set_attr "mode" "TI")])
15838
15839 ;; XOP pack instructions that combine two vectors into a smaller vector
15840 (define_insn "xop_pperm_pack_v2di_v4si"
15841 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15842 (vec_concat:V4SI
15843 (truncate:V2SI
15844 (match_operand:V2DI 1 "register_operand" "x,x"))
15845 (truncate:V2SI
15846 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
15847 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15848 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15849 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15850 [(set_attr "type" "sse4arg")
15851 (set_attr "mode" "TI")])
15852
15853 (define_insn "xop_pperm_pack_v4si_v8hi"
15854 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15855 (vec_concat:V8HI
15856 (truncate:V4HI
15857 (match_operand:V4SI 1 "register_operand" "x,x"))
15858 (truncate:V4HI
15859 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
15860 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15861 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15862 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15863 [(set_attr "type" "sse4arg")
15864 (set_attr "mode" "TI")])
15865
15866 (define_insn "xop_pperm_pack_v8hi_v16qi"
15867 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15868 (vec_concat:V16QI
15869 (truncate:V8QI
15870 (match_operand:V8HI 1 "register_operand" "x,x"))
15871 (truncate:V8QI
15872 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
15873 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15874 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15875 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15876 [(set_attr "type" "sse4arg")
15877 (set_attr "mode" "TI")])
15878
15879 ;; XOP packed rotate instructions
15880 (define_expand "rotl<mode>3"
15881 [(set (match_operand:VI_128 0 "register_operand")
15882 (rotate:VI_128
15883 (match_operand:VI_128 1 "nonimmediate_operand")
15884 (match_operand:SI 2 "general_operand")))]
15885 "TARGET_XOP"
15886 {
15887 /* If we were given a scalar, convert it to parallel */
15888 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15889 {
15890 rtvec vs = rtvec_alloc (<ssescalarnum>);
15891 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15892 rtx reg = gen_reg_rtx (<MODE>mode);
15893 rtx op2 = operands[2];
15894 int i;
15895
15896 if (GET_MODE (op2) != <ssescalarmode>mode)
15897 {
15898 op2 = gen_reg_rtx (<ssescalarmode>mode);
15899 convert_move (op2, operands[2], false);
15900 }
15901
15902 for (i = 0; i < <ssescalarnum>; i++)
15903 RTVEC_ELT (vs, i) = op2;
15904
15905 emit_insn (gen_vec_init<mode> (reg, par));
15906 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15907 DONE;
15908 }
15909 })
15910
15911 (define_expand "rotr<mode>3"
15912 [(set (match_operand:VI_128 0 "register_operand")
15913 (rotatert:VI_128
15914 (match_operand:VI_128 1 "nonimmediate_operand")
15915 (match_operand:SI 2 "general_operand")))]
15916 "TARGET_XOP"
15917 {
15918 /* If we were given a scalar, convert it to parallel */
15919 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15920 {
15921 rtvec vs = rtvec_alloc (<ssescalarnum>);
15922 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15923 rtx neg = gen_reg_rtx (<MODE>mode);
15924 rtx reg = gen_reg_rtx (<MODE>mode);
15925 rtx op2 = operands[2];
15926 int i;
15927
15928 if (GET_MODE (op2) != <ssescalarmode>mode)
15929 {
15930 op2 = gen_reg_rtx (<ssescalarmode>mode);
15931 convert_move (op2, operands[2], false);
15932 }
15933
15934 for (i = 0; i < <ssescalarnum>; i++)
15935 RTVEC_ELT (vs, i) = op2;
15936
15937 emit_insn (gen_vec_init<mode> (reg, par));
15938 emit_insn (gen_neg<mode>2 (neg, reg));
15939 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
15940 DONE;
15941 }
15942 })
15943
15944 (define_insn "xop_rotl<mode>3"
15945 [(set (match_operand:VI_128 0 "register_operand" "=x")
15946 (rotate:VI_128
15947 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15948 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15949 "TARGET_XOP"
15950 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15951 [(set_attr "type" "sseishft")
15952 (set_attr "length_immediate" "1")
15953 (set_attr "mode" "TI")])
15954
15955 (define_insn "xop_rotr<mode>3"
15956 [(set (match_operand:VI_128 0 "register_operand" "=x")
15957 (rotatert:VI_128
15958 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15959 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15960 "TARGET_XOP"
15961 {
15962 operands[3]
15963 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
15964 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
15965 }
15966 [(set_attr "type" "sseishft")
15967 (set_attr "length_immediate" "1")
15968 (set_attr "mode" "TI")])
15969
15970 (define_expand "vrotr<mode>3"
15971 [(match_operand:VI_128 0 "register_operand")
15972 (match_operand:VI_128 1 "register_operand")
15973 (match_operand:VI_128 2 "register_operand")]
15974 "TARGET_XOP"
15975 {
15976 rtx reg = gen_reg_rtx (<MODE>mode);
15977 emit_insn (gen_neg<mode>2 (reg, operands[2]));
15978 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15979 DONE;
15980 })
15981
15982 (define_expand "vrotl<mode>3"
15983 [(match_operand:VI_128 0 "register_operand")
15984 (match_operand:VI_128 1 "register_operand")
15985 (match_operand:VI_128 2 "register_operand")]
15986 "TARGET_XOP"
15987 {
15988 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
15989 DONE;
15990 })
15991
15992 (define_insn "xop_vrotl<mode>3"
15993 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15994 (if_then_else:VI_128
15995 (ge:VI_128
15996 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15997 (const_int 0))
15998 (rotate:VI_128
15999 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16000 (match_dup 2))
16001 (rotatert:VI_128
16002 (match_dup 1)
16003 (neg:VI_128 (match_dup 2)))))]
16004 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16005 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16006 [(set_attr "type" "sseishft")
16007 (set_attr "prefix_data16" "0")
16008 (set_attr "prefix_extra" "2")
16009 (set_attr "mode" "TI")])
16010
16011 ;; XOP packed shift instructions.
16012 (define_expand "vlshr<mode>3"
16013 [(set (match_operand:VI12_128 0 "register_operand")
16014 (lshiftrt:VI12_128
16015 (match_operand:VI12_128 1 "register_operand")
16016 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16017 "TARGET_XOP"
16018 {
16019 rtx neg = gen_reg_rtx (<MODE>mode);
16020 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16021 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16022 DONE;
16023 })
16024
16025 (define_expand "vlshr<mode>3"
16026 [(set (match_operand:VI48_128 0 "register_operand")
16027 (lshiftrt:VI48_128
16028 (match_operand:VI48_128 1 "register_operand")
16029 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16030 "TARGET_AVX2 || TARGET_XOP"
16031 {
16032 if (!TARGET_AVX2)
16033 {
16034 rtx neg = gen_reg_rtx (<MODE>mode);
16035 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16036 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16037 DONE;
16038 }
16039 })
16040
16041 (define_expand "vlshr<mode>3"
16042 [(set (match_operand:VI48_512 0 "register_operand")
16043 (lshiftrt:VI48_512
16044 (match_operand:VI48_512 1 "register_operand")
16045 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16046 "TARGET_AVX512F")
16047
16048 (define_expand "vlshr<mode>3"
16049 [(set (match_operand:VI48_256 0 "register_operand")
16050 (lshiftrt:VI48_256
16051 (match_operand:VI48_256 1 "register_operand")
16052 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16053 "TARGET_AVX2")
16054
16055 (define_expand "vashrv8hi3<mask_name>"
16056 [(set (match_operand:V8HI 0 "register_operand")
16057 (ashiftrt:V8HI
16058 (match_operand:V8HI 1 "register_operand")
16059 (match_operand:V8HI 2 "nonimmediate_operand")))]
16060 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
16061 {
16062 if (TARGET_XOP)
16063 {
16064 rtx neg = gen_reg_rtx (V8HImode);
16065 emit_insn (gen_negv8hi2 (neg, operands[2]));
16066 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16067 DONE;
16068 }
16069 })
16070
16071 (define_expand "vashrv16qi3"
16072 [(set (match_operand:V16QI 0 "register_operand")
16073 (ashiftrt:V16QI
16074 (match_operand:V16QI 1 "register_operand")
16075 (match_operand:V16QI 2 "nonimmediate_operand")))]
16076 "TARGET_XOP"
16077 {
16078 rtx neg = gen_reg_rtx (V16QImode);
16079 emit_insn (gen_negv16qi2 (neg, operands[2]));
16080 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16081 DONE;
16082 })
16083
16084 (define_expand "vashrv2di3<mask_name>"
16085 [(set (match_operand:V2DI 0 "register_operand")
16086 (ashiftrt:V2DI
16087 (match_operand:V2DI 1 "register_operand")
16088 (match_operand:V2DI 2 "nonimmediate_operand")))]
16089 "TARGET_XOP || TARGET_AVX512VL"
16090 {
16091 if (TARGET_XOP)
16092 {
16093 rtx neg = gen_reg_rtx (V2DImode);
16094 emit_insn (gen_negv2di2 (neg, operands[2]));
16095 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16096 DONE;
16097 }
16098 })
16099
16100 (define_expand "vashrv4si3"
16101 [(set (match_operand:V4SI 0 "register_operand")
16102 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16103 (match_operand:V4SI 2 "nonimmediate_operand")))]
16104 "TARGET_AVX2 || TARGET_XOP"
16105 {
16106 if (!TARGET_AVX2)
16107 {
16108 rtx neg = gen_reg_rtx (V4SImode);
16109 emit_insn (gen_negv4si2 (neg, operands[2]));
16110 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16111 DONE;
16112 }
16113 })
16114
16115 (define_expand "vashrv16si3"
16116 [(set (match_operand:V16SI 0 "register_operand")
16117 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16118 (match_operand:V16SI 2 "nonimmediate_operand")))]
16119 "TARGET_AVX512F")
16120
16121 (define_expand "vashrv8si3"
16122 [(set (match_operand:V8SI 0 "register_operand")
16123 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16124 (match_operand:V8SI 2 "nonimmediate_operand")))]
16125 "TARGET_AVX2")
16126
16127 (define_expand "vashl<mode>3"
16128 [(set (match_operand:VI12_128 0 "register_operand")
16129 (ashift:VI12_128
16130 (match_operand:VI12_128 1 "register_operand")
16131 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16132 "TARGET_XOP"
16133 {
16134 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16135 DONE;
16136 })
16137
16138 (define_expand "vashl<mode>3"
16139 [(set (match_operand:VI48_128 0 "register_operand")
16140 (ashift:VI48_128
16141 (match_operand:VI48_128 1 "register_operand")
16142 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16143 "TARGET_AVX2 || TARGET_XOP"
16144 {
16145 if (!TARGET_AVX2)
16146 {
16147 operands[2] = force_reg (<MODE>mode, operands[2]);
16148 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16149 DONE;
16150 }
16151 })
16152
16153 (define_expand "vashl<mode>3"
16154 [(set (match_operand:VI48_512 0 "register_operand")
16155 (ashift:VI48_512
16156 (match_operand:VI48_512 1 "register_operand")
16157 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16158 "TARGET_AVX512F")
16159
16160 (define_expand "vashl<mode>3"
16161 [(set (match_operand:VI48_256 0 "register_operand")
16162 (ashift:VI48_256
16163 (match_operand:VI48_256 1 "register_operand")
16164 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16165 "TARGET_AVX2")
16166
16167 (define_insn "xop_sha<mode>3"
16168 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16169 (if_then_else:VI_128
16170 (ge:VI_128
16171 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16172 (const_int 0))
16173 (ashift:VI_128
16174 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16175 (match_dup 2))
16176 (ashiftrt:VI_128
16177 (match_dup 1)
16178 (neg:VI_128 (match_dup 2)))))]
16179 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16180 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16181 [(set_attr "type" "sseishft")
16182 (set_attr "prefix_data16" "0")
16183 (set_attr "prefix_extra" "2")
16184 (set_attr "mode" "TI")])
16185
16186 (define_insn "xop_shl<mode>3"
16187 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16188 (if_then_else:VI_128
16189 (ge:VI_128
16190 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16191 (const_int 0))
16192 (ashift:VI_128
16193 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16194 (match_dup 2))
16195 (lshiftrt:VI_128
16196 (match_dup 1)
16197 (neg:VI_128 (match_dup 2)))))]
16198 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16199 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16200 [(set_attr "type" "sseishft")
16201 (set_attr "prefix_data16" "0")
16202 (set_attr "prefix_extra" "2")
16203 (set_attr "mode" "TI")])
16204
16205 (define_expand "<shift_insn><mode>3"
16206 [(set (match_operand:VI1_AVX512 0 "register_operand")
16207 (any_shift:VI1_AVX512
16208 (match_operand:VI1_AVX512 1 "register_operand")
16209 (match_operand:SI 2 "nonmemory_operand")))]
16210 "TARGET_SSE2"
16211 {
16212 if (TARGET_XOP && <MODE>mode == V16QImode)
16213 {
16214 bool negate = false;
16215 rtx (*gen) (rtx, rtx, rtx);
16216 rtx tmp, par;
16217 int i;
16218
16219 if (<CODE> != ASHIFT)
16220 {
16221 if (CONST_INT_P (operands[2]))
16222 operands[2] = GEN_INT (-INTVAL (operands[2]));
16223 else
16224 negate = true;
16225 }
16226 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
16227 for (i = 0; i < 16; i++)
16228 XVECEXP (par, 0, i) = operands[2];
16229
16230 tmp = gen_reg_rtx (V16QImode);
16231 emit_insn (gen_vec_initv16qi (tmp, par));
16232
16233 if (negate)
16234 emit_insn (gen_negv16qi2 (tmp, tmp));
16235
16236 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
16237 emit_insn (gen (operands[0], operands[1], tmp));
16238 }
16239 else
16240 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
16241 DONE;
16242 })
16243
16244 (define_expand "ashrv2di3"
16245 [(set (match_operand:V2DI 0 "register_operand")
16246 (ashiftrt:V2DI
16247 (match_operand:V2DI 1 "register_operand")
16248 (match_operand:DI 2 "nonmemory_operand")))]
16249 "TARGET_XOP || TARGET_AVX512VL"
16250 {
16251 if (!TARGET_AVX512VL)
16252 {
16253 rtx reg = gen_reg_rtx (V2DImode);
16254 rtx par;
16255 bool negate = false;
16256 int i;
16257
16258 if (CONST_INT_P (operands[2]))
16259 operands[2] = GEN_INT (-INTVAL (operands[2]));
16260 else
16261 negate = true;
16262
16263 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
16264 for (i = 0; i < 2; i++)
16265 XVECEXP (par, 0, i) = operands[2];
16266
16267 emit_insn (gen_vec_initv2di (reg, par));
16268
16269 if (negate)
16270 emit_insn (gen_negv2di2 (reg, reg));
16271
16272 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
16273 DONE;
16274 }
16275 })
16276
16277 ;; XOP FRCZ support
16278 (define_insn "xop_frcz<mode>2"
16279 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
16280 (unspec:FMAMODE
16281 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
16282 UNSPEC_FRCZ))]
16283 "TARGET_XOP"
16284 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
16285 [(set_attr "type" "ssecvt1")
16286 (set_attr "mode" "<MODE>")])
16287
16288 (define_expand "xop_vmfrcz<mode>2"
16289 [(set (match_operand:VF_128 0 "register_operand")
16290 (vec_merge:VF_128
16291 (unspec:VF_128
16292 [(match_operand:VF_128 1 "nonimmediate_operand")]
16293 UNSPEC_FRCZ)
16294 (match_dup 2)
16295 (const_int 1)))]
16296 "TARGET_XOP"
16297 "operands[2] = CONST0_RTX (<MODE>mode);")
16298
16299 (define_insn "*xop_vmfrcz<mode>2"
16300 [(set (match_operand:VF_128 0 "register_operand" "=x")
16301 (vec_merge:VF_128
16302 (unspec:VF_128
16303 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
16304 UNSPEC_FRCZ)
16305 (match_operand:VF_128 2 "const0_operand")
16306 (const_int 1)))]
16307 "TARGET_XOP"
16308 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
16309 [(set_attr "type" "ssecvt1")
16310 (set_attr "mode" "<MODE>")])
16311
16312 (define_insn "xop_maskcmp<mode>3"
16313 [(set (match_operand:VI_128 0 "register_operand" "=x")
16314 (match_operator:VI_128 1 "ix86_comparison_int_operator"
16315 [(match_operand:VI_128 2 "register_operand" "x")
16316 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16317 "TARGET_XOP"
16318 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16319 [(set_attr "type" "sse4arg")
16320 (set_attr "prefix_data16" "0")
16321 (set_attr "prefix_rep" "0")
16322 (set_attr "prefix_extra" "2")
16323 (set_attr "length_immediate" "1")
16324 (set_attr "mode" "TI")])
16325
16326 (define_insn "xop_maskcmp_uns<mode>3"
16327 [(set (match_operand:VI_128 0 "register_operand" "=x")
16328 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
16329 [(match_operand:VI_128 2 "register_operand" "x")
16330 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16331 "TARGET_XOP"
16332 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16333 [(set_attr "type" "ssecmp")
16334 (set_attr "prefix_data16" "0")
16335 (set_attr "prefix_rep" "0")
16336 (set_attr "prefix_extra" "2")
16337 (set_attr "length_immediate" "1")
16338 (set_attr "mode" "TI")])
16339
16340 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
16341 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
16342 ;; the exact instruction generated for the intrinsic.
16343 (define_insn "xop_maskcmp_uns2<mode>3"
16344 [(set (match_operand:VI_128 0 "register_operand" "=x")
16345 (unspec:VI_128
16346 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16347 [(match_operand:VI_128 2 "register_operand" "x")
16348 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
16349 UNSPEC_XOP_UNSIGNED_CMP))]
16350 "TARGET_XOP"
16351 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16352 [(set_attr "type" "ssecmp")
16353 (set_attr "prefix_data16" "0")
16354 (set_attr "prefix_extra" "2")
16355 (set_attr "length_immediate" "1")
16356 (set_attr "mode" "TI")])
16357
16358 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
16359 ;; being added here to be complete.
16360 (define_insn "xop_pcom_tf<mode>3"
16361 [(set (match_operand:VI_128 0 "register_operand" "=x")
16362 (unspec:VI_128
16363 [(match_operand:VI_128 1 "register_operand" "x")
16364 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
16365 (match_operand:SI 3 "const_int_operand" "n")]
16366 UNSPEC_XOP_TRUEFALSE))]
16367 "TARGET_XOP"
16368 {
16369 return ((INTVAL (operands[3]) != 0)
16370 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16371 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
16372 }
16373 [(set_attr "type" "ssecmp")
16374 (set_attr "prefix_data16" "0")
16375 (set_attr "prefix_extra" "2")
16376 (set_attr "length_immediate" "1")
16377 (set_attr "mode" "TI")])
16378
16379 (define_insn "xop_vpermil2<mode>3"
16380 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
16381 (unspec:VF_128_256
16382 [(match_operand:VF_128_256 1 "register_operand" "x")
16383 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
16384 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
16385 (match_operand:SI 4 "const_0_to_3_operand" "n")]
16386 UNSPEC_VPERMIL2))]
16387 "TARGET_XOP"
16388 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
16389 [(set_attr "type" "sse4arg")
16390 (set_attr "length_immediate" "1")
16391 (set_attr "mode" "<MODE>")])
16392
16393 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16394
16395 (define_insn "aesenc"
16396 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16397 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16398 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
16399 UNSPEC_AESENC))]
16400 "TARGET_AES"
16401 "@
16402 aesenc\t{%2, %0|%0, %2}
16403 vaesenc\t{%2, %1, %0|%0, %1, %2}"
16404 [(set_attr "isa" "noavx,avx")
16405 (set_attr "type" "sselog1")
16406 (set_attr "prefix_extra" "1")
16407 (set_attr "prefix" "orig,vex")
16408 (set_attr "btver2_decode" "double,double")
16409 (set_attr "mode" "TI")])
16410
16411 (define_insn "aesenclast"
16412 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16413 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16414 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
16415 UNSPEC_AESENCLAST))]
16416 "TARGET_AES"
16417 "@
16418 aesenclast\t{%2, %0|%0, %2}
16419 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
16420 [(set_attr "isa" "noavx,avx")
16421 (set_attr "type" "sselog1")
16422 (set_attr "prefix_extra" "1")
16423 (set_attr "prefix" "orig,vex")
16424 (set_attr "btver2_decode" "double,double")
16425 (set_attr "mode" "TI")])
16426
16427 (define_insn "aesdec"
16428 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16429 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16430 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
16431 UNSPEC_AESDEC))]
16432 "TARGET_AES"
16433 "@
16434 aesdec\t{%2, %0|%0, %2}
16435 vaesdec\t{%2, %1, %0|%0, %1, %2}"
16436 [(set_attr "isa" "noavx,avx")
16437 (set_attr "type" "sselog1")
16438 (set_attr "prefix_extra" "1")
16439 (set_attr "prefix" "orig,vex")
16440 (set_attr "btver2_decode" "double,double")
16441 (set_attr "mode" "TI")])
16442
16443 (define_insn "aesdeclast"
16444 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16445 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16446 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
16447 UNSPEC_AESDECLAST))]
16448 "TARGET_AES"
16449 "@
16450 aesdeclast\t{%2, %0|%0, %2}
16451 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
16452 [(set_attr "isa" "noavx,avx")
16453 (set_attr "type" "sselog1")
16454 (set_attr "prefix_extra" "1")
16455 (set_attr "prefix" "orig,vex")
16456 (set_attr "btver2_decode" "double,double")
16457 (set_attr "mode" "TI")])
16458
16459 (define_insn "aesimc"
16460 [(set (match_operand:V2DI 0 "register_operand" "=x")
16461 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
16462 UNSPEC_AESIMC))]
16463 "TARGET_AES"
16464 "%vaesimc\t{%1, %0|%0, %1}"
16465 [(set_attr "type" "sselog1")
16466 (set_attr "prefix_extra" "1")
16467 (set_attr "prefix" "maybe_vex")
16468 (set_attr "mode" "TI")])
16469
16470 (define_insn "aeskeygenassist"
16471 [(set (match_operand:V2DI 0 "register_operand" "=x")
16472 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
16473 (match_operand:SI 2 "const_0_to_255_operand" "n")]
16474 UNSPEC_AESKEYGENASSIST))]
16475 "TARGET_AES"
16476 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
16477 [(set_attr "type" "sselog1")
16478 (set_attr "prefix_extra" "1")
16479 (set_attr "length_immediate" "1")
16480 (set_attr "prefix" "maybe_vex")
16481 (set_attr "mode" "TI")])
16482
16483 (define_insn "pclmulqdq"
16484 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16485 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16486 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
16487 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16488 UNSPEC_PCLMUL))]
16489 "TARGET_PCLMUL"
16490 "@
16491 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
16492 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16493 [(set_attr "isa" "noavx,avx")
16494 (set_attr "type" "sselog1")
16495 (set_attr "prefix_extra" "1")
16496 (set_attr "length_immediate" "1")
16497 (set_attr "prefix" "orig,vex")
16498 (set_attr "mode" "TI")])
16499
16500 (define_expand "avx_vzeroall"
16501 [(match_par_dup 0 [(const_int 0)])]
16502 "TARGET_AVX"
16503 {
16504 int nregs = TARGET_64BIT ? 16 : 8;
16505 int regno;
16506
16507 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
16508
16509 XVECEXP (operands[0], 0, 0)
16510 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
16511 UNSPECV_VZEROALL);
16512
16513 for (regno = 0; regno < nregs; regno++)
16514 XVECEXP (operands[0], 0, regno + 1)
16515 = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
16516 CONST0_RTX (V8SImode));
16517 })
16518
16519 (define_insn "*avx_vzeroall"
16520 [(match_parallel 0 "vzeroall_operation"
16521 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
16522 "TARGET_AVX"
16523 "vzeroall"
16524 [(set_attr "type" "sse")
16525 (set_attr "modrm" "0")
16526 (set_attr "memory" "none")
16527 (set_attr "prefix" "vex")
16528 (set_attr "btver2_decode" "vector")
16529 (set_attr "mode" "OI")])
16530
16531 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
16532 ;; if the upper 128bits are unused.
16533 (define_insn "avx_vzeroupper"
16534 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
16535 "TARGET_AVX"
16536 "vzeroupper"
16537 [(set_attr "type" "sse")
16538 (set_attr "modrm" "0")
16539 (set_attr "memory" "none")
16540 (set_attr "prefix" "vex")
16541 (set_attr "btver2_decode" "vector")
16542 (set_attr "mode" "OI")])
16543
16544 (define_insn "avx2_pbroadcast<mode>"
16545 [(set (match_operand:VI 0 "register_operand" "=x")
16546 (vec_duplicate:VI
16547 (vec_select:<ssescalarmode>
16548 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
16549 (parallel [(const_int 0)]))))]
16550 "TARGET_AVX2"
16551 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
16552 [(set_attr "type" "ssemov")
16553 (set_attr "prefix_extra" "1")
16554 (set_attr "prefix" "vex")
16555 (set_attr "mode" "<sseinsnmode>")])
16556
16557 (define_insn "avx2_pbroadcast<mode>_1"
16558 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
16559 (vec_duplicate:VI_256
16560 (vec_select:<ssescalarmode>
16561 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
16562 (parallel [(const_int 0)]))))]
16563 "TARGET_AVX2"
16564 "@
16565 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16566 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16567 [(set_attr "type" "ssemov")
16568 (set_attr "prefix_extra" "1")
16569 (set_attr "prefix" "vex")
16570 (set_attr "mode" "<sseinsnmode>")])
16571
16572 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
16573 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
16574 (unspec:VI48F_256_512
16575 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
16576 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16577 UNSPEC_VPERMVAR))]
16578 "TARGET_AVX2 && <mask_mode512bit_condition>"
16579 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16580 [(set_attr "type" "sselog")
16581 (set_attr "prefix" "<mask_prefix2>")
16582 (set_attr "mode" "<sseinsnmode>")])
16583
16584 (define_insn "<avx512>_permvar<mode><mask_name>"
16585 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
16586 (unspec:VI1_AVX512VL
16587 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
16588 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16589 UNSPEC_VPERMVAR))]
16590 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
16591 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16592 [(set_attr "type" "sselog")
16593 (set_attr "prefix" "<mask_prefix2>")
16594 (set_attr "mode" "<sseinsnmode>")])
16595
16596 (define_insn "<avx512>_permvar<mode><mask_name>"
16597 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16598 (unspec:VI2_AVX512VL
16599 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
16600 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16601 UNSPEC_VPERMVAR))]
16602 "TARGET_AVX512BW && <mask_mode512bit_condition>"
16603 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16604 [(set_attr "type" "sselog")
16605 (set_attr "prefix" "<mask_prefix2>")
16606 (set_attr "mode" "<sseinsnmode>")])
16607
16608 (define_expand "<avx2_avx512>_perm<mode>"
16609 [(match_operand:VI8F_256_512 0 "register_operand")
16610 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16611 (match_operand:SI 2 "const_0_to_255_operand")]
16612 "TARGET_AVX2"
16613 {
16614 int mask = INTVAL (operands[2]);
16615 emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
16616 GEN_INT ((mask >> 0) & 3),
16617 GEN_INT ((mask >> 2) & 3),
16618 GEN_INT ((mask >> 4) & 3),
16619 GEN_INT ((mask >> 6) & 3)));
16620 DONE;
16621 })
16622
16623 (define_expand "<avx512>_perm<mode>_mask"
16624 [(match_operand:VI8F_256_512 0 "register_operand")
16625 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16626 (match_operand:SI 2 "const_0_to_255_operand")
16627 (match_operand:VI8F_256_512 3 "vector_move_operand")
16628 (match_operand:<avx512fmaskmode> 4 "register_operand")]
16629 "TARGET_AVX512F"
16630 {
16631 int mask = INTVAL (operands[2]);
16632 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
16633 GEN_INT ((mask >> 0) & 3),
16634 GEN_INT ((mask >> 2) & 3),
16635 GEN_INT ((mask >> 4) & 3),
16636 GEN_INT ((mask >> 6) & 3),
16637 operands[3], operands[4]));
16638 DONE;
16639 })
16640
16641 (define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
16642 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
16643 (vec_select:VI8F_256_512
16644 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
16645 (parallel [(match_operand 2 "const_0_to_3_operand")
16646 (match_operand 3 "const_0_to_3_operand")
16647 (match_operand 4 "const_0_to_3_operand")
16648 (match_operand 5 "const_0_to_3_operand")])))]
16649 "TARGET_AVX2 && <mask_mode512bit_condition>"
16650 {
16651 int mask = 0;
16652 mask |= INTVAL (operands[2]) << 0;
16653 mask |= INTVAL (operands[3]) << 2;
16654 mask |= INTVAL (operands[4]) << 4;
16655 mask |= INTVAL (operands[5]) << 6;
16656 operands[2] = GEN_INT (mask);
16657 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16658 }
16659 [(set_attr "type" "sselog")
16660 (set_attr "prefix" "<mask_prefix2>")
16661 (set_attr "mode" "<sseinsnmode>")])
16662
16663 (define_insn "avx2_permv2ti"
16664 [(set (match_operand:V4DI 0 "register_operand" "=x")
16665 (unspec:V4DI
16666 [(match_operand:V4DI 1 "register_operand" "x")
16667 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
16668 (match_operand:SI 3 "const_0_to_255_operand" "n")]
16669 UNSPEC_VPERMTI))]
16670 "TARGET_AVX2"
16671 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16672 [(set_attr "type" "sselog")
16673 (set_attr "prefix" "vex")
16674 (set_attr "mode" "OI")])
16675
16676 (define_insn "avx2_vec_dupv4df"
16677 [(set (match_operand:V4DF 0 "register_operand" "=x")
16678 (vec_duplicate:V4DF
16679 (vec_select:DF
16680 (match_operand:V2DF 1 "register_operand" "x")
16681 (parallel [(const_int 0)]))))]
16682 "TARGET_AVX2"
16683 "vbroadcastsd\t{%1, %0|%0, %1}"
16684 [(set_attr "type" "sselog1")
16685 (set_attr "prefix" "vex")
16686 (set_attr "mode" "V4DF")])
16687
16688 (define_insn "<avx512>_vec_dup<mode>_1"
16689 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
16690 (vec_duplicate:VI_AVX512BW
16691 (vec_select:VI_AVX512BW
16692 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
16693 (parallel [(const_int 0)]))))]
16694 "TARGET_AVX512F"
16695 "@
16696 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
16697 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
16698 [(set_attr "type" "ssemov")
16699 (set_attr "prefix" "evex")
16700 (set_attr "mode" "<sseinsnmode>")])
16701
16702 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16703 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
16704 (vec_duplicate:V48_AVX512VL
16705 (vec_select:<ssescalarmode>
16706 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16707 (parallel [(const_int 0)]))))]
16708 "TARGET_AVX512F"
16709 {
16710 /* There is no DF broadcast (in AVX-512*) to 128b register.
16711 Mimic it with integer variant. */
16712 if (<MODE>mode == V2DFmode)
16713 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
16714
16715 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
16716 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}";
16717 else
16718 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
16719 }
16720 [(set_attr "type" "ssemov")
16721 (set_attr "prefix" "evex")
16722 (set_attr "mode" "<sseinsnmode>")])
16723
16724 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16725 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16726 (vec_duplicate:VI12_AVX512VL
16727 (vec_select:<ssescalarmode>
16728 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16729 (parallel [(const_int 0)]))))]
16730 "TARGET_AVX512BW"
16731 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16732 [(set_attr "type" "ssemov")
16733 (set_attr "prefix" "evex")
16734 (set_attr "mode" "<sseinsnmode>")])
16735
16736 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16737 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16738 (vec_duplicate:V16FI
16739 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16740 "TARGET_AVX512F"
16741 "@
16742 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
16743 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16744 [(set_attr "type" "ssemov")
16745 (set_attr "prefix" "evex")
16746 (set_attr "mode" "<sseinsnmode>")])
16747
16748 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16749 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
16750 (vec_duplicate:V8FI
16751 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16752 "TARGET_AVX512F"
16753 "@
16754 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16755 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16756 [(set_attr "type" "ssemov")
16757 (set_attr "prefix" "evex")
16758 (set_attr "mode" "<sseinsnmode>")])
16759
16760 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16761 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
16762 (vec_duplicate:VI12_AVX512VL
16763 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16764 "TARGET_AVX512BW"
16765 "@
16766 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
16767 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
16768 [(set_attr "type" "ssemov")
16769 (set_attr "prefix" "evex")
16770 (set_attr "mode" "<sseinsnmode>")])
16771
16772 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16773 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
16774 (vec_duplicate:V48_AVX512VL
16775 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16776 "TARGET_AVX512F"
16777 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16778 [(set_attr "type" "ssemov")
16779 (set_attr "prefix" "evex")
16780 (set_attr "mode" "<sseinsnmode>")
16781 (set (attr "enabled")
16782 (if_then_else (eq_attr "alternative" "1")
16783 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
16784 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
16785 (const_int 1)))])
16786
16787 (define_insn "vec_dupv4sf"
16788 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16789 (vec_duplicate:V4SF
16790 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
16791 "TARGET_SSE"
16792 "@
16793 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
16794 vbroadcastss\t{%1, %0|%0, %1}
16795 shufps\t{$0, %0, %0|%0, %0, 0}"
16796 [(set_attr "isa" "avx,avx,noavx")
16797 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
16798 (set_attr "length_immediate" "1,0,1")
16799 (set_attr "prefix_extra" "0,1,*")
16800 (set_attr "prefix" "vex,vex,orig")
16801 (set_attr "mode" "V4SF")])
16802
16803 (define_insn "*vec_dupv4si"
16804 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
16805 (vec_duplicate:V4SI
16806 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
16807 "TARGET_SSE"
16808 "@
16809 %vpshufd\t{$0, %1, %0|%0, %1, 0}
16810 vbroadcastss\t{%1, %0|%0, %1}
16811 shufps\t{$0, %0, %0|%0, %0, 0}"
16812 [(set_attr "isa" "sse2,avx,noavx")
16813 (set_attr "type" "sselog1,ssemov,sselog1")
16814 (set_attr "length_immediate" "1,0,1")
16815 (set_attr "prefix_extra" "0,1,*")
16816 (set_attr "prefix" "maybe_vex,vex,orig")
16817 (set_attr "mode" "TI,V4SF,V4SF")])
16818
16819 (define_insn "*vec_dupv2di"
16820 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
16821 (vec_duplicate:V2DI
16822 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
16823 "TARGET_SSE"
16824 "@
16825 punpcklqdq\t%0, %0
16826 vpunpcklqdq\t{%d1, %0|%0, %d1}
16827 %vmovddup\t{%1, %0|%0, %1}
16828 movlhps\t%0, %0"
16829 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
16830 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
16831 (set_attr "prefix" "orig,vex,maybe_vex,orig")
16832 (set_attr "mode" "TI,TI,DF,V4SF")])
16833
16834 (define_insn "avx2_vbroadcasti128_<mode>"
16835 [(set (match_operand:VI_256 0 "register_operand" "=x")
16836 (vec_concat:VI_256
16837 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
16838 (match_dup 1)))]
16839 "TARGET_AVX2"
16840 "vbroadcasti128\t{%1, %0|%0, %1}"
16841 [(set_attr "type" "ssemov")
16842 (set_attr "prefix_extra" "1")
16843 (set_attr "prefix" "vex")
16844 (set_attr "mode" "OI")])
16845
16846 ;; Modes handled by AVX vec_dup patterns.
16847 (define_mode_iterator AVX_VEC_DUP_MODE
16848 [V8SI V8SF V4DI V4DF])
16849 ;; Modes handled by AVX2 vec_dup patterns.
16850 (define_mode_iterator AVX2_VEC_DUP_MODE
16851 [V32QI V16QI V16HI V8HI V8SI V4SI])
16852
16853 (define_insn "*vec_dup<mode>"
16854 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
16855 (vec_duplicate:AVX2_VEC_DUP_MODE
16856 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
16857 "TARGET_AVX2"
16858 "@
16859 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16860 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16861 #"
16862 [(set_attr "isa" "*,*,noavx512vl")
16863 (set_attr "type" "ssemov")
16864 (set_attr "prefix_extra" "1")
16865 (set_attr "prefix" "maybe_evex")
16866 (set_attr "mode" "<sseinsnmode>")])
16867
16868 (define_insn "vec_dup<mode>"
16869 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
16870 (vec_duplicate:AVX_VEC_DUP_MODE
16871 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
16872 "TARGET_AVX"
16873 "@
16874 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16875 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
16876 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16877 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
16878 #"
16879 [(set_attr "type" "ssemov")
16880 (set_attr "prefix_extra" "1")
16881 (set_attr "prefix" "maybe_evex")
16882 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
16883 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
16884
16885 (define_split
16886 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
16887 (vec_duplicate:AVX2_VEC_DUP_MODE
16888 (match_operand:<ssescalarmode> 1 "register_operand")))]
16889 "TARGET_AVX2
16890 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
16891 available, because then we can broadcast from GPRs directly.
16892 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
16893 for V*SI mode it requires just -mavx512vl. */
16894 && !(TARGET_AVX512VL
16895 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
16896 && reload_completed && GENERAL_REG_P (operands[1])"
16897 [(const_int 0)]
16898 {
16899 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
16900 CONST0_RTX (V4SImode),
16901 gen_lowpart (SImode, operands[1])));
16902 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
16903 gen_lowpart (<ssexmmmode>mode,
16904 operands[0])));
16905 DONE;
16906 })
16907
16908 (define_split
16909 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
16910 (vec_duplicate:AVX_VEC_DUP_MODE
16911 (match_operand:<ssescalarmode> 1 "register_operand")))]
16912 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
16913 [(set (match_dup 2)
16914 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
16915 (set (match_dup 0)
16916 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
16917 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
16918
16919 (define_insn "avx_vbroadcastf128_<mode>"
16920 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
16921 (vec_concat:V_256
16922 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
16923 (match_dup 1)))]
16924 "TARGET_AVX"
16925 "@
16926 vbroadcast<i128>\t{%1, %0|%0, %1}
16927 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
16928 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
16929 [(set_attr "type" "ssemov,sselog1,sselog1")
16930 (set_attr "prefix_extra" "1")
16931 (set_attr "length_immediate" "0,1,1")
16932 (set_attr "prefix" "vex")
16933 (set_attr "mode" "<sseinsnmode>")])
16934
16935 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
16936 (define_mode_iterator VI4F_BRCST32x2
16937 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
16938 V16SF (V8SF "TARGET_AVX512VL")])
16939
16940 (define_mode_attr 64x2mode
16941 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
16942
16943 (define_mode_attr 32x2mode
16944 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
16945 (V8SF "V2SF") (V4SI "V2SI")])
16946
16947 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
16948 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
16949 (vec_duplicate:VI4F_BRCST32x2
16950 (vec_select:<32x2mode>
16951 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16952 (parallel [(const_int 0) (const_int 1)]))))]
16953 "TARGET_AVX512DQ"
16954 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16955 [(set_attr "type" "ssemov")
16956 (set_attr "prefix_extra" "1")
16957 (set_attr "prefix" "evex")
16958 (set_attr "mode" "<sseinsnmode>")])
16959
16960 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
16961 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
16962 (vec_duplicate:VI4F_256
16963 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16964 "TARGET_AVX512VL"
16965 "@
16966 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
16967 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16968 [(set_attr "type" "ssemov")
16969 (set_attr "prefix_extra" "1")
16970 (set_attr "prefix" "evex")
16971 (set_attr "mode" "<sseinsnmode>")])
16972
16973 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16974 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16975 (vec_duplicate:V16FI
16976 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16977 "TARGET_AVX512DQ"
16978 "@
16979 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16980 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16981 [(set_attr "type" "ssemov")
16982 (set_attr "prefix_extra" "1")
16983 (set_attr "prefix" "evex")
16984 (set_attr "mode" "<sseinsnmode>")])
16985
16986 ;; For broadcast[i|f]64x2
16987 (define_mode_iterator VI8F_BRCST64x2
16988 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
16989
16990 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16991 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
16992 (vec_duplicate:VI8F_BRCST64x2
16993 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
16994 "TARGET_AVX512DQ"
16995 "@
16996 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
16997 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16998 [(set_attr "type" "ssemov")
16999 (set_attr "prefix_extra" "1")
17000 (set_attr "prefix" "evex")
17001 (set_attr "mode" "<sseinsnmode>")])
17002
17003 (define_insn "avx512cd_maskb_vec_dup<mode>"
17004 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
17005 (vec_duplicate:VI8_AVX512VL
17006 (zero_extend:DI
17007 (match_operand:QI 1 "register_operand" "Yk"))))]
17008 "TARGET_AVX512CD"
17009 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
17010 [(set_attr "type" "mskmov")
17011 (set_attr "prefix" "evex")
17012 (set_attr "mode" "XI")])
17013
17014 (define_insn "avx512cd_maskw_vec_dup<mode>"
17015 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
17016 (vec_duplicate:VI4_AVX512VL
17017 (zero_extend:SI
17018 (match_operand:HI 1 "register_operand" "Yk"))))]
17019 "TARGET_AVX512CD"
17020 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
17021 [(set_attr "type" "mskmov")
17022 (set_attr "prefix" "evex")
17023 (set_attr "mode" "XI")])
17024
17025 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
17026 ;; If it so happens that the input is in memory, use vbroadcast.
17027 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
17028 (define_insn "*avx_vperm_broadcast_v4sf"
17029 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
17030 (vec_select:V4SF
17031 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
17032 (match_parallel 2 "avx_vbroadcast_operand"
17033 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17034 "TARGET_AVX"
17035 {
17036 int elt = INTVAL (operands[3]);
17037 switch (which_alternative)
17038 {
17039 case 0:
17040 case 1:
17041 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
17042 return "vbroadcastss\t{%1, %0|%0, %k1}";
17043 case 2:
17044 operands[2] = GEN_INT (elt * 0x55);
17045 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
17046 default:
17047 gcc_unreachable ();
17048 }
17049 }
17050 [(set_attr "type" "ssemov,ssemov,sselog1")
17051 (set_attr "prefix_extra" "1")
17052 (set_attr "length_immediate" "0,0,1")
17053 (set_attr "prefix" "vex")
17054 (set_attr "mode" "SF,SF,V4SF")])
17055
17056 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
17057 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
17058 (vec_select:VF_256
17059 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
17060 (match_parallel 2 "avx_vbroadcast_operand"
17061 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17062 "TARGET_AVX"
17063 "#"
17064 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
17065 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
17066 {
17067 rtx op0 = operands[0], op1 = operands[1];
17068 int elt = INTVAL (operands[3]);
17069
17070 if (REG_P (op1))
17071 {
17072 int mask;
17073
17074 if (TARGET_AVX2 && elt == 0)
17075 {
17076 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17077 op1)));
17078 DONE;
17079 }
17080
17081 /* Shuffle element we care about into all elements of the 128-bit lane.
17082 The other lane gets shuffled too, but we don't care. */
17083 if (<MODE>mode == V4DFmode)
17084 mask = (elt & 1 ? 15 : 0);
17085 else
17086 mask = (elt & 3) * 0x55;
17087 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17088
17089 /* Shuffle the lane we care about into both lanes of the dest. */
17090 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17091 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
17092 DONE;
17093 }
17094
17095 operands[1] = adjust_address (op1, <ssescalarmode>mode,
17096 elt * GET_MODE_SIZE (<ssescalarmode>mode));
17097 })
17098
17099 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17100 [(set (match_operand:VF2 0 "register_operand")
17101 (vec_select:VF2
17102 (match_operand:VF2 1 "nonimmediate_operand")
17103 (match_operand:SI 2 "const_0_to_255_operand")))]
17104 "TARGET_AVX && <mask_mode512bit_condition>"
17105 {
17106 int mask = INTVAL (operands[2]);
17107 rtx perm[<ssescalarnum>];
17108
17109 int i;
17110 for (i = 0; i < <ssescalarnum>; i = i + 2)
17111 {
17112 perm[i] = GEN_INT (((mask >> i) & 1) + i);
17113 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
17114 }
17115
17116 operands[2]
17117 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17118 })
17119
17120 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17121 [(set (match_operand:VF1 0 "register_operand")
17122 (vec_select:VF1
17123 (match_operand:VF1 1 "nonimmediate_operand")
17124 (match_operand:SI 2 "const_0_to_255_operand")))]
17125 "TARGET_AVX && <mask_mode512bit_condition>"
17126 {
17127 int mask = INTVAL (operands[2]);
17128 rtx perm[<ssescalarnum>];
17129
17130 int i;
17131 for (i = 0; i < <ssescalarnum>; i = i + 4)
17132 {
17133 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
17134 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
17135 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
17136 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
17137 }
17138
17139 operands[2]
17140 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17141 })
17142
17143 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
17144 [(set (match_operand:VF 0 "register_operand" "=v")
17145 (vec_select:VF
17146 (match_operand:VF 1 "nonimmediate_operand" "vm")
17147 (match_parallel 2 ""
17148 [(match_operand 3 "const_int_operand")])))]
17149 "TARGET_AVX && <mask_mode512bit_condition>
17150 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
17151 {
17152 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
17153 operands[2] = GEN_INT (mask);
17154 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
17155 }
17156 [(set_attr "type" "sselog")
17157 (set_attr "prefix_extra" "1")
17158 (set_attr "length_immediate" "1")
17159 (set_attr "prefix" "<mask_prefix>")
17160 (set_attr "mode" "<sseinsnmode>")])
17161
17162 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
17163 [(set (match_operand:VF 0 "register_operand" "=v")
17164 (unspec:VF
17165 [(match_operand:VF 1 "register_operand" "v")
17166 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
17167 UNSPEC_VPERMIL))]
17168 "TARGET_AVX && <mask_mode512bit_condition>"
17169 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17170 [(set_attr "type" "sselog")
17171 (set_attr "prefix_extra" "1")
17172 (set_attr "btver2_decode" "vector")
17173 (set_attr "prefix" "<mask_prefix>")
17174 (set_attr "mode" "<sseinsnmode>")])
17175
17176 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17177 [(match_operand:VI48F 0 "register_operand" "=v")
17178 (match_operand:VI48F 1 "register_operand" "v")
17179 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17180 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17181 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17182 "TARGET_AVX512F"
17183 {
17184 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17185 operands[0], operands[1], operands[2], operands[3],
17186 CONST0_RTX (<MODE>mode), operands[4]));
17187 DONE;
17188 })
17189
17190 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17191 [(match_operand:VI1_AVX512VL 0 "register_operand")
17192 (match_operand:VI1_AVX512VL 1 "register_operand")
17193 (match_operand:<sseintvecmode> 2 "register_operand")
17194 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
17195 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17196 "TARGET_AVX512VBMI"
17197 {
17198 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17199 operands[0], operands[1], operands[2], operands[3],
17200 CONST0_RTX (<MODE>mode), operands[4]));
17201 DONE;
17202 })
17203
17204 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17205 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17206 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17207 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17208 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17209 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17210 "TARGET_AVX512BW"
17211 {
17212 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17213 operands[0], operands[1], operands[2], operands[3],
17214 CONST0_RTX (<MODE>mode), operands[4]));
17215 DONE;
17216 })
17217
17218 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17219 [(set (match_operand:VI48F 0 "register_operand" "=v")
17220 (unspec:VI48F
17221 [(match_operand:VI48F 1 "register_operand" "v")
17222 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17223 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17224 UNSPEC_VPERMI2))]
17225 "TARGET_AVX512F"
17226 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17227 [(set_attr "type" "sselog")
17228 (set_attr "prefix" "evex")
17229 (set_attr "mode" "<sseinsnmode>")])
17230
17231 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17232 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17233 (unspec:VI1_AVX512VL
17234 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17235 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17236 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17237 UNSPEC_VPERMI2))]
17238 "TARGET_AVX512VBMI"
17239 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17240 [(set_attr "type" "sselog")
17241 (set_attr "prefix" "evex")
17242 (set_attr "mode" "<sseinsnmode>")])
17243
17244 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17245 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17246 (unspec:VI2_AVX512VL
17247 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17248 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17249 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17250 UNSPEC_VPERMI2))]
17251 "TARGET_AVX512BW"
17252 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17253 [(set_attr "type" "sselog")
17254 (set_attr "prefix" "evex")
17255 (set_attr "mode" "<sseinsnmode>")])
17256
17257 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17258 [(set (match_operand:VI48F 0 "register_operand" "=v")
17259 (vec_merge:VI48F
17260 (unspec:VI48F
17261 [(match_operand:VI48F 1 "register_operand" "v")
17262 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17263 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17264 UNSPEC_VPERMI2_MASK)
17265 (match_dup 0)
17266 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17267 "TARGET_AVX512F"
17268 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17269 [(set_attr "type" "sselog")
17270 (set_attr "prefix" "evex")
17271 (set_attr "mode" "<sseinsnmode>")])
17272
17273 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17274 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17275 (vec_merge:VI1_AVX512VL
17276 (unspec:VI1_AVX512VL
17277 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17278 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17279 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17280 UNSPEC_VPERMI2_MASK)
17281 (match_dup 0)
17282 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17283 "TARGET_AVX512VBMI"
17284 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17285 [(set_attr "type" "sselog")
17286 (set_attr "prefix" "evex")
17287 (set_attr "mode" "<sseinsnmode>")])
17288
17289 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17290 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17291 (vec_merge:VI2_AVX512VL
17292 (unspec:VI2_AVX512VL
17293 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17294 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17295 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17296 UNSPEC_VPERMI2_MASK)
17297 (match_dup 0)
17298 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17299 "TARGET_AVX512BW"
17300 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17301 [(set_attr "type" "sselog")
17302 (set_attr "prefix" "evex")
17303 (set_attr "mode" "<sseinsnmode>")])
17304
17305 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17306 [(match_operand:VI48F 0 "register_operand" "=v")
17307 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17308 (match_operand:VI48F 2 "register_operand" "0")
17309 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17310 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17311 "TARGET_AVX512F"
17312 {
17313 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17314 operands[0], operands[1], operands[2], operands[3],
17315 CONST0_RTX (<MODE>mode), operands[4]));
17316 DONE;
17317 })
17318
17319 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17320 [(match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17321 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17322 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17323 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")
17324 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17325 "TARGET_AVX512VBMI"
17326 {
17327 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17328 operands[0], operands[1], operands[2], operands[3],
17329 CONST0_RTX (<MODE>mode), operands[4]));
17330 DONE;
17331 })
17332
17333 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17334 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17335 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17336 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17337 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17338 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17339 "TARGET_AVX512BW"
17340 {
17341 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17342 operands[0], operands[1], operands[2], operands[3],
17343 CONST0_RTX (<MODE>mode), operands[4]));
17344 DONE;
17345 })
17346
17347 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17348 [(set (match_operand:VI48F 0 "register_operand" "=v")
17349 (unspec:VI48F
17350 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17351 (match_operand:VI48F 2 "register_operand" "0")
17352 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17353 UNSPEC_VPERMT2))]
17354 "TARGET_AVX512F"
17355 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17356 [(set_attr "type" "sselog")
17357 (set_attr "prefix" "evex")
17358 (set_attr "mode" "<sseinsnmode>")])
17359
17360 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17361 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17362 (unspec:VI1_AVX512VL
17363 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17364 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17365 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17366 UNSPEC_VPERMT2))]
17367 "TARGET_AVX512VBMI"
17368 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17369 [(set_attr "type" "sselog")
17370 (set_attr "prefix" "evex")
17371 (set_attr "mode" "<sseinsnmode>")])
17372
17373 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17374 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17375 (unspec:VI2_AVX512VL
17376 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17377 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17378 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17379 UNSPEC_VPERMT2))]
17380 "TARGET_AVX512BW"
17381 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17382 [(set_attr "type" "sselog")
17383 (set_attr "prefix" "evex")
17384 (set_attr "mode" "<sseinsnmode>")])
17385
17386 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17387 [(set (match_operand:VI48F 0 "register_operand" "=v")
17388 (vec_merge:VI48F
17389 (unspec:VI48F
17390 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17391 (match_operand:VI48F 2 "register_operand" "0")
17392 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17393 UNSPEC_VPERMT2)
17394 (match_dup 2)
17395 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17396 "TARGET_AVX512F"
17397 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17398 [(set_attr "type" "sselog")
17399 (set_attr "prefix" "evex")
17400 (set_attr "mode" "<sseinsnmode>")])
17401
17402 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17403 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17404 (vec_merge:VI1_AVX512VL
17405 (unspec:VI1_AVX512VL
17406 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17407 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17408 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17409 UNSPEC_VPERMT2)
17410 (match_dup 2)
17411 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17412 "TARGET_AVX512VBMI"
17413 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17414 [(set_attr "type" "sselog")
17415 (set_attr "prefix" "evex")
17416 (set_attr "mode" "<sseinsnmode>")])
17417
17418 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17419 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17420 (vec_merge:VI2_AVX512VL
17421 (unspec:VI2_AVX512VL
17422 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17423 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17424 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17425 UNSPEC_VPERMT2)
17426 (match_dup 2)
17427 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17428 "TARGET_AVX512BW"
17429 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17430 [(set_attr "type" "sselog")
17431 (set_attr "prefix" "evex")
17432 (set_attr "mode" "<sseinsnmode>")])
17433
17434 (define_expand "avx_vperm2f128<mode>3"
17435 [(set (match_operand:AVX256MODE2P 0 "register_operand")
17436 (unspec:AVX256MODE2P
17437 [(match_operand:AVX256MODE2P 1 "register_operand")
17438 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
17439 (match_operand:SI 3 "const_0_to_255_operand")]
17440 UNSPEC_VPERMIL2F128))]
17441 "TARGET_AVX"
17442 {
17443 int mask = INTVAL (operands[3]);
17444 if ((mask & 0x88) == 0)
17445 {
17446 rtx perm[<ssescalarnum>], t1, t2;
17447 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
17448
17449 base = (mask & 3) * nelt2;
17450 for (i = 0; i < nelt2; ++i)
17451 perm[i] = GEN_INT (base + i);
17452
17453 base = ((mask >> 4) & 3) * nelt2;
17454 for (i = 0; i < nelt2; ++i)
17455 perm[i + nelt2] = GEN_INT (base + i);
17456
17457 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
17458 operands[1], operands[2]);
17459 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
17460 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
17461 t2 = gen_rtx_SET (operands[0], t2);
17462 emit_insn (t2);
17463 DONE;
17464 }
17465 })
17466
17467 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
17468 ;; means that in order to represent this properly in rtl we'd have to
17469 ;; nest *another* vec_concat with a zero operand and do the select from
17470 ;; a 4x wide vector. That doesn't seem very nice.
17471 (define_insn "*avx_vperm2f128<mode>_full"
17472 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17473 (unspec:AVX256MODE2P
17474 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
17475 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
17476 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17477 UNSPEC_VPERMIL2F128))]
17478 "TARGET_AVX"
17479 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17480 [(set_attr "type" "sselog")
17481 (set_attr "prefix_extra" "1")
17482 (set_attr "length_immediate" "1")
17483 (set_attr "prefix" "vex")
17484 (set_attr "mode" "<sseinsnmode>")])
17485
17486 (define_insn "*avx_vperm2f128<mode>_nozero"
17487 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17488 (vec_select:AVX256MODE2P
17489 (vec_concat:<ssedoublevecmode>
17490 (match_operand:AVX256MODE2P 1 "register_operand" "x")
17491 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
17492 (match_parallel 3 ""
17493 [(match_operand 4 "const_int_operand")])))]
17494 "TARGET_AVX
17495 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
17496 {
17497 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
17498 if (mask == 0x12)
17499 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
17500 if (mask == 0x20)
17501 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
17502 operands[3] = GEN_INT (mask);
17503 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17504 }
17505 [(set_attr "type" "sselog")
17506 (set_attr "prefix_extra" "1")
17507 (set_attr "length_immediate" "1")
17508 (set_attr "prefix" "vex")
17509 (set_attr "mode" "<sseinsnmode>")])
17510
17511 (define_insn "*ssse3_palignr<mode>_perm"
17512 [(set (match_operand:V_128 0 "register_operand" "=x,x")
17513 (vec_select:V_128
17514 (match_operand:V_128 1 "register_operand" "0,x")
17515 (match_parallel 2 "palignr_operand"
17516 [(match_operand 3 "const_int_operand" "n, n")])))]
17517 "TARGET_SSSE3"
17518 {
17519 operands[2] =
17520 GEN_INT (INTVAL (operands[3]) * GET_MODE_UNIT_SIZE (GET_MODE (operands[0])));
17521
17522 switch (which_alternative)
17523 {
17524 case 0:
17525 return "palignr\t{%2, %1, %0|%0, %1, %2}";
17526 case 1:
17527 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
17528 default:
17529 gcc_unreachable ();
17530 }
17531 }
17532 [(set_attr "isa" "noavx,avx")
17533 (set_attr "type" "sseishft")
17534 (set_attr "atom_unit" "sishuf")
17535 (set_attr "prefix_data16" "1,*")
17536 (set_attr "prefix_extra" "1")
17537 (set_attr "length_immediate" "1")
17538 (set_attr "prefix" "orig,vex")])
17539
17540 (define_expand "avx512vl_vinsert<mode>"
17541 [(match_operand:VI48F_256 0 "register_operand")
17542 (match_operand:VI48F_256 1 "register_operand")
17543 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17544 (match_operand:SI 3 "const_0_to_1_operand")
17545 (match_operand:VI48F_256 4 "register_operand")
17546 (match_operand:<avx512fmaskmode> 5 "register_operand")]
17547 "TARGET_AVX512VL"
17548 {
17549 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17550
17551 switch (INTVAL (operands[3]))
17552 {
17553 case 0:
17554 insn = gen_vec_set_lo_<mode>_mask;
17555 break;
17556 case 1:
17557 insn = gen_vec_set_hi_<mode>_mask;
17558 break;
17559 default:
17560 gcc_unreachable ();
17561 }
17562
17563 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
17564 operands[5]));
17565 DONE;
17566 })
17567
17568 (define_expand "avx_vinsertf128<mode>"
17569 [(match_operand:V_256 0 "register_operand")
17570 (match_operand:V_256 1 "register_operand")
17571 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17572 (match_operand:SI 3 "const_0_to_1_operand")]
17573 "TARGET_AVX"
17574 {
17575 rtx (*insn)(rtx, rtx, rtx);
17576
17577 switch (INTVAL (operands[3]))
17578 {
17579 case 0:
17580 insn = gen_vec_set_lo_<mode>;
17581 break;
17582 case 1:
17583 insn = gen_vec_set_hi_<mode>;
17584 break;
17585 default:
17586 gcc_unreachable ();
17587 }
17588
17589 emit_insn (insn (operands[0], operands[1], operands[2]));
17590 DONE;
17591 })
17592
17593 (define_insn "vec_set_lo_<mode><mask_name>"
17594 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17595 (vec_concat:VI8F_256
17596 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17597 (vec_select:<ssehalfvecmode>
17598 (match_operand:VI8F_256 1 "register_operand" "v")
17599 (parallel [(const_int 2) (const_int 3)]))))]
17600 "TARGET_AVX"
17601 {
17602 if (TARGET_AVX512VL)
17603 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17604 else
17605 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17606 }
17607 [(set_attr "type" "sselog")
17608 (set_attr "prefix_extra" "1")
17609 (set_attr "length_immediate" "1")
17610 (set_attr "prefix" "vex")
17611 (set_attr "mode" "<sseinsnmode>")])
17612
17613 (define_insn "vec_set_hi_<mode><mask_name>"
17614 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17615 (vec_concat:VI8F_256
17616 (vec_select:<ssehalfvecmode>
17617 (match_operand:VI8F_256 1 "register_operand" "v")
17618 (parallel [(const_int 0) (const_int 1)]))
17619 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17620 "TARGET_AVX"
17621 {
17622 if (TARGET_AVX512VL)
17623 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17624 else
17625 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17626 }
17627 [(set_attr "type" "sselog")
17628 (set_attr "prefix_extra" "1")
17629 (set_attr "length_immediate" "1")
17630 (set_attr "prefix" "vex")
17631 (set_attr "mode" "<sseinsnmode>")])
17632
17633 (define_insn "vec_set_lo_<mode><mask_name>"
17634 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17635 (vec_concat:VI4F_256
17636 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17637 (vec_select:<ssehalfvecmode>
17638 (match_operand:VI4F_256 1 "register_operand" "v")
17639 (parallel [(const_int 4) (const_int 5)
17640 (const_int 6) (const_int 7)]))))]
17641 "TARGET_AVX"
17642 {
17643 if (TARGET_AVX512VL)
17644 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17645 else
17646 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17647 }
17648 [(set_attr "type" "sselog")
17649 (set_attr "prefix_extra" "1")
17650 (set_attr "length_immediate" "1")
17651 (set_attr "prefix" "vex")
17652 (set_attr "mode" "<sseinsnmode>")])
17653
17654 (define_insn "vec_set_hi_<mode><mask_name>"
17655 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17656 (vec_concat:VI4F_256
17657 (vec_select:<ssehalfvecmode>
17658 (match_operand:VI4F_256 1 "register_operand" "v")
17659 (parallel [(const_int 0) (const_int 1)
17660 (const_int 2) (const_int 3)]))
17661 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17662 "TARGET_AVX"
17663 {
17664 if (TARGET_AVX512VL)
17665 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17666 else
17667 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17668 }
17669 [(set_attr "type" "sselog")
17670 (set_attr "prefix_extra" "1")
17671 (set_attr "length_immediate" "1")
17672 (set_attr "prefix" "vex")
17673 (set_attr "mode" "<sseinsnmode>")])
17674
17675 (define_insn "vec_set_lo_v16hi"
17676 [(set (match_operand:V16HI 0 "register_operand" "=x")
17677 (vec_concat:V16HI
17678 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
17679 (vec_select:V8HI
17680 (match_operand:V16HI 1 "register_operand" "x")
17681 (parallel [(const_int 8) (const_int 9)
17682 (const_int 10) (const_int 11)
17683 (const_int 12) (const_int 13)
17684 (const_int 14) (const_int 15)]))))]
17685 "TARGET_AVX"
17686 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17687 [(set_attr "type" "sselog")
17688 (set_attr "prefix_extra" "1")
17689 (set_attr "length_immediate" "1")
17690 (set_attr "prefix" "vex")
17691 (set_attr "mode" "OI")])
17692
17693 (define_insn "vec_set_hi_v16hi"
17694 [(set (match_operand:V16HI 0 "register_operand" "=x")
17695 (vec_concat:V16HI
17696 (vec_select:V8HI
17697 (match_operand:V16HI 1 "register_operand" "x")
17698 (parallel [(const_int 0) (const_int 1)
17699 (const_int 2) (const_int 3)
17700 (const_int 4) (const_int 5)
17701 (const_int 6) (const_int 7)]))
17702 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
17703 "TARGET_AVX"
17704 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17705 [(set_attr "type" "sselog")
17706 (set_attr "prefix_extra" "1")
17707 (set_attr "length_immediate" "1")
17708 (set_attr "prefix" "vex")
17709 (set_attr "mode" "OI")])
17710
17711 (define_insn "vec_set_lo_v32qi"
17712 [(set (match_operand:V32QI 0 "register_operand" "=x")
17713 (vec_concat:V32QI
17714 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
17715 (vec_select:V16QI
17716 (match_operand:V32QI 1 "register_operand" "x")
17717 (parallel [(const_int 16) (const_int 17)
17718 (const_int 18) (const_int 19)
17719 (const_int 20) (const_int 21)
17720 (const_int 22) (const_int 23)
17721 (const_int 24) (const_int 25)
17722 (const_int 26) (const_int 27)
17723 (const_int 28) (const_int 29)
17724 (const_int 30) (const_int 31)]))))]
17725 "TARGET_AVX"
17726 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17727 [(set_attr "type" "sselog")
17728 (set_attr "prefix_extra" "1")
17729 (set_attr "length_immediate" "1")
17730 (set_attr "prefix" "vex")
17731 (set_attr "mode" "OI")])
17732
17733 (define_insn "vec_set_hi_v32qi"
17734 [(set (match_operand:V32QI 0 "register_operand" "=x")
17735 (vec_concat:V32QI
17736 (vec_select:V16QI
17737 (match_operand:V32QI 1 "register_operand" "x")
17738 (parallel [(const_int 0) (const_int 1)
17739 (const_int 2) (const_int 3)
17740 (const_int 4) (const_int 5)
17741 (const_int 6) (const_int 7)
17742 (const_int 8) (const_int 9)
17743 (const_int 10) (const_int 11)
17744 (const_int 12) (const_int 13)
17745 (const_int 14) (const_int 15)]))
17746 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
17747 "TARGET_AVX"
17748 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17749 [(set_attr "type" "sselog")
17750 (set_attr "prefix_extra" "1")
17751 (set_attr "length_immediate" "1")
17752 (set_attr "prefix" "vex")
17753 (set_attr "mode" "OI")])
17754
17755 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
17756 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
17757 (unspec:V48_AVX2
17758 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
17759 (match_operand:V48_AVX2 1 "memory_operand" "m")]
17760 UNSPEC_MASKMOV))]
17761 "TARGET_AVX"
17762 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
17763 [(set_attr "type" "sselog1")
17764 (set_attr "prefix_extra" "1")
17765 (set_attr "prefix" "vex")
17766 (set_attr "btver2_decode" "vector")
17767 (set_attr "mode" "<sseinsnmode>")])
17768
17769 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
17770 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
17771 (unspec:V48_AVX2
17772 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
17773 (match_operand:V48_AVX2 2 "register_operand" "x")
17774 (match_dup 0)]
17775 UNSPEC_MASKMOV))]
17776 "TARGET_AVX"
17777 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17778 [(set_attr "type" "sselog1")
17779 (set_attr "prefix_extra" "1")
17780 (set_attr "prefix" "vex")
17781 (set_attr "btver2_decode" "vector")
17782 (set_attr "mode" "<sseinsnmode>")])
17783
17784 (define_expand "maskload<mode><sseintvecmodelower>"
17785 [(set (match_operand:V48_AVX2 0 "register_operand")
17786 (unspec:V48_AVX2
17787 [(match_operand:<sseintvecmode> 2 "register_operand")
17788 (match_operand:V48_AVX2 1 "memory_operand")]
17789 UNSPEC_MASKMOV))]
17790 "TARGET_AVX")
17791
17792 (define_expand "maskload<mode><avx512fmaskmodelower>"
17793 [(set (match_operand:V48_AVX512VL 0 "register_operand")
17794 (vec_merge:V48_AVX512VL
17795 (match_operand:V48_AVX512VL 1 "memory_operand")
17796 (match_dup 0)
17797 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
17798 "TARGET_AVX512F")
17799
17800 (define_expand "maskload<mode><avx512fmaskmodelower>"
17801 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
17802 (vec_merge:VI12_AVX512VL
17803 (match_operand:VI12_AVX512VL 1 "memory_operand")
17804 (match_dup 0)
17805 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
17806 "TARGET_AVX512BW")
17807
17808 (define_expand "maskstore<mode><sseintvecmodelower>"
17809 [(set (match_operand:V48_AVX2 0 "memory_operand")
17810 (unspec:V48_AVX2
17811 [(match_operand:<sseintvecmode> 2 "register_operand")
17812 (match_operand:V48_AVX2 1 "register_operand")
17813 (match_dup 0)]
17814 UNSPEC_MASKMOV))]
17815 "TARGET_AVX")
17816
17817 (define_expand "maskstore<mode><avx512fmaskmodelower>"
17818 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
17819 (vec_merge:V48_AVX512VL
17820 (match_operand:V48_AVX512VL 1 "register_operand")
17821 (match_dup 0)
17822 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
17823 "TARGET_AVX512F")
17824
17825 (define_expand "maskstore<mode><avx512fmaskmodelower>"
17826 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
17827 (vec_merge:VI12_AVX512VL
17828 (match_operand:VI12_AVX512VL 1 "register_operand")
17829 (match_dup 0)
17830 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
17831 "TARGET_AVX512BW")
17832
17833 (define_expand "cbranch<mode>4"
17834 [(set (reg:CC FLAGS_REG)
17835 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
17836 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
17837 (set (pc) (if_then_else
17838 (match_operator 0 "bt_comparison_operator"
17839 [(reg:CC FLAGS_REG) (const_int 0)])
17840 (label_ref (match_operand 3))
17841 (pc)))]
17842 "TARGET_SSE4_1"
17843 {
17844 ix86_expand_branch (GET_CODE (operands[0]),
17845 operands[1], operands[2], operands[3]);
17846 DONE;
17847 })
17848
17849
17850 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
17851 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
17852 (unspec:AVX256MODE2P
17853 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
17854 UNSPEC_CAST))]
17855 "TARGET_AVX"
17856 "#"
17857 "&& reload_completed"
17858 [(set (match_dup 0) (match_dup 1))]
17859 {
17860 if (REG_P (operands[0]))
17861 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
17862 else
17863 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
17864 <ssehalfvecmode>mode);
17865 })
17866
17867 (define_expand "vec_init<mode>"
17868 [(match_operand:V_256 0 "register_operand")
17869 (match_operand 1)]
17870 "TARGET_AVX"
17871 {
17872 ix86_expand_vector_init (false, operands[0], operands[1]);
17873 DONE;
17874 })
17875
17876 (define_expand "vec_init<mode>"
17877 [(match_operand:VF48_I1248 0 "register_operand")
17878 (match_operand 1)]
17879 "TARGET_AVX512F"
17880 {
17881 ix86_expand_vector_init (false, operands[0], operands[1]);
17882 DONE;
17883 })
17884
17885 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17886 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
17887 (ashiftrt:VI48_AVX512F_AVX512VL
17888 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
17889 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
17890 "TARGET_AVX2 && <mask_mode512bit_condition>"
17891 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17892 [(set_attr "type" "sseishft")
17893 (set_attr "prefix" "maybe_evex")
17894 (set_attr "mode" "<sseinsnmode>")])
17895
17896 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17897 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17898 (ashiftrt:VI2_AVX512VL
17899 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17900 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17901 "TARGET_AVX512BW"
17902 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17903 [(set_attr "type" "sseishft")
17904 (set_attr "prefix" "maybe_evex")
17905 (set_attr "mode" "<sseinsnmode>")])
17906
17907 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17908 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
17909 (any_lshift:VI48_AVX512F
17910 (match_operand:VI48_AVX512F 1 "register_operand" "v")
17911 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
17912 "TARGET_AVX2 && <mask_mode512bit_condition>"
17913 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17914 [(set_attr "type" "sseishft")
17915 (set_attr "prefix" "maybe_evex")
17916 (set_attr "mode" "<sseinsnmode>")])
17917
17918 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17919 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17920 (any_lshift:VI2_AVX512VL
17921 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17922 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17923 "TARGET_AVX512BW"
17924 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17925 [(set_attr "type" "sseishft")
17926 (set_attr "prefix" "maybe_evex")
17927 (set_attr "mode" "<sseinsnmode>")])
17928
17929 (define_insn "avx_vec_concat<mode>"
17930 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
17931 (vec_concat:V_256_512
17932 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
17933 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
17934 "TARGET_AVX"
17935 {
17936 switch (which_alternative)
17937 {
17938 case 0:
17939 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
17940 case 1:
17941 switch (get_attr_mode (insn))
17942 {
17943 case MODE_V16SF:
17944 return "vmovaps\t{%1, %t0|%t0, %1}";
17945 case MODE_V8DF:
17946 return "vmovapd\t{%1, %t0|%t0, %1}";
17947 case MODE_V8SF:
17948 return "vmovaps\t{%1, %x0|%x0, %1}";
17949 case MODE_V4DF:
17950 return "vmovapd\t{%1, %x0|%x0, %1}";
17951 case MODE_XI:
17952 return "vmovdqa\t{%1, %t0|%t0, %1}";
17953 case MODE_OI:
17954 return "vmovdqa\t{%1, %x0|%x0, %1}";
17955 default:
17956 gcc_unreachable ();
17957 }
17958 default:
17959 gcc_unreachable ();
17960 }
17961 }
17962 [(set_attr "type" "sselog,ssemov")
17963 (set_attr "prefix_extra" "1,*")
17964 (set_attr "length_immediate" "1,*")
17965 (set_attr "prefix" "maybe_evex")
17966 (set_attr "mode" "<sseinsnmode>")])
17967
17968 (define_insn "vcvtph2ps<mask_name>"
17969 [(set (match_operand:V4SF 0 "register_operand" "=v")
17970 (vec_select:V4SF
17971 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
17972 UNSPEC_VCVTPH2PS)
17973 (parallel [(const_int 0) (const_int 1)
17974 (const_int 2) (const_int 3)])))]
17975 "TARGET_F16C || TARGET_AVX512VL"
17976 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17977 [(set_attr "type" "ssecvt")
17978 (set_attr "prefix" "maybe_evex")
17979 (set_attr "mode" "V4SF")])
17980
17981 (define_insn "*vcvtph2ps_load<mask_name>"
17982 [(set (match_operand:V4SF 0 "register_operand" "=v")
17983 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
17984 UNSPEC_VCVTPH2PS))]
17985 "TARGET_F16C || TARGET_AVX512VL"
17986 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17987 [(set_attr "type" "ssecvt")
17988 (set_attr "prefix" "vex")
17989 (set_attr "mode" "V8SF")])
17990
17991 (define_insn "vcvtph2ps256<mask_name>"
17992 [(set (match_operand:V8SF 0 "register_operand" "=v")
17993 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
17994 UNSPEC_VCVTPH2PS))]
17995 "TARGET_F16C || TARGET_AVX512VL"
17996 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17997 [(set_attr "type" "ssecvt")
17998 (set_attr "prefix" "vex")
17999 (set_attr "btver2_decode" "double")
18000 (set_attr "mode" "V8SF")])
18001
18002 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
18003 [(set (match_operand:V16SF 0 "register_operand" "=v")
18004 (unspec:V16SF
18005 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18006 UNSPEC_VCVTPH2PS))]
18007 "TARGET_AVX512F"
18008 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18009 [(set_attr "type" "ssecvt")
18010 (set_attr "prefix" "evex")
18011 (set_attr "mode" "V16SF")])
18012
18013 (define_expand "vcvtps2ph_mask"
18014 [(set (match_operand:V8HI 0 "register_operand")
18015 (vec_merge:V8HI
18016 (vec_concat:V8HI
18017 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18018 (match_operand:SI 2 "const_0_to_255_operand")]
18019 UNSPEC_VCVTPS2PH)
18020 (match_dup 5))
18021 (match_operand:V8HI 3 "vector_move_operand")
18022 (match_operand:QI 4 "register_operand")))]
18023 "TARGET_AVX512VL"
18024 "operands[5] = CONST0_RTX (V4HImode);")
18025
18026 (define_expand "vcvtps2ph"
18027 [(set (match_operand:V8HI 0 "register_operand")
18028 (vec_concat:V8HI
18029 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18030 (match_operand:SI 2 "const_0_to_255_operand")]
18031 UNSPEC_VCVTPS2PH)
18032 (match_dup 3)))]
18033 "TARGET_F16C"
18034 "operands[3] = CONST0_RTX (V4HImode);")
18035
18036 (define_insn "*vcvtps2ph<mask_name>"
18037 [(set (match_operand:V8HI 0 "register_operand" "=v")
18038 (vec_concat:V8HI
18039 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
18040 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18041 UNSPEC_VCVTPS2PH)
18042 (match_operand:V4HI 3 "const0_operand")))]
18043 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
18044 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
18045 [(set_attr "type" "ssecvt")
18046 (set_attr "prefix" "maybe_evex")
18047 (set_attr "mode" "V4SF")])
18048
18049 (define_insn "*vcvtps2ph_store<mask_name>"
18050 [(set (match_operand:V4HI 0 "memory_operand" "=m")
18051 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
18052 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18053 UNSPEC_VCVTPS2PH))]
18054 "TARGET_F16C || TARGET_AVX512VL"
18055 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18056 [(set_attr "type" "ssecvt")
18057 (set_attr "prefix" "maybe_evex")
18058 (set_attr "mode" "V4SF")])
18059
18060 (define_insn "vcvtps2ph256<mask_name>"
18061 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
18062 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
18063 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18064 UNSPEC_VCVTPS2PH))]
18065 "TARGET_F16C || TARGET_AVX512VL"
18066 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18067 [(set_attr "type" "ssecvt")
18068 (set_attr "prefix" "maybe_evex")
18069 (set_attr "btver2_decode" "vector")
18070 (set_attr "mode" "V8SF")])
18071
18072 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
18073 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
18074 (unspec:V16HI
18075 [(match_operand:V16SF 1 "register_operand" "v")
18076 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18077 UNSPEC_VCVTPS2PH))]
18078 "TARGET_AVX512F"
18079 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18080 [(set_attr "type" "ssecvt")
18081 (set_attr "prefix" "evex")
18082 (set_attr "mode" "V16SF")])
18083
18084 ;; For gather* insn patterns
18085 (define_mode_iterator VEC_GATHER_MODE
18086 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
18087 (define_mode_attr VEC_GATHER_IDXSI
18088 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
18089 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
18090 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
18091 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
18092
18093 (define_mode_attr VEC_GATHER_IDXDI
18094 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18095 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
18096 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
18097 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
18098
18099 (define_mode_attr VEC_GATHER_SRCDI
18100 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18101 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
18102 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
18103 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
18104
18105 (define_expand "avx2_gathersi<mode>"
18106 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18107 (unspec:VEC_GATHER_MODE
18108 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
18109 (mem:<ssescalarmode>
18110 (match_par_dup 7
18111 [(match_operand 2 "vsib_address_operand")
18112 (match_operand:<VEC_GATHER_IDXSI>
18113 3 "register_operand")
18114 (match_operand:SI 5 "const1248_operand ")]))
18115 (mem:BLK (scratch))
18116 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
18117 UNSPEC_GATHER))
18118 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18119 "TARGET_AVX2"
18120 {
18121 operands[7]
18122 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18123 operands[5]), UNSPEC_VSIBADDR);
18124 })
18125
18126 (define_insn "*avx2_gathersi<mode>"
18127 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18128 (unspec:VEC_GATHER_MODE
18129 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
18130 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18131 [(unspec:P
18132 [(match_operand:P 3 "vsib_address_operand" "Tv")
18133 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
18134 (match_operand:SI 6 "const1248_operand" "n")]
18135 UNSPEC_VSIBADDR)])
18136 (mem:BLK (scratch))
18137 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
18138 UNSPEC_GATHER))
18139 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18140 "TARGET_AVX2"
18141 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
18142 [(set_attr "type" "ssemov")
18143 (set_attr "prefix" "vex")
18144 (set_attr "mode" "<sseinsnmode>")])
18145
18146 (define_insn "*avx2_gathersi<mode>_2"
18147 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18148 (unspec:VEC_GATHER_MODE
18149 [(pc)
18150 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18151 [(unspec:P
18152 [(match_operand:P 2 "vsib_address_operand" "Tv")
18153 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18154 (match_operand:SI 5 "const1248_operand" "n")]
18155 UNSPEC_VSIBADDR)])
18156 (mem:BLK (scratch))
18157 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18158 UNSPEC_GATHER))
18159 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18160 "TARGET_AVX2"
18161 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18162 [(set_attr "type" "ssemov")
18163 (set_attr "prefix" "vex")
18164 (set_attr "mode" "<sseinsnmode>")])
18165
18166 (define_expand "avx2_gatherdi<mode>"
18167 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18168 (unspec:VEC_GATHER_MODE
18169 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18170 (mem:<ssescalarmode>
18171 (match_par_dup 7
18172 [(match_operand 2 "vsib_address_operand")
18173 (match_operand:<VEC_GATHER_IDXDI>
18174 3 "register_operand")
18175 (match_operand:SI 5 "const1248_operand ")]))
18176 (mem:BLK (scratch))
18177 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
18178 UNSPEC_GATHER))
18179 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18180 "TARGET_AVX2"
18181 {
18182 operands[7]
18183 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18184 operands[5]), UNSPEC_VSIBADDR);
18185 })
18186
18187 (define_insn "*avx2_gatherdi<mode>"
18188 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18189 (unspec:VEC_GATHER_MODE
18190 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18191 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18192 [(unspec:P
18193 [(match_operand:P 3 "vsib_address_operand" "Tv")
18194 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18195 (match_operand:SI 6 "const1248_operand" "n")]
18196 UNSPEC_VSIBADDR)])
18197 (mem:BLK (scratch))
18198 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18199 UNSPEC_GATHER))
18200 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18201 "TARGET_AVX2"
18202 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
18203 [(set_attr "type" "ssemov")
18204 (set_attr "prefix" "vex")
18205 (set_attr "mode" "<sseinsnmode>")])
18206
18207 (define_insn "*avx2_gatherdi<mode>_2"
18208 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18209 (unspec:VEC_GATHER_MODE
18210 [(pc)
18211 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18212 [(unspec:P
18213 [(match_operand:P 2 "vsib_address_operand" "Tv")
18214 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18215 (match_operand:SI 5 "const1248_operand" "n")]
18216 UNSPEC_VSIBADDR)])
18217 (mem:BLK (scratch))
18218 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18219 UNSPEC_GATHER))
18220 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18221 "TARGET_AVX2"
18222 {
18223 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18224 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
18225 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
18226 }
18227 [(set_attr "type" "ssemov")
18228 (set_attr "prefix" "vex")
18229 (set_attr "mode" "<sseinsnmode>")])
18230
18231 (define_insn "*avx2_gatherdi<mode>_3"
18232 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18233 (vec_select:<VEC_GATHER_SRCDI>
18234 (unspec:VI4F_256
18235 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18236 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18237 [(unspec:P
18238 [(match_operand:P 3 "vsib_address_operand" "Tv")
18239 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18240 (match_operand:SI 6 "const1248_operand" "n")]
18241 UNSPEC_VSIBADDR)])
18242 (mem:BLK (scratch))
18243 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18244 UNSPEC_GATHER)
18245 (parallel [(const_int 0) (const_int 1)
18246 (const_int 2) (const_int 3)])))
18247 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18248 "TARGET_AVX2"
18249 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
18250 [(set_attr "type" "ssemov")
18251 (set_attr "prefix" "vex")
18252 (set_attr "mode" "<sseinsnmode>")])
18253
18254 (define_insn "*avx2_gatherdi<mode>_4"
18255 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18256 (vec_select:<VEC_GATHER_SRCDI>
18257 (unspec:VI4F_256
18258 [(pc)
18259 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18260 [(unspec:P
18261 [(match_operand:P 2 "vsib_address_operand" "Tv")
18262 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18263 (match_operand:SI 5 "const1248_operand" "n")]
18264 UNSPEC_VSIBADDR)])
18265 (mem:BLK (scratch))
18266 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18267 UNSPEC_GATHER)
18268 (parallel [(const_int 0) (const_int 1)
18269 (const_int 2) (const_int 3)])))
18270 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18271 "TARGET_AVX2"
18272 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
18273 [(set_attr "type" "ssemov")
18274 (set_attr "prefix" "vex")
18275 (set_attr "mode" "<sseinsnmode>")])
18276
18277 (define_expand "<avx512>_gathersi<mode>"
18278 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18279 (unspec:VI48F
18280 [(match_operand:VI48F 1 "register_operand")
18281 (match_operand:<avx512fmaskmode> 4 "register_operand")
18282 (mem:<ssescalarmode>
18283 (match_par_dup 6
18284 [(match_operand 2 "vsib_address_operand")
18285 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
18286 (match_operand:SI 5 "const1248_operand")]))]
18287 UNSPEC_GATHER))
18288 (clobber (match_scratch:<avx512fmaskmode> 7))])]
18289 "TARGET_AVX512F"
18290 {
18291 operands[6]
18292 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18293 operands[5]), UNSPEC_VSIBADDR);
18294 })
18295
18296 (define_insn "*avx512f_gathersi<mode>"
18297 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18298 (unspec:VI48F
18299 [(match_operand:VI48F 1 "register_operand" "0")
18300 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
18301 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18302 [(unspec:P
18303 [(match_operand:P 4 "vsib_address_operand" "Tv")
18304 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
18305 (match_operand:SI 5 "const1248_operand" "n")]
18306 UNSPEC_VSIBADDR)])]
18307 UNSPEC_GATHER))
18308 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
18309 "TARGET_AVX512F"
18310 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
18311 [(set_attr "type" "ssemov")
18312 (set_attr "prefix" "evex")
18313 (set_attr "mode" "<sseinsnmode>")])
18314
18315 (define_insn "*avx512f_gathersi<mode>_2"
18316 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18317 (unspec:VI48F
18318 [(pc)
18319 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18320 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18321 [(unspec:P
18322 [(match_operand:P 3 "vsib_address_operand" "Tv")
18323 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18324 (match_operand:SI 4 "const1248_operand" "n")]
18325 UNSPEC_VSIBADDR)])]
18326 UNSPEC_GATHER))
18327 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18328 "TARGET_AVX512F"
18329 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
18330 [(set_attr "type" "ssemov")
18331 (set_attr "prefix" "evex")
18332 (set_attr "mode" "<sseinsnmode>")])
18333
18334
18335 (define_expand "<avx512>_gatherdi<mode>"
18336 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18337 (unspec:VI48F
18338 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18339 (match_operand:QI 4 "register_operand")
18340 (mem:<ssescalarmode>
18341 (match_par_dup 6
18342 [(match_operand 2 "vsib_address_operand")
18343 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
18344 (match_operand:SI 5 "const1248_operand")]))]
18345 UNSPEC_GATHER))
18346 (clobber (match_scratch:QI 7))])]
18347 "TARGET_AVX512F"
18348 {
18349 operands[6]
18350 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18351 operands[5]), UNSPEC_VSIBADDR);
18352 })
18353
18354 (define_insn "*avx512f_gatherdi<mode>"
18355 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18356 (unspec:VI48F
18357 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
18358 (match_operand:QI 7 "register_operand" "2")
18359 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18360 [(unspec:P
18361 [(match_operand:P 4 "vsib_address_operand" "Tv")
18362 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
18363 (match_operand:SI 5 "const1248_operand" "n")]
18364 UNSPEC_VSIBADDR)])]
18365 UNSPEC_GATHER))
18366 (clobber (match_scratch:QI 2 "=&Yk"))]
18367 "TARGET_AVX512F"
18368 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
18369 [(set_attr "type" "ssemov")
18370 (set_attr "prefix" "evex")
18371 (set_attr "mode" "<sseinsnmode>")])
18372
18373 (define_insn "*avx512f_gatherdi<mode>_2"
18374 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18375 (unspec:VI48F
18376 [(pc)
18377 (match_operand:QI 6 "register_operand" "1")
18378 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18379 [(unspec:P
18380 [(match_operand:P 3 "vsib_address_operand" "Tv")
18381 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18382 (match_operand:SI 4 "const1248_operand" "n")]
18383 UNSPEC_VSIBADDR)])]
18384 UNSPEC_GATHER))
18385 (clobber (match_scratch:QI 1 "=&Yk"))]
18386 "TARGET_AVX512F"
18387 {
18388 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18389 {
18390 if (<MODE_SIZE> != 64)
18391 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
18392 else
18393 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
18394 }
18395 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
18396 }
18397 [(set_attr "type" "ssemov")
18398 (set_attr "prefix" "evex")
18399 (set_attr "mode" "<sseinsnmode>")])
18400
18401 (define_expand "<avx512>_scattersi<mode>"
18402 [(parallel [(set (mem:VI48F
18403 (match_par_dup 5
18404 [(match_operand 0 "vsib_address_operand")
18405 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
18406 (match_operand:SI 4 "const1248_operand")]))
18407 (unspec:VI48F
18408 [(match_operand:<avx512fmaskmode> 1 "register_operand")
18409 (match_operand:VI48F 3 "register_operand")]
18410 UNSPEC_SCATTER))
18411 (clobber (match_scratch:<avx512fmaskmode> 6))])]
18412 "TARGET_AVX512F"
18413 {
18414 operands[5]
18415 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18416 operands[4]), UNSPEC_VSIBADDR);
18417 })
18418
18419 (define_insn "*avx512f_scattersi<mode>"
18420 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18421 [(unspec:P
18422 [(match_operand:P 0 "vsib_address_operand" "Tv")
18423 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18424 (match_operand:SI 4 "const1248_operand" "n")]
18425 UNSPEC_VSIBADDR)])
18426 (unspec:VI48F
18427 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18428 (match_operand:VI48F 3 "register_operand" "v")]
18429 UNSPEC_SCATTER))
18430 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18431 "TARGET_AVX512F"
18432 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18433 [(set_attr "type" "ssemov")
18434 (set_attr "prefix" "evex")
18435 (set_attr "mode" "<sseinsnmode>")])
18436
18437 (define_expand "<avx512>_scatterdi<mode>"
18438 [(parallel [(set (mem:VI48F
18439 (match_par_dup 5
18440 [(match_operand 0 "vsib_address_operand")
18441 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
18442 (match_operand:SI 4 "const1248_operand")]))
18443 (unspec:VI48F
18444 [(match_operand:QI 1 "register_operand")
18445 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
18446 UNSPEC_SCATTER))
18447 (clobber (match_scratch:QI 6))])]
18448 "TARGET_AVX512F"
18449 {
18450 operands[5]
18451 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18452 operands[4]), UNSPEC_VSIBADDR);
18453 })
18454
18455 (define_insn "*avx512f_scatterdi<mode>"
18456 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18457 [(unspec:P
18458 [(match_operand:P 0 "vsib_address_operand" "Tv")
18459 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18460 (match_operand:SI 4 "const1248_operand" "n")]
18461 UNSPEC_VSIBADDR)])
18462 (unspec:VI48F
18463 [(match_operand:QI 6 "register_operand" "1")
18464 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
18465 UNSPEC_SCATTER))
18466 (clobber (match_scratch:QI 1 "=&Yk"))]
18467 "TARGET_AVX512F"
18468 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18469 [(set_attr "type" "ssemov")
18470 (set_attr "prefix" "evex")
18471 (set_attr "mode" "<sseinsnmode>")])
18472
18473 (define_insn "<avx512>_compress<mode>_mask"
18474 [(set (match_operand:VI48F 0 "register_operand" "=v")
18475 (unspec:VI48F
18476 [(match_operand:VI48F 1 "register_operand" "v")
18477 (match_operand:VI48F 2 "vector_move_operand" "0C")
18478 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
18479 UNSPEC_COMPRESS))]
18480 "TARGET_AVX512F"
18481 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18482 [(set_attr "type" "ssemov")
18483 (set_attr "prefix" "evex")
18484 (set_attr "mode" "<sseinsnmode>")])
18485
18486 (define_insn "<avx512>_compressstore<mode>_mask"
18487 [(set (match_operand:VI48F 0 "memory_operand" "=m")
18488 (unspec:VI48F
18489 [(match_operand:VI48F 1 "register_operand" "x")
18490 (match_dup 0)
18491 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
18492 UNSPEC_COMPRESS_STORE))]
18493 "TARGET_AVX512F"
18494 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
18495 [(set_attr "type" "ssemov")
18496 (set_attr "prefix" "evex")
18497 (set_attr "memory" "store")
18498 (set_attr "mode" "<sseinsnmode>")])
18499
18500 (define_expand "<avx512>_expand<mode>_maskz"
18501 [(set (match_operand:VI48F 0 "register_operand")
18502 (unspec:VI48F
18503 [(match_operand:VI48F 1 "nonimmediate_operand")
18504 (match_operand:VI48F 2 "vector_move_operand")
18505 (match_operand:<avx512fmaskmode> 3 "register_operand")]
18506 UNSPEC_EXPAND))]
18507 "TARGET_AVX512F"
18508 "operands[2] = CONST0_RTX (<MODE>mode);")
18509
18510 (define_insn "<avx512>_expand<mode>_mask"
18511 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
18512 (unspec:VI48F
18513 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
18514 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
18515 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
18516 UNSPEC_EXPAND))]
18517 "TARGET_AVX512F"
18518 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18519 [(set_attr "type" "ssemov")
18520 (set_attr "prefix" "evex")
18521 (set_attr "memory" "none,load")
18522 (set_attr "mode" "<sseinsnmode>")])
18523
18524 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
18525 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18526 (unspec:VF_AVX512VL
18527 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18528 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18529 (match_operand:SI 3 "const_0_to_15_operand")]
18530 UNSPEC_RANGE))]
18531 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
18532 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
18533 [(set_attr "type" "sse")
18534 (set_attr "prefix" "evex")
18535 (set_attr "mode" "<MODE>")])
18536
18537 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
18538 [(set (match_operand:VF_128 0 "register_operand" "=v")
18539 (vec_merge:VF_128
18540 (unspec:VF_128
18541 [(match_operand:VF_128 1 "register_operand" "v")
18542 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18543 (match_operand:SI 3 "const_0_to_15_operand")]
18544 UNSPEC_RANGE)
18545 (match_dup 1)
18546 (const_int 1)))]
18547 "TARGET_AVX512DQ"
18548 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
18549 [(set_attr "type" "sse")
18550 (set_attr "prefix" "evex")
18551 (set_attr "mode" "<MODE>")])
18552
18553 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
18554 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18555 (unspec:<avx512fmaskmode>
18556 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18557 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18558 UNSPEC_FPCLASS))]
18559 "TARGET_AVX512DQ"
18560 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
18561 [(set_attr "type" "sse")
18562 (set_attr "length_immediate" "1")
18563 (set_attr "prefix" "evex")
18564 (set_attr "mode" "<MODE>")])
18565
18566 (define_insn "avx512dq_vmfpclass<mode>"
18567 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18568 (and:<avx512fmaskmode>
18569 (unspec:<avx512fmaskmode>
18570 [(match_operand:VF_128 1 "register_operand" "v")
18571 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18572 UNSPEC_FPCLASS)
18573 (const_int 1)))]
18574 "TARGET_AVX512DQ"
18575 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
18576 [(set_attr "type" "sse")
18577 (set_attr "length_immediate" "1")
18578 (set_attr "prefix" "evex")
18579 (set_attr "mode" "<MODE>")])
18580
18581 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
18582 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18583 (unspec:VF_AVX512VL
18584 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
18585 (match_operand:SI 2 "const_0_to_15_operand")]
18586 UNSPEC_GETMANT))]
18587 "TARGET_AVX512F"
18588 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
18589 [(set_attr "prefix" "evex")
18590 (set_attr "mode" "<MODE>")])
18591
18592 (define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
18593 [(set (match_operand:VF_128 0 "register_operand" "=v")
18594 (vec_merge:VF_128
18595 (unspec:VF_128
18596 [(match_operand:VF_128 1 "register_operand" "v")
18597 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18598 (match_operand:SI 3 "const_0_to_15_operand")]
18599 UNSPEC_GETMANT)
18600 (match_dup 1)
18601 (const_int 1)))]
18602 "TARGET_AVX512F"
18603 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
18604 [(set_attr "prefix" "evex")
18605 (set_attr "mode" "<ssescalarmode>")])
18606
18607 ;; The correct representation for this is absolutely enormous, and
18608 ;; surely not generally useful.
18609 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
18610 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18611 (unspec:VI2_AVX512VL
18612 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
18613 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
18614 (match_operand:SI 3 "const_0_to_255_operand")]
18615 UNSPEC_DBPSADBW))]
18616 "TARGET_AVX512BW"
18617 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
18618 [(set_attr "isa" "avx")
18619 (set_attr "type" "sselog1")
18620 (set_attr "length_immediate" "1")
18621 (set_attr "prefix" "evex")
18622 (set_attr "mode" "<sseinsnmode>")])
18623
18624 (define_insn "clz<mode>2<mask_name>"
18625 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18626 (clz:VI48_AVX512VL
18627 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
18628 "TARGET_AVX512CD"
18629 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18630 [(set_attr "type" "sse")
18631 (set_attr "prefix" "evex")
18632 (set_attr "mode" "<sseinsnmode>")])
18633
18634 (define_insn "<mask_codefor>conflict<mode><mask_name>"
18635 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18636 (unspec:VI48_AVX512VL
18637 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
18638 UNSPEC_CONFLICT))]
18639 "TARGET_AVX512CD"
18640 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18641 [(set_attr "type" "sse")
18642 (set_attr "prefix" "evex")
18643 (set_attr "mode" "<sseinsnmode>")])
18644
18645 (define_insn "sha1msg1"
18646 [(set (match_operand:V4SI 0 "register_operand" "=x")
18647 (unspec:V4SI
18648 [(match_operand:V4SI 1 "register_operand" "0")
18649 (match_operand:V4SI 2 "vector_operand" "xBm")]
18650 UNSPEC_SHA1MSG1))]
18651 "TARGET_SHA"
18652 "sha1msg1\t{%2, %0|%0, %2}"
18653 [(set_attr "type" "sselog1")
18654 (set_attr "mode" "TI")])
18655
18656 (define_insn "sha1msg2"
18657 [(set (match_operand:V4SI 0 "register_operand" "=x")
18658 (unspec:V4SI
18659 [(match_operand:V4SI 1 "register_operand" "0")
18660 (match_operand:V4SI 2 "vector_operand" "xBm")]
18661 UNSPEC_SHA1MSG2))]
18662 "TARGET_SHA"
18663 "sha1msg2\t{%2, %0|%0, %2}"
18664 [(set_attr "type" "sselog1")
18665 (set_attr "mode" "TI")])
18666
18667 (define_insn "sha1nexte"
18668 [(set (match_operand:V4SI 0 "register_operand" "=x")
18669 (unspec:V4SI
18670 [(match_operand:V4SI 1 "register_operand" "0")
18671 (match_operand:V4SI 2 "vector_operand" "xBm")]
18672 UNSPEC_SHA1NEXTE))]
18673 "TARGET_SHA"
18674 "sha1nexte\t{%2, %0|%0, %2}"
18675 [(set_attr "type" "sselog1")
18676 (set_attr "mode" "TI")])
18677
18678 (define_insn "sha1rnds4"
18679 [(set (match_operand:V4SI 0 "register_operand" "=x")
18680 (unspec:V4SI
18681 [(match_operand:V4SI 1 "register_operand" "0")
18682 (match_operand:V4SI 2 "vector_operand" "xBm")
18683 (match_operand:SI 3 "const_0_to_3_operand" "n")]
18684 UNSPEC_SHA1RNDS4))]
18685 "TARGET_SHA"
18686 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
18687 [(set_attr "type" "sselog1")
18688 (set_attr "length_immediate" "1")
18689 (set_attr "mode" "TI")])
18690
18691 (define_insn "sha256msg1"
18692 [(set (match_operand:V4SI 0 "register_operand" "=x")
18693 (unspec:V4SI
18694 [(match_operand:V4SI 1 "register_operand" "0")
18695 (match_operand:V4SI 2 "vector_operand" "xBm")]
18696 UNSPEC_SHA256MSG1))]
18697 "TARGET_SHA"
18698 "sha256msg1\t{%2, %0|%0, %2}"
18699 [(set_attr "type" "sselog1")
18700 (set_attr "mode" "TI")])
18701
18702 (define_insn "sha256msg2"
18703 [(set (match_operand:V4SI 0 "register_operand" "=x")
18704 (unspec:V4SI
18705 [(match_operand:V4SI 1 "register_operand" "0")
18706 (match_operand:V4SI 2 "vector_operand" "xBm")]
18707 UNSPEC_SHA256MSG2))]
18708 "TARGET_SHA"
18709 "sha256msg2\t{%2, %0|%0, %2}"
18710 [(set_attr "type" "sselog1")
18711 (set_attr "mode" "TI")])
18712
18713 (define_insn "sha256rnds2"
18714 [(set (match_operand:V4SI 0 "register_operand" "=x")
18715 (unspec:V4SI
18716 [(match_operand:V4SI 1 "register_operand" "0")
18717 (match_operand:V4SI 2 "vector_operand" "xBm")
18718 (match_operand:V4SI 3 "register_operand" "Yz")]
18719 UNSPEC_SHA256RNDS2))]
18720 "TARGET_SHA"
18721 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
18722 [(set_attr "type" "sselog1")
18723 (set_attr "length_immediate" "1")
18724 (set_attr "mode" "TI")])
18725
18726 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
18727 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18728 (unspec:AVX512MODE2P
18729 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
18730 UNSPEC_CAST))]
18731 "TARGET_AVX512F"
18732 "#"
18733 "&& reload_completed"
18734 [(set (match_dup 0) (match_dup 1))]
18735 {
18736 if (REG_P (operands[0]))
18737 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
18738 else
18739 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
18740 <ssequartermode>mode);
18741 })
18742
18743 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
18744 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18745 (unspec:AVX512MODE2P
18746 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18747 UNSPEC_CAST))]
18748 "TARGET_AVX512F"
18749 "#"
18750 "&& reload_completed"
18751 [(set (match_dup 0) (match_dup 1))]
18752 {
18753 if (REG_P (operands[0]))
18754 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
18755 else
18756 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
18757 <ssehalfvecmode>mode);
18758 })
18759
18760 (define_int_iterator VPMADD52
18761 [UNSPEC_VPMADD52LUQ
18762 UNSPEC_VPMADD52HUQ])
18763
18764 (define_int_attr vpmadd52type
18765 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
18766
18767 (define_expand "vpamdd52huq<mode>_maskz"
18768 [(match_operand:VI8_AVX512VL 0 "register_operand")
18769 (match_operand:VI8_AVX512VL 1 "register_operand")
18770 (match_operand:VI8_AVX512VL 2 "register_operand")
18771 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18772 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18773 "TARGET_AVX512IFMA"
18774 {
18775 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
18776 operands[0], operands[1], operands[2], operands[3],
18777 CONST0_RTX (<MODE>mode), operands[4]));
18778 DONE;
18779 })
18780
18781 (define_expand "vpamdd52luq<mode>_maskz"
18782 [(match_operand:VI8_AVX512VL 0 "register_operand")
18783 (match_operand:VI8_AVX512VL 1 "register_operand")
18784 (match_operand:VI8_AVX512VL 2 "register_operand")
18785 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18786 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18787 "TARGET_AVX512IFMA"
18788 {
18789 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
18790 operands[0], operands[1], operands[2], operands[3],
18791 CONST0_RTX (<MODE>mode), operands[4]));
18792 DONE;
18793 })
18794
18795 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
18796 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18797 (unspec:VI8_AVX512VL
18798 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18799 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18800 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18801 VPMADD52))]
18802 "TARGET_AVX512IFMA"
18803 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18804 [(set_attr "type" "ssemuladd")
18805 (set_attr "prefix" "evex")
18806 (set_attr "mode" "<sseinsnmode>")])
18807
18808 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
18809 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18810 (vec_merge:VI8_AVX512VL
18811 (unspec:VI8_AVX512VL
18812 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18813 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18814 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18815 VPMADD52)
18816 (match_dup 1)
18817 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18818 "TARGET_AVX512IFMA"
18819 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
18820 [(set_attr "type" "ssemuladd")
18821 (set_attr "prefix" "evex")
18822 (set_attr "mode" "<sseinsnmode>")])
18823
18824 (define_insn "vpmultishiftqb<mode><mask_name>"
18825 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18826 (unspec:VI1_AVX512VL
18827 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18828 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
18829 UNSPEC_VPMULTISHIFT))]
18830 "TARGET_AVX512VBMI"
18831 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18832 [(set_attr "type" "sselog")
18833 (set_attr "prefix" "evex")
18834 (set_attr "mode" "<sseinsnmode>")])