26463e5a0fd017e73048ca6917f6a1d775ede72b
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23
24 ;; SSE3
25 UNSPEC_LDDQU
26
27 ;; SSSE3
28 UNSPEC_PSHUFB
29 UNSPEC_PSIGN
30 UNSPEC_PALIGNR
31
32 ;; For SSE4A support
33 UNSPEC_EXTRQI
34 UNSPEC_EXTRQ
35 UNSPEC_INSERTQI
36 UNSPEC_INSERTQ
37
38 ;; For SSE4.1 support
39 UNSPEC_BLENDV
40 UNSPEC_INSERTPS
41 UNSPEC_DP
42 UNSPEC_MOVNTDQA
43 UNSPEC_MPSADBW
44 UNSPEC_PHMINPOSUW
45 UNSPEC_PTEST
46
47 ;; For SSE4.2 support
48 UNSPEC_PCMPESTR
49 UNSPEC_PCMPISTR
50
51 ;; For FMA4 support
52 UNSPEC_FMADDSUB
53 UNSPEC_XOP_UNSIGNED_CMP
54 UNSPEC_XOP_TRUEFALSE
55 UNSPEC_XOP_PERMUTE
56 UNSPEC_FRCZ
57
58 ;; For AES support
59 UNSPEC_AESENC
60 UNSPEC_AESENCLAST
61 UNSPEC_AESDEC
62 UNSPEC_AESDECLAST
63 UNSPEC_AESIMC
64 UNSPEC_AESKEYGENASSIST
65
66 ;; For PCLMUL support
67 UNSPEC_PCLMUL
68
69 ;; For AVX support
70 UNSPEC_PCMP
71 UNSPEC_VPERMIL
72 UNSPEC_VPERMIL2
73 UNSPEC_VPERMIL2F128
74 UNSPEC_CAST
75 UNSPEC_VTESTP
76 UNSPEC_VCVTPH2PS
77 UNSPEC_VCVTPS2PH
78
79 ;; For AVX2 support
80 UNSPEC_VPERMVAR
81 UNSPEC_VPERMTI
82 UNSPEC_GATHER
83 UNSPEC_VSIBADDR
84
85 ;; For AVX512F support
86 UNSPEC_VPERMI2
87 UNSPEC_VPERMT2
88 UNSPEC_VPERMI2_MASK
89 UNSPEC_UNSIGNED_FIX_NOTRUNC
90 UNSPEC_UNSIGNED_PCMP
91 UNSPEC_TESTM
92 UNSPEC_TESTNM
93 UNSPEC_SCATTER
94 UNSPEC_RCP14
95 UNSPEC_RSQRT14
96 UNSPEC_FIXUPIMM
97 UNSPEC_SCALEF
98 UNSPEC_VTERNLOG
99 UNSPEC_GETEXP
100 UNSPEC_GETMANT
101 UNSPEC_ALIGN
102 UNSPEC_CONFLICT
103 UNSPEC_COMPRESS
104 UNSPEC_COMPRESS_STORE
105 UNSPEC_EXPAND
106 UNSPEC_MASKED_EQ
107 UNSPEC_MASKED_GT
108
109 ;; For embed. rounding feature
110 UNSPEC_EMBEDDED_ROUNDING
111
112 ;; For AVX512PF support
113 UNSPEC_GATHER_PREFETCH
114 UNSPEC_SCATTER_PREFETCH
115
116 ;; For AVX512ER support
117 UNSPEC_EXP2
118 UNSPEC_RCP28
119 UNSPEC_RSQRT28
120
121 ;; For SHA support
122 UNSPEC_SHA1MSG1
123 UNSPEC_SHA1MSG2
124 UNSPEC_SHA1NEXTE
125 UNSPEC_SHA1RNDS4
126 UNSPEC_SHA256MSG1
127 UNSPEC_SHA256MSG2
128 UNSPEC_SHA256RNDS2
129
130 ;; For AVX512BW support
131 UNSPEC_DBPSADBW
132 UNSPEC_PMADDUBSW512
133 UNSPEC_PMADDWD512
134 UNSPEC_PSHUFHW
135 UNSPEC_PSHUFLW
136 UNSPEC_CVTINT2MASK
137
138 ;; For AVX512DQ support
139 UNSPEC_REDUCE
140 UNSPEC_FPCLASS
141 UNSPEC_RANGE
142
143 ;; For AVX512IFMA support
144 UNSPEC_VPMADD52LUQ
145 UNSPEC_VPMADD52HUQ
146
147 ;; For AVX512VBMI support
148 UNSPEC_VPMULTISHIFT
149 ])
150
151 (define_c_enum "unspecv" [
152 UNSPECV_LDMXCSR
153 UNSPECV_STMXCSR
154 UNSPECV_CLFLUSH
155 UNSPECV_MONITOR
156 UNSPECV_MWAIT
157 UNSPECV_VZEROALL
158 UNSPECV_VZEROUPPER
159 ])
160
161 ;; All vector modes including V?TImode, used in move patterns.
162 (define_mode_iterator VMOVE
163 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
164 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
165 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
166 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
167 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
168 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
169 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
170
171 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
172 (define_mode_iterator V48_AVX512VL
173 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
174 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
175 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
176 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
177
178 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
179 (define_mode_iterator VI12_AVX512VL
180 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
181 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
182
183 (define_mode_iterator VI1_AVX512VL
184 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
185
186 ;; All vector modes
187 (define_mode_iterator V
188 [(V32QI "TARGET_AVX") V16QI
189 (V16HI "TARGET_AVX") V8HI
190 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
191 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
192 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
193 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
194
195 ;; All 128bit vector modes
196 (define_mode_iterator V_128
197 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
198
199 ;; All 256bit vector modes
200 (define_mode_iterator V_256
201 [V32QI V16HI V8SI V4DI V8SF V4DF])
202
203 ;; All 512bit vector modes
204 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
205
206 ;; All 256bit and 512bit vector modes
207 (define_mode_iterator V_256_512
208 [V32QI V16HI V8SI V4DI V8SF V4DF
209 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
210 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
211
212 ;; All vector float modes
213 (define_mode_iterator VF
214 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
215 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
216
217 ;; 128- and 256-bit float vector modes
218 (define_mode_iterator VF_128_256
219 [(V8SF "TARGET_AVX") V4SF
220 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
221
222 ;; All SFmode vector float modes
223 (define_mode_iterator VF1
224 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
225
226 ;; 128- and 256-bit SF vector modes
227 (define_mode_iterator VF1_128_256
228 [(V8SF "TARGET_AVX") V4SF])
229
230 (define_mode_iterator VF1_128_256VL
231 [V8SF (V4SF "TARGET_AVX512VL")])
232
233 ;; All DFmode vector float modes
234 (define_mode_iterator VF2
235 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
236
237 ;; 128- and 256-bit DF vector modes
238 (define_mode_iterator VF2_128_256
239 [(V4DF "TARGET_AVX") V2DF])
240
241 (define_mode_iterator VF2_512_256
242 [(V8DF "TARGET_AVX512F") V4DF])
243
244 (define_mode_iterator VF2_512_256VL
245 [V8DF (V4DF "TARGET_AVX512VL")])
246
247 ;; All 128bit vector float modes
248 (define_mode_iterator VF_128
249 [V4SF (V2DF "TARGET_SSE2")])
250
251 ;; All 256bit vector float modes
252 (define_mode_iterator VF_256
253 [V8SF V4DF])
254
255 ;; All 512bit vector float modes
256 (define_mode_iterator VF_512
257 [V16SF V8DF])
258
259 (define_mode_iterator VI48_AVX512VL
260 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
261 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
262
263 (define_mode_iterator VF_AVX512VL
264 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
265 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
266
267 (define_mode_iterator VF2_AVX512VL
268 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
269
270 (define_mode_iterator VF1_AVX512VL
271 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
272
273 ;; All vector integer modes
274 (define_mode_iterator VI
275 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
276 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
277 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
278 (V8SI "TARGET_AVX") V4SI
279 (V4DI "TARGET_AVX") V2DI])
280
281 (define_mode_iterator VI_AVX2
282 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
283 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
284 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
285 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
286
287 ;; All QImode vector integer modes
288 (define_mode_iterator VI1
289 [(V32QI "TARGET_AVX") V16QI])
290
291 ;; All DImode vector integer modes
292 (define_mode_iterator V_AVX
293 [V16QI V8HI V4SI V2DI V4SF V2DF
294 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
295 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
296 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
297
298 (define_mode_iterator VI48_AVX
299 [V4SI V2DI
300 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
301
302 (define_mode_iterator VI8
303 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
304
305 (define_mode_iterator VI8_AVX512VL
306 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
307
308 (define_mode_iterator VI8_256_512
309 [V8DI (V4DI "TARGET_AVX512VL")])
310
311 (define_mode_iterator VI1_AVX2
312 [(V32QI "TARGET_AVX2") V16QI])
313
314 (define_mode_iterator VI1_AVX512
315 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
316
317 (define_mode_iterator VI2_AVX2
318 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
319
320 (define_mode_iterator VI2_AVX512F
321 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
322
323 (define_mode_iterator VI4_AVX
324 [(V8SI "TARGET_AVX") V4SI])
325
326 (define_mode_iterator VI4_AVX2
327 [(V8SI "TARGET_AVX2") V4SI])
328
329 (define_mode_iterator VI4_AVX512F
330 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
331
332 (define_mode_iterator VI4_AVX512VL
333 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
334
335 (define_mode_iterator VI48_AVX512F_AVX512VL
336 [V4SI V8SI (V16SI "TARGET_AVX512F")
337 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
338
339 (define_mode_iterator VI2_AVX512VL
340 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
341
342 (define_mode_iterator VI8_AVX2_AVX512BW
343 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
344
345 (define_mode_iterator VI8_AVX2
346 [(V4DI "TARGET_AVX2") V2DI])
347
348 (define_mode_iterator VI8_AVX2_AVX512F
349 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
350
351 (define_mode_iterator VI4_128_8_256
352 [V4SI V4DI])
353
354 ;; All V8D* modes
355 (define_mode_iterator V8FI
356 [V8DF V8DI])
357
358 ;; All V16S* modes
359 (define_mode_iterator V16FI
360 [V16SF V16SI])
361
362 ;; ??? We should probably use TImode instead.
363 (define_mode_iterator VIMAX_AVX2
364 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
365
366 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
367 (define_mode_iterator SSESCALARMODE
368 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
369
370 (define_mode_iterator VI12_AVX2
371 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
372 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
373
374 (define_mode_iterator VI24_AVX2
375 [(V16HI "TARGET_AVX2") V8HI
376 (V8SI "TARGET_AVX2") V4SI])
377
378 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
379 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
380 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
381 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
382
383 (define_mode_iterator VI124_AVX2
384 [(V32QI "TARGET_AVX2") V16QI
385 (V16HI "TARGET_AVX2") V8HI
386 (V8SI "TARGET_AVX2") V4SI])
387
388 (define_mode_iterator VI2_AVX2_AVX512BW
389 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
390
391 (define_mode_iterator VI48_AVX2
392 [(V8SI "TARGET_AVX2") V4SI
393 (V4DI "TARGET_AVX2") V2DI])
394
395 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
396 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
397 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
398 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
399
400 (define_mode_iterator VI248_AVX512BW_AVX512VL
401 [(V32HI "TARGET_AVX512BW")
402 (V4DI "TARGET_AVX512VL") V16SI V8DI])
403
404 ;; Suppose TARGET_AVX512VL as baseline
405 (define_mode_iterator VI24_AVX512BW_1
406 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
407 V8SI V4SI])
408
409 (define_mode_iterator VI48_AVX512F
410 [(V16SI "TARGET_AVX512F") V8SI V4SI
411 (V8DI "TARGET_AVX512F") V4DI V2DI])
412
413 (define_mode_iterator VI48_AVX_AVX512F
414 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
415 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
416
417 (define_mode_iterator VI12_AVX_AVX512F
418 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
419 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
420
421 (define_mode_iterator V48_AVX2
422 [V4SF V2DF
423 V8SF V4DF
424 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
425 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
426
427 (define_mode_attr avx512
428 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
429 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
430 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
431 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
432 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
433 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
434
435 (define_mode_attr sse2_avx_avx512f
436 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
437 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
438 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
439 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
440 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
441 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
442
443 (define_mode_attr sse2_avx2
444 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
445 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
446 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
447 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
448 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
449
450 (define_mode_attr ssse3_avx2
451 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
452 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
453 (V4SI "ssse3") (V8SI "avx2")
454 (V2DI "ssse3") (V4DI "avx2")
455 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
456
457 (define_mode_attr sse4_1_avx2
458 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
459 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
460 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
461 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
462
463 (define_mode_attr avx_avx2
464 [(V4SF "avx") (V2DF "avx")
465 (V8SF "avx") (V4DF "avx")
466 (V4SI "avx2") (V2DI "avx2")
467 (V8SI "avx2") (V4DI "avx2")])
468
469 (define_mode_attr vec_avx2
470 [(V16QI "vec") (V32QI "avx2")
471 (V8HI "vec") (V16HI "avx2")
472 (V4SI "vec") (V8SI "avx2")
473 (V2DI "vec") (V4DI "avx2")])
474
475 (define_mode_attr avx2_avx512
476 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
477 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
478 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
479 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
480 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
481
482 (define_mode_attr shuffletype
483 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
484 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
485 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
486 (V32HI "i") (V16HI "i") (V8HI "i")
487 (V64QI "i") (V32QI "i") (V16QI "i")
488 (V4TI "i") (V2TI "i") (V1TI "i")])
489
490 (define_mode_attr ssequartermode
491 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
492
493 (define_mode_attr ssedoublemodelower
494 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
495 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
496 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
497
498 (define_mode_attr ssedoublemode
499 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
500 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
501 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
502 (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
503 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
504 (V4DI "V8DI") (V8DI "V16DI")])
505
506 (define_mode_attr ssebytemode
507 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
508
509 ;; All 128bit vector integer modes
510 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
511
512 ;; All 256bit vector integer modes
513 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
514
515 ;; All 512bit vector integer modes
516 (define_mode_iterator VI_512
517 [(V64QI "TARGET_AVX512BW")
518 (V32HI "TARGET_AVX512BW")
519 V16SI V8DI])
520
521 ;; Various 128bit vector integer mode combinations
522 (define_mode_iterator VI12_128 [V16QI V8HI])
523 (define_mode_iterator VI14_128 [V16QI V4SI])
524 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
525 (define_mode_iterator VI24_128 [V8HI V4SI])
526 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
527 (define_mode_iterator VI48_128 [V4SI V2DI])
528
529 ;; Various 256bit and 512 vector integer mode combinations
530 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
531 (define_mode_iterator VI124_256_AVX512F_AVX512BW
532 [V32QI V16HI V8SI
533 (V64QI "TARGET_AVX512BW")
534 (V32HI "TARGET_AVX512BW")
535 (V16SI "TARGET_AVX512F")])
536 (define_mode_iterator VI48_256 [V8SI V4DI])
537 (define_mode_iterator VI48_512 [V16SI V8DI])
538 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
539 (define_mode_iterator VI_AVX512BW
540 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
541
542 ;; Int-float size matches
543 (define_mode_iterator VI4F_128 [V4SI V4SF])
544 (define_mode_iterator VI8F_128 [V2DI V2DF])
545 (define_mode_iterator VI4F_256 [V8SI V8SF])
546 (define_mode_iterator VI8F_256 [V4DI V4DF])
547 (define_mode_iterator VI8F_256_512
548 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
549 (define_mode_iterator VI48F_256_512
550 [V8SI V8SF
551 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
552 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
553 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
554 (define_mode_iterator VF48_I1248
555 [V16SI V16SF V8DI V8DF V32HI V64QI])
556 (define_mode_iterator VI48F
557 [V16SI V16SF V8DI V8DF
558 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
559 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
560 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
561 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
562 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
563
564 ;; Mapping from float mode to required SSE level
565 (define_mode_attr sse
566 [(SF "sse") (DF "sse2")
567 (V4SF "sse") (V2DF "sse2")
568 (V16SF "avx512f") (V8SF "avx")
569 (V8DF "avx512f") (V4DF "avx")])
570
571 (define_mode_attr sse2
572 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
573 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
574
575 (define_mode_attr sse3
576 [(V16QI "sse3") (V32QI "avx")])
577
578 (define_mode_attr sse4_1
579 [(V4SF "sse4_1") (V2DF "sse4_1")
580 (V8SF "avx") (V4DF "avx")
581 (V8DF "avx512f")
582 (V4DI "avx") (V2DI "sse4_1")
583 (V8SI "avx") (V4SI "sse4_1")
584 (V16QI "sse4_1") (V32QI "avx")
585 (V8HI "sse4_1") (V16HI "avx")])
586
587 (define_mode_attr avxsizesuffix
588 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
589 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
590 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
591 (V16SF "512") (V8DF "512")
592 (V8SF "256") (V4DF "256")
593 (V4SF "") (V2DF "")])
594
595 ;; SSE instruction mode
596 (define_mode_attr sseinsnmode
597 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
598 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
599 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
600 (V16SF "V16SF") (V8DF "V8DF")
601 (V8SF "V8SF") (V4DF "V4DF")
602 (V4SF "V4SF") (V2DF "V2DF")
603 (TI "TI")])
604
605 ;; Mapping of vector modes to corresponding mask size
606 (define_mode_attr avx512fmaskmode
607 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
608 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
609 (V16SI "HI") (V8SI "QI") (V4SI "QI")
610 (V8DI "QI") (V4DI "QI") (V2DI "QI")
611 (V16SF "HI") (V8SF "QI") (V4SF "QI")
612 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
613
614 ;; Mapping of vector modes to corresponding mask size
615 (define_mode_attr avx512fmaskmodelower
616 [(V64QI "di") (V32QI "si") (V16QI "hi")
617 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
618 (V16SI "hi") (V8SI "qi") (V4SI "qi")
619 (V8DI "qi") (V4DI "qi") (V2DI "qi")
620 (V16SF "hi") (V8SF "qi") (V4SF "qi")
621 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
622
623 ;; Mapping of vector float modes to an integer mode of the same size
624 (define_mode_attr sseintvecmode
625 [(V16SF "V16SI") (V8DF "V8DI")
626 (V8SF "V8SI") (V4DF "V4DI")
627 (V4SF "V4SI") (V2DF "V2DI")
628 (V16SI "V16SI") (V8DI "V8DI")
629 (V8SI "V8SI") (V4DI "V4DI")
630 (V4SI "V4SI") (V2DI "V2DI")
631 (V16HI "V16HI") (V8HI "V8HI")
632 (V32HI "V32HI") (V64QI "V64QI")
633 (V32QI "V32QI") (V16QI "V16QI")])
634
635 (define_mode_attr sseintvecmode2
636 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
637 (V8SF "OI") (V4SF "TI")])
638
639 (define_mode_attr sseintvecmodelower
640 [(V16SF "v16si") (V8DF "v8di")
641 (V8SF "v8si") (V4DF "v4di")
642 (V4SF "v4si") (V2DF "v2di")
643 (V8SI "v8si") (V4DI "v4di")
644 (V4SI "v4si") (V2DI "v2di")
645 (V16HI "v16hi") (V8HI "v8hi")
646 (V32QI "v32qi") (V16QI "v16qi")])
647
648 ;; Mapping of vector modes to a vector mode of double size
649 (define_mode_attr ssedoublevecmode
650 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
651 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
652 (V8SF "V16SF") (V4DF "V8DF")
653 (V4SF "V8SF") (V2DF "V4DF")])
654
655 ;; Mapping of vector modes to a vector mode of half size
656 (define_mode_attr ssehalfvecmode
657 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
658 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
659 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
660 (V16SF "V8SF") (V8DF "V4DF")
661 (V8SF "V4SF") (V4DF "V2DF")
662 (V4SF "V2SF")])
663
664 ;; Mapping of vector modes ti packed single mode of the same size
665 (define_mode_attr ssePSmode
666 [(V16SI "V16SF") (V8DF "V16SF")
667 (V16SF "V16SF") (V8DI "V16SF")
668 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
669 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
670 (V8SI "V8SF") (V4SI "V4SF")
671 (V4DI "V8SF") (V2DI "V4SF")
672 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
673 (V8SF "V8SF") (V4SF "V4SF")
674 (V4DF "V8SF") (V2DF "V4SF")])
675
676 (define_mode_attr ssePSmode2
677 [(V8DI "V8SF") (V4DI "V4SF")])
678
679 ;; Mapping of vector modes back to the scalar modes
680 (define_mode_attr ssescalarmode
681 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
682 (V32HI "HI") (V16HI "HI") (V8HI "HI")
683 (V16SI "SI") (V8SI "SI") (V4SI "SI")
684 (V8DI "DI") (V4DI "DI") (V2DI "DI")
685 (V16SF "SF") (V8SF "SF") (V4SF "SF")
686 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
687
688 ;; Mapping of vector modes to the 128bit modes
689 (define_mode_attr ssexmmmode
690 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
691 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
692 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
693 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
694 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
695 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
696
697 ;; Pointer size override for scalar modes (Intel asm dialect)
698 (define_mode_attr iptr
699 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
700 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
701 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
702 (V8SF "k") (V4DF "q")
703 (V4SF "k") (V2DF "q")
704 (SF "k") (DF "q")])
705
706 ;; Number of scalar elements in each vector type
707 (define_mode_attr ssescalarnum
708 [(V64QI "64") (V16SI "16") (V8DI "8")
709 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
710 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
711 (V16SF "16") (V8DF "8")
712 (V8SF "8") (V4DF "4")
713 (V4SF "4") (V2DF "2")])
714
715 ;; Mask of scalar elements in each vector type
716 (define_mode_attr ssescalarnummask
717 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
718 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
719 (V8SF "7") (V4DF "3")
720 (V4SF "3") (V2DF "1")])
721
722 (define_mode_attr ssescalarsize
723 [(V4TI "64") (V2TI "64") (V1TI "64")
724 (V8DI "64") (V4DI "64") (V2DI "64")
725 (V64QI "8") (V32QI "8") (V16QI "8")
726 (V32HI "16") (V16HI "16") (V8HI "16")
727 (V16SI "32") (V8SI "32") (V4SI "32")
728 (V16SF "32") (V8SF "32") (V4SF "32")
729 (V8DF "64") (V4DF "64") (V2DF "64")])
730
731 ;; SSE prefix for integer vector modes
732 (define_mode_attr sseintprefix
733 [(V2DI "p") (V2DF "")
734 (V4DI "p") (V4DF "")
735 (V8DI "p") (V8DF "")
736 (V4SI "p") (V4SF "")
737 (V8SI "p") (V8SF "")
738 (V16SI "p") (V16SF "")
739 (V16QI "p") (V8HI "p")
740 (V32QI "p") (V16HI "p")
741 (V64QI "p") (V32HI "p")])
742
743 ;; SSE scalar suffix for vector modes
744 (define_mode_attr ssescalarmodesuffix
745 [(SF "ss") (DF "sd")
746 (V8SF "ss") (V4DF "sd")
747 (V4SF "ss") (V2DF "sd")
748 (V8SI "ss") (V4DI "sd")
749 (V4SI "d")])
750
751 ;; Pack/unpack vector modes
752 (define_mode_attr sseunpackmode
753 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
754 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
755 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
756
757 (define_mode_attr ssepackmode
758 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
759 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
760 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
761
762 ;; Mapping of the max integer size for xop rotate immediate constraint
763 (define_mode_attr sserotatemax
764 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
765
766 ;; Mapping of mode to cast intrinsic name
767 (define_mode_attr castmode
768 [(V8SI "si") (V8SF "ps") (V4DF "pd")
769 (V16SI "si") (V16SF "ps") (V8DF "pd")])
770
771 ;; Instruction suffix for sign and zero extensions.
772 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
773
774 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
775 ;; i64x4 or f64x4 for 512bit modes.
776 (define_mode_attr i128
777 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
778 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
779 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
780
781 ;; Mix-n-match
782 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
783 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
784
785 ;; Mapping for dbpsabbw modes
786 (define_mode_attr dbpsadbwmode
787 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
788
789 ;; Mapping suffixes for broadcast
790 (define_mode_attr bcstscalarsuff
791 [(V64QI "b") (V32QI "b") (V16QI "b")
792 (V32HI "w") (V16HI "w") (V8HI "w")
793 (V16SI "d") (V8SI "d") (V4SI "d")
794 (V8DI "q") (V4DI "q") (V2DI "q")
795 (V16SF "ss") (V8SF "ss") (V4SF "ss")
796 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
797
798 ;; Tie mode of assembler operand to mode iterator
799 (define_mode_attr concat_tg_mode
800 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
801 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
802
803 ;; Half mask mode for unpacks
804 (define_mode_attr HALFMASKMODE
805 [(DI "SI") (SI "HI")])
806
807 ;; Double mask mode for packs
808 (define_mode_attr DOUBLEMASKMODE
809 [(HI "SI") (SI "DI")])
810
811
812 ;; Include define_subst patterns for instructions with mask
813 (include "subst.md")
814
815 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
816
817 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
818 ;;
819 ;; Move patterns
820 ;;
821 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
822
823 ;; All of these patterns are enabled for SSE1 as well as SSE2.
824 ;; This is essential for maintaining stable calling conventions.
825
826 (define_expand "mov<mode>"
827 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
828 (match_operand:VMOVE 1 "nonimmediate_operand"))]
829 "TARGET_SSE"
830 {
831 ix86_expand_vector_move (<MODE>mode, operands);
832 DONE;
833 })
834
835 (define_insn "mov<mode>_internal"
836 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
837 "=v,v ,v ,m")
838 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
839 " C,BC,vm,v"))]
840 "TARGET_SSE
841 && (register_operand (operands[0], <MODE>mode)
842 || register_operand (operands[1], <MODE>mode))"
843 {
844 switch (get_attr_type (insn))
845 {
846 case TYPE_SSELOG1:
847 return standard_sse_constant_opcode (insn, operands[1]);
848
849 case TYPE_SSEMOV:
850 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
851 in avx512f, so we need to use workarounds, to access sse registers
852 16-31, which are evex-only. In avx512vl we don't need workarounds. */
853 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
854 && (EXT_REX_SSE_REG_P (operands[0])
855 || EXT_REX_SSE_REG_P (operands[1])))
856 {
857 if (memory_operand (operands[0], <MODE>mode))
858 {
859 if (<MODE_SIZE> == 32)
860 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
861 else if (<MODE_SIZE> == 16)
862 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
863 else
864 gcc_unreachable ();
865 }
866 else if (memory_operand (operands[1], <MODE>mode))
867 {
868 if (<MODE_SIZE> == 32)
869 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
870 else if (<MODE_SIZE> == 16)
871 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
872 else
873 gcc_unreachable ();
874 }
875 else
876 /* Reg -> reg move is always aligned. Just use wider move. */
877 switch (get_attr_mode (insn))
878 {
879 case MODE_V8SF:
880 case MODE_V4SF:
881 return "vmovaps\t{%g1, %g0|%g0, %g1}";
882 case MODE_V4DF:
883 case MODE_V2DF:
884 return "vmovapd\t{%g1, %g0|%g0, %g1}";
885 case MODE_OI:
886 case MODE_TI:
887 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
888 default:
889 gcc_unreachable ();
890 }
891 }
892
893 switch (get_attr_mode (insn))
894 {
895 case MODE_V16SF:
896 case MODE_V8SF:
897 case MODE_V4SF:
898 if (misaligned_operand (operands[0], <MODE>mode)
899 || misaligned_operand (operands[1], <MODE>mode))
900 return "%vmovups\t{%1, %0|%0, %1}";
901 else
902 return "%vmovaps\t{%1, %0|%0, %1}";
903
904 case MODE_V8DF:
905 case MODE_V4DF:
906 case MODE_V2DF:
907 if (misaligned_operand (operands[0], <MODE>mode)
908 || misaligned_operand (operands[1], <MODE>mode))
909 return "%vmovupd\t{%1, %0|%0, %1}";
910 else
911 return "%vmovapd\t{%1, %0|%0, %1}";
912
913 case MODE_OI:
914 case MODE_TI:
915 if (misaligned_operand (operands[0], <MODE>mode)
916 || misaligned_operand (operands[1], <MODE>mode))
917 return TARGET_AVX512VL ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
918 : "%vmovdqu\t{%1, %0|%0, %1}";
919 else
920 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
921 : "%vmovdqa\t{%1, %0|%0, %1}";
922 case MODE_XI:
923 if (misaligned_operand (operands[0], <MODE>mode)
924 || misaligned_operand (operands[1], <MODE>mode))
925 return (<MODE>mode == V16SImode
926 || <MODE>mode == V8DImode
927 || TARGET_AVX512BW)
928 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
929 : "vmovdqu64\t{%1, %0|%0, %1}";
930 else
931 return "vmovdqa64\t{%1, %0|%0, %1}";
932
933 default:
934 gcc_unreachable ();
935 }
936
937 default:
938 gcc_unreachable ();
939 }
940 }
941 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
942 (set_attr "prefix" "maybe_vex")
943 (set (attr "mode")
944 (cond [(and (eq_attr "alternative" "1")
945 (match_test "TARGET_AVX512VL"))
946 (const_string "XI")
947 (and (match_test "<MODE_SIZE> == 16")
948 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
949 (and (eq_attr "alternative" "3")
950 (match_test "TARGET_SSE_TYPELESS_STORES"))))
951 (const_string "<ssePSmode>")
952 (match_test "TARGET_AVX")
953 (const_string "<sseinsnmode>")
954 (ior (not (match_test "TARGET_SSE2"))
955 (match_test "optimize_function_for_size_p (cfun)"))
956 (const_string "V4SF")
957 (and (eq_attr "alternative" "0")
958 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
959 (const_string "TI")
960 ]
961 (const_string "<sseinsnmode>")))
962 (set (attr "enabled")
963 (cond [(and (match_test "<MODE_SIZE> == 16")
964 (eq_attr "alternative" "1"))
965 (symbol_ref "TARGET_SSE2")
966 (and (match_test "<MODE_SIZE> == 32")
967 (eq_attr "alternative" "1"))
968 (symbol_ref "TARGET_AVX2")
969 ]
970 (symbol_ref "true")))])
971
972 (define_insn "<avx512>_load<mode>_mask"
973 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
974 (vec_merge:V48_AVX512VL
975 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
976 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
977 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
978 "TARGET_AVX512F"
979 {
980 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
981 {
982 if (misaligned_operand (operands[1], <MODE>mode))
983 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
984 else
985 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
986 }
987 else
988 {
989 if (misaligned_operand (operands[1], <MODE>mode))
990 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
991 else
992 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
993 }
994 }
995 [(set_attr "type" "ssemov")
996 (set_attr "prefix" "evex")
997 (set_attr "memory" "none,load")
998 (set_attr "mode" "<sseinsnmode>")])
999
1000 (define_insn "<avx512>_load<mode>_mask"
1001 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1002 (vec_merge:VI12_AVX512VL
1003 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1004 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
1005 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1006 "TARGET_AVX512BW"
1007 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1008 [(set_attr "type" "ssemov")
1009 (set_attr "prefix" "evex")
1010 (set_attr "memory" "none,load")
1011 (set_attr "mode" "<sseinsnmode>")])
1012
1013 (define_insn "<avx512>_blendm<mode>"
1014 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1015 (vec_merge:V48_AVX512VL
1016 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1017 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1018 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1019 "TARGET_AVX512F"
1020 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1021 [(set_attr "type" "ssemov")
1022 (set_attr "prefix" "evex")
1023 (set_attr "mode" "<sseinsnmode>")])
1024
1025 (define_insn "<avx512>_blendm<mode>"
1026 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1027 (vec_merge:VI12_AVX512VL
1028 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1029 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1030 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1031 "TARGET_AVX512BW"
1032 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1033 [(set_attr "type" "ssemov")
1034 (set_attr "prefix" "evex")
1035 (set_attr "mode" "<sseinsnmode>")])
1036
1037 (define_insn "<avx512>_store<mode>_mask"
1038 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1039 (vec_merge:V48_AVX512VL
1040 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1041 (match_dup 0)
1042 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1043 "TARGET_AVX512F"
1044 {
1045 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1046 {
1047 if (misaligned_operand (operands[0], <MODE>mode))
1048 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1049 else
1050 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1051 }
1052 else
1053 {
1054 if (misaligned_operand (operands[0], <MODE>mode))
1055 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1056 else
1057 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1058 }
1059 }
1060 [(set_attr "type" "ssemov")
1061 (set_attr "prefix" "evex")
1062 (set_attr "memory" "store")
1063 (set_attr "mode" "<sseinsnmode>")])
1064
1065 (define_insn "<avx512>_store<mode>_mask"
1066 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1067 (vec_merge:VI12_AVX512VL
1068 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1069 (match_dup 0)
1070 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1071 "TARGET_AVX512BW"
1072 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1073 [(set_attr "type" "ssemov")
1074 (set_attr "prefix" "evex")
1075 (set_attr "memory" "store")
1076 (set_attr "mode" "<sseinsnmode>")])
1077
1078 (define_insn "sse2_movq128"
1079 [(set (match_operand:V2DI 0 "register_operand" "=v")
1080 (vec_concat:V2DI
1081 (vec_select:DI
1082 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1083 (parallel [(const_int 0)]))
1084 (const_int 0)))]
1085 "TARGET_SSE2"
1086 "%vmovq\t{%1, %0|%0, %q1}"
1087 [(set_attr "type" "ssemov")
1088 (set_attr "prefix" "maybe_vex")
1089 (set_attr "mode" "TI")])
1090
1091 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1092 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1093 ;; from memory, we'd prefer to load the memory directly into the %xmm
1094 ;; register. To facilitate this happy circumstance, this pattern won't
1095 ;; split until after register allocation. If the 64-bit value didn't
1096 ;; come from memory, this is the best we can do. This is much better
1097 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1098 ;; from there.
1099
1100 (define_insn_and_split "movdi_to_sse"
1101 [(parallel
1102 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1103 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1104 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1105 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1106 "#"
1107 "&& reload_completed"
1108 [(const_int 0)]
1109 {
1110 if (register_operand (operands[1], DImode))
1111 {
1112 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1113 Assemble the 64-bit DImode value in an xmm register. */
1114 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1115 gen_lowpart (SImode, operands[1])));
1116 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1117 gen_highpart (SImode, operands[1])));
1118 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1119 operands[2]));
1120 }
1121 else if (memory_operand (operands[1], DImode))
1122 {
1123 rtx tmp = gen_reg_rtx (V2DImode);
1124 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1125 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1126 }
1127 else
1128 gcc_unreachable ();
1129 })
1130
1131 (define_split
1132 [(set (match_operand:V4SF 0 "register_operand")
1133 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1134 "TARGET_SSE && reload_completed"
1135 [(set (match_dup 0)
1136 (vec_merge:V4SF
1137 (vec_duplicate:V4SF (match_dup 1))
1138 (match_dup 2)
1139 (const_int 1)))]
1140 {
1141 operands[1] = gen_lowpart (SFmode, operands[1]);
1142 operands[2] = CONST0_RTX (V4SFmode);
1143 })
1144
1145 (define_split
1146 [(set (match_operand:V2DF 0 "register_operand")
1147 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1148 "TARGET_SSE2 && reload_completed"
1149 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1150 {
1151 operands[1] = gen_lowpart (DFmode, operands[1]);
1152 operands[2] = CONST0_RTX (DFmode);
1153 })
1154
1155 (define_expand "movmisalign<mode>"
1156 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1157 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1158 "TARGET_SSE"
1159 {
1160 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1161 DONE;
1162 })
1163
1164 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1165 (define_peephole2
1166 [(set (match_operand:V2DF 0 "register_operand")
1167 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1168 (match_operand:DF 4 "const0_operand")))
1169 (set (match_operand:V2DF 2 "register_operand")
1170 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1171 (parallel [(const_int 0)]))
1172 (match_operand:DF 3 "memory_operand")))]
1173 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1174 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1175 [(set (match_dup 2) (match_dup 4))]
1176 "operands[4] = adjust_address (operands[1], V2DFmode, 0);")
1177
1178 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1179 (define_peephole2
1180 [(set (match_operand:DF 0 "memory_operand")
1181 (vec_select:DF (match_operand:V2DF 1 "register_operand")
1182 (parallel [(const_int 0)])))
1183 (set (match_operand:DF 2 "memory_operand")
1184 (vec_select:DF (match_operand:V2DF 3 "register_operand")
1185 (parallel [(const_int 1)])))]
1186 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1187 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1188 [(set (match_dup 4) (match_dup 1))]
1189 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1190
1191 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1192 [(set (match_operand:VI1 0 "register_operand" "=x")
1193 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1194 UNSPEC_LDDQU))]
1195 "TARGET_SSE3"
1196 "%vlddqu\t{%1, %0|%0, %1}"
1197 [(set_attr "type" "ssemov")
1198 (set_attr "movu" "1")
1199 (set (attr "prefix_data16")
1200 (if_then_else
1201 (match_test "TARGET_AVX")
1202 (const_string "*")
1203 (const_string "0")))
1204 (set (attr "prefix_rep")
1205 (if_then_else
1206 (match_test "TARGET_AVX")
1207 (const_string "*")
1208 (const_string "1")))
1209 (set_attr "prefix" "maybe_vex")
1210 (set_attr "mode" "<sseinsnmode>")])
1211
1212 (define_insn "sse2_movnti<mode>"
1213 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1214 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1215 UNSPEC_MOVNT))]
1216 "TARGET_SSE2"
1217 "movnti\t{%1, %0|%0, %1}"
1218 [(set_attr "type" "ssemov")
1219 (set_attr "prefix_data16" "0")
1220 (set_attr "mode" "<MODE>")])
1221
1222 (define_insn "<sse>_movnt<mode>"
1223 [(set (match_operand:VF 0 "memory_operand" "=m")
1224 (unspec:VF
1225 [(match_operand:VF 1 "register_operand" "v")]
1226 UNSPEC_MOVNT))]
1227 "TARGET_SSE"
1228 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1229 [(set_attr "type" "ssemov")
1230 (set_attr "prefix" "maybe_vex")
1231 (set_attr "mode" "<MODE>")])
1232
1233 (define_insn "<sse2>_movnt<mode>"
1234 [(set (match_operand:VI8 0 "memory_operand" "=m")
1235 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1236 UNSPEC_MOVNT))]
1237 "TARGET_SSE2"
1238 "%vmovntdq\t{%1, %0|%0, %1}"
1239 [(set_attr "type" "ssecvt")
1240 (set (attr "prefix_data16")
1241 (if_then_else
1242 (match_test "TARGET_AVX")
1243 (const_string "*")
1244 (const_string "1")))
1245 (set_attr "prefix" "maybe_vex")
1246 (set_attr "mode" "<sseinsnmode>")])
1247
1248 ; Expand patterns for non-temporal stores. At the moment, only those
1249 ; that directly map to insns are defined; it would be possible to
1250 ; define patterns for other modes that would expand to several insns.
1251
1252 ;; Modes handled by storent patterns.
1253 (define_mode_iterator STORENT_MODE
1254 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1255 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1256 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1257 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1258 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1259
1260 (define_expand "storent<mode>"
1261 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1262 (unspec:STORENT_MODE
1263 [(match_operand:STORENT_MODE 1 "register_operand")]
1264 UNSPEC_MOVNT))]
1265 "TARGET_SSE")
1266
1267 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1268 ;;
1269 ;; Parallel floating point arithmetic
1270 ;;
1271 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1272
1273 (define_expand "<code><mode>2"
1274 [(set (match_operand:VF 0 "register_operand")
1275 (absneg:VF
1276 (match_operand:VF 1 "register_operand")))]
1277 "TARGET_SSE"
1278 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1279
1280 (define_insn_and_split "*absneg<mode>2"
1281 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1282 (match_operator:VF 3 "absneg_operator"
1283 [(match_operand:VF 1 "vector_operand" "0, xBm,v, m")]))
1284 (use (match_operand:VF 2 "vector_operand" "xBm,0, vm,v"))]
1285 "TARGET_SSE"
1286 "#"
1287 "&& reload_completed"
1288 [(const_int 0)]
1289 {
1290 enum rtx_code absneg_op;
1291 rtx op1, op2;
1292 rtx t;
1293
1294 if (TARGET_AVX)
1295 {
1296 if (MEM_P (operands[1]))
1297 op1 = operands[2], op2 = operands[1];
1298 else
1299 op1 = operands[1], op2 = operands[2];
1300 }
1301 else
1302 {
1303 op1 = operands[0];
1304 if (rtx_equal_p (operands[0], operands[1]))
1305 op2 = operands[2];
1306 else
1307 op2 = operands[1];
1308 }
1309
1310 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1311 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1312 t = gen_rtx_SET (operands[0], t);
1313 emit_insn (t);
1314 DONE;
1315 }
1316 [(set_attr "isa" "noavx,noavx,avx,avx")])
1317
1318 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1319 [(set (match_operand:VF 0 "register_operand")
1320 (plusminus:VF
1321 (match_operand:VF 1 "<round_nimm_predicate>")
1322 (match_operand:VF 2 "<round_nimm_predicate>")))]
1323 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1324 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1325
1326 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1327 [(set (match_operand:VF 0 "register_operand" "=x,v")
1328 (plusminus:VF
1329 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1330 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1331 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1332 "@
1333 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1334 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1335 [(set_attr "isa" "noavx,avx")
1336 (set_attr "type" "sseadd")
1337 (set_attr "prefix" "<mask_prefix3>")
1338 (set_attr "mode" "<MODE>")])
1339
1340 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1341 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1342 (vec_merge:VF_128
1343 (plusminus:VF_128
1344 (match_operand:VF_128 1 "register_operand" "0,v")
1345 (match_operand:VF_128 2 "vector_operand" "xBm,<round_constraint>"))
1346 (match_dup 1)
1347 (const_int 1)))]
1348 "TARGET_SSE"
1349 "@
1350 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1351 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1352 [(set_attr "isa" "noavx,avx")
1353 (set_attr "type" "sseadd")
1354 (set_attr "prefix" "<round_prefix>")
1355 (set_attr "mode" "<ssescalarmode>")])
1356
1357 (define_expand "mul<mode>3<mask_name><round_name>"
1358 [(set (match_operand:VF 0 "register_operand")
1359 (mult:VF
1360 (match_operand:VF 1 "<round_nimm_predicate>")
1361 (match_operand:VF 2 "<round_nimm_predicate>")))]
1362 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1363 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1364
1365 (define_insn "*mul<mode>3<mask_name><round_name>"
1366 [(set (match_operand:VF 0 "register_operand" "=x,v")
1367 (mult:VF
1368 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1369 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1370 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1371 "@
1372 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1373 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1374 [(set_attr "isa" "noavx,avx")
1375 (set_attr "type" "ssemul")
1376 (set_attr "prefix" "<mask_prefix3>")
1377 (set_attr "btver2_decode" "direct,double")
1378 (set_attr "mode" "<MODE>")])
1379
1380 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1381 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1382 (vec_merge:VF_128
1383 (multdiv:VF_128
1384 (match_operand:VF_128 1 "register_operand" "0,v")
1385 (match_operand:VF_128 2 "vector_operand" "xBm,<round_constraint>"))
1386 (match_dup 1)
1387 (const_int 1)))]
1388 "TARGET_SSE"
1389 "@
1390 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1391 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1392 [(set_attr "isa" "noavx,avx")
1393 (set_attr "type" "sse<multdiv_mnemonic>")
1394 (set_attr "prefix" "<round_prefix>")
1395 (set_attr "btver2_decode" "direct,double")
1396 (set_attr "mode" "<ssescalarmode>")])
1397
1398 (define_expand "div<mode>3"
1399 [(set (match_operand:VF2 0 "register_operand")
1400 (div:VF2 (match_operand:VF2 1 "register_operand")
1401 (match_operand:VF2 2 "vector_operand")))]
1402 "TARGET_SSE2"
1403 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1404
1405 (define_expand "div<mode>3"
1406 [(set (match_operand:VF1 0 "register_operand")
1407 (div:VF1 (match_operand:VF1 1 "register_operand")
1408 (match_operand:VF1 2 "vector_operand")))]
1409 "TARGET_SSE"
1410 {
1411 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1412
1413 if (TARGET_SSE_MATH
1414 && TARGET_RECIP_VEC_DIV
1415 && !optimize_insn_for_size_p ()
1416 && flag_finite_math_only && !flag_trapping_math
1417 && flag_unsafe_math_optimizations)
1418 {
1419 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1420 DONE;
1421 }
1422 })
1423
1424 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1425 [(set (match_operand:VF 0 "register_operand" "=x,v")
1426 (div:VF
1427 (match_operand:VF 1 "register_operand" "0,v")
1428 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1429 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1430 "@
1431 div<ssemodesuffix>\t{%2, %0|%0, %2}
1432 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1433 [(set_attr "isa" "noavx,avx")
1434 (set_attr "type" "ssediv")
1435 (set_attr "prefix" "<mask_prefix3>")
1436 (set_attr "mode" "<MODE>")])
1437
1438 (define_insn "<sse>_rcp<mode>2"
1439 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1440 (unspec:VF1_128_256
1441 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1442 "TARGET_SSE"
1443 "%vrcpps\t{%1, %0|%0, %1}"
1444 [(set_attr "type" "sse")
1445 (set_attr "atom_sse_attr" "rcp")
1446 (set_attr "btver2_sse_attr" "rcp")
1447 (set_attr "prefix" "maybe_vex")
1448 (set_attr "mode" "<MODE>")])
1449
1450 (define_insn "sse_vmrcpv4sf2"
1451 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1452 (vec_merge:V4SF
1453 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1454 UNSPEC_RCP)
1455 (match_operand:V4SF 2 "register_operand" "0,x")
1456 (const_int 1)))]
1457 "TARGET_SSE"
1458 "@
1459 rcpss\t{%1, %0|%0, %k1}
1460 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1461 [(set_attr "isa" "noavx,avx")
1462 (set_attr "type" "sse")
1463 (set_attr "atom_sse_attr" "rcp")
1464 (set_attr "btver2_sse_attr" "rcp")
1465 (set_attr "prefix" "orig,vex")
1466 (set_attr "mode" "SF")])
1467
1468 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1469 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1470 (unspec:VF_AVX512VL
1471 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1472 UNSPEC_RCP14))]
1473 "TARGET_AVX512F"
1474 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1475 [(set_attr "type" "sse")
1476 (set_attr "prefix" "evex")
1477 (set_attr "mode" "<MODE>")])
1478
1479 (define_insn "srcp14<mode>"
1480 [(set (match_operand:VF_128 0 "register_operand" "=v")
1481 (vec_merge:VF_128
1482 (unspec:VF_128
1483 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1484 UNSPEC_RCP14)
1485 (match_operand:VF_128 2 "register_operand" "v")
1486 (const_int 1)))]
1487 "TARGET_AVX512F"
1488 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1489 [(set_attr "type" "sse")
1490 (set_attr "prefix" "evex")
1491 (set_attr "mode" "<MODE>")])
1492
1493 (define_expand "sqrt<mode>2"
1494 [(set (match_operand:VF2 0 "register_operand")
1495 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
1496 "TARGET_SSE2")
1497
1498 (define_expand "sqrt<mode>2"
1499 [(set (match_operand:VF1 0 "register_operand")
1500 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
1501 "TARGET_SSE"
1502 {
1503 if (TARGET_SSE_MATH
1504 && TARGET_RECIP_VEC_SQRT
1505 && !optimize_insn_for_size_p ()
1506 && flag_finite_math_only && !flag_trapping_math
1507 && flag_unsafe_math_optimizations)
1508 {
1509 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1510 DONE;
1511 }
1512 })
1513
1514 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1515 [(set (match_operand:VF 0 "register_operand" "=x,v")
1516 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1517 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1518 "@
1519 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
1520 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1521 [(set_attr "isa" "noavx,avx")
1522 (set_attr "type" "sse")
1523 (set_attr "atom_sse_attr" "sqrt")
1524 (set_attr "btver2_sse_attr" "sqrt")
1525 (set_attr "prefix" "maybe_vex")
1526 (set_attr "mode" "<MODE>")])
1527
1528 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1529 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1530 (vec_merge:VF_128
1531 (sqrt:VF_128
1532 (match_operand:VF_128 1 "vector_operand" "xBm,<round_constraint>"))
1533 (match_operand:VF_128 2 "register_operand" "0,v")
1534 (const_int 1)))]
1535 "TARGET_SSE"
1536 "@
1537 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1538 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1539 [(set_attr "isa" "noavx,avx")
1540 (set_attr "type" "sse")
1541 (set_attr "atom_sse_attr" "sqrt")
1542 (set_attr "prefix" "<round_prefix>")
1543 (set_attr "btver2_sse_attr" "sqrt")
1544 (set_attr "mode" "<ssescalarmode>")])
1545
1546 (define_expand "rsqrt<mode>2"
1547 [(set (match_operand:VF1_128_256 0 "register_operand")
1548 (unspec:VF1_128_256
1549 [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
1550 "TARGET_SSE_MATH"
1551 {
1552 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1553 DONE;
1554 })
1555
1556 (define_insn "<sse>_rsqrt<mode>2"
1557 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1558 (unspec:VF1_128_256
1559 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
1560 "TARGET_SSE"
1561 "%vrsqrtps\t{%1, %0|%0, %1}"
1562 [(set_attr "type" "sse")
1563 (set_attr "prefix" "maybe_vex")
1564 (set_attr "mode" "<MODE>")])
1565
1566 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1567 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1568 (unspec:VF_AVX512VL
1569 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1570 UNSPEC_RSQRT14))]
1571 "TARGET_AVX512F"
1572 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1573 [(set_attr "type" "sse")
1574 (set_attr "prefix" "evex")
1575 (set_attr "mode" "<MODE>")])
1576
1577 (define_insn "rsqrt14<mode>"
1578 [(set (match_operand:VF_128 0 "register_operand" "=v")
1579 (vec_merge:VF_128
1580 (unspec:VF_128
1581 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1582 UNSPEC_RSQRT14)
1583 (match_operand:VF_128 2 "register_operand" "v")
1584 (const_int 1)))]
1585 "TARGET_AVX512F"
1586 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1587 [(set_attr "type" "sse")
1588 (set_attr "prefix" "evex")
1589 (set_attr "mode" "<MODE>")])
1590
1591 (define_insn "sse_vmrsqrtv4sf2"
1592 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1593 (vec_merge:V4SF
1594 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1595 UNSPEC_RSQRT)
1596 (match_operand:V4SF 2 "register_operand" "0,x")
1597 (const_int 1)))]
1598 "TARGET_SSE"
1599 "@
1600 rsqrtss\t{%1, %0|%0, %k1}
1601 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1602 [(set_attr "isa" "noavx,avx")
1603 (set_attr "type" "sse")
1604 (set_attr "prefix" "orig,vex")
1605 (set_attr "mode" "SF")])
1606
1607 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1608 ;; isn't really correct, as those rtl operators aren't defined when
1609 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1610
1611 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1612 [(set (match_operand:VF 0 "register_operand")
1613 (smaxmin:VF
1614 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1615 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1616 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1617 {
1618 if (!flag_finite_math_only)
1619 operands[1] = force_reg (<MODE>mode, operands[1]);
1620 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1621 })
1622
1623 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1624 [(set (match_operand:VF 0 "register_operand" "=x,v")
1625 (smaxmin:VF
1626 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1627 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
1628 "TARGET_SSE && flag_finite_math_only
1629 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1630 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1631 "@
1632 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1633 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1634 [(set_attr "isa" "noavx,avx")
1635 (set_attr "type" "sseadd")
1636 (set_attr "btver2_sse_attr" "maxmin")
1637 (set_attr "prefix" "<mask_prefix3>")
1638 (set_attr "mode" "<MODE>")])
1639
1640 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1641 [(set (match_operand:VF 0 "register_operand" "=x,v")
1642 (smaxmin:VF
1643 (match_operand:VF 1 "register_operand" "0,v")
1644 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
1645 "TARGET_SSE && !flag_finite_math_only
1646 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1647 "@
1648 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1649 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1650 [(set_attr "isa" "noavx,avx")
1651 (set_attr "type" "sseadd")
1652 (set_attr "btver2_sse_attr" "maxmin")
1653 (set_attr "prefix" "<mask_prefix3>")
1654 (set_attr "mode" "<MODE>")])
1655
1656 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1657 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1658 (vec_merge:VF_128
1659 (smaxmin:VF_128
1660 (match_operand:VF_128 1 "register_operand" "0,v")
1661 (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_constraint>"))
1662 (match_dup 1)
1663 (const_int 1)))]
1664 "TARGET_SSE"
1665 "@
1666 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1667 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1668 [(set_attr "isa" "noavx,avx")
1669 (set_attr "type" "sse")
1670 (set_attr "btver2_sse_attr" "maxmin")
1671 (set_attr "prefix" "<round_saeonly_prefix>")
1672 (set_attr "mode" "<ssescalarmode>")])
1673
1674 ;; These versions of the min/max patterns implement exactly the operations
1675 ;; min = (op1 < op2 ? op1 : op2)
1676 ;; max = (!(op1 < op2) ? op1 : op2)
1677 ;; Their operands are not commutative, and thus they may be used in the
1678 ;; presence of -0.0 and NaN.
1679
1680 (define_insn "*ieee_smin<mode>3"
1681 [(set (match_operand:VF 0 "register_operand" "=x,v")
1682 (unspec:VF
1683 [(match_operand:VF 1 "register_operand" "0,v")
1684 (match_operand:VF 2 "vector_operand" "xBm,vm")]
1685 UNSPEC_IEEE_MIN))]
1686 "TARGET_SSE"
1687 "@
1688 min<ssemodesuffix>\t{%2, %0|%0, %2}
1689 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1690 [(set_attr "isa" "noavx,avx")
1691 (set_attr "type" "sseadd")
1692 (set_attr "prefix" "orig,vex")
1693 (set_attr "mode" "<MODE>")])
1694
1695 (define_insn "*ieee_smax<mode>3"
1696 [(set (match_operand:VF 0 "register_operand" "=x,v")
1697 (unspec:VF
1698 [(match_operand:VF 1 "register_operand" "0,v")
1699 (match_operand:VF 2 "vector_operand" "xBm,vm")]
1700 UNSPEC_IEEE_MAX))]
1701 "TARGET_SSE"
1702 "@
1703 max<ssemodesuffix>\t{%2, %0|%0, %2}
1704 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1705 [(set_attr "isa" "noavx,avx")
1706 (set_attr "type" "sseadd")
1707 (set_attr "prefix" "orig,vex")
1708 (set_attr "mode" "<MODE>")])
1709
1710 (define_insn "avx_addsubv4df3"
1711 [(set (match_operand:V4DF 0 "register_operand" "=x")
1712 (vec_merge:V4DF
1713 (minus:V4DF
1714 (match_operand:V4DF 1 "register_operand" "x")
1715 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1716 (plus:V4DF (match_dup 1) (match_dup 2))
1717 (const_int 5)))]
1718 "TARGET_AVX"
1719 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1720 [(set_attr "type" "sseadd")
1721 (set_attr "prefix" "vex")
1722 (set_attr "mode" "V4DF")])
1723
1724 (define_insn "sse3_addsubv2df3"
1725 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1726 (vec_merge:V2DF
1727 (minus:V2DF
1728 (match_operand:V2DF 1 "register_operand" "0,x")
1729 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
1730 (plus:V2DF (match_dup 1) (match_dup 2))
1731 (const_int 1)))]
1732 "TARGET_SSE3"
1733 "@
1734 addsubpd\t{%2, %0|%0, %2}
1735 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1736 [(set_attr "isa" "noavx,avx")
1737 (set_attr "type" "sseadd")
1738 (set_attr "atom_unit" "complex")
1739 (set_attr "prefix" "orig,vex")
1740 (set_attr "mode" "V2DF")])
1741
1742 (define_insn "avx_addsubv8sf3"
1743 [(set (match_operand:V8SF 0 "register_operand" "=x")
1744 (vec_merge:V8SF
1745 (minus:V8SF
1746 (match_operand:V8SF 1 "register_operand" "x")
1747 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1748 (plus:V8SF (match_dup 1) (match_dup 2))
1749 (const_int 85)))]
1750 "TARGET_AVX"
1751 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1752 [(set_attr "type" "sseadd")
1753 (set_attr "prefix" "vex")
1754 (set_attr "mode" "V8SF")])
1755
1756 (define_insn "sse3_addsubv4sf3"
1757 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1758 (vec_merge:V4SF
1759 (minus:V4SF
1760 (match_operand:V4SF 1 "register_operand" "0,x")
1761 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
1762 (plus:V4SF (match_dup 1) (match_dup 2))
1763 (const_int 5)))]
1764 "TARGET_SSE3"
1765 "@
1766 addsubps\t{%2, %0|%0, %2}
1767 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1768 [(set_attr "isa" "noavx,avx")
1769 (set_attr "type" "sseadd")
1770 (set_attr "prefix" "orig,vex")
1771 (set_attr "prefix_rep" "1,*")
1772 (set_attr "mode" "V4SF")])
1773
1774 (define_split
1775 [(set (match_operand:VF_128_256 0 "register_operand")
1776 (match_operator:VF_128_256 6 "addsub_vm_operator"
1777 [(minus:VF_128_256
1778 (match_operand:VF_128_256 1 "register_operand")
1779 (match_operand:VF_128_256 2 "vector_operand"))
1780 (plus:VF_128_256
1781 (match_operand:VF_128_256 3 "vector_operand")
1782 (match_operand:VF_128_256 4 "vector_operand"))
1783 (match_operand 5 "const_int_operand")]))]
1784 "TARGET_SSE3
1785 && can_create_pseudo_p ()
1786 && ((rtx_equal_p (operands[1], operands[3])
1787 && rtx_equal_p (operands[2], operands[4]))
1788 || (rtx_equal_p (operands[1], operands[4])
1789 && rtx_equal_p (operands[2], operands[3])))"
1790 [(set (match_dup 0)
1791 (vec_merge:VF_128_256
1792 (minus:VF_128_256 (match_dup 1) (match_dup 2))
1793 (plus:VF_128_256 (match_dup 1) (match_dup 2))
1794 (match_dup 5)))])
1795
1796 (define_split
1797 [(set (match_operand:VF_128_256 0 "register_operand")
1798 (match_operator:VF_128_256 6 "addsub_vm_operator"
1799 [(plus:VF_128_256
1800 (match_operand:VF_128_256 1 "vector_operand")
1801 (match_operand:VF_128_256 2 "vector_operand"))
1802 (minus:VF_128_256
1803 (match_operand:VF_128_256 3 "register_operand")
1804 (match_operand:VF_128_256 4 "vector_operand"))
1805 (match_operand 5 "const_int_operand")]))]
1806 "TARGET_SSE3
1807 && can_create_pseudo_p ()
1808 && ((rtx_equal_p (operands[1], operands[3])
1809 && rtx_equal_p (operands[2], operands[4]))
1810 || (rtx_equal_p (operands[1], operands[4])
1811 && rtx_equal_p (operands[2], operands[3])))"
1812 [(set (match_dup 0)
1813 (vec_merge:VF_128_256
1814 (minus:VF_128_256 (match_dup 3) (match_dup 4))
1815 (plus:VF_128_256 (match_dup 3) (match_dup 4))
1816 (match_dup 5)))]
1817 {
1818 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
1819 operands[5]
1820 = GEN_INT (~INTVAL (operands[5])
1821 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
1822 })
1823
1824 (define_split
1825 [(set (match_operand:VF_128_256 0 "register_operand")
1826 (match_operator:VF_128_256 7 "addsub_vs_operator"
1827 [(vec_concat:<ssedoublemode>
1828 (minus:VF_128_256
1829 (match_operand:VF_128_256 1 "register_operand")
1830 (match_operand:VF_128_256 2 "vector_operand"))
1831 (plus:VF_128_256
1832 (match_operand:VF_128_256 3 "vector_operand")
1833 (match_operand:VF_128_256 4 "vector_operand")))
1834 (match_parallel 5 "addsub_vs_parallel"
1835 [(match_operand 6 "const_int_operand")])]))]
1836 "TARGET_SSE3
1837 && can_create_pseudo_p ()
1838 && ((rtx_equal_p (operands[1], operands[3])
1839 && rtx_equal_p (operands[2], operands[4]))
1840 || (rtx_equal_p (operands[1], operands[4])
1841 && rtx_equal_p (operands[2], operands[3])))"
1842 [(set (match_dup 0)
1843 (vec_merge:VF_128_256
1844 (minus:VF_128_256 (match_dup 1) (match_dup 2))
1845 (plus:VF_128_256 (match_dup 1) (match_dup 2))
1846 (match_dup 5)))]
1847 {
1848 int i, nelt = XVECLEN (operands[5], 0);
1849 HOST_WIDE_INT ival = 0;
1850
1851 for (i = 0; i < nelt; i++)
1852 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
1853 ival |= HOST_WIDE_INT_1 << i;
1854
1855 operands[5] = GEN_INT (ival);
1856 })
1857
1858 (define_split
1859 [(set (match_operand:VF_128_256 0 "register_operand")
1860 (match_operator:VF_128_256 7 "addsub_vs_operator"
1861 [(vec_concat:<ssedoublemode>
1862 (plus:VF_128_256
1863 (match_operand:VF_128_256 1 "vector_operand")
1864 (match_operand:VF_128_256 2 "vector_operand"))
1865 (minus:VF_128_256
1866 (match_operand:VF_128_256 3 "register_operand")
1867 (match_operand:VF_128_256 4 "vector_operand")))
1868 (match_parallel 5 "addsub_vs_parallel"
1869 [(match_operand 6 "const_int_operand")])]))]
1870 "TARGET_SSE3
1871 && can_create_pseudo_p ()
1872 && ((rtx_equal_p (operands[1], operands[3])
1873 && rtx_equal_p (operands[2], operands[4]))
1874 || (rtx_equal_p (operands[1], operands[4])
1875 && rtx_equal_p (operands[2], operands[3])))"
1876 [(set (match_dup 0)
1877 (vec_merge:VF_128_256
1878 (minus:VF_128_256 (match_dup 3) (match_dup 4))
1879 (plus:VF_128_256 (match_dup 3) (match_dup 4))
1880 (match_dup 5)))]
1881 {
1882 int i, nelt = XVECLEN (operands[5], 0);
1883 HOST_WIDE_INT ival = 0;
1884
1885 for (i = 0; i < nelt; i++)
1886 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
1887 ival |= HOST_WIDE_INT_1 << i;
1888
1889 operands[5] = GEN_INT (ival);
1890 })
1891
1892 (define_insn "avx_h<plusminus_insn>v4df3"
1893 [(set (match_operand:V4DF 0 "register_operand" "=x")
1894 (vec_concat:V4DF
1895 (vec_concat:V2DF
1896 (plusminus:DF
1897 (vec_select:DF
1898 (match_operand:V4DF 1 "register_operand" "x")
1899 (parallel [(const_int 0)]))
1900 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1901 (plusminus:DF
1902 (vec_select:DF
1903 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1904 (parallel [(const_int 0)]))
1905 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1906 (vec_concat:V2DF
1907 (plusminus:DF
1908 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1909 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1910 (plusminus:DF
1911 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1912 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1913 "TARGET_AVX"
1914 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1915 [(set_attr "type" "sseadd")
1916 (set_attr "prefix" "vex")
1917 (set_attr "mode" "V4DF")])
1918
1919 (define_expand "sse3_haddv2df3"
1920 [(set (match_operand:V2DF 0 "register_operand")
1921 (vec_concat:V2DF
1922 (plus:DF
1923 (vec_select:DF
1924 (match_operand:V2DF 1 "register_operand")
1925 (parallel [(const_int 0)]))
1926 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1927 (plus:DF
1928 (vec_select:DF
1929 (match_operand:V2DF 2 "vector_operand")
1930 (parallel [(const_int 0)]))
1931 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1932 "TARGET_SSE3")
1933
1934 (define_insn "*sse3_haddv2df3"
1935 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1936 (vec_concat:V2DF
1937 (plus:DF
1938 (vec_select:DF
1939 (match_operand:V2DF 1 "register_operand" "0,x")
1940 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1941 (vec_select:DF
1942 (match_dup 1)
1943 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1944 (plus:DF
1945 (vec_select:DF
1946 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
1947 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1948 (vec_select:DF
1949 (match_dup 2)
1950 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1951 "TARGET_SSE3
1952 && INTVAL (operands[3]) != INTVAL (operands[4])
1953 && INTVAL (operands[5]) != INTVAL (operands[6])"
1954 "@
1955 haddpd\t{%2, %0|%0, %2}
1956 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1957 [(set_attr "isa" "noavx,avx")
1958 (set_attr "type" "sseadd")
1959 (set_attr "prefix" "orig,vex")
1960 (set_attr "mode" "V2DF")])
1961
1962 (define_insn "sse3_hsubv2df3"
1963 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1964 (vec_concat:V2DF
1965 (minus:DF
1966 (vec_select:DF
1967 (match_operand:V2DF 1 "register_operand" "0,x")
1968 (parallel [(const_int 0)]))
1969 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1970 (minus:DF
1971 (vec_select:DF
1972 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
1973 (parallel [(const_int 0)]))
1974 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1975 "TARGET_SSE3"
1976 "@
1977 hsubpd\t{%2, %0|%0, %2}
1978 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1979 [(set_attr "isa" "noavx,avx")
1980 (set_attr "type" "sseadd")
1981 (set_attr "prefix" "orig,vex")
1982 (set_attr "mode" "V2DF")])
1983
1984 (define_insn "*sse3_haddv2df3_low"
1985 [(set (match_operand:DF 0 "register_operand" "=x,x")
1986 (plus:DF
1987 (vec_select:DF
1988 (match_operand:V2DF 1 "register_operand" "0,x")
1989 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1990 (vec_select:DF
1991 (match_dup 1)
1992 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1993 "TARGET_SSE3
1994 && INTVAL (operands[2]) != INTVAL (operands[3])"
1995 "@
1996 haddpd\t{%0, %0|%0, %0}
1997 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1998 [(set_attr "isa" "noavx,avx")
1999 (set_attr "type" "sseadd1")
2000 (set_attr "prefix" "orig,vex")
2001 (set_attr "mode" "V2DF")])
2002
2003 (define_insn "*sse3_hsubv2df3_low"
2004 [(set (match_operand:DF 0 "register_operand" "=x,x")
2005 (minus:DF
2006 (vec_select:DF
2007 (match_operand:V2DF 1 "register_operand" "0,x")
2008 (parallel [(const_int 0)]))
2009 (vec_select:DF
2010 (match_dup 1)
2011 (parallel [(const_int 1)]))))]
2012 "TARGET_SSE3"
2013 "@
2014 hsubpd\t{%0, %0|%0, %0}
2015 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2016 [(set_attr "isa" "noavx,avx")
2017 (set_attr "type" "sseadd1")
2018 (set_attr "prefix" "orig,vex")
2019 (set_attr "mode" "V2DF")])
2020
2021 (define_insn "avx_h<plusminus_insn>v8sf3"
2022 [(set (match_operand:V8SF 0 "register_operand" "=x")
2023 (vec_concat:V8SF
2024 (vec_concat:V4SF
2025 (vec_concat:V2SF
2026 (plusminus:SF
2027 (vec_select:SF
2028 (match_operand:V8SF 1 "register_operand" "x")
2029 (parallel [(const_int 0)]))
2030 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2031 (plusminus:SF
2032 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2033 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2034 (vec_concat:V2SF
2035 (plusminus:SF
2036 (vec_select:SF
2037 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2038 (parallel [(const_int 0)]))
2039 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2040 (plusminus:SF
2041 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2042 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2043 (vec_concat:V4SF
2044 (vec_concat:V2SF
2045 (plusminus:SF
2046 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2047 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2048 (plusminus:SF
2049 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2050 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2051 (vec_concat:V2SF
2052 (plusminus:SF
2053 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2054 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2055 (plusminus:SF
2056 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2057 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2058 "TARGET_AVX"
2059 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2060 [(set_attr "type" "sseadd")
2061 (set_attr "prefix" "vex")
2062 (set_attr "mode" "V8SF")])
2063
2064 (define_insn "sse3_h<plusminus_insn>v4sf3"
2065 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2066 (vec_concat:V4SF
2067 (vec_concat:V2SF
2068 (plusminus:SF
2069 (vec_select:SF
2070 (match_operand:V4SF 1 "register_operand" "0,x")
2071 (parallel [(const_int 0)]))
2072 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2073 (plusminus:SF
2074 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2075 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2076 (vec_concat:V2SF
2077 (plusminus:SF
2078 (vec_select:SF
2079 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2080 (parallel [(const_int 0)]))
2081 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2082 (plusminus:SF
2083 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2084 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2085 "TARGET_SSE3"
2086 "@
2087 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2088 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2089 [(set_attr "isa" "noavx,avx")
2090 (set_attr "type" "sseadd")
2091 (set_attr "atom_unit" "complex")
2092 (set_attr "prefix" "orig,vex")
2093 (set_attr "prefix_rep" "1,*")
2094 (set_attr "mode" "V4SF")])
2095
2096 (define_expand "reduc_plus_scal_v8df"
2097 [(match_operand:DF 0 "register_operand")
2098 (match_operand:V8DF 1 "register_operand")]
2099 "TARGET_AVX512F"
2100 {
2101 rtx tmp = gen_reg_rtx (V8DFmode);
2102 ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
2103 emit_insn (gen_vec_extractv8df (operands[0], tmp, const0_rtx));
2104 DONE;
2105 })
2106
2107 (define_expand "reduc_plus_scal_v4df"
2108 [(match_operand:DF 0 "register_operand")
2109 (match_operand:V4DF 1 "register_operand")]
2110 "TARGET_AVX"
2111 {
2112 rtx tmp = gen_reg_rtx (V4DFmode);
2113 rtx tmp2 = gen_reg_rtx (V4DFmode);
2114 rtx vec_res = gen_reg_rtx (V4DFmode);
2115 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2116 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2117 emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
2118 emit_insn (gen_vec_extractv4df (operands[0], vec_res, const0_rtx));
2119 DONE;
2120 })
2121
2122 (define_expand "reduc_plus_scal_v2df"
2123 [(match_operand:DF 0 "register_operand")
2124 (match_operand:V2DF 1 "register_operand")]
2125 "TARGET_SSE3"
2126 {
2127 rtx tmp = gen_reg_rtx (V2DFmode);
2128 emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
2129 emit_insn (gen_vec_extractv2df (operands[0], tmp, const0_rtx));
2130 DONE;
2131 })
2132
2133 (define_expand "reduc_plus_scal_v16sf"
2134 [(match_operand:SF 0 "register_operand")
2135 (match_operand:V16SF 1 "register_operand")]
2136 "TARGET_AVX512F"
2137 {
2138 rtx tmp = gen_reg_rtx (V16SFmode);
2139 ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
2140 emit_insn (gen_vec_extractv16sf (operands[0], tmp, const0_rtx));
2141 DONE;
2142 })
2143
2144 (define_expand "reduc_plus_scal_v8sf"
2145 [(match_operand:SF 0 "register_operand")
2146 (match_operand:V8SF 1 "register_operand")]
2147 "TARGET_AVX"
2148 {
2149 rtx tmp = gen_reg_rtx (V8SFmode);
2150 rtx tmp2 = gen_reg_rtx (V8SFmode);
2151 rtx vec_res = gen_reg_rtx (V8SFmode);
2152 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2153 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2154 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2155 emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
2156 emit_insn (gen_vec_extractv8sf (operands[0], vec_res, const0_rtx));
2157 DONE;
2158 })
2159
2160 (define_expand "reduc_plus_scal_v4sf"
2161 [(match_operand:SF 0 "register_operand")
2162 (match_operand:V4SF 1 "register_operand")]
2163 "TARGET_SSE"
2164 {
2165 rtx vec_res = gen_reg_rtx (V4SFmode);
2166 if (TARGET_SSE3)
2167 {
2168 rtx tmp = gen_reg_rtx (V4SFmode);
2169 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2170 emit_insn (gen_sse3_haddv4sf3 (vec_res, tmp, tmp));
2171 }
2172 else
2173 ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
2174 emit_insn (gen_vec_extractv4sf (operands[0], vec_res, const0_rtx));
2175 DONE;
2176 })
2177
2178 ;; Modes handled by reduc_sm{in,ax}* patterns.
2179 (define_mode_iterator REDUC_SMINMAX_MODE
2180 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2181 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2182 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2183 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2184 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2185 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2186 (V8DF "TARGET_AVX512F")])
2187
2188 (define_expand "reduc_<code>_scal_<mode>"
2189 [(smaxmin:REDUC_SMINMAX_MODE
2190 (match_operand:<ssescalarmode> 0 "register_operand")
2191 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2192 ""
2193 {
2194 rtx tmp = gen_reg_rtx (<MODE>mode);
2195 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2196 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
2197 DONE;
2198 })
2199
2200 (define_expand "reduc_<code>_scal_<mode>"
2201 [(umaxmin:VI_AVX512BW
2202 (match_operand:<ssescalarmode> 0 "register_operand")
2203 (match_operand:VI_AVX512BW 1 "register_operand"))]
2204 "TARGET_AVX512F"
2205 {
2206 rtx tmp = gen_reg_rtx (<MODE>mode);
2207 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2208 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
2209 DONE;
2210 })
2211
2212 (define_expand "reduc_<code>_scal_<mode>"
2213 [(umaxmin:VI_256
2214 (match_operand:<ssescalarmode> 0 "register_operand")
2215 (match_operand:VI_256 1 "register_operand"))]
2216 "TARGET_AVX2"
2217 {
2218 rtx tmp = gen_reg_rtx (<MODE>mode);
2219 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2220 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
2221 DONE;
2222 })
2223
2224 (define_expand "reduc_umin_scal_v8hi"
2225 [(umin:V8HI
2226 (match_operand:HI 0 "register_operand")
2227 (match_operand:V8HI 1 "register_operand"))]
2228 "TARGET_SSE4_1"
2229 {
2230 rtx tmp = gen_reg_rtx (V8HImode);
2231 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2232 emit_insn (gen_vec_extractv8hi (operands[0], tmp, const0_rtx));
2233 DONE;
2234 })
2235
2236 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2237 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2238 (unspec:VF_AVX512VL
2239 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2240 (match_operand:SI 2 "const_0_to_255_operand")]
2241 UNSPEC_REDUCE))]
2242 "TARGET_AVX512DQ"
2243 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2244 [(set_attr "type" "sse")
2245 (set_attr "prefix" "evex")
2246 (set_attr "mode" "<MODE>")])
2247
2248 (define_insn "reduces<mode>"
2249 [(set (match_operand:VF_128 0 "register_operand" "=v")
2250 (vec_merge:VF_128
2251 (unspec:VF_128
2252 [(match_operand:VF_128 1 "register_operand" "v")
2253 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2254 (match_operand:SI 3 "const_0_to_255_operand")]
2255 UNSPEC_REDUCE)
2256 (match_dup 1)
2257 (const_int 1)))]
2258 "TARGET_AVX512DQ"
2259 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2260 [(set_attr "type" "sse")
2261 (set_attr "prefix" "evex")
2262 (set_attr "mode" "<MODE>")])
2263
2264 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2265 ;;
2266 ;; Parallel floating point comparisons
2267 ;;
2268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2269
2270 (define_insn "avx_cmp<mode>3"
2271 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2272 (unspec:VF_128_256
2273 [(match_operand:VF_128_256 1 "register_operand" "x")
2274 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2275 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2276 UNSPEC_PCMP))]
2277 "TARGET_AVX"
2278 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2279 [(set_attr "type" "ssecmp")
2280 (set_attr "length_immediate" "1")
2281 (set_attr "prefix" "vex")
2282 (set_attr "mode" "<MODE>")])
2283
2284 (define_insn "avx_vmcmp<mode>3"
2285 [(set (match_operand:VF_128 0 "register_operand" "=x")
2286 (vec_merge:VF_128
2287 (unspec:VF_128
2288 [(match_operand:VF_128 1 "register_operand" "x")
2289 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2290 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2291 UNSPEC_PCMP)
2292 (match_dup 1)
2293 (const_int 1)))]
2294 "TARGET_AVX"
2295 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2296 [(set_attr "type" "ssecmp")
2297 (set_attr "length_immediate" "1")
2298 (set_attr "prefix" "vex")
2299 (set_attr "mode" "<ssescalarmode>")])
2300
2301 (define_insn "*<sse>_maskcmp<mode>3_comm"
2302 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2303 (match_operator:VF_128_256 3 "sse_comparison_operator"
2304 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2305 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2306 "TARGET_SSE
2307 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2308 "@
2309 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2310 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2311 [(set_attr "isa" "noavx,avx")
2312 (set_attr "type" "ssecmp")
2313 (set_attr "length_immediate" "1")
2314 (set_attr "prefix" "orig,vex")
2315 (set_attr "mode" "<MODE>")])
2316
2317 (define_insn "<sse>_maskcmp<mode>3"
2318 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2319 (match_operator:VF_128_256 3 "sse_comparison_operator"
2320 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2321 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2322 "TARGET_SSE"
2323 "@
2324 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2325 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2326 [(set_attr "isa" "noavx,avx")
2327 (set_attr "type" "ssecmp")
2328 (set_attr "length_immediate" "1")
2329 (set_attr "prefix" "orig,vex")
2330 (set_attr "mode" "<MODE>")])
2331
2332 (define_insn "<sse>_vmmaskcmp<mode>3"
2333 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2334 (vec_merge:VF_128
2335 (match_operator:VF_128 3 "sse_comparison_operator"
2336 [(match_operand:VF_128 1 "register_operand" "0,x")
2337 (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2338 (match_dup 1)
2339 (const_int 1)))]
2340 "TARGET_SSE"
2341 "@
2342 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2343 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2344 [(set_attr "isa" "noavx,avx")
2345 (set_attr "type" "ssecmp")
2346 (set_attr "length_immediate" "1,*")
2347 (set_attr "prefix" "orig,vex")
2348 (set_attr "mode" "<ssescalarmode>")])
2349
2350 (define_mode_attr cmp_imm_predicate
2351 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2352 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2353 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2354 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2355 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2356 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2357 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2358 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2359 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2360
2361 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2362 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2363 (unspec:<avx512fmaskmode>
2364 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2365 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2366 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2367 UNSPEC_PCMP))]
2368 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2369 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2370 [(set_attr "type" "ssecmp")
2371 (set_attr "length_immediate" "1")
2372 (set_attr "prefix" "evex")
2373 (set_attr "mode" "<sseinsnmode>")])
2374
2375 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2376 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2377 (unspec:<avx512fmaskmode>
2378 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2379 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2380 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2381 UNSPEC_PCMP))]
2382 "TARGET_AVX512BW"
2383 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2384 [(set_attr "type" "ssecmp")
2385 (set_attr "length_immediate" "1")
2386 (set_attr "prefix" "evex")
2387 (set_attr "mode" "<sseinsnmode>")])
2388
2389 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2390 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2391 (unspec:<avx512fmaskmode>
2392 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2393 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2394 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2395 UNSPEC_UNSIGNED_PCMP))]
2396 "TARGET_AVX512BW"
2397 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2398 [(set_attr "type" "ssecmp")
2399 (set_attr "length_immediate" "1")
2400 (set_attr "prefix" "evex")
2401 (set_attr "mode" "<sseinsnmode>")])
2402
2403 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2404 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2405 (unspec:<avx512fmaskmode>
2406 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2407 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2408 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2409 UNSPEC_UNSIGNED_PCMP))]
2410 "TARGET_AVX512F"
2411 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2412 [(set_attr "type" "ssecmp")
2413 (set_attr "length_immediate" "1")
2414 (set_attr "prefix" "evex")
2415 (set_attr "mode" "<sseinsnmode>")])
2416
2417 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2418 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2419 (and:<avx512fmaskmode>
2420 (unspec:<avx512fmaskmode>
2421 [(match_operand:VF_128 1 "register_operand" "v")
2422 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2423 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2424 UNSPEC_PCMP)
2425 (const_int 1)))]
2426 "TARGET_AVX512F"
2427 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2428 [(set_attr "type" "ssecmp")
2429 (set_attr "length_immediate" "1")
2430 (set_attr "prefix" "evex")
2431 (set_attr "mode" "<ssescalarmode>")])
2432
2433 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2434 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2435 (and:<avx512fmaskmode>
2436 (unspec:<avx512fmaskmode>
2437 [(match_operand:VF_128 1 "register_operand" "v")
2438 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2439 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2440 UNSPEC_PCMP)
2441 (and:<avx512fmaskmode>
2442 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2443 (const_int 1))))]
2444 "TARGET_AVX512F"
2445 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2446 [(set_attr "type" "ssecmp")
2447 (set_attr "length_immediate" "1")
2448 (set_attr "prefix" "evex")
2449 (set_attr "mode" "<ssescalarmode>")])
2450
2451 (define_insn "avx512f_maskcmp<mode>3"
2452 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2453 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2454 [(match_operand:VF 1 "register_operand" "v")
2455 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2456 "TARGET_AVX512F"
2457 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2458 [(set_attr "type" "ssecmp")
2459 (set_attr "length_immediate" "1")
2460 (set_attr "prefix" "evex")
2461 (set_attr "mode" "<sseinsnmode>")])
2462
2463 (define_insn "<sse>_comi<round_saeonly_name>"
2464 [(set (reg:CCFP FLAGS_REG)
2465 (compare:CCFP
2466 (vec_select:MODEF
2467 (match_operand:<ssevecmode> 0 "register_operand" "v")
2468 (parallel [(const_int 0)]))
2469 (vec_select:MODEF
2470 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2471 (parallel [(const_int 0)]))))]
2472 "SSE_FLOAT_MODE_P (<MODE>mode)"
2473 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2474 [(set_attr "type" "ssecomi")
2475 (set_attr "prefix" "maybe_vex")
2476 (set_attr "prefix_rep" "0")
2477 (set (attr "prefix_data16")
2478 (if_then_else (eq_attr "mode" "DF")
2479 (const_string "1")
2480 (const_string "0")))
2481 (set_attr "mode" "<MODE>")])
2482
2483 (define_insn "<sse>_ucomi<round_saeonly_name>"
2484 [(set (reg:CCFPU FLAGS_REG)
2485 (compare:CCFPU
2486 (vec_select:MODEF
2487 (match_operand:<ssevecmode> 0 "register_operand" "v")
2488 (parallel [(const_int 0)]))
2489 (vec_select:MODEF
2490 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2491 (parallel [(const_int 0)]))))]
2492 "SSE_FLOAT_MODE_P (<MODE>mode)"
2493 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2494 [(set_attr "type" "ssecomi")
2495 (set_attr "prefix" "maybe_vex")
2496 (set_attr "prefix_rep" "0")
2497 (set (attr "prefix_data16")
2498 (if_then_else (eq_attr "mode" "DF")
2499 (const_string "1")
2500 (const_string "0")))
2501 (set_attr "mode" "<MODE>")])
2502
2503 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2504 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2505 (match_operator:<avx512fmaskmode> 1 ""
2506 [(match_operand:V48_AVX512VL 2 "register_operand")
2507 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2508 "TARGET_AVX512F"
2509 {
2510 bool ok = ix86_expand_mask_vec_cmp (operands);
2511 gcc_assert (ok);
2512 DONE;
2513 })
2514
2515 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2516 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2517 (match_operator:<avx512fmaskmode> 1 ""
2518 [(match_operand:VI12_AVX512VL 2 "register_operand")
2519 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2520 "TARGET_AVX512BW"
2521 {
2522 bool ok = ix86_expand_mask_vec_cmp (operands);
2523 gcc_assert (ok);
2524 DONE;
2525 })
2526
2527 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2528 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2529 (match_operator:<sseintvecmode> 1 ""
2530 [(match_operand:VI_256 2 "register_operand")
2531 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2532 "TARGET_AVX2"
2533 {
2534 bool ok = ix86_expand_int_vec_cmp (operands);
2535 gcc_assert (ok);
2536 DONE;
2537 })
2538
2539 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2540 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2541 (match_operator:<sseintvecmode> 1 ""
2542 [(match_operand:VI124_128 2 "register_operand")
2543 (match_operand:VI124_128 3 "vector_operand")]))]
2544 "TARGET_SSE2"
2545 {
2546 bool ok = ix86_expand_int_vec_cmp (operands);
2547 gcc_assert (ok);
2548 DONE;
2549 })
2550
2551 (define_expand "vec_cmpv2div2di"
2552 [(set (match_operand:V2DI 0 "register_operand")
2553 (match_operator:V2DI 1 ""
2554 [(match_operand:V2DI 2 "register_operand")
2555 (match_operand:V2DI 3 "vector_operand")]))]
2556 "TARGET_SSE4_2"
2557 {
2558 bool ok = ix86_expand_int_vec_cmp (operands);
2559 gcc_assert (ok);
2560 DONE;
2561 })
2562
2563 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2564 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2565 (match_operator:<sseintvecmode> 1 ""
2566 [(match_operand:VF_256 2 "register_operand")
2567 (match_operand:VF_256 3 "nonimmediate_operand")]))]
2568 "TARGET_AVX"
2569 {
2570 bool ok = ix86_expand_fp_vec_cmp (operands);
2571 gcc_assert (ok);
2572 DONE;
2573 })
2574
2575 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2576 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2577 (match_operator:<sseintvecmode> 1 ""
2578 [(match_operand:VF_128 2 "register_operand")
2579 (match_operand:VF_128 3 "vector_operand")]))]
2580 "TARGET_SSE"
2581 {
2582 bool ok = ix86_expand_fp_vec_cmp (operands);
2583 gcc_assert (ok);
2584 DONE;
2585 })
2586
2587 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2588 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2589 (match_operator:<avx512fmaskmode> 1 ""
2590 [(match_operand:VI48_AVX512VL 2 "register_operand")
2591 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
2592 "TARGET_AVX512F"
2593 {
2594 bool ok = ix86_expand_mask_vec_cmp (operands);
2595 gcc_assert (ok);
2596 DONE;
2597 })
2598
2599 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2600 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2601 (match_operator:<avx512fmaskmode> 1 ""
2602 [(match_operand:VI12_AVX512VL 2 "register_operand")
2603 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2604 "TARGET_AVX512BW"
2605 {
2606 bool ok = ix86_expand_mask_vec_cmp (operands);
2607 gcc_assert (ok);
2608 DONE;
2609 })
2610
2611 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2612 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2613 (match_operator:<sseintvecmode> 1 ""
2614 [(match_operand:VI_256 2 "register_operand")
2615 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2616 "TARGET_AVX2"
2617 {
2618 bool ok = ix86_expand_int_vec_cmp (operands);
2619 gcc_assert (ok);
2620 DONE;
2621 })
2622
2623 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2624 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2625 (match_operator:<sseintvecmode> 1 ""
2626 [(match_operand:VI124_128 2 "register_operand")
2627 (match_operand:VI124_128 3 "vector_operand")]))]
2628 "TARGET_SSE2"
2629 {
2630 bool ok = ix86_expand_int_vec_cmp (operands);
2631 gcc_assert (ok);
2632 DONE;
2633 })
2634
2635 (define_expand "vec_cmpuv2div2di"
2636 [(set (match_operand:V2DI 0 "register_operand")
2637 (match_operator:V2DI 1 ""
2638 [(match_operand:V2DI 2 "register_operand")
2639 (match_operand:V2DI 3 "vector_operand")]))]
2640 "TARGET_SSE4_2"
2641 {
2642 bool ok = ix86_expand_int_vec_cmp (operands);
2643 gcc_assert (ok);
2644 DONE;
2645 })
2646
2647 (define_expand "vcond<V_512:mode><VF_512:mode>"
2648 [(set (match_operand:V_512 0 "register_operand")
2649 (if_then_else:V_512
2650 (match_operator 3 ""
2651 [(match_operand:VF_512 4 "nonimmediate_operand")
2652 (match_operand:VF_512 5 "nonimmediate_operand")])
2653 (match_operand:V_512 1 "general_operand")
2654 (match_operand:V_512 2 "general_operand")))]
2655 "TARGET_AVX512F
2656 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2657 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2658 {
2659 bool ok = ix86_expand_fp_vcond (operands);
2660 gcc_assert (ok);
2661 DONE;
2662 })
2663
2664 (define_expand "vcond<V_256:mode><VF_256:mode>"
2665 [(set (match_operand:V_256 0 "register_operand")
2666 (if_then_else:V_256
2667 (match_operator 3 ""
2668 [(match_operand:VF_256 4 "nonimmediate_operand")
2669 (match_operand:VF_256 5 "nonimmediate_operand")])
2670 (match_operand:V_256 1 "general_operand")
2671 (match_operand:V_256 2 "general_operand")))]
2672 "TARGET_AVX
2673 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2674 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2675 {
2676 bool ok = ix86_expand_fp_vcond (operands);
2677 gcc_assert (ok);
2678 DONE;
2679 })
2680
2681 (define_expand "vcond<V_128:mode><VF_128:mode>"
2682 [(set (match_operand:V_128 0 "register_operand")
2683 (if_then_else:V_128
2684 (match_operator 3 ""
2685 [(match_operand:VF_128 4 "vector_operand")
2686 (match_operand:VF_128 5 "vector_operand")])
2687 (match_operand:V_128 1 "general_operand")
2688 (match_operand:V_128 2 "general_operand")))]
2689 "TARGET_SSE
2690 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2691 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2692 {
2693 bool ok = ix86_expand_fp_vcond (operands);
2694 gcc_assert (ok);
2695 DONE;
2696 })
2697
2698 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
2699 [(set (match_operand:V48_AVX512VL 0 "register_operand")
2700 (vec_merge:V48_AVX512VL
2701 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
2702 (match_operand:V48_AVX512VL 2 "vector_move_operand")
2703 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
2704 "TARGET_AVX512F")
2705
2706 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
2707 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
2708 (vec_merge:VI12_AVX512VL
2709 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
2710 (match_operand:VI12_AVX512VL 2 "vector_move_operand")
2711 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
2712 "TARGET_AVX512BW")
2713
2714 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
2715 [(set (match_operand:VI_256 0 "register_operand")
2716 (vec_merge:VI_256
2717 (match_operand:VI_256 1 "nonimmediate_operand")
2718 (match_operand:VI_256 2 "vector_move_operand")
2719 (match_operand:<sseintvecmode> 3 "register_operand")))]
2720 "TARGET_AVX2"
2721 {
2722 ix86_expand_sse_movcc (operands[0], operands[3],
2723 operands[1], operands[2]);
2724 DONE;
2725 })
2726
2727 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
2728 [(set (match_operand:VI124_128 0 "register_operand")
2729 (vec_merge:VI124_128
2730 (match_operand:VI124_128 1 "vector_operand")
2731 (match_operand:VI124_128 2 "vector_move_operand")
2732 (match_operand:<sseintvecmode> 3 "register_operand")))]
2733 "TARGET_SSE2"
2734 {
2735 ix86_expand_sse_movcc (operands[0], operands[3],
2736 operands[1], operands[2]);
2737 DONE;
2738 })
2739
2740 (define_expand "vcond_mask_v2div2di"
2741 [(set (match_operand:V2DI 0 "register_operand")
2742 (vec_merge:V2DI
2743 (match_operand:V2DI 1 "vector_operand")
2744 (match_operand:V2DI 2 "vector_move_operand")
2745 (match_operand:V2DI 3 "register_operand")))]
2746 "TARGET_SSE4_2"
2747 {
2748 ix86_expand_sse_movcc (operands[0], operands[3],
2749 operands[1], operands[2]);
2750 DONE;
2751 })
2752
2753 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
2754 [(set (match_operand:VF_256 0 "register_operand")
2755 (vec_merge:VF_256
2756 (match_operand:VF_256 1 "nonimmediate_operand")
2757 (match_operand:VF_256 2 "vector_move_operand")
2758 (match_operand:<sseintvecmode> 3 "register_operand")))]
2759 "TARGET_AVX"
2760 {
2761 ix86_expand_sse_movcc (operands[0], operands[3],
2762 operands[1], operands[2]);
2763 DONE;
2764 })
2765
2766 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
2767 [(set (match_operand:VF_128 0 "register_operand")
2768 (vec_merge:VF_128
2769 (match_operand:VF_128 1 "vector_operand")
2770 (match_operand:VF_128 2 "vector_move_operand")
2771 (match_operand:<sseintvecmode> 3 "register_operand")))]
2772 "TARGET_SSE"
2773 {
2774 ix86_expand_sse_movcc (operands[0], operands[3],
2775 operands[1], operands[2]);
2776 DONE;
2777 })
2778
2779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2780 ;;
2781 ;; Parallel floating point logical operations
2782 ;;
2783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2784
2785 (define_insn "<sse>_andnot<mode>3<mask_name>"
2786 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2787 (and:VF_128_256
2788 (not:VF_128_256
2789 (match_operand:VF_128_256 1 "register_operand" "0,v"))
2790 (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))]
2791 "TARGET_SSE && <mask_avx512vl_condition>"
2792 {
2793 static char buf[128];
2794 const char *ops;
2795 const char *suffix;
2796
2797 switch (get_attr_mode (insn))
2798 {
2799 case MODE_V8SF:
2800 case MODE_V4SF:
2801 suffix = "ps";
2802 break;
2803 default:
2804 suffix = "<ssemodesuffix>";
2805 }
2806
2807 switch (which_alternative)
2808 {
2809 case 0:
2810 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2811 break;
2812 case 1:
2813 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2814 break;
2815 default:
2816 gcc_unreachable ();
2817 }
2818
2819 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2820 if (<mask_applied> && !TARGET_AVX512DQ)
2821 {
2822 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2823 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2824 }
2825
2826 snprintf (buf, sizeof (buf), ops, suffix);
2827 return buf;
2828 }
2829 [(set_attr "isa" "noavx,avx")
2830 (set_attr "type" "sselog")
2831 (set_attr "prefix" "orig,maybe_evex")
2832 (set (attr "mode")
2833 (cond [(and (match_test "<MODE_SIZE> == 16")
2834 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2835 (const_string "<ssePSmode>")
2836 (match_test "TARGET_AVX")
2837 (const_string "<MODE>")
2838 (match_test "optimize_function_for_size_p (cfun)")
2839 (const_string "V4SF")
2840 ]
2841 (const_string "<MODE>")))])
2842
2843
2844 (define_insn "<sse>_andnot<mode>3<mask_name>"
2845 [(set (match_operand:VF_512 0 "register_operand" "=v")
2846 (and:VF_512
2847 (not:VF_512
2848 (match_operand:VF_512 1 "register_operand" "v"))
2849 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2850 "TARGET_AVX512F"
2851 {
2852 static char buf[128];
2853 const char *ops;
2854 const char *suffix;
2855
2856 suffix = "<ssemodesuffix>";
2857 ops = "";
2858
2859 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2860 if (!TARGET_AVX512DQ)
2861 {
2862 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2863 ops = "p";
2864 }
2865
2866 snprintf (buf, sizeof (buf),
2867 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2868 ops, suffix);
2869 return buf;
2870 }
2871 [(set_attr "type" "sselog")
2872 (set_attr "prefix" "evex")
2873 (set_attr "mode" "<sseinsnmode>")])
2874
2875 (define_expand "<code><mode>3<mask_name>"
2876 [(set (match_operand:VF_128_256 0 "register_operand")
2877 (any_logic:VF_128_256
2878 (match_operand:VF_128_256 1 "vector_operand")
2879 (match_operand:VF_128_256 2 "vector_operand")))]
2880 "TARGET_SSE && <mask_avx512vl_condition>"
2881 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2882
2883 (define_expand "<code><mode>3<mask_name>"
2884 [(set (match_operand:VF_512 0 "register_operand")
2885 (any_logic:VF_512
2886 (match_operand:VF_512 1 "nonimmediate_operand")
2887 (match_operand:VF_512 2 "nonimmediate_operand")))]
2888 "TARGET_AVX512F"
2889 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2890
2891 (define_insn "*<code><mode>3<mask_name>"
2892 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2893 (any_logic:VF_128_256
2894 (match_operand:VF_128_256 1 "vector_operand" "%0,v")
2895 (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))]
2896 "TARGET_SSE && <mask_avx512vl_condition>
2897 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2898 {
2899 static char buf[128];
2900 const char *ops;
2901 const char *suffix;
2902
2903 switch (get_attr_mode (insn))
2904 {
2905 case MODE_V8SF:
2906 case MODE_V4SF:
2907 suffix = "ps";
2908 break;
2909 default:
2910 suffix = "<ssemodesuffix>";
2911 }
2912
2913 switch (which_alternative)
2914 {
2915 case 0:
2916 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2917 break;
2918 case 1:
2919 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2920 break;
2921 default:
2922 gcc_unreachable ();
2923 }
2924
2925 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2926 if (<mask_applied> && !TARGET_AVX512DQ)
2927 {
2928 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2929 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2930 }
2931
2932 snprintf (buf, sizeof (buf), ops, suffix);
2933 return buf;
2934 }
2935 [(set_attr "isa" "noavx,avx")
2936 (set_attr "type" "sselog")
2937 (set_attr "prefix" "orig,maybe_evex")
2938 (set (attr "mode")
2939 (cond [(and (match_test "<MODE_SIZE> == 16")
2940 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2941 (const_string "<ssePSmode>")
2942 (match_test "TARGET_AVX")
2943 (const_string "<MODE>")
2944 (match_test "optimize_function_for_size_p (cfun)")
2945 (const_string "V4SF")
2946 ]
2947 (const_string "<MODE>")))])
2948
2949 (define_insn "*<code><mode>3<mask_name>"
2950 [(set (match_operand:VF_512 0 "register_operand" "=v")
2951 (any_logic:VF_512
2952 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
2953 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2954 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2955 {
2956 static char buf[128];
2957 const char *ops;
2958 const char *suffix;
2959
2960 suffix = "<ssemodesuffix>";
2961 ops = "";
2962
2963 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2964 if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
2965 {
2966 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2967 ops = "p";
2968 }
2969
2970 snprintf (buf, sizeof (buf),
2971 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2972 ops, suffix);
2973 return buf;
2974 }
2975 [(set_attr "type" "sselog")
2976 (set_attr "prefix" "evex")
2977 (set_attr "mode" "<sseinsnmode>")])
2978
2979 (define_expand "copysign<mode>3"
2980 [(set (match_dup 4)
2981 (and:VF
2982 (not:VF (match_dup 3))
2983 (match_operand:VF 1 "vector_operand")))
2984 (set (match_dup 5)
2985 (and:VF (match_dup 3)
2986 (match_operand:VF 2 "vector_operand")))
2987 (set (match_operand:VF 0 "register_operand")
2988 (ior:VF (match_dup 4) (match_dup 5)))]
2989 "TARGET_SSE"
2990 {
2991 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2992
2993 operands[4] = gen_reg_rtx (<MODE>mode);
2994 operands[5] = gen_reg_rtx (<MODE>mode);
2995 })
2996
2997 ;; Also define scalar versions. These are used for abs, neg, and
2998 ;; conditional move. Using subregs into vector modes causes register
2999 ;; allocation lossage. These patterns do not allow memory operands
3000 ;; because the native instructions read the full 128-bits.
3001
3002 (define_insn "*andnot<mode>3"
3003 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
3004 (and:MODEF
3005 (not:MODEF
3006 (match_operand:MODEF 1 "register_operand" "0,x"))
3007 (match_operand:MODEF 2 "register_operand" "x,x")))]
3008 "SSE_FLOAT_MODE_P (<MODE>mode)"
3009 {
3010 static char buf[32];
3011 const char *ops;
3012 const char *suffix
3013 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3014
3015 switch (which_alternative)
3016 {
3017 case 0:
3018 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3019 break;
3020 case 1:
3021 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3022 break;
3023 default:
3024 gcc_unreachable ();
3025 }
3026
3027 snprintf (buf, sizeof (buf), ops, suffix);
3028 return buf;
3029 }
3030 [(set_attr "isa" "noavx,avx")
3031 (set_attr "type" "sselog")
3032 (set_attr "prefix" "orig,vex")
3033 (set (attr "mode")
3034 (cond [(and (match_test "<MODE_SIZE> == 16")
3035 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3036 (const_string "V4SF")
3037 (match_test "TARGET_AVX")
3038 (const_string "<ssevecmode>")
3039 (match_test "optimize_function_for_size_p (cfun)")
3040 (const_string "V4SF")
3041 ]
3042 (const_string "<ssevecmode>")))])
3043
3044 (define_insn "*andnottf3"
3045 [(set (match_operand:TF 0 "register_operand" "=x,x")
3046 (and:TF
3047 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
3048 (match_operand:TF 2 "vector_operand" "xBm,xm")))]
3049 "TARGET_SSE"
3050 {
3051 static char buf[32];
3052 const char *ops;
3053 const char *tmp
3054 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
3055
3056 switch (which_alternative)
3057 {
3058 case 0:
3059 ops = "%s\t{%%2, %%0|%%0, %%2}";
3060 break;
3061 case 1:
3062 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3063 break;
3064 default:
3065 gcc_unreachable ();
3066 }
3067
3068 snprintf (buf, sizeof (buf), ops, tmp);
3069 return buf;
3070 }
3071 [(set_attr "isa" "noavx,avx")
3072 (set_attr "type" "sselog")
3073 (set (attr "prefix_data16")
3074 (if_then_else
3075 (and (eq_attr "alternative" "0")
3076 (eq_attr "mode" "TI"))
3077 (const_string "1")
3078 (const_string "*")))
3079 (set_attr "prefix" "orig,vex")
3080 (set (attr "mode")
3081 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3082 (const_string "V4SF")
3083 (match_test "TARGET_AVX")
3084 (const_string "TI")
3085 (ior (not (match_test "TARGET_SSE2"))
3086 (match_test "optimize_function_for_size_p (cfun)"))
3087 (const_string "V4SF")
3088 ]
3089 (const_string "TI")))])
3090
3091 (define_insn "*<code><mode>3"
3092 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
3093 (any_logic:MODEF
3094 (match_operand:MODEF 1 "register_operand" "%0,x")
3095 (match_operand:MODEF 2 "register_operand" "x,x")))]
3096 "SSE_FLOAT_MODE_P (<MODE>mode)"
3097 {
3098 static char buf[32];
3099 const char *ops;
3100 const char *suffix
3101 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3102
3103 switch (which_alternative)
3104 {
3105 case 0:
3106 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3107 break;
3108 case 1:
3109 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3110 break;
3111 default:
3112 gcc_unreachable ();
3113 }
3114
3115 snprintf (buf, sizeof (buf), ops, suffix);
3116 return buf;
3117 }
3118 [(set_attr "isa" "noavx,avx")
3119 (set_attr "type" "sselog")
3120 (set_attr "prefix" "orig,vex")
3121 (set (attr "mode")
3122 (cond [(and (match_test "<MODE_SIZE> == 16")
3123 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3124 (const_string "V4SF")
3125 (match_test "TARGET_AVX")
3126 (const_string "<ssevecmode>")
3127 (match_test "optimize_function_for_size_p (cfun)")
3128 (const_string "V4SF")
3129 ]
3130 (const_string "<ssevecmode>")))])
3131
3132 (define_expand "<code>tf3"
3133 [(set (match_operand:TF 0 "register_operand")
3134 (any_logic:TF
3135 (match_operand:TF 1 "vector_operand")
3136 (match_operand:TF 2 "vector_operand")))]
3137 "TARGET_SSE"
3138 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3139
3140 (define_insn "*<code>tf3"
3141 [(set (match_operand:TF 0 "register_operand" "=x,x")
3142 (any_logic:TF
3143 (match_operand:TF 1 "vector_operand" "%0,x")
3144 (match_operand:TF 2 "vector_operand" "xBm,xm")))]
3145 "TARGET_SSE
3146 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3147 {
3148 static char buf[32];
3149 const char *ops;
3150 const char *tmp
3151 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
3152
3153 switch (which_alternative)
3154 {
3155 case 0:
3156 ops = "%s\t{%%2, %%0|%%0, %%2}";
3157 break;
3158 case 1:
3159 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3160 break;
3161 default:
3162 gcc_unreachable ();
3163 }
3164
3165 snprintf (buf, sizeof (buf), ops, tmp);
3166 return buf;
3167 }
3168 [(set_attr "isa" "noavx,avx")
3169 (set_attr "type" "sselog")
3170 (set (attr "prefix_data16")
3171 (if_then_else
3172 (and (eq_attr "alternative" "0")
3173 (eq_attr "mode" "TI"))
3174 (const_string "1")
3175 (const_string "*")))
3176 (set_attr "prefix" "orig,vex")
3177 (set (attr "mode")
3178 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3179 (const_string "V4SF")
3180 (match_test "TARGET_AVX")
3181 (const_string "TI")
3182 (ior (not (match_test "TARGET_SSE2"))
3183 (match_test "optimize_function_for_size_p (cfun)"))
3184 (const_string "V4SF")
3185 ]
3186 (const_string "TI")))])
3187
3188 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3189 ;;
3190 ;; FMA floating point multiply/accumulate instructions. These include
3191 ;; scalar versions of the instructions as well as vector versions.
3192 ;;
3193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3194
3195 ;; The standard names for scalar FMA are only available with SSE math enabled.
3196 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3197 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3198 ;; and TARGET_FMA4 are both false.
3199 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3200 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3201 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3202 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3203 (define_mode_iterator FMAMODEM
3204 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3205 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3206 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3207 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3208 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3209 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3210 (V16SF "TARGET_AVX512F")
3211 (V8DF "TARGET_AVX512F")])
3212
3213 (define_expand "fma<mode>4"
3214 [(set (match_operand:FMAMODEM 0 "register_operand")
3215 (fma:FMAMODEM
3216 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3217 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3218 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3219
3220 (define_expand "fms<mode>4"
3221 [(set (match_operand:FMAMODEM 0 "register_operand")
3222 (fma:FMAMODEM
3223 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3224 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3225 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3226
3227 (define_expand "fnma<mode>4"
3228 [(set (match_operand:FMAMODEM 0 "register_operand")
3229 (fma:FMAMODEM
3230 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3231 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3232 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3233
3234 (define_expand "fnms<mode>4"
3235 [(set (match_operand:FMAMODEM 0 "register_operand")
3236 (fma:FMAMODEM
3237 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3238 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3239 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3240
3241 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3242 (define_mode_iterator FMAMODE_AVX512
3243 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3244 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3245 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3246 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3247 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3248 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3249 (V16SF "TARGET_AVX512F")
3250 (V8DF "TARGET_AVX512F")])
3251
3252 (define_mode_iterator FMAMODE
3253 [SF DF V4SF V2DF V8SF V4DF])
3254
3255 (define_expand "fma4i_fmadd_<mode>"
3256 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3257 (fma:FMAMODE_AVX512
3258 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3259 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3260 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3261
3262 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3263 [(match_operand:VF_AVX512VL 0 "register_operand")
3264 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3265 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3266 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3267 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3268 "TARGET_AVX512F && <round_mode512bit_condition>"
3269 {
3270 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3271 operands[0], operands[1], operands[2], operands[3],
3272 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3273 DONE;
3274 })
3275
3276 (define_insn "*fma_fmadd_<mode>"
3277 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3278 (fma:FMAMODE
3279 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3280 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3281 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3282 "TARGET_FMA || TARGET_FMA4"
3283 "@
3284 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3285 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3286 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3287 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3288 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3289 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3290 (set_attr "type" "ssemuladd")
3291 (set_attr "mode" "<MODE>")])
3292
3293 ;; Suppose AVX-512F as baseline
3294 (define_mode_iterator VF_SF_AVX512VL
3295 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3296 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3297
3298 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3299 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3300 (fma:VF_SF_AVX512VL
3301 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3302 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3303 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3304 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3305 "@
3306 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3307 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3308 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3309 [(set_attr "type" "ssemuladd")
3310 (set_attr "mode" "<MODE>")])
3311
3312 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3313 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3314 (vec_merge:VF_AVX512VL
3315 (fma:VF_AVX512VL
3316 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3317 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3318 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3319 (match_dup 1)
3320 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3321 "TARGET_AVX512F && <round_mode512bit_condition>"
3322 "@
3323 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3324 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3325 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3326 (set_attr "type" "ssemuladd")
3327 (set_attr "mode" "<MODE>")])
3328
3329 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3330 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3331 (vec_merge:VF_AVX512VL
3332 (fma:VF_AVX512VL
3333 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3334 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3335 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3336 (match_dup 3)
3337 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3338 "TARGET_AVX512F"
3339 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3340 [(set_attr "isa" "fma_avx512f")
3341 (set_attr "type" "ssemuladd")
3342 (set_attr "mode" "<MODE>")])
3343
3344 (define_insn "*fma_fmsub_<mode>"
3345 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3346 (fma:FMAMODE
3347 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3348 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3349 (neg:FMAMODE
3350 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3351 "TARGET_FMA || TARGET_FMA4"
3352 "@
3353 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3354 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3355 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3356 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3357 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3358 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3359 (set_attr "type" "ssemuladd")
3360 (set_attr "mode" "<MODE>")])
3361
3362 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3363 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3364 (fma:VF_SF_AVX512VL
3365 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3366 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3367 (neg:VF_SF_AVX512VL
3368 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3369 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3370 "@
3371 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3372 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3373 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3374 [(set_attr "type" "ssemuladd")
3375 (set_attr "mode" "<MODE>")])
3376
3377 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3378 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3379 (vec_merge:VF_AVX512VL
3380 (fma:VF_AVX512VL
3381 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3382 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3383 (neg:VF_AVX512VL
3384 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3385 (match_dup 1)
3386 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3387 "TARGET_AVX512F"
3388 "@
3389 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3390 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3391 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3392 (set_attr "type" "ssemuladd")
3393 (set_attr "mode" "<MODE>")])
3394
3395 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3396 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3397 (vec_merge:VF_AVX512VL
3398 (fma:VF_AVX512VL
3399 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3400 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3401 (neg:VF_AVX512VL
3402 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3403 (match_dup 3)
3404 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3405 "TARGET_AVX512F && <round_mode512bit_condition>"
3406 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3407 [(set_attr "isa" "fma_avx512f")
3408 (set_attr "type" "ssemuladd")
3409 (set_attr "mode" "<MODE>")])
3410
3411 (define_insn "*fma_fnmadd_<mode>"
3412 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3413 (fma:FMAMODE
3414 (neg:FMAMODE
3415 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3416 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3417 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3418 "TARGET_FMA || TARGET_FMA4"
3419 "@
3420 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3421 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3422 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3423 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3424 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3425 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3426 (set_attr "type" "ssemuladd")
3427 (set_attr "mode" "<MODE>")])
3428
3429 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3430 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3431 (fma:VF_SF_AVX512VL
3432 (neg:VF_SF_AVX512VL
3433 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3434 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3435 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3436 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3437 "@
3438 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3439 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3440 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3441 [(set_attr "type" "ssemuladd")
3442 (set_attr "mode" "<MODE>")])
3443
3444 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3445 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3446 (vec_merge:VF_AVX512VL
3447 (fma:VF_AVX512VL
3448 (neg:VF_AVX512VL
3449 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3450 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3451 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3452 (match_dup 1)
3453 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3454 "TARGET_AVX512F && <round_mode512bit_condition>"
3455 "@
3456 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3457 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3458 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3459 (set_attr "type" "ssemuladd")
3460 (set_attr "mode" "<MODE>")])
3461
3462 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3463 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3464 (vec_merge:VF_AVX512VL
3465 (fma:VF_AVX512VL
3466 (neg:VF_AVX512VL
3467 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3468 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3469 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3470 (match_dup 3)
3471 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3472 "TARGET_AVX512F && <round_mode512bit_condition>"
3473 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3474 [(set_attr "isa" "fma_avx512f")
3475 (set_attr "type" "ssemuladd")
3476 (set_attr "mode" "<MODE>")])
3477
3478 (define_insn "*fma_fnmsub_<mode>"
3479 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3480 (fma:FMAMODE
3481 (neg:FMAMODE
3482 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3483 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3484 (neg:FMAMODE
3485 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3486 "TARGET_FMA || TARGET_FMA4"
3487 "@
3488 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3489 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3490 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3491 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3492 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3493 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3494 (set_attr "type" "ssemuladd")
3495 (set_attr "mode" "<MODE>")])
3496
3497 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3498 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3499 (fma:VF_SF_AVX512VL
3500 (neg:VF_SF_AVX512VL
3501 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3502 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3503 (neg:VF_SF_AVX512VL
3504 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3505 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3506 "@
3507 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3508 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3509 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3510 [(set_attr "type" "ssemuladd")
3511 (set_attr "mode" "<MODE>")])
3512
3513 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3514 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3515 (vec_merge:VF_AVX512VL
3516 (fma:VF_AVX512VL
3517 (neg:VF_AVX512VL
3518 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3519 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3520 (neg:VF_AVX512VL
3521 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3522 (match_dup 1)
3523 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3524 "TARGET_AVX512F && <round_mode512bit_condition>"
3525 "@
3526 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3527 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3528 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3529 (set_attr "type" "ssemuladd")
3530 (set_attr "mode" "<MODE>")])
3531
3532 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3533 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3534 (vec_merge:VF_AVX512VL
3535 (fma:VF_AVX512VL
3536 (neg:VF_AVX512VL
3537 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3538 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3539 (neg:VF_AVX512VL
3540 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3541 (match_dup 3)
3542 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3543 "TARGET_AVX512F"
3544 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3545 [(set_attr "isa" "fma_avx512f")
3546 (set_attr "type" "ssemuladd")
3547 (set_attr "mode" "<MODE>")])
3548
3549 ;; FMA parallel floating point multiply addsub and subadd operations.
3550
3551 ;; It would be possible to represent these without the UNSPEC as
3552 ;;
3553 ;; (vec_merge
3554 ;; (fma op1 op2 op3)
3555 ;; (fma op1 op2 (neg op3))
3556 ;; (merge-const))
3557 ;;
3558 ;; But this doesn't seem useful in practice.
3559
3560 (define_expand "fmaddsub_<mode>"
3561 [(set (match_operand:VF 0 "register_operand")
3562 (unspec:VF
3563 [(match_operand:VF 1 "nonimmediate_operand")
3564 (match_operand:VF 2 "nonimmediate_operand")
3565 (match_operand:VF 3 "nonimmediate_operand")]
3566 UNSPEC_FMADDSUB))]
3567 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3568
3569 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3570 [(match_operand:VF_AVX512VL 0 "register_operand")
3571 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3572 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3573 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3574 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3575 "TARGET_AVX512F"
3576 {
3577 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3578 operands[0], operands[1], operands[2], operands[3],
3579 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3580 DONE;
3581 })
3582
3583 (define_insn "*fma_fmaddsub_<mode>"
3584 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3585 (unspec:VF_128_256
3586 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3587 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3588 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3589 UNSPEC_FMADDSUB))]
3590 "TARGET_FMA || TARGET_FMA4"
3591 "@
3592 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3593 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3594 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3595 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3596 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3597 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3598 (set_attr "type" "ssemuladd")
3599 (set_attr "mode" "<MODE>")])
3600
3601 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3602 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3603 (unspec:VF_SF_AVX512VL
3604 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3605 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3606 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3607 UNSPEC_FMADDSUB))]
3608 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3609 "@
3610 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3611 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3612 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3613 [(set_attr "type" "ssemuladd")
3614 (set_attr "mode" "<MODE>")])
3615
3616 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3617 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3618 (vec_merge:VF_AVX512VL
3619 (unspec:VF_AVX512VL
3620 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3621 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3622 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
3623 UNSPEC_FMADDSUB)
3624 (match_dup 1)
3625 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3626 "TARGET_AVX512F"
3627 "@
3628 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3629 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3630 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3631 (set_attr "type" "ssemuladd")
3632 (set_attr "mode" "<MODE>")])
3633
3634 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3635 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3636 (vec_merge:VF_AVX512VL
3637 (unspec:VF_AVX512VL
3638 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3639 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3640 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
3641 UNSPEC_FMADDSUB)
3642 (match_dup 3)
3643 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3644 "TARGET_AVX512F"
3645 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3646 [(set_attr "isa" "fma_avx512f")
3647 (set_attr "type" "ssemuladd")
3648 (set_attr "mode" "<MODE>")])
3649
3650 (define_insn "*fma_fmsubadd_<mode>"
3651 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3652 (unspec:VF_128_256
3653 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3654 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3655 (neg:VF_128_256
3656 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
3657 UNSPEC_FMADDSUB))]
3658 "TARGET_FMA || TARGET_FMA4"
3659 "@
3660 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3661 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3662 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3663 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3664 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3665 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3666 (set_attr "type" "ssemuladd")
3667 (set_attr "mode" "<MODE>")])
3668
3669 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3670 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3671 (unspec:VF_SF_AVX512VL
3672 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3673 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3674 (neg:VF_SF_AVX512VL
3675 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3676 UNSPEC_FMADDSUB))]
3677 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3678 "@
3679 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3680 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3681 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3682 [(set_attr "type" "ssemuladd")
3683 (set_attr "mode" "<MODE>")])
3684
3685 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
3686 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3687 (vec_merge:VF_AVX512VL
3688 (unspec:VF_AVX512VL
3689 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3690 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3691 (neg:VF_AVX512VL
3692 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
3693 UNSPEC_FMADDSUB)
3694 (match_dup 1)
3695 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3696 "TARGET_AVX512F"
3697 "@
3698 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3699 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3700 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3701 (set_attr "type" "ssemuladd")
3702 (set_attr "mode" "<MODE>")])
3703
3704 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
3705 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3706 (vec_merge:VF_AVX512VL
3707 (unspec:VF_AVX512VL
3708 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3709 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3710 (neg:VF_AVX512VL
3711 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
3712 UNSPEC_FMADDSUB)
3713 (match_dup 3)
3714 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3715 "TARGET_AVX512F"
3716 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3717 [(set_attr "isa" "fma_avx512f")
3718 (set_attr "type" "ssemuladd")
3719 (set_attr "mode" "<MODE>")])
3720
3721 ;; FMA3 floating point scalar intrinsics. These merge result with
3722 ;; high-order elements from the destination register.
3723
3724 (define_expand "fmai_vmfmadd_<mode><round_name>"
3725 [(set (match_operand:VF_128 0 "register_operand")
3726 (vec_merge:VF_128
3727 (fma:VF_128
3728 (match_operand:VF_128 1 "<round_nimm_predicate>")
3729 (match_operand:VF_128 2 "<round_nimm_predicate>")
3730 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3731 (match_dup 1)
3732 (const_int 1)))]
3733 "TARGET_FMA")
3734
3735 (define_insn "*fmai_fmadd_<mode>"
3736 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3737 (vec_merge:VF_128
3738 (fma:VF_128
3739 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3740 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3741 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3742 (match_dup 1)
3743 (const_int 1)))]
3744 "TARGET_FMA || TARGET_AVX512F"
3745 "@
3746 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3747 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3748 [(set_attr "type" "ssemuladd")
3749 (set_attr "mode" "<MODE>")])
3750
3751 (define_insn "*fmai_fmsub_<mode>"
3752 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3753 (vec_merge:VF_128
3754 (fma:VF_128
3755 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3756 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3757 (neg:VF_128
3758 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3759 (match_dup 1)
3760 (const_int 1)))]
3761 "TARGET_FMA || TARGET_AVX512F"
3762 "@
3763 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3764 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3765 [(set_attr "type" "ssemuladd")
3766 (set_attr "mode" "<MODE>")])
3767
3768 (define_insn "*fmai_fnmadd_<mode><round_name>"
3769 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3770 (vec_merge:VF_128
3771 (fma:VF_128
3772 (neg:VF_128
3773 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3774 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3775 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3776 (match_dup 1)
3777 (const_int 1)))]
3778 "TARGET_FMA || TARGET_AVX512F"
3779 "@
3780 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3781 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3782 [(set_attr "type" "ssemuladd")
3783 (set_attr "mode" "<MODE>")])
3784
3785 (define_insn "*fmai_fnmsub_<mode><round_name>"
3786 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3787 (vec_merge:VF_128
3788 (fma:VF_128
3789 (neg:VF_128
3790 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3791 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3792 (neg:VF_128
3793 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3794 (match_dup 1)
3795 (const_int 1)))]
3796 "TARGET_FMA || TARGET_AVX512F"
3797 "@
3798 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3799 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3800 [(set_attr "type" "ssemuladd")
3801 (set_attr "mode" "<MODE>")])
3802
3803 ;; FMA4 floating point scalar intrinsics. These write the
3804 ;; entire destination register, with the high-order elements zeroed.
3805
3806 (define_expand "fma4i_vmfmadd_<mode>"
3807 [(set (match_operand:VF_128 0 "register_operand")
3808 (vec_merge:VF_128
3809 (fma:VF_128
3810 (match_operand:VF_128 1 "nonimmediate_operand")
3811 (match_operand:VF_128 2 "nonimmediate_operand")
3812 (match_operand:VF_128 3 "nonimmediate_operand"))
3813 (match_dup 4)
3814 (const_int 1)))]
3815 "TARGET_FMA4"
3816 "operands[4] = CONST0_RTX (<MODE>mode);")
3817
3818 (define_insn "*fma4i_vmfmadd_<mode>"
3819 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3820 (vec_merge:VF_128
3821 (fma:VF_128
3822 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3823 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3824 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3825 (match_operand:VF_128 4 "const0_operand")
3826 (const_int 1)))]
3827 "TARGET_FMA4"
3828 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3829 [(set_attr "type" "ssemuladd")
3830 (set_attr "mode" "<MODE>")])
3831
3832 (define_insn "*fma4i_vmfmsub_<mode>"
3833 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3834 (vec_merge:VF_128
3835 (fma:VF_128
3836 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3837 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3838 (neg:VF_128
3839 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3840 (match_operand:VF_128 4 "const0_operand")
3841 (const_int 1)))]
3842 "TARGET_FMA4"
3843 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3844 [(set_attr "type" "ssemuladd")
3845 (set_attr "mode" "<MODE>")])
3846
3847 (define_insn "*fma4i_vmfnmadd_<mode>"
3848 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3849 (vec_merge:VF_128
3850 (fma:VF_128
3851 (neg:VF_128
3852 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3853 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3854 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3855 (match_operand:VF_128 4 "const0_operand")
3856 (const_int 1)))]
3857 "TARGET_FMA4"
3858 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3859 [(set_attr "type" "ssemuladd")
3860 (set_attr "mode" "<MODE>")])
3861
3862 (define_insn "*fma4i_vmfnmsub_<mode>"
3863 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3864 (vec_merge:VF_128
3865 (fma:VF_128
3866 (neg:VF_128
3867 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3868 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3869 (neg:VF_128
3870 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3871 (match_operand:VF_128 4 "const0_operand")
3872 (const_int 1)))]
3873 "TARGET_FMA4"
3874 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3875 [(set_attr "type" "ssemuladd")
3876 (set_attr "mode" "<MODE>")])
3877
3878 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3879 ;;
3880 ;; Parallel single-precision floating point conversion operations
3881 ;;
3882 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3883
3884 (define_insn "sse_cvtpi2ps"
3885 [(set (match_operand:V4SF 0 "register_operand" "=x")
3886 (vec_merge:V4SF
3887 (vec_duplicate:V4SF
3888 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3889 (match_operand:V4SF 1 "register_operand" "0")
3890 (const_int 3)))]
3891 "TARGET_SSE"
3892 "cvtpi2ps\t{%2, %0|%0, %2}"
3893 [(set_attr "type" "ssecvt")
3894 (set_attr "mode" "V4SF")])
3895
3896 (define_insn "sse_cvtps2pi"
3897 [(set (match_operand:V2SI 0 "register_operand" "=y")
3898 (vec_select:V2SI
3899 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3900 UNSPEC_FIX_NOTRUNC)
3901 (parallel [(const_int 0) (const_int 1)])))]
3902 "TARGET_SSE"
3903 "cvtps2pi\t{%1, %0|%0, %q1}"
3904 [(set_attr "type" "ssecvt")
3905 (set_attr "unit" "mmx")
3906 (set_attr "mode" "DI")])
3907
3908 (define_insn "sse_cvttps2pi"
3909 [(set (match_operand:V2SI 0 "register_operand" "=y")
3910 (vec_select:V2SI
3911 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3912 (parallel [(const_int 0) (const_int 1)])))]
3913 "TARGET_SSE"
3914 "cvttps2pi\t{%1, %0|%0, %q1}"
3915 [(set_attr "type" "ssecvt")
3916 (set_attr "unit" "mmx")
3917 (set_attr "prefix_rep" "0")
3918 (set_attr "mode" "SF")])
3919
3920 (define_insn "sse_cvtsi2ss<round_name>"
3921 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3922 (vec_merge:V4SF
3923 (vec_duplicate:V4SF
3924 (float:SF (match_operand:SI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
3925 (match_operand:V4SF 1 "register_operand" "0,0,v")
3926 (const_int 1)))]
3927 "TARGET_SSE"
3928 "@
3929 cvtsi2ss\t{%2, %0|%0, %2}
3930 cvtsi2ss\t{%2, %0|%0, %2}
3931 vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
3932 [(set_attr "isa" "noavx,noavx,avx")
3933 (set_attr "type" "sseicvt")
3934 (set_attr "athlon_decode" "vector,double,*")
3935 (set_attr "amdfam10_decode" "vector,double,*")
3936 (set_attr "bdver1_decode" "double,direct,*")
3937 (set_attr "btver2_decode" "double,double,double")
3938 (set_attr "znver1_decode" "double,double,double")
3939 (set_attr "prefix" "orig,orig,maybe_evex")
3940 (set_attr "mode" "SF")])
3941
3942 (define_insn "sse_cvtsi2ssq<round_name>"
3943 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3944 (vec_merge:V4SF
3945 (vec_duplicate:V4SF
3946 (float:SF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
3947 (match_operand:V4SF 1 "register_operand" "0,0,v")
3948 (const_int 1)))]
3949 "TARGET_SSE && TARGET_64BIT"
3950 "@
3951 cvtsi2ssq\t{%2, %0|%0, %2}
3952 cvtsi2ssq\t{%2, %0|%0, %2}
3953 vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
3954 [(set_attr "isa" "noavx,noavx,avx")
3955 (set_attr "type" "sseicvt")
3956 (set_attr "athlon_decode" "vector,double,*")
3957 (set_attr "amdfam10_decode" "vector,double,*")
3958 (set_attr "bdver1_decode" "double,direct,*")
3959 (set_attr "btver2_decode" "double,double,double")
3960 (set_attr "length_vex" "*,*,4")
3961 (set_attr "prefix_rex" "1,1,*")
3962 (set_attr "prefix" "orig,orig,maybe_evex")
3963 (set_attr "mode" "SF")])
3964
3965 (define_insn "sse_cvtss2si<round_name>"
3966 [(set (match_operand:SI 0 "register_operand" "=r,r")
3967 (unspec:SI
3968 [(vec_select:SF
3969 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
3970 (parallel [(const_int 0)]))]
3971 UNSPEC_FIX_NOTRUNC))]
3972 "TARGET_SSE"
3973 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3974 [(set_attr "type" "sseicvt")
3975 (set_attr "athlon_decode" "double,vector")
3976 (set_attr "bdver1_decode" "double,double")
3977 (set_attr "prefix_rep" "1")
3978 (set_attr "prefix" "maybe_vex")
3979 (set_attr "mode" "SI")])
3980
3981 (define_insn "sse_cvtss2si_2"
3982 [(set (match_operand:SI 0 "register_operand" "=r,r")
3983 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3984 UNSPEC_FIX_NOTRUNC))]
3985 "TARGET_SSE"
3986 "%vcvtss2si\t{%1, %0|%0, %k1}"
3987 [(set_attr "type" "sseicvt")
3988 (set_attr "athlon_decode" "double,vector")
3989 (set_attr "amdfam10_decode" "double,double")
3990 (set_attr "bdver1_decode" "double,double")
3991 (set_attr "prefix_rep" "1")
3992 (set_attr "prefix" "maybe_vex")
3993 (set_attr "mode" "SI")])
3994
3995 (define_insn "sse_cvtss2siq<round_name>"
3996 [(set (match_operand:DI 0 "register_operand" "=r,r")
3997 (unspec:DI
3998 [(vec_select:SF
3999 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4000 (parallel [(const_int 0)]))]
4001 UNSPEC_FIX_NOTRUNC))]
4002 "TARGET_SSE && TARGET_64BIT"
4003 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4004 [(set_attr "type" "sseicvt")
4005 (set_attr "athlon_decode" "double,vector")
4006 (set_attr "bdver1_decode" "double,double")
4007 (set_attr "prefix_rep" "1")
4008 (set_attr "prefix" "maybe_vex")
4009 (set_attr "mode" "DI")])
4010
4011 (define_insn "sse_cvtss2siq_2"
4012 [(set (match_operand:DI 0 "register_operand" "=r,r")
4013 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4014 UNSPEC_FIX_NOTRUNC))]
4015 "TARGET_SSE && TARGET_64BIT"
4016 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
4017 [(set_attr "type" "sseicvt")
4018 (set_attr "athlon_decode" "double,vector")
4019 (set_attr "amdfam10_decode" "double,double")
4020 (set_attr "bdver1_decode" "double,double")
4021 (set_attr "prefix_rep" "1")
4022 (set_attr "prefix" "maybe_vex")
4023 (set_attr "mode" "DI")])
4024
4025 (define_insn "sse_cvttss2si<round_saeonly_name>"
4026 [(set (match_operand:SI 0 "register_operand" "=r,r")
4027 (fix:SI
4028 (vec_select:SF
4029 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4030 (parallel [(const_int 0)]))))]
4031 "TARGET_SSE"
4032 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4033 [(set_attr "type" "sseicvt")
4034 (set_attr "athlon_decode" "double,vector")
4035 (set_attr "amdfam10_decode" "double,double")
4036 (set_attr "bdver1_decode" "double,double")
4037 (set_attr "prefix_rep" "1")
4038 (set_attr "prefix" "maybe_vex")
4039 (set_attr "mode" "SI")])
4040
4041 (define_insn "sse_cvttss2siq<round_saeonly_name>"
4042 [(set (match_operand:DI 0 "register_operand" "=r,r")
4043 (fix:DI
4044 (vec_select:SF
4045 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
4046 (parallel [(const_int 0)]))))]
4047 "TARGET_SSE && TARGET_64BIT"
4048 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4049 [(set_attr "type" "sseicvt")
4050 (set_attr "athlon_decode" "double,vector")
4051 (set_attr "amdfam10_decode" "double,double")
4052 (set_attr "bdver1_decode" "double,double")
4053 (set_attr "prefix_rep" "1")
4054 (set_attr "prefix" "maybe_vex")
4055 (set_attr "mode" "DI")])
4056
4057 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
4058 [(set (match_operand:VF_128 0 "register_operand" "=v")
4059 (vec_merge:VF_128
4060 (vec_duplicate:VF_128
4061 (unsigned_float:<ssescalarmode>
4062 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4063 (match_operand:VF_128 1 "register_operand" "v")
4064 (const_int 1)))]
4065 "TARGET_AVX512F && <round_modev4sf_condition>"
4066 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4067 [(set_attr "type" "sseicvt")
4068 (set_attr "prefix" "evex")
4069 (set_attr "mode" "<ssescalarmode>")])
4070
4071 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4072 [(set (match_operand:VF_128 0 "register_operand" "=v")
4073 (vec_merge:VF_128
4074 (vec_duplicate:VF_128
4075 (unsigned_float:<ssescalarmode>
4076 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4077 (match_operand:VF_128 1 "register_operand" "v")
4078 (const_int 1)))]
4079 "TARGET_AVX512F && TARGET_64BIT"
4080 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4081 [(set_attr "type" "sseicvt")
4082 (set_attr "prefix" "evex")
4083 (set_attr "mode" "<ssescalarmode>")])
4084
4085 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4086 [(set (match_operand:VF1 0 "register_operand" "=x,v")
4087 (float:VF1
4088 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
4089 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4090 "@
4091 cvtdq2ps\t{%1, %0|%0, %1}
4092 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4093 [(set_attr "isa" "noavx,avx")
4094 (set_attr "type" "ssecvt")
4095 (set_attr "prefix" "maybe_vex")
4096 (set_attr "mode" "<sseinsnmode>")])
4097
4098 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4099 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4100 (unsigned_float:VF1_AVX512VL
4101 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4102 "TARGET_AVX512F"
4103 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4104 [(set_attr "type" "ssecvt")
4105 (set_attr "prefix" "evex")
4106 (set_attr "mode" "<MODE>")])
4107
4108 (define_expand "floatuns<sseintvecmodelower><mode>2"
4109 [(match_operand:VF1 0 "register_operand")
4110 (match_operand:<sseintvecmode> 1 "register_operand")]
4111 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4112 {
4113 if (<MODE>mode == V16SFmode)
4114 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4115 else
4116 if (TARGET_AVX512VL)
4117 {
4118 if (<MODE>mode == V4SFmode)
4119 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4120 else
4121 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4122 }
4123 else
4124 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4125
4126 DONE;
4127 })
4128
4129
4130 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4131 (define_mode_attr sf2simodelower
4132 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4133
4134 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4135 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4136 (unspec:VI4_AVX
4137 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
4138 UNSPEC_FIX_NOTRUNC))]
4139 "TARGET_SSE2 && <mask_mode512bit_condition>"
4140 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4141 [(set_attr "type" "ssecvt")
4142 (set (attr "prefix_data16")
4143 (if_then_else
4144 (match_test "TARGET_AVX")
4145 (const_string "*")
4146 (const_string "1")))
4147 (set_attr "prefix" "maybe_vex")
4148 (set_attr "mode" "<sseinsnmode>")])
4149
4150 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4151 [(set (match_operand:V16SI 0 "register_operand" "=v")
4152 (unspec:V16SI
4153 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4154 UNSPEC_FIX_NOTRUNC))]
4155 "TARGET_AVX512F"
4156 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4157 [(set_attr "type" "ssecvt")
4158 (set_attr "prefix" "evex")
4159 (set_attr "mode" "XI")])
4160
4161 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4162 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4163 (unspec:VI4_AVX512VL
4164 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4165 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4166 "TARGET_AVX512F"
4167 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4168 [(set_attr "type" "ssecvt")
4169 (set_attr "prefix" "evex")
4170 (set_attr "mode" "<sseinsnmode>")])
4171
4172 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4173 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4174 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4175 UNSPEC_FIX_NOTRUNC))]
4176 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4177 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4178 [(set_attr "type" "ssecvt")
4179 (set_attr "prefix" "evex")
4180 (set_attr "mode" "<sseinsnmode>")])
4181
4182 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4183 [(set (match_operand:V2DI 0 "register_operand" "=v")
4184 (unspec:V2DI
4185 [(vec_select:V2SF
4186 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4187 (parallel [(const_int 0) (const_int 1)]))]
4188 UNSPEC_FIX_NOTRUNC))]
4189 "TARGET_AVX512DQ && TARGET_AVX512VL"
4190 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4191 [(set_attr "type" "ssecvt")
4192 (set_attr "prefix" "evex")
4193 (set_attr "mode" "TI")])
4194
4195 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4196 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4197 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4198 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4199 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4200 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4201 [(set_attr "type" "ssecvt")
4202 (set_attr "prefix" "evex")
4203 (set_attr "mode" "<sseinsnmode>")])
4204
4205 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4206 [(set (match_operand:V2DI 0 "register_operand" "=v")
4207 (unspec:V2DI
4208 [(vec_select:V2SF
4209 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4210 (parallel [(const_int 0) (const_int 1)]))]
4211 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4212 "TARGET_AVX512DQ && TARGET_AVX512VL"
4213 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4214 [(set_attr "type" "ssecvt")
4215 (set_attr "prefix" "evex")
4216 (set_attr "mode" "TI")])
4217
4218 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4219 [(set (match_operand:V16SI 0 "register_operand" "=v")
4220 (any_fix:V16SI
4221 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4222 "TARGET_AVX512F"
4223 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4224 [(set_attr "type" "ssecvt")
4225 (set_attr "prefix" "evex")
4226 (set_attr "mode" "XI")])
4227
4228 (define_insn "fix_truncv8sfv8si2<mask_name>"
4229 [(set (match_operand:V8SI 0 "register_operand" "=v")
4230 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4231 "TARGET_AVX && <mask_avx512vl_condition>"
4232 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4233 [(set_attr "type" "ssecvt")
4234 (set_attr "prefix" "<mask_prefix>")
4235 (set_attr "mode" "OI")])
4236
4237 (define_insn "fix_truncv4sfv4si2<mask_name>"
4238 [(set (match_operand:V4SI 0 "register_operand" "=v")
4239 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
4240 "TARGET_SSE2 && <mask_avx512vl_condition>"
4241 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4242 [(set_attr "type" "ssecvt")
4243 (set (attr "prefix_rep")
4244 (if_then_else
4245 (match_test "TARGET_AVX")
4246 (const_string "*")
4247 (const_string "1")))
4248 (set (attr "prefix_data16")
4249 (if_then_else
4250 (match_test "TARGET_AVX")
4251 (const_string "*")
4252 (const_string "0")))
4253 (set_attr "prefix_data16" "0")
4254 (set_attr "prefix" "<mask_prefix2>")
4255 (set_attr "mode" "TI")])
4256
4257 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4258 [(match_operand:<sseintvecmode> 0 "register_operand")
4259 (match_operand:VF1 1 "register_operand")]
4260 "TARGET_SSE2"
4261 {
4262 if (<MODE>mode == V16SFmode)
4263 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4264 operands[1]));
4265 else
4266 {
4267 rtx tmp[3];
4268 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4269 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4270 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4271 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4272 }
4273 DONE;
4274 })
4275
4276 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4277 ;;
4278 ;; Parallel double-precision floating point conversion operations
4279 ;;
4280 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4281
4282 (define_insn "sse2_cvtpi2pd"
4283 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4284 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4285 "TARGET_SSE2"
4286 "cvtpi2pd\t{%1, %0|%0, %1}"
4287 [(set_attr "type" "ssecvt")
4288 (set_attr "unit" "mmx,*")
4289 (set_attr "prefix_data16" "1,*")
4290 (set_attr "mode" "V2DF")])
4291
4292 (define_insn "sse2_cvtpd2pi"
4293 [(set (match_operand:V2SI 0 "register_operand" "=y")
4294 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4295 UNSPEC_FIX_NOTRUNC))]
4296 "TARGET_SSE2"
4297 "cvtpd2pi\t{%1, %0|%0, %1}"
4298 [(set_attr "type" "ssecvt")
4299 (set_attr "unit" "mmx")
4300 (set_attr "bdver1_decode" "double")
4301 (set_attr "btver2_decode" "direct")
4302 (set_attr "prefix_data16" "1")
4303 (set_attr "mode" "DI")])
4304
4305 (define_insn "sse2_cvttpd2pi"
4306 [(set (match_operand:V2SI 0 "register_operand" "=y")
4307 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4308 "TARGET_SSE2"
4309 "cvttpd2pi\t{%1, %0|%0, %1}"
4310 [(set_attr "type" "ssecvt")
4311 (set_attr "unit" "mmx")
4312 (set_attr "bdver1_decode" "double")
4313 (set_attr "prefix_data16" "1")
4314 (set_attr "mode" "TI")])
4315
4316 (define_insn "sse2_cvtsi2sd"
4317 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4318 (vec_merge:V2DF
4319 (vec_duplicate:V2DF
4320 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4321 (match_operand:V2DF 1 "register_operand" "0,0,v")
4322 (const_int 1)))]
4323 "TARGET_SSE2"
4324 "@
4325 cvtsi2sd\t{%2, %0|%0, %2}
4326 cvtsi2sd\t{%2, %0|%0, %2}
4327 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4328 [(set_attr "isa" "noavx,noavx,avx")
4329 (set_attr "type" "sseicvt")
4330 (set_attr "athlon_decode" "double,direct,*")
4331 (set_attr "amdfam10_decode" "vector,double,*")
4332 (set_attr "bdver1_decode" "double,direct,*")
4333 (set_attr "btver2_decode" "double,double,double")
4334 (set_attr "znver1_decode" "double,double,double")
4335 (set_attr "prefix" "orig,orig,maybe_evex")
4336 (set_attr "mode" "DF")])
4337
4338 (define_insn "sse2_cvtsi2sdq<round_name>"
4339 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4340 (vec_merge:V2DF
4341 (vec_duplicate:V2DF
4342 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4343 (match_operand:V2DF 1 "register_operand" "0,0,v")
4344 (const_int 1)))]
4345 "TARGET_SSE2 && TARGET_64BIT"
4346 "@
4347 cvtsi2sdq\t{%2, %0|%0, %2}
4348 cvtsi2sdq\t{%2, %0|%0, %2}
4349 vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4350 [(set_attr "isa" "noavx,noavx,avx")
4351 (set_attr "type" "sseicvt")
4352 (set_attr "athlon_decode" "double,direct,*")
4353 (set_attr "amdfam10_decode" "vector,double,*")
4354 (set_attr "bdver1_decode" "double,direct,*")
4355 (set_attr "length_vex" "*,*,4")
4356 (set_attr "prefix_rex" "1,1,*")
4357 (set_attr "prefix" "orig,orig,maybe_evex")
4358 (set_attr "mode" "DF")])
4359
4360 (define_insn "avx512f_vcvtss2usi<round_name>"
4361 [(set (match_operand:SI 0 "register_operand" "=r")
4362 (unspec:SI
4363 [(vec_select:SF
4364 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4365 (parallel [(const_int 0)]))]
4366 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4367 "TARGET_AVX512F"
4368 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4369 [(set_attr "type" "sseicvt")
4370 (set_attr "prefix" "evex")
4371 (set_attr "mode" "SI")])
4372
4373 (define_insn "avx512f_vcvtss2usiq<round_name>"
4374 [(set (match_operand:DI 0 "register_operand" "=r")
4375 (unspec:DI
4376 [(vec_select:SF
4377 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4378 (parallel [(const_int 0)]))]
4379 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4380 "TARGET_AVX512F && TARGET_64BIT"
4381 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4382 [(set_attr "type" "sseicvt")
4383 (set_attr "prefix" "evex")
4384 (set_attr "mode" "DI")])
4385
4386 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4387 [(set (match_operand:SI 0 "register_operand" "=r")
4388 (unsigned_fix:SI
4389 (vec_select:SF
4390 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4391 (parallel [(const_int 0)]))))]
4392 "TARGET_AVX512F"
4393 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4394 [(set_attr "type" "sseicvt")
4395 (set_attr "prefix" "evex")
4396 (set_attr "mode" "SI")])
4397
4398 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4399 [(set (match_operand:DI 0 "register_operand" "=r")
4400 (unsigned_fix:DI
4401 (vec_select:SF
4402 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4403 (parallel [(const_int 0)]))))]
4404 "TARGET_AVX512F && TARGET_64BIT"
4405 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4406 [(set_attr "type" "sseicvt")
4407 (set_attr "prefix" "evex")
4408 (set_attr "mode" "DI")])
4409
4410 (define_insn "avx512f_vcvtsd2usi<round_name>"
4411 [(set (match_operand:SI 0 "register_operand" "=r")
4412 (unspec:SI
4413 [(vec_select:DF
4414 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4415 (parallel [(const_int 0)]))]
4416 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4417 "TARGET_AVX512F"
4418 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4419 [(set_attr "type" "sseicvt")
4420 (set_attr "prefix" "evex")
4421 (set_attr "mode" "SI")])
4422
4423 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4424 [(set (match_operand:DI 0 "register_operand" "=r")
4425 (unspec:DI
4426 [(vec_select:DF
4427 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4428 (parallel [(const_int 0)]))]
4429 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4430 "TARGET_AVX512F && TARGET_64BIT"
4431 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4432 [(set_attr "type" "sseicvt")
4433 (set_attr "prefix" "evex")
4434 (set_attr "mode" "DI")])
4435
4436 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4437 [(set (match_operand:SI 0 "register_operand" "=r")
4438 (unsigned_fix:SI
4439 (vec_select:DF
4440 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4441 (parallel [(const_int 0)]))))]
4442 "TARGET_AVX512F"
4443 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4444 [(set_attr "type" "sseicvt")
4445 (set_attr "prefix" "evex")
4446 (set_attr "mode" "SI")])
4447
4448 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4449 [(set (match_operand:DI 0 "register_operand" "=r")
4450 (unsigned_fix:DI
4451 (vec_select:DF
4452 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4453 (parallel [(const_int 0)]))))]
4454 "TARGET_AVX512F && TARGET_64BIT"
4455 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4456 [(set_attr "type" "sseicvt")
4457 (set_attr "prefix" "evex")
4458 (set_attr "mode" "DI")])
4459
4460 (define_insn "sse2_cvtsd2si<round_name>"
4461 [(set (match_operand:SI 0 "register_operand" "=r,r")
4462 (unspec:SI
4463 [(vec_select:DF
4464 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4465 (parallel [(const_int 0)]))]
4466 UNSPEC_FIX_NOTRUNC))]
4467 "TARGET_SSE2"
4468 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4469 [(set_attr "type" "sseicvt")
4470 (set_attr "athlon_decode" "double,vector")
4471 (set_attr "bdver1_decode" "double,double")
4472 (set_attr "btver2_decode" "double,double")
4473 (set_attr "prefix_rep" "1")
4474 (set_attr "prefix" "maybe_vex")
4475 (set_attr "mode" "SI")])
4476
4477 (define_insn "sse2_cvtsd2si_2"
4478 [(set (match_operand:SI 0 "register_operand" "=r,r")
4479 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4480 UNSPEC_FIX_NOTRUNC))]
4481 "TARGET_SSE2"
4482 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4483 [(set_attr "type" "sseicvt")
4484 (set_attr "athlon_decode" "double,vector")
4485 (set_attr "amdfam10_decode" "double,double")
4486 (set_attr "bdver1_decode" "double,double")
4487 (set_attr "prefix_rep" "1")
4488 (set_attr "prefix" "maybe_vex")
4489 (set_attr "mode" "SI")])
4490
4491 (define_insn "sse2_cvtsd2siq<round_name>"
4492 [(set (match_operand:DI 0 "register_operand" "=r,r")
4493 (unspec:DI
4494 [(vec_select:DF
4495 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4496 (parallel [(const_int 0)]))]
4497 UNSPEC_FIX_NOTRUNC))]
4498 "TARGET_SSE2 && TARGET_64BIT"
4499 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4500 [(set_attr "type" "sseicvt")
4501 (set_attr "athlon_decode" "double,vector")
4502 (set_attr "bdver1_decode" "double,double")
4503 (set_attr "prefix_rep" "1")
4504 (set_attr "prefix" "maybe_vex")
4505 (set_attr "mode" "DI")])
4506
4507 (define_insn "sse2_cvtsd2siq_2"
4508 [(set (match_operand:DI 0 "register_operand" "=r,r")
4509 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4510 UNSPEC_FIX_NOTRUNC))]
4511 "TARGET_SSE2 && TARGET_64BIT"
4512 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4513 [(set_attr "type" "sseicvt")
4514 (set_attr "athlon_decode" "double,vector")
4515 (set_attr "amdfam10_decode" "double,double")
4516 (set_attr "bdver1_decode" "double,double")
4517 (set_attr "prefix_rep" "1")
4518 (set_attr "prefix" "maybe_vex")
4519 (set_attr "mode" "DI")])
4520
4521 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4522 [(set (match_operand:SI 0 "register_operand" "=r,r")
4523 (fix:SI
4524 (vec_select:DF
4525 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4526 (parallel [(const_int 0)]))))]
4527 "TARGET_SSE2"
4528 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4529 [(set_attr "type" "sseicvt")
4530 (set_attr "athlon_decode" "double,vector")
4531 (set_attr "amdfam10_decode" "double,double")
4532 (set_attr "bdver1_decode" "double,double")
4533 (set_attr "btver2_decode" "double,double")
4534 (set_attr "prefix_rep" "1")
4535 (set_attr "prefix" "maybe_vex")
4536 (set_attr "mode" "SI")])
4537
4538 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4539 [(set (match_operand:DI 0 "register_operand" "=r,r")
4540 (fix:DI
4541 (vec_select:DF
4542 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4543 (parallel [(const_int 0)]))))]
4544 "TARGET_SSE2 && TARGET_64BIT"
4545 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4546 [(set_attr "type" "sseicvt")
4547 (set_attr "athlon_decode" "double,vector")
4548 (set_attr "amdfam10_decode" "double,double")
4549 (set_attr "bdver1_decode" "double,double")
4550 (set_attr "prefix_rep" "1")
4551 (set_attr "prefix" "maybe_vex")
4552 (set_attr "mode" "DI")])
4553
4554 ;; For float<si2dfmode><mode>2 insn pattern
4555 (define_mode_attr si2dfmode
4556 [(V8DF "V8SI") (V4DF "V4SI")])
4557 (define_mode_attr si2dfmodelower
4558 [(V8DF "v8si") (V4DF "v4si")])
4559
4560 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4561 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4562 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4563 "TARGET_AVX && <mask_mode512bit_condition>"
4564 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4565 [(set_attr "type" "ssecvt")
4566 (set_attr "prefix" "maybe_vex")
4567 (set_attr "mode" "<MODE>")])
4568
4569 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4570 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4571 (any_float:VF2_AVX512VL
4572 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4573 "TARGET_AVX512DQ"
4574 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4575 [(set_attr "type" "ssecvt")
4576 (set_attr "prefix" "evex")
4577 (set_attr "mode" "<MODE>")])
4578
4579 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4580 (define_mode_attr qq2pssuff
4581 [(V8SF "") (V4SF "{y}")])
4582
4583 (define_mode_attr sselongvecmode
4584 [(V8SF "V8DI") (V4SF "V4DI")])
4585
4586 (define_mode_attr sselongvecmodelower
4587 [(V8SF "v8di") (V4SF "v4di")])
4588
4589 (define_mode_attr sseintvecmode3
4590 [(V8SF "XI") (V4SF "OI")
4591 (V8DF "OI") (V4DF "TI")])
4592
4593 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4594 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4595 (any_float:VF1_128_256VL
4596 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4597 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4598 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4599 [(set_attr "type" "ssecvt")
4600 (set_attr "prefix" "evex")
4601 (set_attr "mode" "<MODE>")])
4602
4603 (define_insn "*<floatsuffix>floatv2div2sf2"
4604 [(set (match_operand:V4SF 0 "register_operand" "=v")
4605 (vec_concat:V4SF
4606 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4607 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4608 "TARGET_AVX512DQ && TARGET_AVX512VL"
4609 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4610 [(set_attr "type" "ssecvt")
4611 (set_attr "prefix" "evex")
4612 (set_attr "mode" "V4SF")])
4613
4614 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4615 [(set (match_operand:V4SF 0 "register_operand" "=v")
4616 (vec_concat:V4SF
4617 (vec_merge:V2SF
4618 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4619 (vec_select:V2SF
4620 (match_operand:V4SF 2 "vector_move_operand" "0C")
4621 (parallel [(const_int 0) (const_int 1)]))
4622 (match_operand:QI 3 "register_operand" "Yk"))
4623 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4624 "TARGET_AVX512DQ && TARGET_AVX512VL"
4625 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4626 [(set_attr "type" "ssecvt")
4627 (set_attr "prefix" "evex")
4628 (set_attr "mode" "V4SF")])
4629
4630 (define_insn "*<floatsuffix>floatv2div2sf2_mask_1"
4631 [(set (match_operand:V4SF 0 "register_operand" "=v")
4632 (vec_concat:V4SF
4633 (vec_merge:V2SF
4634 (any_float:V2SF (match_operand:V2DI 1
4635 "nonimmediate_operand" "vm"))
4636 (const_vector:V2SF [(const_int 0) (const_int 0)])
4637 (match_operand:QI 2 "register_operand" "Yk"))
4638 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4639 "TARGET_AVX512DQ && TARGET_AVX512VL"
4640 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
4641 [(set_attr "type" "ssecvt")
4642 (set_attr "prefix" "evex")
4643 (set_attr "mode" "V4SF")])
4644
4645 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4646 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4647 (unsigned_float:VF2_512_256VL
4648 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4649 "TARGET_AVX512F"
4650 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4651 [(set_attr "type" "ssecvt")
4652 (set_attr "prefix" "evex")
4653 (set_attr "mode" "<MODE>")])
4654
4655 (define_insn "ufloatv2siv2df2<mask_name>"
4656 [(set (match_operand:V2DF 0 "register_operand" "=v")
4657 (unsigned_float:V2DF
4658 (vec_select:V2SI
4659 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4660 (parallel [(const_int 0) (const_int 1)]))))]
4661 "TARGET_AVX512VL"
4662 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4663 [(set_attr "type" "ssecvt")
4664 (set_attr "prefix" "evex")
4665 (set_attr "mode" "V2DF")])
4666
4667 (define_insn "avx512f_cvtdq2pd512_2"
4668 [(set (match_operand:V8DF 0 "register_operand" "=v")
4669 (float:V8DF
4670 (vec_select:V8SI
4671 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4672 (parallel [(const_int 0) (const_int 1)
4673 (const_int 2) (const_int 3)
4674 (const_int 4) (const_int 5)
4675 (const_int 6) (const_int 7)]))))]
4676 "TARGET_AVX512F"
4677 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4678 [(set_attr "type" "ssecvt")
4679 (set_attr "prefix" "evex")
4680 (set_attr "mode" "V8DF")])
4681
4682 (define_insn "avx_cvtdq2pd256_2"
4683 [(set (match_operand:V4DF 0 "register_operand" "=v")
4684 (float:V4DF
4685 (vec_select:V4SI
4686 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
4687 (parallel [(const_int 0) (const_int 1)
4688 (const_int 2) (const_int 3)]))))]
4689 "TARGET_AVX"
4690 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4691 [(set_attr "type" "ssecvt")
4692 (set_attr "prefix" "maybe_evex")
4693 (set_attr "mode" "V4DF")])
4694
4695 (define_insn "sse2_cvtdq2pd<mask_name>"
4696 [(set (match_operand:V2DF 0 "register_operand" "=v")
4697 (float:V2DF
4698 (vec_select:V2SI
4699 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4700 (parallel [(const_int 0) (const_int 1)]))))]
4701 "TARGET_SSE2 && <mask_avx512vl_condition>"
4702 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4703 [(set_attr "type" "ssecvt")
4704 (set_attr "prefix" "maybe_vex")
4705 (set_attr "mode" "V2DF")])
4706
4707 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4708 [(set (match_operand:V8SI 0 "register_operand" "=v")
4709 (unspec:V8SI
4710 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4711 UNSPEC_FIX_NOTRUNC))]
4712 "TARGET_AVX512F"
4713 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4714 [(set_attr "type" "ssecvt")
4715 (set_attr "prefix" "evex")
4716 (set_attr "mode" "OI")])
4717
4718 (define_insn "avx_cvtpd2dq256<mask_name>"
4719 [(set (match_operand:V4SI 0 "register_operand" "=v")
4720 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
4721 UNSPEC_FIX_NOTRUNC))]
4722 "TARGET_AVX && <mask_avx512vl_condition>"
4723 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4724 [(set_attr "type" "ssecvt")
4725 (set_attr "prefix" "<mask_prefix>")
4726 (set_attr "mode" "OI")])
4727
4728 (define_expand "avx_cvtpd2dq256_2"
4729 [(set (match_operand:V8SI 0 "register_operand")
4730 (vec_concat:V8SI
4731 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4732 UNSPEC_FIX_NOTRUNC)
4733 (match_dup 2)))]
4734 "TARGET_AVX"
4735 "operands[2] = CONST0_RTX (V4SImode);")
4736
4737 (define_insn "*avx_cvtpd2dq256_2"
4738 [(set (match_operand:V8SI 0 "register_operand" "=v")
4739 (vec_concat:V8SI
4740 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
4741 UNSPEC_FIX_NOTRUNC)
4742 (match_operand:V4SI 2 "const0_operand")))]
4743 "TARGET_AVX"
4744 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4745 [(set_attr "type" "ssecvt")
4746 (set_attr "prefix" "vex")
4747 (set_attr "btver2_decode" "vector")
4748 (set_attr "mode" "OI")])
4749
4750 (define_insn "sse2_cvtpd2dq<mask_name>"
4751 [(set (match_operand:V4SI 0 "register_operand" "=v")
4752 (vec_concat:V4SI
4753 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
4754 UNSPEC_FIX_NOTRUNC)
4755 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4756 "TARGET_SSE2 && <mask_avx512vl_condition>"
4757 {
4758 if (TARGET_AVX)
4759 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4760 else
4761 return "cvtpd2dq\t{%1, %0|%0, %1}";
4762 }
4763 [(set_attr "type" "ssecvt")
4764 (set_attr "prefix_rep" "1")
4765 (set_attr "prefix_data16" "0")
4766 (set_attr "prefix" "maybe_vex")
4767 (set_attr "mode" "TI")
4768 (set_attr "amdfam10_decode" "double")
4769 (set_attr "athlon_decode" "vector")
4770 (set_attr "bdver1_decode" "double")])
4771
4772 ;; For ufix_notrunc* insn patterns
4773 (define_mode_attr pd2udqsuff
4774 [(V8DF "") (V4DF "{y}")])
4775
4776 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
4777 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
4778 (unspec:<si2dfmode>
4779 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
4780 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4781 "TARGET_AVX512F"
4782 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4783 [(set_attr "type" "ssecvt")
4784 (set_attr "prefix" "evex")
4785 (set_attr "mode" "<sseinsnmode>")])
4786
4787 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
4788 [(set (match_operand:V4SI 0 "register_operand" "=v")
4789 (vec_concat:V4SI
4790 (unspec:V2SI
4791 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4792 UNSPEC_UNSIGNED_FIX_NOTRUNC)
4793 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4794 "TARGET_AVX512VL"
4795 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4796 [(set_attr "type" "ssecvt")
4797 (set_attr "prefix" "evex")
4798 (set_attr "mode" "TI")])
4799
4800 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4801 [(set (match_operand:V8SI 0 "register_operand" "=v")
4802 (any_fix:V8SI
4803 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4804 "TARGET_AVX512F"
4805 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4806 [(set_attr "type" "ssecvt")
4807 (set_attr "prefix" "evex")
4808 (set_attr "mode" "OI")])
4809
4810 (define_insn "ufix_truncv2dfv2si2<mask_name>"
4811 [(set (match_operand:V4SI 0 "register_operand" "=v")
4812 (vec_concat:V4SI
4813 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4814 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4815 "TARGET_AVX512VL"
4816 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4817 [(set_attr "type" "ssecvt")
4818 (set_attr "prefix" "evex")
4819 (set_attr "mode" "TI")])
4820
4821 (define_insn "fix_truncv4dfv4si2<mask_name>"
4822 [(set (match_operand:V4SI 0 "register_operand" "=v")
4823 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4824 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
4825 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4826 [(set_attr "type" "ssecvt")
4827 (set_attr "prefix" "maybe_evex")
4828 (set_attr "mode" "OI")])
4829
4830 (define_insn "ufix_truncv4dfv4si2<mask_name>"
4831 [(set (match_operand:V4SI 0 "register_operand" "=v")
4832 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4833 "TARGET_AVX512VL && TARGET_AVX512F"
4834 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4835 [(set_attr "type" "ssecvt")
4836 (set_attr "prefix" "maybe_evex")
4837 (set_attr "mode" "OI")])
4838
4839 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4840 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4841 (any_fix:<sseintvecmode>
4842 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4843 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4844 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4845 [(set_attr "type" "ssecvt")
4846 (set_attr "prefix" "evex")
4847 (set_attr "mode" "<sseintvecmode2>")])
4848
4849 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4850 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4851 (unspec:<sseintvecmode>
4852 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4853 UNSPEC_FIX_NOTRUNC))]
4854 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4855 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4856 [(set_attr "type" "ssecvt")
4857 (set_attr "prefix" "evex")
4858 (set_attr "mode" "<sseintvecmode2>")])
4859
4860 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4861 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4862 (unspec:<sseintvecmode>
4863 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4864 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4865 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4866 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4867 [(set_attr "type" "ssecvt")
4868 (set_attr "prefix" "evex")
4869 (set_attr "mode" "<sseintvecmode2>")])
4870
4871 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
4872 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
4873 (any_fix:<sselongvecmode>
4874 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4875 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
4876 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4877 [(set_attr "type" "ssecvt")
4878 (set_attr "prefix" "evex")
4879 (set_attr "mode" "<sseintvecmode3>")])
4880
4881 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
4882 [(set (match_operand:V2DI 0 "register_operand" "=v")
4883 (any_fix:V2DI
4884 (vec_select:V2SF
4885 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4886 (parallel [(const_int 0) (const_int 1)]))))]
4887 "TARGET_AVX512DQ && TARGET_AVX512VL"
4888 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4889 [(set_attr "type" "ssecvt")
4890 (set_attr "prefix" "evex")
4891 (set_attr "mode" "TI")])
4892
4893 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
4894 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4895 (unsigned_fix:<sseintvecmode>
4896 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
4897 "TARGET_AVX512VL"
4898 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4899 [(set_attr "type" "ssecvt")
4900 (set_attr "prefix" "evex")
4901 (set_attr "mode" "<sseintvecmode2>")])
4902
4903 (define_expand "avx_cvttpd2dq256_2"
4904 [(set (match_operand:V8SI 0 "register_operand")
4905 (vec_concat:V8SI
4906 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4907 (match_dup 2)))]
4908 "TARGET_AVX"
4909 "operands[2] = CONST0_RTX (V4SImode);")
4910
4911 (define_insn "sse2_cvttpd2dq<mask_name>"
4912 [(set (match_operand:V4SI 0 "register_operand" "=v")
4913 (vec_concat:V4SI
4914 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
4915 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4916 "TARGET_SSE2 && <mask_avx512vl_condition>"
4917 {
4918 if (TARGET_AVX)
4919 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4920 else
4921 return "cvttpd2dq\t{%1, %0|%0, %1}";
4922 }
4923 [(set_attr "type" "ssecvt")
4924 (set_attr "amdfam10_decode" "double")
4925 (set_attr "athlon_decode" "vector")
4926 (set_attr "bdver1_decode" "double")
4927 (set_attr "prefix" "maybe_vex")
4928 (set_attr "mode" "TI")])
4929
4930 (define_insn "sse2_cvtsd2ss<round_name>"
4931 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4932 (vec_merge:V4SF
4933 (vec_duplicate:V4SF
4934 (float_truncate:V2SF
4935 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4936 (match_operand:V4SF 1 "register_operand" "0,0,v")
4937 (const_int 1)))]
4938 "TARGET_SSE2"
4939 "@
4940 cvtsd2ss\t{%2, %0|%0, %2}
4941 cvtsd2ss\t{%2, %0|%0, %q2}
4942 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4943 [(set_attr "isa" "noavx,noavx,avx")
4944 (set_attr "type" "ssecvt")
4945 (set_attr "athlon_decode" "vector,double,*")
4946 (set_attr "amdfam10_decode" "vector,double,*")
4947 (set_attr "bdver1_decode" "direct,direct,*")
4948 (set_attr "btver2_decode" "double,double,double")
4949 (set_attr "prefix" "orig,orig,<round_prefix>")
4950 (set_attr "mode" "SF")])
4951
4952 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4953 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4954 (vec_merge:V2DF
4955 (float_extend:V2DF
4956 (vec_select:V2SF
4957 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
4958 (parallel [(const_int 0) (const_int 1)])))
4959 (match_operand:V2DF 1 "register_operand" "0,0,v")
4960 (const_int 1)))]
4961 "TARGET_SSE2"
4962 "@
4963 cvtss2sd\t{%2, %0|%0, %2}
4964 cvtss2sd\t{%2, %0|%0, %k2}
4965 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4966 [(set_attr "isa" "noavx,noavx,avx")
4967 (set_attr "type" "ssecvt")
4968 (set_attr "amdfam10_decode" "vector,double,*")
4969 (set_attr "athlon_decode" "direct,direct,*")
4970 (set_attr "bdver1_decode" "direct,direct,*")
4971 (set_attr "btver2_decode" "double,double,double")
4972 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4973 (set_attr "mode" "DF")])
4974
4975 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4976 [(set (match_operand:V8SF 0 "register_operand" "=v")
4977 (float_truncate:V8SF
4978 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4979 "TARGET_AVX512F"
4980 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4981 [(set_attr "type" "ssecvt")
4982 (set_attr "prefix" "evex")
4983 (set_attr "mode" "V8SF")])
4984
4985 (define_insn "avx_cvtpd2ps256<mask_name>"
4986 [(set (match_operand:V4SF 0 "register_operand" "=v")
4987 (float_truncate:V4SF
4988 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4989 "TARGET_AVX && <mask_avx512vl_condition>"
4990 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4991 [(set_attr "type" "ssecvt")
4992 (set_attr "prefix" "maybe_evex")
4993 (set_attr "btver2_decode" "vector")
4994 (set_attr "mode" "V4SF")])
4995
4996 (define_expand "sse2_cvtpd2ps"
4997 [(set (match_operand:V4SF 0 "register_operand")
4998 (vec_concat:V4SF
4999 (float_truncate:V2SF
5000 (match_operand:V2DF 1 "vector_operand"))
5001 (match_dup 2)))]
5002 "TARGET_SSE2"
5003 "operands[2] = CONST0_RTX (V2SFmode);")
5004
5005 (define_expand "sse2_cvtpd2ps_mask"
5006 [(set (match_operand:V4SF 0 "register_operand")
5007 (vec_merge:V4SF
5008 (vec_concat:V4SF
5009 (float_truncate:V2SF
5010 (match_operand:V2DF 1 "vector_operand"))
5011 (match_dup 4))
5012 (match_operand:V4SF 2 "register_operand")
5013 (match_operand:QI 3 "register_operand")))]
5014 "TARGET_SSE2"
5015 "operands[4] = CONST0_RTX (V2SFmode);")
5016
5017 (define_insn "*sse2_cvtpd2ps<mask_name>"
5018 [(set (match_operand:V4SF 0 "register_operand" "=v")
5019 (vec_concat:V4SF
5020 (float_truncate:V2SF
5021 (match_operand:V2DF 1 "vector_operand" "vBm"))
5022 (match_operand:V2SF 2 "const0_operand")))]
5023 "TARGET_SSE2 && <mask_avx512vl_condition>"
5024 {
5025 if (TARGET_AVX)
5026 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
5027 else
5028 return "cvtpd2ps\t{%1, %0|%0, %1}";
5029 }
5030 [(set_attr "type" "ssecvt")
5031 (set_attr "amdfam10_decode" "double")
5032 (set_attr "athlon_decode" "vector")
5033 (set_attr "bdver1_decode" "double")
5034 (set_attr "prefix_data16" "1")
5035 (set_attr "prefix" "maybe_vex")
5036 (set_attr "mode" "V4SF")])
5037
5038 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
5039 (define_mode_attr sf2dfmode
5040 [(V8DF "V8SF") (V4DF "V4SF")])
5041
5042 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
5043 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5044 (float_extend:VF2_512_256
5045 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5046 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
5047 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5048 [(set_attr "type" "ssecvt")
5049 (set_attr "prefix" "maybe_vex")
5050 (set_attr "mode" "<MODE>")])
5051
5052 (define_insn "*avx_cvtps2pd256_2"
5053 [(set (match_operand:V4DF 0 "register_operand" "=v")
5054 (float_extend:V4DF
5055 (vec_select:V4SF
5056 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5057 (parallel [(const_int 0) (const_int 1)
5058 (const_int 2) (const_int 3)]))))]
5059 "TARGET_AVX"
5060 "vcvtps2pd\t{%x1, %0|%0, %x1}"
5061 [(set_attr "type" "ssecvt")
5062 (set_attr "prefix" "vex")
5063 (set_attr "mode" "V4DF")])
5064
5065 (define_insn "vec_unpacks_lo_v16sf"
5066 [(set (match_operand:V8DF 0 "register_operand" "=v")
5067 (float_extend:V8DF
5068 (vec_select:V8SF
5069 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5070 (parallel [(const_int 0) (const_int 1)
5071 (const_int 2) (const_int 3)
5072 (const_int 4) (const_int 5)
5073 (const_int 6) (const_int 7)]))))]
5074 "TARGET_AVX512F"
5075 "vcvtps2pd\t{%t1, %0|%0, %t1}"
5076 [(set_attr "type" "ssecvt")
5077 (set_attr "prefix" "evex")
5078 (set_attr "mode" "V8DF")])
5079
5080 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5081 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5082 (unspec:<avx512fmaskmode>
5083 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5084 UNSPEC_CVTINT2MASK))]
5085 "TARGET_AVX512BW"
5086 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5087 [(set_attr "prefix" "evex")
5088 (set_attr "mode" "<sseinsnmode>")])
5089
5090 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5091 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5092 (unspec:<avx512fmaskmode>
5093 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5094 UNSPEC_CVTINT2MASK))]
5095 "TARGET_AVX512DQ"
5096 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5097 [(set_attr "prefix" "evex")
5098 (set_attr "mode" "<sseinsnmode>")])
5099
5100 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5101 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5102 (vec_merge:VI12_AVX512VL
5103 (match_dup 2)
5104 (match_dup 3)
5105 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5106 "TARGET_AVX512BW"
5107 {
5108 operands[2] = CONSTM1_RTX (<MODE>mode);
5109 operands[3] = CONST0_RTX (<MODE>mode);
5110 })
5111
5112 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5113 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5114 (vec_merge:VI12_AVX512VL
5115 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
5116 (match_operand:VI12_AVX512VL 3 "const0_operand")
5117 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5118 "TARGET_AVX512BW"
5119 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5120 [(set_attr "prefix" "evex")
5121 (set_attr "mode" "<sseinsnmode>")])
5122
5123 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5124 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5125 (vec_merge:VI48_AVX512VL
5126 (match_dup 2)
5127 (match_dup 3)
5128 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5129 "TARGET_AVX512DQ"
5130 "{
5131 operands[2] = CONSTM1_RTX (<MODE>mode);
5132 operands[3] = CONST0_RTX (<MODE>mode);
5133 }")
5134
5135 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5136 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5137 (vec_merge:VI48_AVX512VL
5138 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
5139 (match_operand:VI48_AVX512VL 3 "const0_operand")
5140 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5141 "TARGET_AVX512DQ"
5142 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5143 [(set_attr "prefix" "evex")
5144 (set_attr "mode" "<sseinsnmode>")])
5145
5146 (define_insn "sse2_cvtps2pd<mask_name>"
5147 [(set (match_operand:V2DF 0 "register_operand" "=v")
5148 (float_extend:V2DF
5149 (vec_select:V2SF
5150 (match_operand:V4SF 1 "vector_operand" "vm")
5151 (parallel [(const_int 0) (const_int 1)]))))]
5152 "TARGET_SSE2 && <mask_avx512vl_condition>"
5153 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5154 [(set_attr "type" "ssecvt")
5155 (set_attr "amdfam10_decode" "direct")
5156 (set_attr "athlon_decode" "double")
5157 (set_attr "bdver1_decode" "double")
5158 (set_attr "prefix_data16" "0")
5159 (set_attr "prefix" "maybe_vex")
5160 (set_attr "mode" "V2DF")])
5161
5162 (define_expand "vec_unpacks_hi_v4sf"
5163 [(set (match_dup 2)
5164 (vec_select:V4SF
5165 (vec_concat:V8SF
5166 (match_dup 2)
5167 (match_operand:V4SF 1 "vector_operand"))
5168 (parallel [(const_int 6) (const_int 7)
5169 (const_int 2) (const_int 3)])))
5170 (set (match_operand:V2DF 0 "register_operand")
5171 (float_extend:V2DF
5172 (vec_select:V2SF
5173 (match_dup 2)
5174 (parallel [(const_int 0) (const_int 1)]))))]
5175 "TARGET_SSE2"
5176 "operands[2] = gen_reg_rtx (V4SFmode);")
5177
5178 (define_expand "vec_unpacks_hi_v8sf"
5179 [(set (match_dup 2)
5180 (vec_select:V4SF
5181 (match_operand:V8SF 1 "register_operand")
5182 (parallel [(const_int 4) (const_int 5)
5183 (const_int 6) (const_int 7)])))
5184 (set (match_operand:V4DF 0 "register_operand")
5185 (float_extend:V4DF
5186 (match_dup 2)))]
5187 "TARGET_AVX"
5188 "operands[2] = gen_reg_rtx (V4SFmode);")
5189
5190 (define_expand "vec_unpacks_hi_v16sf"
5191 [(set (match_dup 2)
5192 (vec_select:V8SF
5193 (match_operand:V16SF 1 "register_operand")
5194 (parallel [(const_int 8) (const_int 9)
5195 (const_int 10) (const_int 11)
5196 (const_int 12) (const_int 13)
5197 (const_int 14) (const_int 15)])))
5198 (set (match_operand:V8DF 0 "register_operand")
5199 (float_extend:V8DF
5200 (match_dup 2)))]
5201 "TARGET_AVX512F"
5202 "operands[2] = gen_reg_rtx (V8SFmode);")
5203
5204 (define_expand "vec_unpacks_lo_v4sf"
5205 [(set (match_operand:V2DF 0 "register_operand")
5206 (float_extend:V2DF
5207 (vec_select:V2SF
5208 (match_operand:V4SF 1 "vector_operand")
5209 (parallel [(const_int 0) (const_int 1)]))))]
5210 "TARGET_SSE2")
5211
5212 (define_expand "vec_unpacks_lo_v8sf"
5213 [(set (match_operand:V4DF 0 "register_operand")
5214 (float_extend:V4DF
5215 (vec_select:V4SF
5216 (match_operand:V8SF 1 "nonimmediate_operand")
5217 (parallel [(const_int 0) (const_int 1)
5218 (const_int 2) (const_int 3)]))))]
5219 "TARGET_AVX")
5220
5221 (define_mode_attr sseunpackfltmode
5222 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5223 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5224
5225 (define_expand "vec_unpacks_float_hi_<mode>"
5226 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5227 (match_operand:VI2_AVX512F 1 "register_operand")]
5228 "TARGET_SSE2"
5229 {
5230 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5231
5232 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5233 emit_insn (gen_rtx_SET (operands[0],
5234 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5235 DONE;
5236 })
5237
5238 (define_expand "vec_unpacks_float_lo_<mode>"
5239 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5240 (match_operand:VI2_AVX512F 1 "register_operand")]
5241 "TARGET_SSE2"
5242 {
5243 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5244
5245 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5246 emit_insn (gen_rtx_SET (operands[0],
5247 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5248 DONE;
5249 })
5250
5251 (define_expand "vec_unpacku_float_hi_<mode>"
5252 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5253 (match_operand:VI2_AVX512F 1 "register_operand")]
5254 "TARGET_SSE2"
5255 {
5256 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5257
5258 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5259 emit_insn (gen_rtx_SET (operands[0],
5260 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5261 DONE;
5262 })
5263
5264 (define_expand "vec_unpacku_float_lo_<mode>"
5265 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5266 (match_operand:VI2_AVX512F 1 "register_operand")]
5267 "TARGET_SSE2"
5268 {
5269 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5270
5271 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5272 emit_insn (gen_rtx_SET (operands[0],
5273 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5274 DONE;
5275 })
5276
5277 (define_expand "vec_unpacks_float_hi_v4si"
5278 [(set (match_dup 2)
5279 (vec_select:V4SI
5280 (match_operand:V4SI 1 "vector_operand")
5281 (parallel [(const_int 2) (const_int 3)
5282 (const_int 2) (const_int 3)])))
5283 (set (match_operand:V2DF 0 "register_operand")
5284 (float:V2DF
5285 (vec_select:V2SI
5286 (match_dup 2)
5287 (parallel [(const_int 0) (const_int 1)]))))]
5288 "TARGET_SSE2"
5289 "operands[2] = gen_reg_rtx (V4SImode);")
5290
5291 (define_expand "vec_unpacks_float_lo_v4si"
5292 [(set (match_operand:V2DF 0 "register_operand")
5293 (float:V2DF
5294 (vec_select:V2SI
5295 (match_operand:V4SI 1 "vector_operand")
5296 (parallel [(const_int 0) (const_int 1)]))))]
5297 "TARGET_SSE2")
5298
5299 (define_expand "vec_unpacks_float_hi_v8si"
5300 [(set (match_dup 2)
5301 (vec_select:V4SI
5302 (match_operand:V8SI 1 "vector_operand")
5303 (parallel [(const_int 4) (const_int 5)
5304 (const_int 6) (const_int 7)])))
5305 (set (match_operand:V4DF 0 "register_operand")
5306 (float:V4DF
5307 (match_dup 2)))]
5308 "TARGET_AVX"
5309 "operands[2] = gen_reg_rtx (V4SImode);")
5310
5311 (define_expand "vec_unpacks_float_lo_v8si"
5312 [(set (match_operand:V4DF 0 "register_operand")
5313 (float:V4DF
5314 (vec_select:V4SI
5315 (match_operand:V8SI 1 "nonimmediate_operand")
5316 (parallel [(const_int 0) (const_int 1)
5317 (const_int 2) (const_int 3)]))))]
5318 "TARGET_AVX")
5319
5320 (define_expand "vec_unpacks_float_hi_v16si"
5321 [(set (match_dup 2)
5322 (vec_select:V8SI
5323 (match_operand:V16SI 1 "nonimmediate_operand")
5324 (parallel [(const_int 8) (const_int 9)
5325 (const_int 10) (const_int 11)
5326 (const_int 12) (const_int 13)
5327 (const_int 14) (const_int 15)])))
5328 (set (match_operand:V8DF 0 "register_operand")
5329 (float:V8DF
5330 (match_dup 2)))]
5331 "TARGET_AVX512F"
5332 "operands[2] = gen_reg_rtx (V8SImode);")
5333
5334 (define_expand "vec_unpacks_float_lo_v16si"
5335 [(set (match_operand:V8DF 0 "register_operand")
5336 (float:V8DF
5337 (vec_select:V8SI
5338 (match_operand:V16SI 1 "nonimmediate_operand")
5339 (parallel [(const_int 0) (const_int 1)
5340 (const_int 2) (const_int 3)
5341 (const_int 4) (const_int 5)
5342 (const_int 6) (const_int 7)]))))]
5343 "TARGET_AVX512F")
5344
5345 (define_expand "vec_unpacku_float_hi_v4si"
5346 [(set (match_dup 5)
5347 (vec_select:V4SI
5348 (match_operand:V4SI 1 "vector_operand")
5349 (parallel [(const_int 2) (const_int 3)
5350 (const_int 2) (const_int 3)])))
5351 (set (match_dup 6)
5352 (float:V2DF
5353 (vec_select:V2SI
5354 (match_dup 5)
5355 (parallel [(const_int 0) (const_int 1)]))))
5356 (set (match_dup 7)
5357 (lt:V2DF (match_dup 6) (match_dup 3)))
5358 (set (match_dup 8)
5359 (and:V2DF (match_dup 7) (match_dup 4)))
5360 (set (match_operand:V2DF 0 "register_operand")
5361 (plus:V2DF (match_dup 6) (match_dup 8)))]
5362 "TARGET_SSE2"
5363 {
5364 REAL_VALUE_TYPE TWO32r;
5365 rtx x;
5366 int i;
5367
5368 real_ldexp (&TWO32r, &dconst1, 32);
5369 x = const_double_from_real_value (TWO32r, DFmode);
5370
5371 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5372 operands[4] = force_reg (V2DFmode,
5373 ix86_build_const_vector (V2DFmode, 1, x));
5374
5375 operands[5] = gen_reg_rtx (V4SImode);
5376
5377 for (i = 6; i < 9; i++)
5378 operands[i] = gen_reg_rtx (V2DFmode);
5379 })
5380
5381 (define_expand "vec_unpacku_float_lo_v4si"
5382 [(set (match_dup 5)
5383 (float:V2DF
5384 (vec_select:V2SI
5385 (match_operand:V4SI 1 "vector_operand")
5386 (parallel [(const_int 0) (const_int 1)]))))
5387 (set (match_dup 6)
5388 (lt:V2DF (match_dup 5) (match_dup 3)))
5389 (set (match_dup 7)
5390 (and:V2DF (match_dup 6) (match_dup 4)))
5391 (set (match_operand:V2DF 0 "register_operand")
5392 (plus:V2DF (match_dup 5) (match_dup 7)))]
5393 "TARGET_SSE2"
5394 {
5395 REAL_VALUE_TYPE TWO32r;
5396 rtx x;
5397 int i;
5398
5399 real_ldexp (&TWO32r, &dconst1, 32);
5400 x = const_double_from_real_value (TWO32r, DFmode);
5401
5402 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5403 operands[4] = force_reg (V2DFmode,
5404 ix86_build_const_vector (V2DFmode, 1, x));
5405
5406 for (i = 5; i < 8; i++)
5407 operands[i] = gen_reg_rtx (V2DFmode);
5408 })
5409
5410 (define_expand "vec_unpacku_float_hi_v8si"
5411 [(match_operand:V4DF 0 "register_operand")
5412 (match_operand:V8SI 1 "register_operand")]
5413 "TARGET_AVX"
5414 {
5415 REAL_VALUE_TYPE TWO32r;
5416 rtx x, tmp[6];
5417 int i;
5418
5419 real_ldexp (&TWO32r, &dconst1, 32);
5420 x = const_double_from_real_value (TWO32r, DFmode);
5421
5422 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5423 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5424 tmp[5] = gen_reg_rtx (V4SImode);
5425
5426 for (i = 2; i < 5; i++)
5427 tmp[i] = gen_reg_rtx (V4DFmode);
5428 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5429 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5430 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5431 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5432 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5433 DONE;
5434 })
5435
5436 (define_expand "vec_unpacku_float_hi_v16si"
5437 [(match_operand:V8DF 0 "register_operand")
5438 (match_operand:V16SI 1 "register_operand")]
5439 "TARGET_AVX512F"
5440 {
5441 REAL_VALUE_TYPE TWO32r;
5442 rtx k, x, tmp[4];
5443
5444 real_ldexp (&TWO32r, &dconst1, 32);
5445 x = const_double_from_real_value (TWO32r, DFmode);
5446
5447 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5448 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5449 tmp[2] = gen_reg_rtx (V8DFmode);
5450 tmp[3] = gen_reg_rtx (V8SImode);
5451 k = gen_reg_rtx (QImode);
5452
5453 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5454 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5455 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5456 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5457 emit_move_insn (operands[0], tmp[2]);
5458 DONE;
5459 })
5460
5461 (define_expand "vec_unpacku_float_lo_v8si"
5462 [(match_operand:V4DF 0 "register_operand")
5463 (match_operand:V8SI 1 "nonimmediate_operand")]
5464 "TARGET_AVX"
5465 {
5466 REAL_VALUE_TYPE TWO32r;
5467 rtx x, tmp[5];
5468 int i;
5469
5470 real_ldexp (&TWO32r, &dconst1, 32);
5471 x = const_double_from_real_value (TWO32r, DFmode);
5472
5473 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5474 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5475
5476 for (i = 2; i < 5; i++)
5477 tmp[i] = gen_reg_rtx (V4DFmode);
5478 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5479 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5480 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5481 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5482 DONE;
5483 })
5484
5485 (define_expand "vec_unpacku_float_lo_v16si"
5486 [(match_operand:V8DF 0 "register_operand")
5487 (match_operand:V16SI 1 "nonimmediate_operand")]
5488 "TARGET_AVX512F"
5489 {
5490 REAL_VALUE_TYPE TWO32r;
5491 rtx k, x, tmp[3];
5492
5493 real_ldexp (&TWO32r, &dconst1, 32);
5494 x = const_double_from_real_value (TWO32r, DFmode);
5495
5496 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5497 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5498 tmp[2] = gen_reg_rtx (V8DFmode);
5499 k = gen_reg_rtx (QImode);
5500
5501 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5502 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5503 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5504 emit_move_insn (operands[0], tmp[2]);
5505 DONE;
5506 })
5507
5508 (define_expand "vec_pack_trunc_<mode>"
5509 [(set (match_dup 3)
5510 (float_truncate:<sf2dfmode>
5511 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5512 (set (match_dup 4)
5513 (float_truncate:<sf2dfmode>
5514 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5515 (set (match_operand:<ssePSmode> 0 "register_operand")
5516 (vec_concat:<ssePSmode>
5517 (match_dup 3)
5518 (match_dup 4)))]
5519 "TARGET_AVX"
5520 {
5521 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5522 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5523 })
5524
5525 (define_expand "vec_pack_trunc_v2df"
5526 [(match_operand:V4SF 0 "register_operand")
5527 (match_operand:V2DF 1 "vector_operand")
5528 (match_operand:V2DF 2 "vector_operand")]
5529 "TARGET_SSE2"
5530 {
5531 rtx tmp0, tmp1;
5532
5533 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5534 {
5535 tmp0 = gen_reg_rtx (V4DFmode);
5536 tmp1 = force_reg (V2DFmode, operands[1]);
5537
5538 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5539 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5540 }
5541 else
5542 {
5543 tmp0 = gen_reg_rtx (V4SFmode);
5544 tmp1 = gen_reg_rtx (V4SFmode);
5545
5546 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5547 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5548 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5549 }
5550 DONE;
5551 })
5552
5553 (define_expand "vec_pack_sfix_trunc_v8df"
5554 [(match_operand:V16SI 0 "register_operand")
5555 (match_operand:V8DF 1 "nonimmediate_operand")
5556 (match_operand:V8DF 2 "nonimmediate_operand")]
5557 "TARGET_AVX512F"
5558 {
5559 rtx r1, r2;
5560
5561 r1 = gen_reg_rtx (V8SImode);
5562 r2 = gen_reg_rtx (V8SImode);
5563
5564 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5565 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5566 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5567 DONE;
5568 })
5569
5570 (define_expand "vec_pack_sfix_trunc_v4df"
5571 [(match_operand:V8SI 0 "register_operand")
5572 (match_operand:V4DF 1 "nonimmediate_operand")
5573 (match_operand:V4DF 2 "nonimmediate_operand")]
5574 "TARGET_AVX"
5575 {
5576 rtx r1, r2;
5577
5578 r1 = gen_reg_rtx (V4SImode);
5579 r2 = gen_reg_rtx (V4SImode);
5580
5581 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5582 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5583 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5584 DONE;
5585 })
5586
5587 (define_expand "vec_pack_sfix_trunc_v2df"
5588 [(match_operand:V4SI 0 "register_operand")
5589 (match_operand:V2DF 1 "vector_operand")
5590 (match_operand:V2DF 2 "vector_operand")]
5591 "TARGET_SSE2"
5592 {
5593 rtx tmp0, tmp1, tmp2;
5594
5595 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5596 {
5597 tmp0 = gen_reg_rtx (V4DFmode);
5598 tmp1 = force_reg (V2DFmode, operands[1]);
5599
5600 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5601 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5602 }
5603 else
5604 {
5605 tmp0 = gen_reg_rtx (V4SImode);
5606 tmp1 = gen_reg_rtx (V4SImode);
5607 tmp2 = gen_reg_rtx (V2DImode);
5608
5609 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5610 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5611 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5612 gen_lowpart (V2DImode, tmp0),
5613 gen_lowpart (V2DImode, tmp1)));
5614 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5615 }
5616 DONE;
5617 })
5618
5619 (define_mode_attr ssepackfltmode
5620 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5621
5622 (define_expand "vec_pack_ufix_trunc_<mode>"
5623 [(match_operand:<ssepackfltmode> 0 "register_operand")
5624 (match_operand:VF2 1 "register_operand")
5625 (match_operand:VF2 2 "register_operand")]
5626 "TARGET_SSE2"
5627 {
5628 if (<MODE>mode == V8DFmode)
5629 {
5630 rtx r1, r2;
5631
5632 r1 = gen_reg_rtx (V8SImode);
5633 r2 = gen_reg_rtx (V8SImode);
5634
5635 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5636 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5637 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5638 }
5639 else
5640 {
5641 rtx tmp[7];
5642 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5643 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5644 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5645 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5646 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5647 {
5648 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5649 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5650 }
5651 else
5652 {
5653 tmp[5] = gen_reg_rtx (V8SFmode);
5654 ix86_expand_vec_extract_even_odd (tmp[5],
5655 gen_lowpart (V8SFmode, tmp[2]),
5656 gen_lowpart (V8SFmode, tmp[3]), 0);
5657 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5658 }
5659 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5660 operands[0], 0, OPTAB_DIRECT);
5661 if (tmp[6] != operands[0])
5662 emit_move_insn (operands[0], tmp[6]);
5663 }
5664
5665 DONE;
5666 })
5667
5668 (define_expand "vec_pack_sfix_v4df"
5669 [(match_operand:V8SI 0 "register_operand")
5670 (match_operand:V4DF 1 "nonimmediate_operand")
5671 (match_operand:V4DF 2 "nonimmediate_operand")]
5672 "TARGET_AVX"
5673 {
5674 rtx r1, r2;
5675
5676 r1 = gen_reg_rtx (V4SImode);
5677 r2 = gen_reg_rtx (V4SImode);
5678
5679 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5680 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5681 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5682 DONE;
5683 })
5684
5685 (define_expand "vec_pack_sfix_v2df"
5686 [(match_operand:V4SI 0 "register_operand")
5687 (match_operand:V2DF 1 "vector_operand")
5688 (match_operand:V2DF 2 "vector_operand")]
5689 "TARGET_SSE2"
5690 {
5691 rtx tmp0, tmp1, tmp2;
5692
5693 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5694 {
5695 tmp0 = gen_reg_rtx (V4DFmode);
5696 tmp1 = force_reg (V2DFmode, operands[1]);
5697
5698 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5699 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5700 }
5701 else
5702 {
5703 tmp0 = gen_reg_rtx (V4SImode);
5704 tmp1 = gen_reg_rtx (V4SImode);
5705 tmp2 = gen_reg_rtx (V2DImode);
5706
5707 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5708 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
5709 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5710 gen_lowpart (V2DImode, tmp0),
5711 gen_lowpart (V2DImode, tmp1)));
5712 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5713 }
5714 DONE;
5715 })
5716
5717 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5718 ;;
5719 ;; Parallel single-precision floating point element swizzling
5720 ;;
5721 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5722
5723 (define_expand "sse_movhlps_exp"
5724 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5725 (vec_select:V4SF
5726 (vec_concat:V8SF
5727 (match_operand:V4SF 1 "nonimmediate_operand")
5728 (match_operand:V4SF 2 "nonimmediate_operand"))
5729 (parallel [(const_int 6)
5730 (const_int 7)
5731 (const_int 2)
5732 (const_int 3)])))]
5733 "TARGET_SSE"
5734 {
5735 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5736
5737 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5738
5739 /* Fix up the destination if needed. */
5740 if (dst != operands[0])
5741 emit_move_insn (operands[0], dst);
5742
5743 DONE;
5744 })
5745
5746 (define_insn "sse_movhlps"
5747 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
5748 (vec_select:V4SF
5749 (vec_concat:V8SF
5750 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
5751 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
5752 (parallel [(const_int 6)
5753 (const_int 7)
5754 (const_int 2)
5755 (const_int 3)])))]
5756 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5757 "@
5758 movhlps\t{%2, %0|%0, %2}
5759 vmovhlps\t{%2, %1, %0|%0, %1, %2}
5760 movlps\t{%H2, %0|%0, %H2}
5761 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5762 %vmovhps\t{%2, %0|%q0, %2}"
5763 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5764 (set_attr "type" "ssemov")
5765 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
5766 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5767
5768 (define_expand "sse_movlhps_exp"
5769 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5770 (vec_select:V4SF
5771 (vec_concat:V8SF
5772 (match_operand:V4SF 1 "nonimmediate_operand")
5773 (match_operand:V4SF 2 "nonimmediate_operand"))
5774 (parallel [(const_int 0)
5775 (const_int 1)
5776 (const_int 4)
5777 (const_int 5)])))]
5778 "TARGET_SSE"
5779 {
5780 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5781
5782 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5783
5784 /* Fix up the destination if needed. */
5785 if (dst != operands[0])
5786 emit_move_insn (operands[0], dst);
5787
5788 DONE;
5789 })
5790
5791 (define_insn "sse_movlhps"
5792 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
5793 (vec_select:V4SF
5794 (vec_concat:V8SF
5795 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
5796 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
5797 (parallel [(const_int 0)
5798 (const_int 1)
5799 (const_int 4)
5800 (const_int 5)])))]
5801 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5802 "@
5803 movlhps\t{%2, %0|%0, %2}
5804 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5805 movhps\t{%2, %0|%0, %q2}
5806 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5807 %vmovlps\t{%2, %H0|%H0, %2}"
5808 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5809 (set_attr "type" "ssemov")
5810 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
5811 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5812
5813 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5814 [(set (match_operand:V16SF 0 "register_operand" "=v")
5815 (vec_select:V16SF
5816 (vec_concat:V32SF
5817 (match_operand:V16SF 1 "register_operand" "v")
5818 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5819 (parallel [(const_int 2) (const_int 18)
5820 (const_int 3) (const_int 19)
5821 (const_int 6) (const_int 22)
5822 (const_int 7) (const_int 23)
5823 (const_int 10) (const_int 26)
5824 (const_int 11) (const_int 27)
5825 (const_int 14) (const_int 30)
5826 (const_int 15) (const_int 31)])))]
5827 "TARGET_AVX512F"
5828 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5829 [(set_attr "type" "sselog")
5830 (set_attr "prefix" "evex")
5831 (set_attr "mode" "V16SF")])
5832
5833 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5834 (define_insn "avx_unpckhps256<mask_name>"
5835 [(set (match_operand:V8SF 0 "register_operand" "=v")
5836 (vec_select:V8SF
5837 (vec_concat:V16SF
5838 (match_operand:V8SF 1 "register_operand" "v")
5839 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5840 (parallel [(const_int 2) (const_int 10)
5841 (const_int 3) (const_int 11)
5842 (const_int 6) (const_int 14)
5843 (const_int 7) (const_int 15)])))]
5844 "TARGET_AVX && <mask_avx512vl_condition>"
5845 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5846 [(set_attr "type" "sselog")
5847 (set_attr "prefix" "vex")
5848 (set_attr "mode" "V8SF")])
5849
5850 (define_expand "vec_interleave_highv8sf"
5851 [(set (match_dup 3)
5852 (vec_select:V8SF
5853 (vec_concat:V16SF
5854 (match_operand:V8SF 1 "register_operand")
5855 (match_operand:V8SF 2 "nonimmediate_operand"))
5856 (parallel [(const_int 0) (const_int 8)
5857 (const_int 1) (const_int 9)
5858 (const_int 4) (const_int 12)
5859 (const_int 5) (const_int 13)])))
5860 (set (match_dup 4)
5861 (vec_select:V8SF
5862 (vec_concat:V16SF
5863 (match_dup 1)
5864 (match_dup 2))
5865 (parallel [(const_int 2) (const_int 10)
5866 (const_int 3) (const_int 11)
5867 (const_int 6) (const_int 14)
5868 (const_int 7) (const_int 15)])))
5869 (set (match_operand:V8SF 0 "register_operand")
5870 (vec_select:V8SF
5871 (vec_concat:V16SF
5872 (match_dup 3)
5873 (match_dup 4))
5874 (parallel [(const_int 4) (const_int 5)
5875 (const_int 6) (const_int 7)
5876 (const_int 12) (const_int 13)
5877 (const_int 14) (const_int 15)])))]
5878 "TARGET_AVX"
5879 {
5880 operands[3] = gen_reg_rtx (V8SFmode);
5881 operands[4] = gen_reg_rtx (V8SFmode);
5882 })
5883
5884 (define_insn "vec_interleave_highv4sf<mask_name>"
5885 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
5886 (vec_select:V4SF
5887 (vec_concat:V8SF
5888 (match_operand:V4SF 1 "register_operand" "0,v")
5889 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
5890 (parallel [(const_int 2) (const_int 6)
5891 (const_int 3) (const_int 7)])))]
5892 "TARGET_SSE && <mask_avx512vl_condition>"
5893 "@
5894 unpckhps\t{%2, %0|%0, %2}
5895 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5896 [(set_attr "isa" "noavx,avx")
5897 (set_attr "type" "sselog")
5898 (set_attr "prefix" "orig,vex")
5899 (set_attr "mode" "V4SF")])
5900
5901 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5902 [(set (match_operand:V16SF 0 "register_operand" "=v")
5903 (vec_select:V16SF
5904 (vec_concat:V32SF
5905 (match_operand:V16SF 1 "register_operand" "v")
5906 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5907 (parallel [(const_int 0) (const_int 16)
5908 (const_int 1) (const_int 17)
5909 (const_int 4) (const_int 20)
5910 (const_int 5) (const_int 21)
5911 (const_int 8) (const_int 24)
5912 (const_int 9) (const_int 25)
5913 (const_int 12) (const_int 28)
5914 (const_int 13) (const_int 29)])))]
5915 "TARGET_AVX512F"
5916 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5917 [(set_attr "type" "sselog")
5918 (set_attr "prefix" "evex")
5919 (set_attr "mode" "V16SF")])
5920
5921 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5922 (define_insn "avx_unpcklps256<mask_name>"
5923 [(set (match_operand:V8SF 0 "register_operand" "=v")
5924 (vec_select:V8SF
5925 (vec_concat:V16SF
5926 (match_operand:V8SF 1 "register_operand" "v")
5927 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5928 (parallel [(const_int 0) (const_int 8)
5929 (const_int 1) (const_int 9)
5930 (const_int 4) (const_int 12)
5931 (const_int 5) (const_int 13)])))]
5932 "TARGET_AVX && <mask_avx512vl_condition>"
5933 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5934 [(set_attr "type" "sselog")
5935 (set_attr "prefix" "vex")
5936 (set_attr "mode" "V8SF")])
5937
5938 (define_insn "unpcklps128_mask"
5939 [(set (match_operand:V4SF 0 "register_operand" "=v")
5940 (vec_merge:V4SF
5941 (vec_select:V4SF
5942 (vec_concat:V8SF
5943 (match_operand:V4SF 1 "register_operand" "v")
5944 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5945 (parallel [(const_int 0) (const_int 4)
5946 (const_int 1) (const_int 5)]))
5947 (match_operand:V4SF 3 "vector_move_operand" "0C")
5948 (match_operand:QI 4 "register_operand" "Yk")))]
5949 "TARGET_AVX512VL"
5950 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
5951 [(set_attr "type" "sselog")
5952 (set_attr "prefix" "evex")
5953 (set_attr "mode" "V4SF")])
5954
5955 (define_expand "vec_interleave_lowv8sf"
5956 [(set (match_dup 3)
5957 (vec_select:V8SF
5958 (vec_concat:V16SF
5959 (match_operand:V8SF 1 "register_operand")
5960 (match_operand:V8SF 2 "nonimmediate_operand"))
5961 (parallel [(const_int 0) (const_int 8)
5962 (const_int 1) (const_int 9)
5963 (const_int 4) (const_int 12)
5964 (const_int 5) (const_int 13)])))
5965 (set (match_dup 4)
5966 (vec_select:V8SF
5967 (vec_concat:V16SF
5968 (match_dup 1)
5969 (match_dup 2))
5970 (parallel [(const_int 2) (const_int 10)
5971 (const_int 3) (const_int 11)
5972 (const_int 6) (const_int 14)
5973 (const_int 7) (const_int 15)])))
5974 (set (match_operand:V8SF 0 "register_operand")
5975 (vec_select:V8SF
5976 (vec_concat:V16SF
5977 (match_dup 3)
5978 (match_dup 4))
5979 (parallel [(const_int 0) (const_int 1)
5980 (const_int 2) (const_int 3)
5981 (const_int 8) (const_int 9)
5982 (const_int 10) (const_int 11)])))]
5983 "TARGET_AVX"
5984 {
5985 operands[3] = gen_reg_rtx (V8SFmode);
5986 operands[4] = gen_reg_rtx (V8SFmode);
5987 })
5988
5989 (define_insn "vec_interleave_lowv4sf"
5990 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
5991 (vec_select:V4SF
5992 (vec_concat:V8SF
5993 (match_operand:V4SF 1 "register_operand" "0,v")
5994 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
5995 (parallel [(const_int 0) (const_int 4)
5996 (const_int 1) (const_int 5)])))]
5997 "TARGET_SSE"
5998 "@
5999 unpcklps\t{%2, %0|%0, %2}
6000 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
6001 [(set_attr "isa" "noavx,avx")
6002 (set_attr "type" "sselog")
6003 (set_attr "prefix" "orig,maybe_evex")
6004 (set_attr "mode" "V4SF")])
6005
6006 ;; These are modeled with the same vec_concat as the others so that we
6007 ;; capture users of shufps that can use the new instructions
6008 (define_insn "avx_movshdup256<mask_name>"
6009 [(set (match_operand:V8SF 0 "register_operand" "=v")
6010 (vec_select:V8SF
6011 (vec_concat:V16SF
6012 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6013 (match_dup 1))
6014 (parallel [(const_int 1) (const_int 1)
6015 (const_int 3) (const_int 3)
6016 (const_int 5) (const_int 5)
6017 (const_int 7) (const_int 7)])))]
6018 "TARGET_AVX && <mask_avx512vl_condition>"
6019 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6020 [(set_attr "type" "sse")
6021 (set_attr "prefix" "vex")
6022 (set_attr "mode" "V8SF")])
6023
6024 (define_insn "sse3_movshdup<mask_name>"
6025 [(set (match_operand:V4SF 0 "register_operand" "=v")
6026 (vec_select:V4SF
6027 (vec_concat:V8SF
6028 (match_operand:V4SF 1 "vector_operand" "vBm")
6029 (match_dup 1))
6030 (parallel [(const_int 1)
6031 (const_int 1)
6032 (const_int 7)
6033 (const_int 7)])))]
6034 "TARGET_SSE3 && <mask_avx512vl_condition>"
6035 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6036 [(set_attr "type" "sse")
6037 (set_attr "prefix_rep" "1")
6038 (set_attr "prefix" "maybe_vex")
6039 (set_attr "mode" "V4SF")])
6040
6041 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
6042 [(set (match_operand:V16SF 0 "register_operand" "=v")
6043 (vec_select:V16SF
6044 (vec_concat:V32SF
6045 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6046 (match_dup 1))
6047 (parallel [(const_int 1) (const_int 1)
6048 (const_int 3) (const_int 3)
6049 (const_int 5) (const_int 5)
6050 (const_int 7) (const_int 7)
6051 (const_int 9) (const_int 9)
6052 (const_int 11) (const_int 11)
6053 (const_int 13) (const_int 13)
6054 (const_int 15) (const_int 15)])))]
6055 "TARGET_AVX512F"
6056 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6057 [(set_attr "type" "sse")
6058 (set_attr "prefix" "evex")
6059 (set_attr "mode" "V16SF")])
6060
6061 (define_insn "avx_movsldup256<mask_name>"
6062 [(set (match_operand:V8SF 0 "register_operand" "=v")
6063 (vec_select:V8SF
6064 (vec_concat:V16SF
6065 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6066 (match_dup 1))
6067 (parallel [(const_int 0) (const_int 0)
6068 (const_int 2) (const_int 2)
6069 (const_int 4) (const_int 4)
6070 (const_int 6) (const_int 6)])))]
6071 "TARGET_AVX && <mask_avx512vl_condition>"
6072 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6073 [(set_attr "type" "sse")
6074 (set_attr "prefix" "vex")
6075 (set_attr "mode" "V8SF")])
6076
6077 (define_insn "sse3_movsldup<mask_name>"
6078 [(set (match_operand:V4SF 0 "register_operand" "=v")
6079 (vec_select:V4SF
6080 (vec_concat:V8SF
6081 (match_operand:V4SF 1 "vector_operand" "vBm")
6082 (match_dup 1))
6083 (parallel [(const_int 0)
6084 (const_int 0)
6085 (const_int 6)
6086 (const_int 6)])))]
6087 "TARGET_SSE3 && <mask_avx512vl_condition>"
6088 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6089 [(set_attr "type" "sse")
6090 (set_attr "prefix_rep" "1")
6091 (set_attr "prefix" "maybe_vex")
6092 (set_attr "mode" "V4SF")])
6093
6094 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6095 [(set (match_operand:V16SF 0 "register_operand" "=v")
6096 (vec_select:V16SF
6097 (vec_concat:V32SF
6098 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6099 (match_dup 1))
6100 (parallel [(const_int 0) (const_int 0)
6101 (const_int 2) (const_int 2)
6102 (const_int 4) (const_int 4)
6103 (const_int 6) (const_int 6)
6104 (const_int 8) (const_int 8)
6105 (const_int 10) (const_int 10)
6106 (const_int 12) (const_int 12)
6107 (const_int 14) (const_int 14)])))]
6108 "TARGET_AVX512F"
6109 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6110 [(set_attr "type" "sse")
6111 (set_attr "prefix" "evex")
6112 (set_attr "mode" "V16SF")])
6113
6114 (define_expand "avx_shufps256<mask_expand4_name>"
6115 [(match_operand:V8SF 0 "register_operand")
6116 (match_operand:V8SF 1 "register_operand")
6117 (match_operand:V8SF 2 "nonimmediate_operand")
6118 (match_operand:SI 3 "const_int_operand")]
6119 "TARGET_AVX"
6120 {
6121 int mask = INTVAL (operands[3]);
6122 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6123 operands[1],
6124 operands[2],
6125 GEN_INT ((mask >> 0) & 3),
6126 GEN_INT ((mask >> 2) & 3),
6127 GEN_INT (((mask >> 4) & 3) + 8),
6128 GEN_INT (((mask >> 6) & 3) + 8),
6129 GEN_INT (((mask >> 0) & 3) + 4),
6130 GEN_INT (((mask >> 2) & 3) + 4),
6131 GEN_INT (((mask >> 4) & 3) + 12),
6132 GEN_INT (((mask >> 6) & 3) + 12)
6133 <mask_expand4_args>));
6134 DONE;
6135 })
6136
6137 ;; One bit in mask selects 2 elements.
6138 (define_insn "avx_shufps256_1<mask_name>"
6139 [(set (match_operand:V8SF 0 "register_operand" "=v")
6140 (vec_select:V8SF
6141 (vec_concat:V16SF
6142 (match_operand:V8SF 1 "register_operand" "v")
6143 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6144 (parallel [(match_operand 3 "const_0_to_3_operand" )
6145 (match_operand 4 "const_0_to_3_operand" )
6146 (match_operand 5 "const_8_to_11_operand" )
6147 (match_operand 6 "const_8_to_11_operand" )
6148 (match_operand 7 "const_4_to_7_operand" )
6149 (match_operand 8 "const_4_to_7_operand" )
6150 (match_operand 9 "const_12_to_15_operand")
6151 (match_operand 10 "const_12_to_15_operand")])))]
6152 "TARGET_AVX
6153 && <mask_avx512vl_condition>
6154 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6155 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6156 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6157 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6158 {
6159 int mask;
6160 mask = INTVAL (operands[3]);
6161 mask |= INTVAL (operands[4]) << 2;
6162 mask |= (INTVAL (operands[5]) - 8) << 4;
6163 mask |= (INTVAL (operands[6]) - 8) << 6;
6164 operands[3] = GEN_INT (mask);
6165
6166 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6167 }
6168 [(set_attr "type" "sseshuf")
6169 (set_attr "length_immediate" "1")
6170 (set_attr "prefix" "<mask_prefix>")
6171 (set_attr "mode" "V8SF")])
6172
6173 (define_expand "sse_shufps<mask_expand4_name>"
6174 [(match_operand:V4SF 0 "register_operand")
6175 (match_operand:V4SF 1 "register_operand")
6176 (match_operand:V4SF 2 "vector_operand")
6177 (match_operand:SI 3 "const_int_operand")]
6178 "TARGET_SSE"
6179 {
6180 int mask = INTVAL (operands[3]);
6181 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6182 operands[1],
6183 operands[2],
6184 GEN_INT ((mask >> 0) & 3),
6185 GEN_INT ((mask >> 2) & 3),
6186 GEN_INT (((mask >> 4) & 3) + 4),
6187 GEN_INT (((mask >> 6) & 3) + 4)
6188 <mask_expand4_args>));
6189 DONE;
6190 })
6191
6192 (define_insn "sse_shufps_v4sf_mask"
6193 [(set (match_operand:V4SF 0 "register_operand" "=v")
6194 (vec_merge:V4SF
6195 (vec_select:V4SF
6196 (vec_concat:V8SF
6197 (match_operand:V4SF 1 "register_operand" "v")
6198 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6199 (parallel [(match_operand 3 "const_0_to_3_operand")
6200 (match_operand 4 "const_0_to_3_operand")
6201 (match_operand 5 "const_4_to_7_operand")
6202 (match_operand 6 "const_4_to_7_operand")]))
6203 (match_operand:V4SF 7 "vector_move_operand" "0C")
6204 (match_operand:QI 8 "register_operand" "Yk")))]
6205 "TARGET_AVX512VL"
6206 {
6207 int mask = 0;
6208 mask |= INTVAL (operands[3]) << 0;
6209 mask |= INTVAL (operands[4]) << 2;
6210 mask |= (INTVAL (operands[5]) - 4) << 4;
6211 mask |= (INTVAL (operands[6]) - 4) << 6;
6212 operands[3] = GEN_INT (mask);
6213
6214 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6215 }
6216 [(set_attr "type" "sseshuf")
6217 (set_attr "length_immediate" "1")
6218 (set_attr "prefix" "evex")
6219 (set_attr "mode" "V4SF")])
6220
6221 (define_insn "sse_shufps_<mode>"
6222 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
6223 (vec_select:VI4F_128
6224 (vec_concat:<ssedoublevecmode>
6225 (match_operand:VI4F_128 1 "register_operand" "0,v")
6226 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
6227 (parallel [(match_operand 3 "const_0_to_3_operand")
6228 (match_operand 4 "const_0_to_3_operand")
6229 (match_operand 5 "const_4_to_7_operand")
6230 (match_operand 6 "const_4_to_7_operand")])))]
6231 "TARGET_SSE"
6232 {
6233 int mask = 0;
6234 mask |= INTVAL (operands[3]) << 0;
6235 mask |= INTVAL (operands[4]) << 2;
6236 mask |= (INTVAL (operands[5]) - 4) << 4;
6237 mask |= (INTVAL (operands[6]) - 4) << 6;
6238 operands[3] = GEN_INT (mask);
6239
6240 switch (which_alternative)
6241 {
6242 case 0:
6243 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6244 case 1:
6245 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6246 default:
6247 gcc_unreachable ();
6248 }
6249 }
6250 [(set_attr "isa" "noavx,avx")
6251 (set_attr "type" "sseshuf")
6252 (set_attr "length_immediate" "1")
6253 (set_attr "prefix" "orig,maybe_evex")
6254 (set_attr "mode" "V4SF")])
6255
6256 (define_insn "sse_storehps"
6257 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6258 (vec_select:V2SF
6259 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
6260 (parallel [(const_int 2) (const_int 3)])))]
6261 "TARGET_SSE"
6262 "@
6263 %vmovhps\t{%1, %0|%q0, %1}
6264 %vmovhlps\t{%1, %d0|%d0, %1}
6265 %vmovlps\t{%H1, %d0|%d0, %H1}"
6266 [(set_attr "type" "ssemov")
6267 (set_attr "prefix" "maybe_vex")
6268 (set_attr "mode" "V2SF,V4SF,V2SF")])
6269
6270 (define_expand "sse_loadhps_exp"
6271 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6272 (vec_concat:V4SF
6273 (vec_select:V2SF
6274 (match_operand:V4SF 1 "nonimmediate_operand")
6275 (parallel [(const_int 0) (const_int 1)]))
6276 (match_operand:V2SF 2 "nonimmediate_operand")))]
6277 "TARGET_SSE"
6278 {
6279 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6280
6281 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6282
6283 /* Fix up the destination if needed. */
6284 if (dst != operands[0])
6285 emit_move_insn (operands[0], dst);
6286
6287 DONE;
6288 })
6289
6290 (define_insn "sse_loadhps"
6291 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6292 (vec_concat:V4SF
6293 (vec_select:V2SF
6294 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6295 (parallel [(const_int 0) (const_int 1)]))
6296 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
6297 "TARGET_SSE"
6298 "@
6299 movhps\t{%2, %0|%0, %q2}
6300 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6301 movlhps\t{%2, %0|%0, %2}
6302 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6303 %vmovlps\t{%2, %H0|%H0, %2}"
6304 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6305 (set_attr "type" "ssemov")
6306 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6307 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6308
6309 (define_insn "sse_storelps"
6310 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6311 (vec_select:V2SF
6312 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
6313 (parallel [(const_int 0) (const_int 1)])))]
6314 "TARGET_SSE"
6315 "@
6316 %vmovlps\t{%1, %0|%q0, %1}
6317 %vmovaps\t{%1, %0|%0, %1}
6318 %vmovlps\t{%1, %d0|%d0, %q1}"
6319 [(set_attr "type" "ssemov")
6320 (set_attr "prefix" "maybe_vex")
6321 (set_attr "mode" "V2SF,V4SF,V2SF")])
6322
6323 (define_expand "sse_loadlps_exp"
6324 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6325 (vec_concat:V4SF
6326 (match_operand:V2SF 2 "nonimmediate_operand")
6327 (vec_select:V2SF
6328 (match_operand:V4SF 1 "nonimmediate_operand")
6329 (parallel [(const_int 2) (const_int 3)]))))]
6330 "TARGET_SSE"
6331 {
6332 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6333
6334 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6335
6336 /* Fix up the destination if needed. */
6337 if (dst != operands[0])
6338 emit_move_insn (operands[0], dst);
6339
6340 DONE;
6341 })
6342
6343 (define_insn "sse_loadlps"
6344 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6345 (vec_concat:V4SF
6346 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
6347 (vec_select:V2SF
6348 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
6349 (parallel [(const_int 2) (const_int 3)]))))]
6350 "TARGET_SSE"
6351 "@
6352 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6353 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6354 movlps\t{%2, %0|%0, %q2}
6355 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6356 %vmovlps\t{%2, %0|%q0, %2}"
6357 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6358 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6359 (set_attr "length_immediate" "1,1,*,*,*")
6360 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6361 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6362
6363 (define_insn "sse_movss"
6364 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6365 (vec_merge:V4SF
6366 (match_operand:V4SF 2 "register_operand" " x,v")
6367 (match_operand:V4SF 1 "register_operand" " 0,v")
6368 (const_int 1)))]
6369 "TARGET_SSE"
6370 "@
6371 movss\t{%2, %0|%0, %2}
6372 vmovss\t{%2, %1, %0|%0, %1, %2}"
6373 [(set_attr "isa" "noavx,avx")
6374 (set_attr "type" "ssemov")
6375 (set_attr "prefix" "orig,maybe_evex")
6376 (set_attr "mode" "SF")])
6377
6378 (define_insn "avx2_vec_dup<mode>"
6379 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
6380 (vec_duplicate:VF1_128_256
6381 (vec_select:SF
6382 (match_operand:V4SF 1 "register_operand" "v")
6383 (parallel [(const_int 0)]))))]
6384 "TARGET_AVX2"
6385 "vbroadcastss\t{%1, %0|%0, %1}"
6386 [(set_attr "type" "sselog1")
6387 (set_attr "prefix" "maybe_evex")
6388 (set_attr "mode" "<MODE>")])
6389
6390 (define_insn "avx2_vec_dupv8sf_1"
6391 [(set (match_operand:V8SF 0 "register_operand" "=v")
6392 (vec_duplicate:V8SF
6393 (vec_select:SF
6394 (match_operand:V8SF 1 "register_operand" "v")
6395 (parallel [(const_int 0)]))))]
6396 "TARGET_AVX2"
6397 "vbroadcastss\t{%x1, %0|%0, %x1}"
6398 [(set_attr "type" "sselog1")
6399 (set_attr "prefix" "maybe_evex")
6400 (set_attr "mode" "V8SF")])
6401
6402 (define_insn "avx512f_vec_dup<mode>_1"
6403 [(set (match_operand:VF_512 0 "register_operand" "=v")
6404 (vec_duplicate:VF_512
6405 (vec_select:<ssescalarmode>
6406 (match_operand:VF_512 1 "register_operand" "v")
6407 (parallel [(const_int 0)]))))]
6408 "TARGET_AVX512F"
6409 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6410 [(set_attr "type" "sselog1")
6411 (set_attr "prefix" "evex")
6412 (set_attr "mode" "<MODE>")])
6413
6414 ;; Although insertps takes register source, we prefer
6415 ;; unpcklps with register source since it is shorter.
6416 (define_insn "*vec_concatv2sf_sse4_1"
6417 [(set (match_operand:V2SF 0 "register_operand"
6418 "=Yr,*x,v,Yr,*x,v,v,*y ,*y")
6419 (vec_concat:V2SF
6420 (match_operand:SF 1 "nonimmediate_operand"
6421 " 0, 0,v, 0,0, v,m, 0 , m")
6422 (match_operand:SF 2 "vector_move_operand"
6423 " Yr,*x,v, m,m, m,C,*ym, C")))]
6424 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6425 "@
6426 unpcklps\t{%2, %0|%0, %2}
6427 unpcklps\t{%2, %0|%0, %2}
6428 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6429 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6430 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6431 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6432 %vmovss\t{%1, %0|%0, %1}
6433 punpckldq\t{%2, %0|%0, %2}
6434 movd\t{%1, %0|%0, %1}"
6435 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
6436 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6437 (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
6438 (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
6439 (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
6440 (set_attr "prefix" "orig,orig,maybe_evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
6441 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6442
6443 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6444 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
6445 ;; alternatives pretty much forces the MMX alternative to be chosen.
6446 (define_insn "*vec_concatv2sf_sse"
6447 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6448 (vec_concat:V2SF
6449 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6450 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6451 "TARGET_SSE"
6452 "@
6453 unpcklps\t{%2, %0|%0, %2}
6454 movss\t{%1, %0|%0, %1}
6455 punpckldq\t{%2, %0|%0, %2}
6456 movd\t{%1, %0|%0, %1}"
6457 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6458 (set_attr "mode" "V4SF,SF,DI,DI")])
6459
6460 (define_insn "*vec_concatv4sf"
6461 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
6462 (vec_concat:V4SF
6463 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
6464 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
6465 "TARGET_SSE"
6466 "@
6467 movlhps\t{%2, %0|%0, %2}
6468 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6469 movhps\t{%2, %0|%0, %q2}
6470 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6471 [(set_attr "isa" "noavx,avx,noavx,avx")
6472 (set_attr "type" "ssemov")
6473 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
6474 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6475
6476 (define_expand "vec_init<mode>"
6477 [(match_operand:V_128 0 "register_operand")
6478 (match_operand 1)]
6479 "TARGET_SSE"
6480 {
6481 ix86_expand_vector_init (false, operands[0], operands[1]);
6482 DONE;
6483 })
6484
6485 ;; Avoid combining registers from different units in a single alternative,
6486 ;; see comment above inline_secondary_memory_needed function in i386.c
6487 (define_insn "vec_set<mode>_0"
6488 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6489 "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6490 (vec_merge:VI4F_128
6491 (vec_duplicate:VI4F_128
6492 (match_operand:<ssescalarmode> 2 "general_operand"
6493 " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6494 (match_operand:VI4F_128 1 "vector_move_operand"
6495 " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
6496 (const_int 1)))]
6497 "TARGET_SSE"
6498 "@
6499 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6500 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6501 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6502 %vmovd\t{%2, %0|%0, %2}
6503 movss\t{%2, %0|%0, %2}
6504 movss\t{%2, %0|%0, %2}
6505 vmovss\t{%2, %1, %0|%0, %1, %2}
6506 pinsrd\t{$0, %2, %0|%0, %2, 0}
6507 pinsrd\t{$0, %2, %0|%0, %2, 0}
6508 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6509 #
6510 #
6511 #"
6512 [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*")
6513 (set (attr "type")
6514 (cond [(eq_attr "alternative" "0,1,7,8,9")
6515 (const_string "sselog")
6516 (eq_attr "alternative" "11")
6517 (const_string "imov")
6518 (eq_attr "alternative" "12")
6519 (const_string "fmov")
6520 ]
6521 (const_string "ssemov")))
6522 (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6523 (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6524 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*")
6525 (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
6526
6527 ;; A subset is vec_setv4sf.
6528 (define_insn "*vec_setv4sf_sse4_1"
6529 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6530 (vec_merge:V4SF
6531 (vec_duplicate:V4SF
6532 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
6533 (match_operand:V4SF 1 "register_operand" "0,0,x")
6534 (match_operand:SI 3 "const_int_operand")))]
6535 "TARGET_SSE4_1
6536 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6537 < GET_MODE_NUNITS (V4SFmode))"
6538 {
6539 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6540 switch (which_alternative)
6541 {
6542 case 0:
6543 case 1:
6544 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6545 case 2:
6546 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6547 default:
6548 gcc_unreachable ();
6549 }
6550 }
6551 [(set_attr "isa" "noavx,noavx,avx")
6552 (set_attr "type" "sselog")
6553 (set_attr "prefix_data16" "1,1,*")
6554 (set_attr "prefix_extra" "1")
6555 (set_attr "length_immediate" "1")
6556 (set_attr "prefix" "orig,orig,vex")
6557 (set_attr "mode" "V4SF")])
6558
6559 (define_insn "sse4_1_insertps"
6560 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6561 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
6562 (match_operand:V4SF 1 "register_operand" "0,0,x")
6563 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
6564 UNSPEC_INSERTPS))]
6565 "TARGET_SSE4_1"
6566 {
6567 if (MEM_P (operands[2]))
6568 {
6569 unsigned count_s = INTVAL (operands[3]) >> 6;
6570 if (count_s)
6571 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6572 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6573 }
6574 switch (which_alternative)
6575 {
6576 case 0:
6577 case 1:
6578 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6579 case 2:
6580 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6581 default:
6582 gcc_unreachable ();
6583 }
6584 }
6585 [(set_attr "isa" "noavx,noavx,avx")
6586 (set_attr "type" "sselog")
6587 (set_attr "prefix_data16" "1,1,*")
6588 (set_attr "prefix_extra" "1")
6589 (set_attr "length_immediate" "1")
6590 (set_attr "prefix" "orig,orig,vex")
6591 (set_attr "mode" "V4SF")])
6592
6593 (define_split
6594 [(set (match_operand:VI4F_128 0 "memory_operand")
6595 (vec_merge:VI4F_128
6596 (vec_duplicate:VI4F_128
6597 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6598 (match_dup 0)
6599 (const_int 1)))]
6600 "TARGET_SSE && reload_completed"
6601 [(set (match_dup 0) (match_dup 1))]
6602 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6603
6604 (define_expand "vec_set<mode>"
6605 [(match_operand:V 0 "register_operand")
6606 (match_operand:<ssescalarmode> 1 "register_operand")
6607 (match_operand 2 "const_int_operand")]
6608 "TARGET_SSE"
6609 {
6610 ix86_expand_vector_set (false, operands[0], operands[1],
6611 INTVAL (operands[2]));
6612 DONE;
6613 })
6614
6615 (define_insn_and_split "*vec_extractv4sf_0"
6616 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
6617 (vec_select:SF
6618 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
6619 (parallel [(const_int 0)])))]
6620 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6621 "#"
6622 "&& reload_completed"
6623 [(set (match_dup 0) (match_dup 1))]
6624 "operands[1] = gen_lowpart (SFmode, operands[1]);")
6625
6626 (define_insn_and_split "*sse4_1_extractps"
6627 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,v,v")
6628 (vec_select:SF
6629 (match_operand:V4SF 1 "register_operand" "Yr,*v,0,v")
6630 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
6631 "TARGET_SSE4_1"
6632 "@
6633 %vextractps\t{%2, %1, %0|%0, %1, %2}
6634 %vextractps\t{%2, %1, %0|%0, %1, %2}
6635 #
6636 #"
6637 "&& reload_completed && SSE_REG_P (operands[0])"
6638 [(const_int 0)]
6639 {
6640 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
6641 switch (INTVAL (operands[2]))
6642 {
6643 case 1:
6644 case 3:
6645 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6646 operands[2], operands[2],
6647 GEN_INT (INTVAL (operands[2]) + 4),
6648 GEN_INT (INTVAL (operands[2]) + 4)));
6649 break;
6650 case 2:
6651 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6652 break;
6653 default:
6654 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6655 gcc_unreachable ();
6656 }
6657 DONE;
6658 }
6659 [(set_attr "isa" "*,*,noavx,avx")
6660 (set_attr "type" "sselog,sselog,*,*")
6661 (set_attr "prefix_data16" "1,1,*,*")
6662 (set_attr "prefix_extra" "1,1,*,*")
6663 (set_attr "length_immediate" "1,1,*,*")
6664 (set_attr "prefix" "maybe_vex,maybe_vex,*,*")
6665 (set_attr "mode" "V4SF,V4SF,*,*")])
6666
6667 (define_insn_and_split "*vec_extractv4sf_mem"
6668 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
6669 (vec_select:SF
6670 (match_operand:V4SF 1 "memory_operand" "o,o,o")
6671 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
6672 "TARGET_SSE"
6673 "#"
6674 "&& reload_completed"
6675 [(set (match_dup 0) (match_dup 1))]
6676 {
6677 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
6678 })
6679
6680 (define_mode_attr extract_type
6681 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
6682
6683 (define_mode_attr extract_suf
6684 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
6685
6686 (define_mode_iterator AVX512_VEC
6687 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
6688
6689 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
6690 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
6691 (match_operand:AVX512_VEC 1 "register_operand")
6692 (match_operand:SI 2 "const_0_to_3_operand")
6693 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6694 (match_operand:QI 4 "register_operand")]
6695 "TARGET_AVX512F"
6696 {
6697 int mask;
6698 mask = INTVAL (operands[2]);
6699
6700 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6701 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6702
6703 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
6704 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6705 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
6706 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
6707 operands[4]));
6708 else
6709 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
6710 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
6711 operands[4]));
6712 DONE;
6713 })
6714
6715 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
6716 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6717 (vec_merge:<ssequartermode>
6718 (vec_select:<ssequartermode>
6719 (match_operand:V8FI 1 "register_operand" "v")
6720 (parallel [(match_operand 2 "const_0_to_7_operand")
6721 (match_operand 3 "const_0_to_7_operand")]))
6722 (match_operand:<ssequartermode> 4 "memory_operand" "0")
6723 (match_operand:QI 5 "register_operand" "k")))]
6724 "TARGET_AVX512DQ
6725 && (INTVAL (operands[2]) % 2 == 0)
6726 && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)
6727 && rtx_equal_p (operands[4], operands[0])"
6728 {
6729 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6730 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
6731 }
6732 [(set_attr "type" "sselog")
6733 (set_attr "prefix_extra" "1")
6734 (set_attr "length_immediate" "1")
6735 (set_attr "memory" "store")
6736 (set_attr "prefix" "evex")
6737 (set_attr "mode" "<sseinsnmode>")])
6738
6739 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6740 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6741 (vec_merge:<ssequartermode>
6742 (vec_select:<ssequartermode>
6743 (match_operand:V16FI 1 "register_operand" "v")
6744 (parallel [(match_operand 2 "const_0_to_15_operand")
6745 (match_operand 3 "const_0_to_15_operand")
6746 (match_operand 4 "const_0_to_15_operand")
6747 (match_operand 5 "const_0_to_15_operand")]))
6748 (match_operand:<ssequartermode> 6 "memory_operand" "0")
6749 (match_operand:QI 7 "register_operand" "Yk")))]
6750 "TARGET_AVX512F
6751 && ((INTVAL (operands[2]) % 4 == 0)
6752 && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6753 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6754 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))
6755 && rtx_equal_p (operands[6], operands[0])"
6756 {
6757 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6758 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6759 }
6760 [(set_attr "type" "sselog")
6761 (set_attr "prefix_extra" "1")
6762 (set_attr "length_immediate" "1")
6763 (set_attr "memory" "store")
6764 (set_attr "prefix" "evex")
6765 (set_attr "mode" "<sseinsnmode>")])
6766
6767 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
6768 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6769 (vec_select:<ssequartermode>
6770 (match_operand:V8FI 1 "register_operand" "v")
6771 (parallel [(match_operand 2 "const_0_to_7_operand")
6772 (match_operand 3 "const_0_to_7_operand")])))]
6773 "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
6774 {
6775 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6776 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
6777 }
6778 [(set_attr "type" "sselog1")
6779 (set_attr "prefix_extra" "1")
6780 (set_attr "length_immediate" "1")
6781 (set_attr "prefix" "evex")
6782 (set_attr "mode" "<sseinsnmode>")])
6783
6784 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6785 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6786 (vec_select:<ssequartermode>
6787 (match_operand:V16FI 1 "register_operand" "v")
6788 (parallel [(match_operand 2 "const_0_to_15_operand")
6789 (match_operand 3 "const_0_to_15_operand")
6790 (match_operand 4 "const_0_to_15_operand")
6791 (match_operand 5 "const_0_to_15_operand")])))]
6792 "TARGET_AVX512F
6793 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6794 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6795 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6796 {
6797 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6798 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6799 }
6800 [(set_attr "type" "sselog1")
6801 (set_attr "prefix_extra" "1")
6802 (set_attr "length_immediate" "1")
6803 (set_attr "prefix" "evex")
6804 (set_attr "mode" "<sseinsnmode>")])
6805
6806 (define_mode_attr extract_type_2
6807 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
6808
6809 (define_mode_attr extract_suf_2
6810 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
6811
6812 (define_mode_iterator AVX512_VEC_2
6813 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
6814
6815 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
6816 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6817 (match_operand:AVX512_VEC_2 1 "register_operand")
6818 (match_operand:SI 2 "const_0_to_1_operand")
6819 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6820 (match_operand:QI 4 "register_operand")]
6821 "TARGET_AVX512F"
6822 {
6823 rtx (*insn)(rtx, rtx, rtx, rtx);
6824
6825 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6826 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6827
6828 switch (INTVAL (operands[2]))
6829 {
6830 case 0:
6831 insn = gen_vec_extract_lo_<mode>_mask;
6832 break;
6833 case 1:
6834 insn = gen_vec_extract_hi_<mode>_mask;
6835 break;
6836 default:
6837 gcc_unreachable ();
6838 }
6839
6840 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6841 DONE;
6842 })
6843
6844 (define_split
6845 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6846 (vec_select:<ssehalfvecmode>
6847 (match_operand:V8FI 1 "nonimmediate_operand")
6848 (parallel [(const_int 0) (const_int 1)
6849 (const_int 2) (const_int 3)])))]
6850 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6851 && reload_completed
6852 && (TARGET_AVX512VL
6853 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
6854 [(set (match_dup 0) (match_dup 1))]
6855 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
6856
6857 (define_insn "vec_extract_lo_<mode>_maskm"
6858 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6859 (vec_merge:<ssehalfvecmode>
6860 (vec_select:<ssehalfvecmode>
6861 (match_operand:V8FI 1 "register_operand" "v")
6862 (parallel [(const_int 0) (const_int 1)
6863 (const_int 2) (const_int 3)]))
6864 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6865 (match_operand:QI 3 "register_operand" "Yk")))]
6866 "TARGET_AVX512F
6867 && rtx_equal_p (operands[2], operands[0])"
6868 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6869 [(set_attr "type" "sselog1")
6870 (set_attr "prefix_extra" "1")
6871 (set_attr "length_immediate" "1")
6872 (set_attr "prefix" "evex")
6873 (set_attr "mode" "<sseinsnmode>")])
6874
6875 (define_insn "vec_extract_lo_<mode><mask_name>"
6876 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6877 (vec_select:<ssehalfvecmode>
6878 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6879 (parallel [(const_int 0) (const_int 1)
6880 (const_int 2) (const_int 3)])))]
6881 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6882 {
6883 if (<mask_applied> || !TARGET_AVX512VL)
6884 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6885 else
6886 return "#";
6887 }
6888 [(set_attr "type" "sselog1")
6889 (set_attr "prefix_extra" "1")
6890 (set_attr "length_immediate" "1")
6891 (set_attr "prefix" "evex")
6892 (set_attr "mode" "<sseinsnmode>")])
6893
6894 (define_insn "vec_extract_hi_<mode>_maskm"
6895 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6896 (vec_merge:<ssehalfvecmode>
6897 (vec_select:<ssehalfvecmode>
6898 (match_operand:V8FI 1 "register_operand" "v")
6899 (parallel [(const_int 4) (const_int 5)
6900 (const_int 6) (const_int 7)]))
6901 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6902 (match_operand:QI 3 "register_operand" "Yk")))]
6903 "TARGET_AVX512F
6904 && rtx_equal_p (operands[2], operands[0])"
6905 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6906 [(set_attr "type" "sselog")
6907 (set_attr "prefix_extra" "1")
6908 (set_attr "length_immediate" "1")
6909 (set_attr "memory" "store")
6910 (set_attr "prefix" "evex")
6911 (set_attr "mode" "<sseinsnmode>")])
6912
6913 (define_insn "vec_extract_hi_<mode><mask_name>"
6914 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6915 (vec_select:<ssehalfvecmode>
6916 (match_operand:V8FI 1 "register_operand" "v")
6917 (parallel [(const_int 4) (const_int 5)
6918 (const_int 6) (const_int 7)])))]
6919 "TARGET_AVX512F"
6920 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6921 [(set_attr "type" "sselog1")
6922 (set_attr "prefix_extra" "1")
6923 (set_attr "length_immediate" "1")
6924 (set_attr "prefix" "evex")
6925 (set_attr "mode" "<sseinsnmode>")])
6926
6927 (define_insn "vec_extract_hi_<mode>_maskm"
6928 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6929 (vec_merge:<ssehalfvecmode>
6930 (vec_select:<ssehalfvecmode>
6931 (match_operand:V16FI 1 "register_operand" "v")
6932 (parallel [(const_int 8) (const_int 9)
6933 (const_int 10) (const_int 11)
6934 (const_int 12) (const_int 13)
6935 (const_int 14) (const_int 15)]))
6936 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6937 (match_operand:QI 3 "register_operand" "k")))]
6938 "TARGET_AVX512DQ
6939 && rtx_equal_p (operands[2], operands[0])"
6940 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6941 [(set_attr "type" "sselog1")
6942 (set_attr "prefix_extra" "1")
6943 (set_attr "length_immediate" "1")
6944 (set_attr "prefix" "evex")
6945 (set_attr "mode" "<sseinsnmode>")])
6946
6947 (define_insn "vec_extract_hi_<mode><mask_name>"
6948 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6949 (vec_select:<ssehalfvecmode>
6950 (match_operand:V16FI 1 "register_operand" "v,v")
6951 (parallel [(const_int 8) (const_int 9)
6952 (const_int 10) (const_int 11)
6953 (const_int 12) (const_int 13)
6954 (const_int 14) (const_int 15)])))]
6955 "TARGET_AVX512F && <mask_avx512dq_condition>"
6956 "@
6957 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6958 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6959 [(set_attr "type" "sselog1")
6960 (set_attr "prefix_extra" "1")
6961 (set_attr "isa" "avx512dq,noavx512dq")
6962 (set_attr "length_immediate" "1")
6963 (set_attr "prefix" "evex")
6964 (set_attr "mode" "<sseinsnmode>")])
6965
6966 (define_expand "avx512vl_vextractf128<mode>"
6967 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6968 (match_operand:VI48F_256 1 "register_operand")
6969 (match_operand:SI 2 "const_0_to_1_operand")
6970 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
6971 (match_operand:QI 4 "register_operand")]
6972 "TARGET_AVX512DQ && TARGET_AVX512VL"
6973 {
6974 rtx (*insn)(rtx, rtx, rtx, rtx);
6975
6976 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6977 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
6978
6979 switch (INTVAL (operands[2]))
6980 {
6981 case 0:
6982 insn = gen_vec_extract_lo_<mode>_mask;
6983 break;
6984 case 1:
6985 insn = gen_vec_extract_hi_<mode>_mask;
6986 break;
6987 default:
6988 gcc_unreachable ();
6989 }
6990
6991 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6992 DONE;
6993 })
6994
6995 (define_expand "avx_vextractf128<mode>"
6996 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6997 (match_operand:V_256 1 "register_operand")
6998 (match_operand:SI 2 "const_0_to_1_operand")]
6999 "TARGET_AVX"
7000 {
7001 rtx (*insn)(rtx, rtx);
7002
7003 switch (INTVAL (operands[2]))
7004 {
7005 case 0:
7006 insn = gen_vec_extract_lo_<mode>;
7007 break;
7008 case 1:
7009 insn = gen_vec_extract_hi_<mode>;
7010 break;
7011 default:
7012 gcc_unreachable ();
7013 }
7014
7015 emit_insn (insn (operands[0], operands[1]));
7016 DONE;
7017 })
7018
7019 (define_insn "vec_extract_lo_<mode><mask_name>"
7020 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
7021 (vec_select:<ssehalfvecmode>
7022 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
7023 (parallel [(const_int 0) (const_int 1)
7024 (const_int 2) (const_int 3)
7025 (const_int 4) (const_int 5)
7026 (const_int 6) (const_int 7)])))]
7027 "TARGET_AVX512F
7028 && <mask_mode512bit_condition>
7029 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7030 {
7031 if (<mask_applied>)
7032 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7033 else
7034 return "#";
7035 })
7036
7037 (define_split
7038 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7039 (vec_select:<ssehalfvecmode>
7040 (match_operand:V16FI 1 "nonimmediate_operand")
7041 (parallel [(const_int 0) (const_int 1)
7042 (const_int 2) (const_int 3)
7043 (const_int 4) (const_int 5)
7044 (const_int 6) (const_int 7)])))]
7045 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7046 && reload_completed"
7047 [(set (match_dup 0) (match_dup 1))]
7048 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7049
7050 (define_insn "vec_extract_lo_<mode><mask_name>"
7051 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
7052 (vec_select:<ssehalfvecmode>
7053 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
7054 (parallel [(const_int 0) (const_int 1)])))]
7055 "TARGET_AVX
7056 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7057 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7058 {
7059 if (<mask_applied>)
7060 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
7061 else
7062 return "#";
7063 }
7064 [(set_attr "type" "sselog")
7065 (set_attr "prefix_extra" "1")
7066 (set_attr "length_immediate" "1")
7067 (set_attr "memory" "none,store")
7068 (set_attr "prefix" "evex")
7069 (set_attr "mode" "XI")])
7070
7071 (define_split
7072 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7073 (vec_select:<ssehalfvecmode>
7074 (match_operand:VI8F_256 1 "nonimmediate_operand")
7075 (parallel [(const_int 0) (const_int 1)])))]
7076 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7077 && reload_completed"
7078 [(set (match_dup 0) (match_dup 1))]
7079 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7080
7081 (define_insn "vec_extract_hi_<mode><mask_name>"
7082 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7083 (vec_select:<ssehalfvecmode>
7084 (match_operand:VI8F_256 1 "register_operand" "v,v")
7085 (parallel [(const_int 2) (const_int 3)])))]
7086 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7087 {
7088 if (TARGET_AVX512VL)
7089 {
7090 if (TARGET_AVX512DQ)
7091 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7092 else
7093 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7094 }
7095 else
7096 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7097 }
7098 [(set_attr "type" "sselog")
7099 (set_attr "prefix_extra" "1")
7100 (set_attr "length_immediate" "1")
7101 (set_attr "memory" "none,store")
7102 (set_attr "prefix" "vex")
7103 (set_attr "mode" "<sseinsnmode>")])
7104
7105 (define_split
7106 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7107 (vec_select:<ssehalfvecmode>
7108 (match_operand:VI4F_256 1 "nonimmediate_operand")
7109 (parallel [(const_int 0) (const_int 1)
7110 (const_int 2) (const_int 3)])))]
7111 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7112 && reload_completed"
7113 [(set (match_dup 0) (match_dup 1))]
7114 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7115
7116 (define_insn "vec_extract_lo_<mode><mask_name>"
7117 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7118 (vec_select:<ssehalfvecmode>
7119 (match_operand:VI4F_256 1 "register_operand" "v")
7120 (parallel [(const_int 0) (const_int 1)
7121 (const_int 2) (const_int 3)])))]
7122 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7123 {
7124 if (<mask_applied>)
7125 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7126 else
7127 return "#";
7128 }
7129 [(set_attr "type" "sselog1")
7130 (set_attr "prefix_extra" "1")
7131 (set_attr "length_immediate" "1")
7132 (set_attr "prefix" "evex")
7133 (set_attr "mode" "<sseinsnmode>")])
7134
7135 (define_insn "vec_extract_lo_<mode>_maskm"
7136 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7137 (vec_merge:<ssehalfvecmode>
7138 (vec_select:<ssehalfvecmode>
7139 (match_operand:VI4F_256 1 "register_operand" "v")
7140 (parallel [(const_int 0) (const_int 1)
7141 (const_int 2) (const_int 3)]))
7142 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7143 (match_operand:QI 3 "register_operand" "k")))]
7144 "TARGET_AVX512VL && TARGET_AVX512F
7145 && rtx_equal_p (operands[2], operands[0])"
7146 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7147 [(set_attr "type" "sselog1")
7148 (set_attr "prefix_extra" "1")
7149 (set_attr "length_immediate" "1")
7150 (set_attr "prefix" "evex")
7151 (set_attr "mode" "<sseinsnmode>")])
7152
7153 (define_insn "vec_extract_hi_<mode>_maskm"
7154 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7155 (vec_merge:<ssehalfvecmode>
7156 (vec_select:<ssehalfvecmode>
7157 (match_operand:VI4F_256 1 "register_operand" "v")
7158 (parallel [(const_int 4) (const_int 5)
7159 (const_int 6) (const_int 7)]))
7160 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7161 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
7162 "TARGET_AVX512F && TARGET_AVX512VL
7163 && rtx_equal_p (operands[2], operands[0])"
7164 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7165 [(set_attr "type" "sselog1")
7166 (set_attr "length_immediate" "1")
7167 (set_attr "prefix" "evex")
7168 (set_attr "mode" "<sseinsnmode>")])
7169
7170 (define_insn "vec_extract_hi_<mode>_mask"
7171 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
7172 (vec_merge:<ssehalfvecmode>
7173 (vec_select:<ssehalfvecmode>
7174 (match_operand:VI4F_256 1 "register_operand" "v")
7175 (parallel [(const_int 4) (const_int 5)
7176 (const_int 6) (const_int 7)]))
7177 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C")
7178 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
7179 "TARGET_AVX512VL"
7180 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
7181 [(set_attr "type" "sselog1")
7182 (set_attr "length_immediate" "1")
7183 (set_attr "prefix" "evex")
7184 (set_attr "mode" "<sseinsnmode>")])
7185
7186 (define_insn "vec_extract_hi_<mode>"
7187 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
7188 (vec_select:<ssehalfvecmode>
7189 (match_operand:VI4F_256 1 "register_operand" "x, v")
7190 (parallel [(const_int 4) (const_int 5)
7191 (const_int 6) (const_int 7)])))]
7192 "TARGET_AVX"
7193 "@
7194 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
7195 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7196 [(set_attr "isa" "*, avx512vl")
7197 (set_attr "prefix" "vex, evex")
7198 (set_attr "type" "sselog1")
7199 (set_attr "length_immediate" "1")
7200 (set_attr "mode" "<sseinsnmode>")])
7201
7202 (define_insn_and_split "vec_extract_lo_v32hi"
7203 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7204 (vec_select:V16HI
7205 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7206 (parallel [(const_int 0) (const_int 1)
7207 (const_int 2) (const_int 3)
7208 (const_int 4) (const_int 5)
7209 (const_int 6) (const_int 7)
7210 (const_int 8) (const_int 9)
7211 (const_int 10) (const_int 11)
7212 (const_int 12) (const_int 13)
7213 (const_int 14) (const_int 15)])))]
7214 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7215 "#"
7216 "&& reload_completed"
7217 [(set (match_dup 0) (match_dup 1))]
7218 "operands[1] = gen_lowpart (V16HImode, operands[1]);")
7219
7220 (define_insn "vec_extract_hi_v32hi"
7221 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7222 (vec_select:V16HI
7223 (match_operand:V32HI 1 "register_operand" "v,v")
7224 (parallel [(const_int 16) (const_int 17)
7225 (const_int 18) (const_int 19)
7226 (const_int 20) (const_int 21)
7227 (const_int 22) (const_int 23)
7228 (const_int 24) (const_int 25)
7229 (const_int 26) (const_int 27)
7230 (const_int 28) (const_int 29)
7231 (const_int 30) (const_int 31)])))]
7232 "TARGET_AVX512F"
7233 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7234 [(set_attr "type" "sselog")
7235 (set_attr "prefix_extra" "1")
7236 (set_attr "length_immediate" "1")
7237 (set_attr "memory" "none,store")
7238 (set_attr "prefix" "evex")
7239 (set_attr "mode" "XI")])
7240
7241 (define_insn_and_split "vec_extract_lo_v16hi"
7242 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
7243 (vec_select:V8HI
7244 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
7245 (parallel [(const_int 0) (const_int 1)
7246 (const_int 2) (const_int 3)
7247 (const_int 4) (const_int 5)
7248 (const_int 6) (const_int 7)])))]
7249 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7250 "#"
7251 "&& reload_completed"
7252 [(set (match_dup 0) (match_dup 1))]
7253 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
7254
7255 (define_insn "vec_extract_hi_v16hi"
7256 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
7257 (vec_select:V8HI
7258 (match_operand:V16HI 1 "register_operand" "x,x,v,v,v,v")
7259 (parallel [(const_int 8) (const_int 9)
7260 (const_int 10) (const_int 11)
7261 (const_int 12) (const_int 13)
7262 (const_int 14) (const_int 15)])))]
7263 "TARGET_AVX"
7264 "@
7265 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7266 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7267 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7268 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7269 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
7270 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7271 [(set_attr "type" "sselog")
7272 (set_attr "prefix_extra" "1")
7273 (set_attr "length_immediate" "1")
7274 (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
7275 (set_attr "memory" "none,store,none,store,none,store")
7276 (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
7277 (set_attr "mode" "OI")])
7278
7279 (define_insn_and_split "vec_extract_lo_v64qi"
7280 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7281 (vec_select:V32QI
7282 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7283 (parallel [(const_int 0) (const_int 1)
7284 (const_int 2) (const_int 3)
7285 (const_int 4) (const_int 5)
7286 (const_int 6) (const_int 7)
7287 (const_int 8) (const_int 9)
7288 (const_int 10) (const_int 11)
7289 (const_int 12) (const_int 13)
7290 (const_int 14) (const_int 15)
7291 (const_int 16) (const_int 17)
7292 (const_int 18) (const_int 19)
7293 (const_int 20) (const_int 21)
7294 (const_int 22) (const_int 23)
7295 (const_int 24) (const_int 25)
7296 (const_int 26) (const_int 27)
7297 (const_int 28) (const_int 29)
7298 (const_int 30) (const_int 31)])))]
7299 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7300 "#"
7301 "&& reload_completed"
7302 [(set (match_dup 0) (match_dup 1))]
7303 "operands[1] = gen_lowpart (V32QImode, operands[1]);")
7304
7305 (define_insn "vec_extract_hi_v64qi"
7306 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7307 (vec_select:V32QI
7308 (match_operand:V64QI 1 "register_operand" "v,v")
7309 (parallel [(const_int 32) (const_int 33)
7310 (const_int 34) (const_int 35)
7311 (const_int 36) (const_int 37)
7312 (const_int 38) (const_int 39)
7313 (const_int 40) (const_int 41)
7314 (const_int 42) (const_int 43)
7315 (const_int 44) (const_int 45)
7316 (const_int 46) (const_int 47)
7317 (const_int 48) (const_int 49)
7318 (const_int 50) (const_int 51)
7319 (const_int 52) (const_int 53)
7320 (const_int 54) (const_int 55)
7321 (const_int 56) (const_int 57)
7322 (const_int 58) (const_int 59)
7323 (const_int 60) (const_int 61)
7324 (const_int 62) (const_int 63)])))]
7325 "TARGET_AVX512F"
7326 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7327 [(set_attr "type" "sselog")
7328 (set_attr "prefix_extra" "1")
7329 (set_attr "length_immediate" "1")
7330 (set_attr "memory" "none,store")
7331 (set_attr "prefix" "evex")
7332 (set_attr "mode" "XI")])
7333
7334 (define_insn_and_split "vec_extract_lo_v32qi"
7335 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
7336 (vec_select:V16QI
7337 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
7338 (parallel [(const_int 0) (const_int 1)
7339 (const_int 2) (const_int 3)
7340 (const_int 4) (const_int 5)
7341 (const_int 6) (const_int 7)
7342 (const_int 8) (const_int 9)
7343 (const_int 10) (const_int 11)
7344 (const_int 12) (const_int 13)
7345 (const_int 14) (const_int 15)])))]
7346 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7347 "#"
7348 "&& reload_completed"
7349 [(set (match_dup 0) (match_dup 1))]
7350 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
7351
7352 (define_insn "vec_extract_hi_v32qi"
7353 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
7354 (vec_select:V16QI
7355 (match_operand:V32QI 1 "register_operand" "x,x,v,v,v,v")
7356 (parallel [(const_int 16) (const_int 17)
7357 (const_int 18) (const_int 19)
7358 (const_int 20) (const_int 21)
7359 (const_int 22) (const_int 23)
7360 (const_int 24) (const_int 25)
7361 (const_int 26) (const_int 27)
7362 (const_int 28) (const_int 29)
7363 (const_int 30) (const_int 31)])))]
7364 "TARGET_AVX"
7365 "@
7366 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7367 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7368 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7369 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7370 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
7371 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7372 [(set_attr "type" "sselog")
7373 (set_attr "prefix_extra" "1")
7374 (set_attr "length_immediate" "1")
7375 (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
7376 (set_attr "memory" "none,store,none,store,none,store")
7377 (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
7378 (set_attr "mode" "OI")])
7379
7380 ;; Modes handled by vec_extract patterns.
7381 (define_mode_iterator VEC_EXTRACT_MODE
7382 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7383 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7384 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7385 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7386 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7387 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
7388
7389 (define_expand "vec_extract<mode>"
7390 [(match_operand:<ssescalarmode> 0 "register_operand")
7391 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7392 (match_operand 2 "const_int_operand")]
7393 "TARGET_SSE"
7394 {
7395 ix86_expand_vector_extract (false, operands[0], operands[1],
7396 INTVAL (operands[2]));
7397 DONE;
7398 })
7399
7400 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7401 ;;
7402 ;; Parallel double-precision floating point element swizzling
7403 ;;
7404 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7405
7406 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7407 [(set (match_operand:V8DF 0 "register_operand" "=v")
7408 (vec_select:V8DF
7409 (vec_concat:V16DF
7410 (match_operand:V8DF 1 "register_operand" "v")
7411 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7412 (parallel [(const_int 1) (const_int 9)
7413 (const_int 3) (const_int 11)
7414 (const_int 5) (const_int 13)
7415 (const_int 7) (const_int 15)])))]
7416 "TARGET_AVX512F"
7417 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7418 [(set_attr "type" "sselog")
7419 (set_attr "prefix" "evex")
7420 (set_attr "mode" "V8DF")])
7421
7422 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7423 (define_insn "avx_unpckhpd256<mask_name>"
7424 [(set (match_operand:V4DF 0 "register_operand" "=v")
7425 (vec_select:V4DF
7426 (vec_concat:V8DF
7427 (match_operand:V4DF 1 "register_operand" "v")
7428 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7429 (parallel [(const_int 1) (const_int 5)
7430 (const_int 3) (const_int 7)])))]
7431 "TARGET_AVX && <mask_avx512vl_condition>"
7432 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7433 [(set_attr "type" "sselog")
7434 (set_attr "prefix" "vex")
7435 (set_attr "mode" "V4DF")])
7436
7437 (define_expand "vec_interleave_highv4df"
7438 [(set (match_dup 3)
7439 (vec_select:V4DF
7440 (vec_concat:V8DF
7441 (match_operand:V4DF 1 "register_operand")
7442 (match_operand:V4DF 2 "nonimmediate_operand"))
7443 (parallel [(const_int 0) (const_int 4)
7444 (const_int 2) (const_int 6)])))
7445 (set (match_dup 4)
7446 (vec_select:V4DF
7447 (vec_concat:V8DF
7448 (match_dup 1)
7449 (match_dup 2))
7450 (parallel [(const_int 1) (const_int 5)
7451 (const_int 3) (const_int 7)])))
7452 (set (match_operand:V4DF 0 "register_operand")
7453 (vec_select:V4DF
7454 (vec_concat:V8DF
7455 (match_dup 3)
7456 (match_dup 4))
7457 (parallel [(const_int 2) (const_int 3)
7458 (const_int 6) (const_int 7)])))]
7459 "TARGET_AVX"
7460 {
7461 operands[3] = gen_reg_rtx (V4DFmode);
7462 operands[4] = gen_reg_rtx (V4DFmode);
7463 })
7464
7465
7466 (define_insn "avx512vl_unpckhpd128_mask"
7467 [(set (match_operand:V2DF 0 "register_operand" "=v")
7468 (vec_merge:V2DF
7469 (vec_select:V2DF
7470 (vec_concat:V4DF
7471 (match_operand:V2DF 1 "register_operand" "v")
7472 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7473 (parallel [(const_int 1) (const_int 3)]))
7474 (match_operand:V2DF 3 "vector_move_operand" "0C")
7475 (match_operand:QI 4 "register_operand" "Yk")))]
7476 "TARGET_AVX512VL"
7477 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7478 [(set_attr "type" "sselog")
7479 (set_attr "prefix" "evex")
7480 (set_attr "mode" "V2DF")])
7481
7482 (define_expand "vec_interleave_highv2df"
7483 [(set (match_operand:V2DF 0 "register_operand")
7484 (vec_select:V2DF
7485 (vec_concat:V4DF
7486 (match_operand:V2DF 1 "nonimmediate_operand")
7487 (match_operand:V2DF 2 "nonimmediate_operand"))
7488 (parallel [(const_int 1)
7489 (const_int 3)])))]
7490 "TARGET_SSE2"
7491 {
7492 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7493 operands[2] = force_reg (V2DFmode, operands[2]);
7494 })
7495
7496 (define_insn "*vec_interleave_highv2df"
7497 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
7498 (vec_select:V2DF
7499 (vec_concat:V4DF
7500 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
7501 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
7502 (parallel [(const_int 1)
7503 (const_int 3)])))]
7504 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
7505 "@
7506 unpckhpd\t{%2, %0|%0, %2}
7507 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7508 %vmovddup\t{%H1, %0|%0, %H1}
7509 movlpd\t{%H1, %0|%0, %H1}
7510 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
7511 %vmovhpd\t{%1, %0|%q0, %1}"
7512 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7513 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7514 (set_attr "prefix_data16" "*,*,*,1,*,1")
7515 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
7516 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7517
7518 (define_expand "avx512f_movddup512<mask_name>"
7519 [(set (match_operand:V8DF 0 "register_operand")
7520 (vec_select:V8DF
7521 (vec_concat:V16DF
7522 (match_operand:V8DF 1 "nonimmediate_operand")
7523 (match_dup 1))
7524 (parallel [(const_int 0) (const_int 8)
7525 (const_int 2) (const_int 10)
7526 (const_int 4) (const_int 12)
7527 (const_int 6) (const_int 14)])))]
7528 "TARGET_AVX512F")
7529
7530 (define_expand "avx512f_unpcklpd512<mask_name>"
7531 [(set (match_operand:V8DF 0 "register_operand")
7532 (vec_select:V8DF
7533 (vec_concat:V16DF
7534 (match_operand:V8DF 1 "register_operand")
7535 (match_operand:V8DF 2 "nonimmediate_operand"))
7536 (parallel [(const_int 0) (const_int 8)
7537 (const_int 2) (const_int 10)
7538 (const_int 4) (const_int 12)
7539 (const_int 6) (const_int 14)])))]
7540 "TARGET_AVX512F")
7541
7542 (define_insn "*avx512f_unpcklpd512<mask_name>"
7543 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7544 (vec_select:V8DF
7545 (vec_concat:V16DF
7546 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7547 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7548 (parallel [(const_int 0) (const_int 8)
7549 (const_int 2) (const_int 10)
7550 (const_int 4) (const_int 12)
7551 (const_int 6) (const_int 14)])))]
7552 "TARGET_AVX512F"
7553 "@
7554 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7555 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7556 [(set_attr "type" "sselog")
7557 (set_attr "prefix" "evex")
7558 (set_attr "mode" "V8DF")])
7559
7560 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7561 (define_expand "avx_movddup256<mask_name>"
7562 [(set (match_operand:V4DF 0 "register_operand")
7563 (vec_select:V4DF
7564 (vec_concat:V8DF
7565 (match_operand:V4DF 1 "nonimmediate_operand")
7566 (match_dup 1))
7567 (parallel [(const_int 0) (const_int 4)
7568 (const_int 2) (const_int 6)])))]
7569 "TARGET_AVX && <mask_avx512vl_condition>")
7570
7571 (define_expand "avx_unpcklpd256<mask_name>"
7572 [(set (match_operand:V4DF 0 "register_operand")
7573 (vec_select:V4DF
7574 (vec_concat:V8DF
7575 (match_operand:V4DF 1 "register_operand")
7576 (match_operand:V4DF 2 "nonimmediate_operand"))
7577 (parallel [(const_int 0) (const_int 4)
7578 (const_int 2) (const_int 6)])))]
7579 "TARGET_AVX && <mask_avx512vl_condition>")
7580
7581 (define_insn "*avx_unpcklpd256<mask_name>"
7582 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
7583 (vec_select:V4DF
7584 (vec_concat:V8DF
7585 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7586 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
7587 (parallel [(const_int 0) (const_int 4)
7588 (const_int 2) (const_int 6)])))]
7589 "TARGET_AVX && <mask_avx512vl_condition>"
7590 "@
7591 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7592 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
7593 [(set_attr "type" "sselog")
7594 (set_attr "prefix" "vex")
7595 (set_attr "mode" "V4DF")])
7596
7597 (define_expand "vec_interleave_lowv4df"
7598 [(set (match_dup 3)
7599 (vec_select:V4DF
7600 (vec_concat:V8DF
7601 (match_operand:V4DF 1 "register_operand")
7602 (match_operand:V4DF 2 "nonimmediate_operand"))
7603 (parallel [(const_int 0) (const_int 4)
7604 (const_int 2) (const_int 6)])))
7605 (set (match_dup 4)
7606 (vec_select:V4DF
7607 (vec_concat:V8DF
7608 (match_dup 1)
7609 (match_dup 2))
7610 (parallel [(const_int 1) (const_int 5)
7611 (const_int 3) (const_int 7)])))
7612 (set (match_operand:V4DF 0 "register_operand")
7613 (vec_select:V4DF
7614 (vec_concat:V8DF
7615 (match_dup 3)
7616 (match_dup 4))
7617 (parallel [(const_int 0) (const_int 1)
7618 (const_int 4) (const_int 5)])))]
7619 "TARGET_AVX"
7620 {
7621 operands[3] = gen_reg_rtx (V4DFmode);
7622 operands[4] = gen_reg_rtx (V4DFmode);
7623 })
7624
7625 (define_insn "avx512vl_unpcklpd128_mask"
7626 [(set (match_operand:V2DF 0 "register_operand" "=v")
7627 (vec_merge:V2DF
7628 (vec_select:V2DF
7629 (vec_concat:V4DF
7630 (match_operand:V2DF 1 "register_operand" "v")
7631 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7632 (parallel [(const_int 0) (const_int 2)]))
7633 (match_operand:V2DF 3 "vector_move_operand" "0C")
7634 (match_operand:QI 4 "register_operand" "Yk")))]
7635 "TARGET_AVX512VL"
7636 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7637 [(set_attr "type" "sselog")
7638 (set_attr "prefix" "evex")
7639 (set_attr "mode" "V2DF")])
7640
7641 (define_expand "vec_interleave_lowv2df"
7642 [(set (match_operand:V2DF 0 "register_operand")
7643 (vec_select:V2DF
7644 (vec_concat:V4DF
7645 (match_operand:V2DF 1 "nonimmediate_operand")
7646 (match_operand:V2DF 2 "nonimmediate_operand"))
7647 (parallel [(const_int 0)
7648 (const_int 2)])))]
7649 "TARGET_SSE2"
7650 {
7651 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7652 operands[1] = force_reg (V2DFmode, operands[1]);
7653 })
7654
7655 (define_insn "*vec_interleave_lowv2df"
7656 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
7657 (vec_select:V2DF
7658 (vec_concat:V4DF
7659 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
7660 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
7661 (parallel [(const_int 0)
7662 (const_int 2)])))]
7663 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
7664 "@
7665 unpcklpd\t{%2, %0|%0, %2}
7666 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7667 %vmovddup\t{%1, %0|%0, %q1}
7668 movhpd\t{%2, %0|%0, %q2}
7669 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
7670 %vmovlpd\t{%2, %H0|%H0, %2}"
7671 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7672 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7673 (set_attr "prefix_data16" "*,*,*,1,*,1")
7674 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
7675 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7676
7677 (define_split
7678 [(set (match_operand:V2DF 0 "memory_operand")
7679 (vec_select:V2DF
7680 (vec_concat:V4DF
7681 (match_operand:V2DF 1 "register_operand")
7682 (match_dup 1))
7683 (parallel [(const_int 0)
7684 (const_int 2)])))]
7685 "TARGET_SSE3 && reload_completed"
7686 [(const_int 0)]
7687 {
7688 rtx low = gen_lowpart (DFmode, operands[1]);
7689
7690 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7691 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7692 DONE;
7693 })
7694
7695 (define_split
7696 [(set (match_operand:V2DF 0 "register_operand")
7697 (vec_select:V2DF
7698 (vec_concat:V4DF
7699 (match_operand:V2DF 1 "memory_operand")
7700 (match_dup 1))
7701 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7702 (match_operand:SI 3 "const_int_operand")])))]
7703 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7704 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7705 {
7706 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7707 })
7708
7709 (define_insn "avx512f_vmscalef<mode><round_name>"
7710 [(set (match_operand:VF_128 0 "register_operand" "=v")
7711 (vec_merge:VF_128
7712 (unspec:VF_128
7713 [(match_operand:VF_128 1 "register_operand" "v")
7714 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
7715 UNSPEC_SCALEF)
7716 (match_dup 1)
7717 (const_int 1)))]
7718 "TARGET_AVX512F"
7719 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
7720 [(set_attr "prefix" "evex")
7721 (set_attr "mode" "<ssescalarmode>")])
7722
7723 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
7724 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7725 (unspec:VF_AVX512VL
7726 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
7727 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
7728 UNSPEC_SCALEF))]
7729 "TARGET_AVX512F"
7730 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
7731 [(set_attr "prefix" "evex")
7732 (set_attr "mode" "<MODE>")])
7733
7734 (define_expand "<avx512>_vternlog<mode>_maskz"
7735 [(match_operand:VI48_AVX512VL 0 "register_operand")
7736 (match_operand:VI48_AVX512VL 1 "register_operand")
7737 (match_operand:VI48_AVX512VL 2 "register_operand")
7738 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
7739 (match_operand:SI 4 "const_0_to_255_operand")
7740 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7741 "TARGET_AVX512F"
7742 {
7743 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
7744 operands[0], operands[1], operands[2], operands[3],
7745 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7746 DONE;
7747 })
7748
7749 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
7750 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7751 (unspec:VI48_AVX512VL
7752 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7753 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7754 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7755 (match_operand:SI 4 "const_0_to_255_operand")]
7756 UNSPEC_VTERNLOG))]
7757 "TARGET_AVX512F"
7758 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
7759 [(set_attr "type" "sselog")
7760 (set_attr "prefix" "evex")
7761 (set_attr "mode" "<sseinsnmode>")])
7762
7763 (define_insn "<avx512>_vternlog<mode>_mask"
7764 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7765 (vec_merge:VI48_AVX512VL
7766 (unspec:VI48_AVX512VL
7767 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7768 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7769 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7770 (match_operand:SI 4 "const_0_to_255_operand")]
7771 UNSPEC_VTERNLOG)
7772 (match_dup 1)
7773 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7774 "TARGET_AVX512F"
7775 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7776 [(set_attr "type" "sselog")
7777 (set_attr "prefix" "evex")
7778 (set_attr "mode" "<sseinsnmode>")])
7779
7780 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
7781 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7782 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7783 UNSPEC_GETEXP))]
7784 "TARGET_AVX512F"
7785 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
7786 [(set_attr "prefix" "evex")
7787 (set_attr "mode" "<MODE>")])
7788
7789 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
7790 [(set (match_operand:VF_128 0 "register_operand" "=v")
7791 (vec_merge:VF_128
7792 (unspec:VF_128
7793 [(match_operand:VF_128 1 "register_operand" "v")
7794 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7795 UNSPEC_GETEXP)
7796 (match_dup 1)
7797 (const_int 1)))]
7798 "TARGET_AVX512F"
7799 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
7800 [(set_attr "prefix" "evex")
7801 (set_attr "mode" "<ssescalarmode>")])
7802
7803 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
7804 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7805 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
7806 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
7807 (match_operand:SI 3 "const_0_to_255_operand")]
7808 UNSPEC_ALIGN))]
7809 "TARGET_AVX512F"
7810 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
7811 [(set_attr "prefix" "evex")
7812 (set_attr "mode" "<sseinsnmode>")])
7813
7814 (define_expand "avx512f_shufps512_mask"
7815 [(match_operand:V16SF 0 "register_operand")
7816 (match_operand:V16SF 1 "register_operand")
7817 (match_operand:V16SF 2 "nonimmediate_operand")
7818 (match_operand:SI 3 "const_0_to_255_operand")
7819 (match_operand:V16SF 4 "register_operand")
7820 (match_operand:HI 5 "register_operand")]
7821 "TARGET_AVX512F"
7822 {
7823 int mask = INTVAL (operands[3]);
7824 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7825 GEN_INT ((mask >> 0) & 3),
7826 GEN_INT ((mask >> 2) & 3),
7827 GEN_INT (((mask >> 4) & 3) + 16),
7828 GEN_INT (((mask >> 6) & 3) + 16),
7829 GEN_INT (((mask >> 0) & 3) + 4),
7830 GEN_INT (((mask >> 2) & 3) + 4),
7831 GEN_INT (((mask >> 4) & 3) + 20),
7832 GEN_INT (((mask >> 6) & 3) + 20),
7833 GEN_INT (((mask >> 0) & 3) + 8),
7834 GEN_INT (((mask >> 2) & 3) + 8),
7835 GEN_INT (((mask >> 4) & 3) + 24),
7836 GEN_INT (((mask >> 6) & 3) + 24),
7837 GEN_INT (((mask >> 0) & 3) + 12),
7838 GEN_INT (((mask >> 2) & 3) + 12),
7839 GEN_INT (((mask >> 4) & 3) + 28),
7840 GEN_INT (((mask >> 6) & 3) + 28),
7841 operands[4], operands[5]));
7842 DONE;
7843 })
7844
7845
7846 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7847 [(match_operand:VF_AVX512VL 0 "register_operand")
7848 (match_operand:VF_AVX512VL 1 "register_operand")
7849 (match_operand:VF_AVX512VL 2 "register_operand")
7850 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7851 (match_operand:SI 4 "const_0_to_255_operand")
7852 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7853 "TARGET_AVX512F"
7854 {
7855 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7856 operands[0], operands[1], operands[2], operands[3],
7857 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7858 <round_saeonly_expand_operand6>));
7859 DONE;
7860 })
7861
7862 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7863 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7864 (unspec:VF_AVX512VL
7865 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7866 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7867 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7868 (match_operand:SI 4 "const_0_to_255_operand")]
7869 UNSPEC_FIXUPIMM))]
7870 "TARGET_AVX512F"
7871 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7872 [(set_attr "prefix" "evex")
7873 (set_attr "mode" "<MODE>")])
7874
7875 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
7876 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7877 (vec_merge:VF_AVX512VL
7878 (unspec:VF_AVX512VL
7879 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7880 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7881 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7882 (match_operand:SI 4 "const_0_to_255_operand")]
7883 UNSPEC_FIXUPIMM)
7884 (match_dup 1)
7885 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7886 "TARGET_AVX512F"
7887 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7888 [(set_attr "prefix" "evex")
7889 (set_attr "mode" "<MODE>")])
7890
7891 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
7892 [(match_operand:VF_128 0 "register_operand")
7893 (match_operand:VF_128 1 "register_operand")
7894 (match_operand:VF_128 2 "register_operand")
7895 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7896 (match_operand:SI 4 "const_0_to_255_operand")
7897 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7898 "TARGET_AVX512F"
7899 {
7900 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7901 operands[0], operands[1], operands[2], operands[3],
7902 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7903 <round_saeonly_expand_operand6>));
7904 DONE;
7905 })
7906
7907 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
7908 [(set (match_operand:VF_128 0 "register_operand" "=v")
7909 (vec_merge:VF_128
7910 (unspec:VF_128
7911 [(match_operand:VF_128 1 "register_operand" "0")
7912 (match_operand:VF_128 2 "register_operand" "v")
7913 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7914 (match_operand:SI 4 "const_0_to_255_operand")]
7915 UNSPEC_FIXUPIMM)
7916 (match_dup 1)
7917 (const_int 1)))]
7918 "TARGET_AVX512F"
7919 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7920 [(set_attr "prefix" "evex")
7921 (set_attr "mode" "<ssescalarmode>")])
7922
7923 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
7924 [(set (match_operand:VF_128 0 "register_operand" "=v")
7925 (vec_merge:VF_128
7926 (vec_merge:VF_128
7927 (unspec:VF_128
7928 [(match_operand:VF_128 1 "register_operand" "0")
7929 (match_operand:VF_128 2 "register_operand" "v")
7930 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7931 (match_operand:SI 4 "const_0_to_255_operand")]
7932 UNSPEC_FIXUPIMM)
7933 (match_dup 1)
7934 (const_int 1))
7935 (match_dup 1)
7936 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7937 "TARGET_AVX512F"
7938 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7939 [(set_attr "prefix" "evex")
7940 (set_attr "mode" "<ssescalarmode>")])
7941
7942 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
7943 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7944 (unspec:VF_AVX512VL
7945 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
7946 (match_operand:SI 2 "const_0_to_255_operand")]
7947 UNSPEC_ROUND))]
7948 "TARGET_AVX512F"
7949 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
7950 [(set_attr "length_immediate" "1")
7951 (set_attr "prefix" "evex")
7952 (set_attr "mode" "<MODE>")])
7953
7954 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
7955 [(set (match_operand:VF_128 0 "register_operand" "=v")
7956 (vec_merge:VF_128
7957 (unspec:VF_128
7958 [(match_operand:VF_128 1 "register_operand" "v")
7959 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7960 (match_operand:SI 3 "const_0_to_255_operand")]
7961 UNSPEC_ROUND)
7962 (match_dup 1)
7963 (const_int 1)))]
7964 "TARGET_AVX512F"
7965 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
7966 [(set_attr "length_immediate" "1")
7967 (set_attr "prefix" "evex")
7968 (set_attr "mode" "<MODE>")])
7969
7970 ;; One bit in mask selects 2 elements.
7971 (define_insn "avx512f_shufps512_1<mask_name>"
7972 [(set (match_operand:V16SF 0 "register_operand" "=v")
7973 (vec_select:V16SF
7974 (vec_concat:V32SF
7975 (match_operand:V16SF 1 "register_operand" "v")
7976 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7977 (parallel [(match_operand 3 "const_0_to_3_operand")
7978 (match_operand 4 "const_0_to_3_operand")
7979 (match_operand 5 "const_16_to_19_operand")
7980 (match_operand 6 "const_16_to_19_operand")
7981 (match_operand 7 "const_4_to_7_operand")
7982 (match_operand 8 "const_4_to_7_operand")
7983 (match_operand 9 "const_20_to_23_operand")
7984 (match_operand 10 "const_20_to_23_operand")
7985 (match_operand 11 "const_8_to_11_operand")
7986 (match_operand 12 "const_8_to_11_operand")
7987 (match_operand 13 "const_24_to_27_operand")
7988 (match_operand 14 "const_24_to_27_operand")
7989 (match_operand 15 "const_12_to_15_operand")
7990 (match_operand 16 "const_12_to_15_operand")
7991 (match_operand 17 "const_28_to_31_operand")
7992 (match_operand 18 "const_28_to_31_operand")])))]
7993 "TARGET_AVX512F
7994 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7995 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7996 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7997 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7998 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7999 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
8000 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
8001 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
8002 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
8003 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
8004 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
8005 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
8006 {
8007 int mask;
8008 mask = INTVAL (operands[3]);
8009 mask |= INTVAL (operands[4]) << 2;
8010 mask |= (INTVAL (operands[5]) - 16) << 4;
8011 mask |= (INTVAL (operands[6]) - 16) << 6;
8012 operands[3] = GEN_INT (mask);
8013
8014 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
8015 }
8016 [(set_attr "type" "sselog")
8017 (set_attr "length_immediate" "1")
8018 (set_attr "prefix" "evex")
8019 (set_attr "mode" "V16SF")])
8020
8021 (define_expand "avx512f_shufpd512_mask"
8022 [(match_operand:V8DF 0 "register_operand")
8023 (match_operand:V8DF 1 "register_operand")
8024 (match_operand:V8DF 2 "nonimmediate_operand")
8025 (match_operand:SI 3 "const_0_to_255_operand")
8026 (match_operand:V8DF 4 "register_operand")
8027 (match_operand:QI 5 "register_operand")]
8028 "TARGET_AVX512F"
8029 {
8030 int mask = INTVAL (operands[3]);
8031 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8032 GEN_INT (mask & 1),
8033 GEN_INT (mask & 2 ? 9 : 8),
8034 GEN_INT (mask & 4 ? 3 : 2),
8035 GEN_INT (mask & 8 ? 11 : 10),
8036 GEN_INT (mask & 16 ? 5 : 4),
8037 GEN_INT (mask & 32 ? 13 : 12),
8038 GEN_INT (mask & 64 ? 7 : 6),
8039 GEN_INT (mask & 128 ? 15 : 14),
8040 operands[4], operands[5]));
8041 DONE;
8042 })
8043
8044 (define_insn "avx512f_shufpd512_1<mask_name>"
8045 [(set (match_operand:V8DF 0 "register_operand" "=v")
8046 (vec_select:V8DF
8047 (vec_concat:V16DF
8048 (match_operand:V8DF 1 "register_operand" "v")
8049 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8050 (parallel [(match_operand 3 "const_0_to_1_operand")
8051 (match_operand 4 "const_8_to_9_operand")
8052 (match_operand 5 "const_2_to_3_operand")
8053 (match_operand 6 "const_10_to_11_operand")
8054 (match_operand 7 "const_4_to_5_operand")
8055 (match_operand 8 "const_12_to_13_operand")
8056 (match_operand 9 "const_6_to_7_operand")
8057 (match_operand 10 "const_14_to_15_operand")])))]
8058 "TARGET_AVX512F"
8059 {
8060 int mask;
8061 mask = INTVAL (operands[3]);
8062 mask |= (INTVAL (operands[4]) - 8) << 1;
8063 mask |= (INTVAL (operands[5]) - 2) << 2;
8064 mask |= (INTVAL (operands[6]) - 10) << 3;
8065 mask |= (INTVAL (operands[7]) - 4) << 4;
8066 mask |= (INTVAL (operands[8]) - 12) << 5;
8067 mask |= (INTVAL (operands[9]) - 6) << 6;
8068 mask |= (INTVAL (operands[10]) - 14) << 7;
8069 operands[3] = GEN_INT (mask);
8070
8071 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8072 }
8073 [(set_attr "type" "sselog")
8074 (set_attr "length_immediate" "1")
8075 (set_attr "prefix" "evex")
8076 (set_attr "mode" "V8DF")])
8077
8078 (define_expand "avx_shufpd256<mask_expand4_name>"
8079 [(match_operand:V4DF 0 "register_operand")
8080 (match_operand:V4DF 1 "register_operand")
8081 (match_operand:V4DF 2 "nonimmediate_operand")
8082 (match_operand:SI 3 "const_int_operand")]
8083 "TARGET_AVX"
8084 {
8085 int mask = INTVAL (operands[3]);
8086 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8087 operands[1],
8088 operands[2],
8089 GEN_INT (mask & 1),
8090 GEN_INT (mask & 2 ? 5 : 4),
8091 GEN_INT (mask & 4 ? 3 : 2),
8092 GEN_INT (mask & 8 ? 7 : 6)
8093 <mask_expand4_args>));
8094 DONE;
8095 })
8096
8097 (define_insn "avx_shufpd256_1<mask_name>"
8098 [(set (match_operand:V4DF 0 "register_operand" "=v")
8099 (vec_select:V4DF
8100 (vec_concat:V8DF
8101 (match_operand:V4DF 1 "register_operand" "v")
8102 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8103 (parallel [(match_operand 3 "const_0_to_1_operand")
8104 (match_operand 4 "const_4_to_5_operand")
8105 (match_operand 5 "const_2_to_3_operand")
8106 (match_operand 6 "const_6_to_7_operand")])))]
8107 "TARGET_AVX && <mask_avx512vl_condition>"
8108 {
8109 int mask;
8110 mask = INTVAL (operands[3]);
8111 mask |= (INTVAL (operands[4]) - 4) << 1;
8112 mask |= (INTVAL (operands[5]) - 2) << 2;
8113 mask |= (INTVAL (operands[6]) - 6) << 3;
8114 operands[3] = GEN_INT (mask);
8115
8116 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8117 }
8118 [(set_attr "type" "sseshuf")
8119 (set_attr "length_immediate" "1")
8120 (set_attr "prefix" "vex")
8121 (set_attr "mode" "V4DF")])
8122
8123 (define_expand "sse2_shufpd<mask_expand4_name>"
8124 [(match_operand:V2DF 0 "register_operand")
8125 (match_operand:V2DF 1 "register_operand")
8126 (match_operand:V2DF 2 "vector_operand")
8127 (match_operand:SI 3 "const_int_operand")]
8128 "TARGET_SSE2"
8129 {
8130 int mask = INTVAL (operands[3]);
8131 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8132 operands[2], GEN_INT (mask & 1),
8133 GEN_INT (mask & 2 ? 3 : 2)
8134 <mask_expand4_args>));
8135 DONE;
8136 })
8137
8138 (define_insn "sse2_shufpd_v2df_mask"
8139 [(set (match_operand:V2DF 0 "register_operand" "=v")
8140 (vec_merge:V2DF
8141 (vec_select:V2DF
8142 (vec_concat:V4DF
8143 (match_operand:V2DF 1 "register_operand" "v")
8144 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8145 (parallel [(match_operand 3 "const_0_to_1_operand")
8146 (match_operand 4 "const_2_to_3_operand")]))
8147 (match_operand:V2DF 5 "vector_move_operand" "0C")
8148 (match_operand:QI 6 "register_operand" "Yk")))]
8149 "TARGET_AVX512VL"
8150 {
8151 int mask;
8152 mask = INTVAL (operands[3]);
8153 mask |= (INTVAL (operands[4]) - 2) << 1;
8154 operands[3] = GEN_INT (mask);
8155
8156 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8157 }
8158 [(set_attr "type" "sseshuf")
8159 (set_attr "length_immediate" "1")
8160 (set_attr "prefix" "evex")
8161 (set_attr "mode" "V2DF")])
8162
8163 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8164 (define_insn "avx2_interleave_highv4di<mask_name>"
8165 [(set (match_operand:V4DI 0 "register_operand" "=v")
8166 (vec_select:V4DI
8167 (vec_concat:V8DI
8168 (match_operand:V4DI 1 "register_operand" "v")
8169 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8170 (parallel [(const_int 1)
8171 (const_int 5)
8172 (const_int 3)
8173 (const_int 7)])))]
8174 "TARGET_AVX2 && <mask_avx512vl_condition>"
8175 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8176 [(set_attr "type" "sselog")
8177 (set_attr "prefix" "vex")
8178 (set_attr "mode" "OI")])
8179
8180 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8181 [(set (match_operand:V8DI 0 "register_operand" "=v")
8182 (vec_select:V8DI
8183 (vec_concat:V16DI
8184 (match_operand:V8DI 1 "register_operand" "v")
8185 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8186 (parallel [(const_int 1) (const_int 9)
8187 (const_int 3) (const_int 11)
8188 (const_int 5) (const_int 13)
8189 (const_int 7) (const_int 15)])))]
8190 "TARGET_AVX512F"
8191 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8192 [(set_attr "type" "sselog")
8193 (set_attr "prefix" "evex")
8194 (set_attr "mode" "XI")])
8195
8196 (define_insn "vec_interleave_highv2di<mask_name>"
8197 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8198 (vec_select:V2DI
8199 (vec_concat:V4DI
8200 (match_operand:V2DI 1 "register_operand" "0,v")
8201 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8202 (parallel [(const_int 1)
8203 (const_int 3)])))]
8204 "TARGET_SSE2 && <mask_avx512vl_condition>"
8205 "@
8206 punpckhqdq\t{%2, %0|%0, %2}
8207 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8208 [(set_attr "isa" "noavx,avx")
8209 (set_attr "type" "sselog")
8210 (set_attr "prefix_data16" "1,*")
8211 (set_attr "prefix" "orig,<mask_prefix>")
8212 (set_attr "mode" "TI")])
8213
8214 (define_insn "avx2_interleave_lowv4di<mask_name>"
8215 [(set (match_operand:V4DI 0 "register_operand" "=v")
8216 (vec_select:V4DI
8217 (vec_concat:V8DI
8218 (match_operand:V4DI 1 "register_operand" "v")
8219 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8220 (parallel [(const_int 0)
8221 (const_int 4)
8222 (const_int 2)
8223 (const_int 6)])))]
8224 "TARGET_AVX2 && <mask_avx512vl_condition>"
8225 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8226 [(set_attr "type" "sselog")
8227 (set_attr "prefix" "vex")
8228 (set_attr "mode" "OI")])
8229
8230 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8231 [(set (match_operand:V8DI 0 "register_operand" "=v")
8232 (vec_select:V8DI
8233 (vec_concat:V16DI
8234 (match_operand:V8DI 1 "register_operand" "v")
8235 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8236 (parallel [(const_int 0) (const_int 8)
8237 (const_int 2) (const_int 10)
8238 (const_int 4) (const_int 12)
8239 (const_int 6) (const_int 14)])))]
8240 "TARGET_AVX512F"
8241 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8242 [(set_attr "type" "sselog")
8243 (set_attr "prefix" "evex")
8244 (set_attr "mode" "XI")])
8245
8246 (define_insn "vec_interleave_lowv2di<mask_name>"
8247 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8248 (vec_select:V2DI
8249 (vec_concat:V4DI
8250 (match_operand:V2DI 1 "register_operand" "0,v")
8251 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8252 (parallel [(const_int 0)
8253 (const_int 2)])))]
8254 "TARGET_SSE2 && <mask_avx512vl_condition>"
8255 "@
8256 punpcklqdq\t{%2, %0|%0, %2}
8257 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8258 [(set_attr "isa" "noavx,avx")
8259 (set_attr "type" "sselog")
8260 (set_attr "prefix_data16" "1,*")
8261 (set_attr "prefix" "orig,vex")
8262 (set_attr "mode" "TI")])
8263
8264 (define_insn "sse2_shufpd_<mode>"
8265 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
8266 (vec_select:VI8F_128
8267 (vec_concat:<ssedoublevecmode>
8268 (match_operand:VI8F_128 1 "register_operand" "0,v")
8269 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
8270 (parallel [(match_operand 3 "const_0_to_1_operand")
8271 (match_operand 4 "const_2_to_3_operand")])))]
8272 "TARGET_SSE2"
8273 {
8274 int mask;
8275 mask = INTVAL (operands[3]);
8276 mask |= (INTVAL (operands[4]) - 2) << 1;
8277 operands[3] = GEN_INT (mask);
8278
8279 switch (which_alternative)
8280 {
8281 case 0:
8282 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8283 case 1:
8284 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8285 default:
8286 gcc_unreachable ();
8287 }
8288 }
8289 [(set_attr "isa" "noavx,avx")
8290 (set_attr "type" "sseshuf")
8291 (set_attr "length_immediate" "1")
8292 (set_attr "prefix" "orig,maybe_evex")
8293 (set_attr "mode" "V2DF")])
8294
8295 ;; Avoid combining registers from different units in a single alternative,
8296 ;; see comment above inline_secondary_memory_needed function in i386.c
8297 (define_insn "sse2_storehpd"
8298 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,v,x,*f,r")
8299 (vec_select:DF
8300 (match_operand:V2DF 1 "nonimmediate_operand" " v,0,v,o,o,o")
8301 (parallel [(const_int 1)])))]
8302 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8303 "@
8304 %vmovhpd\t{%1, %0|%0, %1}
8305 unpckhpd\t%0, %0
8306 vunpckhpd\t{%d1, %0|%0, %d1}
8307 #
8308 #
8309 #"
8310 [(set_attr "isa" "*,noavx,avx,*,*,*")
8311 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8312 (set (attr "prefix_data16")
8313 (if_then_else
8314 (and (eq_attr "alternative" "0")
8315 (not (match_test "TARGET_AVX")))
8316 (const_string "1")
8317 (const_string "*")))
8318 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
8319 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8320
8321 (define_split
8322 [(set (match_operand:DF 0 "register_operand")
8323 (vec_select:DF
8324 (match_operand:V2DF 1 "memory_operand")
8325 (parallel [(const_int 1)])))]
8326 "TARGET_SSE2 && reload_completed"
8327 [(set (match_dup 0) (match_dup 1))]
8328 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8329
8330 (define_insn "*vec_extractv2df_1_sse"
8331 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8332 (vec_select:DF
8333 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8334 (parallel [(const_int 1)])))]
8335 "!TARGET_SSE2 && TARGET_SSE
8336 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8337 "@
8338 movhps\t{%1, %0|%q0, %1}
8339 movhlps\t{%1, %0|%0, %1}
8340 movlps\t{%H1, %0|%0, %H1}"
8341 [(set_attr "type" "ssemov")
8342 (set_attr "mode" "V2SF,V4SF,V2SF")])
8343
8344 ;; Avoid combining registers from different units in a single alternative,
8345 ;; see comment above inline_secondary_memory_needed function in i386.c
8346 (define_insn "sse2_storelpd"
8347 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8348 (vec_select:DF
8349 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
8350 (parallel [(const_int 0)])))]
8351 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8352 "@
8353 %vmovlpd\t{%1, %0|%0, %1}
8354 #
8355 #
8356 #
8357 #"
8358 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8359 (set_attr "prefix_data16" "1,*,*,*,*")
8360 (set_attr "prefix" "maybe_vex")
8361 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8362
8363 (define_split
8364 [(set (match_operand:DF 0 "register_operand")
8365 (vec_select:DF
8366 (match_operand:V2DF 1 "nonimmediate_operand")
8367 (parallel [(const_int 0)])))]
8368 "TARGET_SSE2 && reload_completed"
8369 [(set (match_dup 0) (match_dup 1))]
8370 "operands[1] = gen_lowpart (DFmode, operands[1]);")
8371
8372 (define_insn "*vec_extractv2df_0_sse"
8373 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8374 (vec_select:DF
8375 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8376 (parallel [(const_int 0)])))]
8377 "!TARGET_SSE2 && TARGET_SSE
8378 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8379 "@
8380 movlps\t{%1, %0|%0, %1}
8381 movaps\t{%1, %0|%0, %1}
8382 movlps\t{%1, %0|%0, %q1}"
8383 [(set_attr "type" "ssemov")
8384 (set_attr "mode" "V2SF,V4SF,V2SF")])
8385
8386 (define_expand "sse2_loadhpd_exp"
8387 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8388 (vec_concat:V2DF
8389 (vec_select:DF
8390 (match_operand:V2DF 1 "nonimmediate_operand")
8391 (parallel [(const_int 0)]))
8392 (match_operand:DF 2 "nonimmediate_operand")))]
8393 "TARGET_SSE2"
8394 {
8395 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8396
8397 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8398
8399 /* Fix up the destination if needed. */
8400 if (dst != operands[0])
8401 emit_move_insn (operands[0], dst);
8402
8403 DONE;
8404 })
8405
8406 ;; Avoid combining registers from different units in a single alternative,
8407 ;; see comment above inline_secondary_memory_needed function in i386.c
8408 (define_insn "sse2_loadhpd"
8409 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8410 "=x,v,x,v,o,o ,o")
8411 (vec_concat:V2DF
8412 (vec_select:DF
8413 (match_operand:V2DF 1 "nonimmediate_operand"
8414 " 0,v,0,v,0,0 ,0")
8415 (parallel [(const_int 0)]))
8416 (match_operand:DF 2 "nonimmediate_operand"
8417 " m,m,x,v,x,*f,r")))]
8418 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8419 "@
8420 movhpd\t{%2, %0|%0, %2}
8421 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8422 unpcklpd\t{%2, %0|%0, %2}
8423 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8424 #
8425 #
8426 #"
8427 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8428 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8429 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8430 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
8431 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8432
8433 (define_split
8434 [(set (match_operand:V2DF 0 "memory_operand")
8435 (vec_concat:V2DF
8436 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8437 (match_operand:DF 1 "register_operand")))]
8438 "TARGET_SSE2 && reload_completed"
8439 [(set (match_dup 0) (match_dup 1))]
8440 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8441
8442 (define_expand "sse2_loadlpd_exp"
8443 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8444 (vec_concat:V2DF
8445 (match_operand:DF 2 "nonimmediate_operand")
8446 (vec_select:DF
8447 (match_operand:V2DF 1 "nonimmediate_operand")
8448 (parallel [(const_int 1)]))))]
8449 "TARGET_SSE2"
8450 {
8451 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8452
8453 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8454
8455 /* Fix up the destination if needed. */
8456 if (dst != operands[0])
8457 emit_move_insn (operands[0], dst);
8458
8459 DONE;
8460 })
8461
8462 ;; Avoid combining registers from different units in a single alternative,
8463 ;; see comment above inline_secondary_memory_needed function in i386.c
8464 (define_insn "sse2_loadlpd"
8465 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8466 "=v,x,v,x,v,x,x,v,m,m ,m")
8467 (vec_concat:V2DF
8468 (match_operand:DF 2 "nonimmediate_operand"
8469 "vm,m,m,x,v,0,0,v,x,*f,r")
8470 (vec_select:DF
8471 (match_operand:V2DF 1 "vector_move_operand"
8472 " C,0,v,0,v,x,o,o,0,0 ,0")
8473 (parallel [(const_int 1)]))))]
8474 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8475 "@
8476 %vmovq\t{%2, %0|%0, %2}
8477 movlpd\t{%2, %0|%0, %2}
8478 vmovlpd\t{%2, %1, %0|%0, %1, %2}
8479 movsd\t{%2, %0|%0, %2}
8480 vmovsd\t{%2, %1, %0|%0, %1, %2}
8481 shufpd\t{$2, %1, %0|%0, %1, 2}
8482 movhpd\t{%H1, %0|%0, %H1}
8483 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
8484 #
8485 #
8486 #"
8487 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8488 (set (attr "type")
8489 (cond [(eq_attr "alternative" "5")
8490 (const_string "sselog")
8491 (eq_attr "alternative" "9")
8492 (const_string "fmov")
8493 (eq_attr "alternative" "10")
8494 (const_string "imov")
8495 ]
8496 (const_string "ssemov")))
8497 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8498 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8499 (set_attr "prefix" "maybe_vex,orig,maybe_evex,orig,maybe_evex,orig,orig,maybe_evex,*,*,*")
8500 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
8501
8502 (define_split
8503 [(set (match_operand:V2DF 0 "memory_operand")
8504 (vec_concat:V2DF
8505 (match_operand:DF 1 "register_operand")
8506 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8507 "TARGET_SSE2 && reload_completed"
8508 [(set (match_dup 0) (match_dup 1))]
8509 "operands[0] = adjust_address (operands[0], DFmode, 0);")
8510
8511 (define_insn "sse2_movsd"
8512 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
8513 (vec_merge:V2DF
8514 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
8515 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
8516 (const_int 1)))]
8517 "TARGET_SSE2"
8518 "@
8519 movsd\t{%2, %0|%0, %2}
8520 vmovsd\t{%2, %1, %0|%0, %1, %2}
8521 movlpd\t{%2, %0|%0, %q2}
8522 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8523 %vmovlpd\t{%2, %0|%q0, %2}
8524 shufpd\t{$2, %1, %0|%0, %1, 2}
8525 movhps\t{%H1, %0|%0, %H1}
8526 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8527 %vmovhps\t{%1, %H0|%H0, %1}"
8528 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8529 (set (attr "type")
8530 (if_then_else
8531 (eq_attr "alternative" "5")
8532 (const_string "sselog")
8533 (const_string "ssemov")))
8534 (set (attr "prefix_data16")
8535 (if_then_else
8536 (and (eq_attr "alternative" "2,4")
8537 (not (match_test "TARGET_AVX")))
8538 (const_string "1")
8539 (const_string "*")))
8540 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8541 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex,orig,orig,maybe_evex,maybe_vex")
8542 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
8543
8544 (define_insn "vec_dupv2df<mask_name>"
8545 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
8546 (vec_duplicate:V2DF
8547 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
8548 "TARGET_SSE2 && <mask_avx512vl_condition>"
8549 "@
8550 unpcklpd\t%0, %0
8551 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
8552 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8553 [(set_attr "isa" "noavx,sse3,avx512vl")
8554 (set_attr "type" "sselog1")
8555 (set_attr "prefix" "orig,maybe_vex,evex")
8556 (set_attr "mode" "V2DF,DF,DF")])
8557
8558 (define_insn "vec_concatv2df"
8559 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
8560 (vec_concat:V2DF
8561 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,xm,0,0")
8562 (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m, C,x,m")))]
8563 "TARGET_SSE
8564 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
8565 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
8566 "@
8567 unpcklpd\t{%2, %0|%0, %2}
8568 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8569 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8570 %vmovddup\t{%1, %0|%0, %1}
8571 vmovddup\t{%1, %0|%0, %1}
8572 movhpd\t{%2, %0|%0, %2}
8573 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8574 %vmovq\t{%1, %0|%0, %1}
8575 movlhps\t{%2, %0|%0, %2}
8576 movhps\t{%2, %0|%0, %2}"
8577 [(set_attr "isa" "sse2_noavx,avx,avx512vl,sse3,avx512vl,sse2_noavx,avx,sse2,noavx,noavx")
8578 (set (attr "type")
8579 (if_then_else
8580 (eq_attr "alternative" "0,1,2,3,4")
8581 (const_string "sselog")
8582 (const_string "ssemov")))
8583 (set (attr "prefix_data16")
8584 (if_then_else (eq_attr "alternative" "5")
8585 (const_string "1")
8586 (const_string "*")))
8587 (set_attr "prefix" "orig,vex,evex,maybe_vex,evex,orig,vex,maybe_vex,orig,orig")
8588 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
8589
8590 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8591 ;;
8592 ;; Parallel integer down-conversion operations
8593 ;;
8594 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8595
8596 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
8597 (define_mode_attr pmov_src_mode
8598 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8599 (define_mode_attr pmov_src_lower
8600 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
8601 (define_mode_attr pmov_suff_1
8602 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8603
8604 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
8605 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8606 (any_truncate:PMOV_DST_MODE_1
8607 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8608 "TARGET_AVX512F"
8609 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
8610 [(set_attr "type" "ssemov")
8611 (set_attr "memory" "none,store")
8612 (set_attr "prefix" "evex")
8613 (set_attr "mode" "<sseinsnmode>")])
8614
8615 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
8616 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8617 (vec_merge:PMOV_DST_MODE_1
8618 (any_truncate:PMOV_DST_MODE_1
8619 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
8620 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
8621 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8622 "TARGET_AVX512F"
8623 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8624 [(set_attr "type" "ssemov")
8625 (set_attr "memory" "none,store")
8626 (set_attr "prefix" "evex")
8627 (set_attr "mode" "<sseinsnmode>")])
8628
8629 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
8630 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
8631 (vec_merge:PMOV_DST_MODE_1
8632 (any_truncate:PMOV_DST_MODE_1
8633 (match_operand:<pmov_src_mode> 1 "register_operand"))
8634 (match_dup 0)
8635 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8636 "TARGET_AVX512F")
8637
8638 (define_insn "avx512bw_<code>v32hiv32qi2"
8639 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8640 (any_truncate:V32QI
8641 (match_operand:V32HI 1 "register_operand" "v,v")))]
8642 "TARGET_AVX512BW"
8643 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
8644 [(set_attr "type" "ssemov")
8645 (set_attr "memory" "none,store")
8646 (set_attr "prefix" "evex")
8647 (set_attr "mode" "XI")])
8648
8649 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
8650 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8651 (vec_merge:V32QI
8652 (any_truncate:V32QI
8653 (match_operand:V32HI 1 "register_operand" "v,v"))
8654 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
8655 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
8656 "TARGET_AVX512BW"
8657 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8658 [(set_attr "type" "ssemov")
8659 (set_attr "memory" "none,store")
8660 (set_attr "prefix" "evex")
8661 (set_attr "mode" "XI")])
8662
8663 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
8664 [(set (match_operand:V32QI 0 "nonimmediate_operand")
8665 (vec_merge:V32QI
8666 (any_truncate:V32QI
8667 (match_operand:V32HI 1 "register_operand"))
8668 (match_dup 0)
8669 (match_operand:SI 2 "register_operand")))]
8670 "TARGET_AVX512BW")
8671
8672 (define_mode_iterator PMOV_DST_MODE_2
8673 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
8674 (define_mode_attr pmov_suff_2
8675 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
8676
8677 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
8678 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8679 (any_truncate:PMOV_DST_MODE_2
8680 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
8681 "TARGET_AVX512VL"
8682 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
8683 [(set_attr "type" "ssemov")
8684 (set_attr "memory" "none,store")
8685 (set_attr "prefix" "evex")
8686 (set_attr "mode" "<sseinsnmode>")])
8687
8688 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
8689 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8690 (vec_merge:PMOV_DST_MODE_2
8691 (any_truncate:PMOV_DST_MODE_2
8692 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
8693 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
8694 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8695 "TARGET_AVX512VL"
8696 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8697 [(set_attr "type" "ssemov")
8698 (set_attr "memory" "none,store")
8699 (set_attr "prefix" "evex")
8700 (set_attr "mode" "<sseinsnmode>")])
8701
8702 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
8703 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
8704 (vec_merge:PMOV_DST_MODE_2
8705 (any_truncate:PMOV_DST_MODE_2
8706 (match_operand:<ssedoublemode> 1 "register_operand"))
8707 (match_dup 0)
8708 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8709 "TARGET_AVX512VL")
8710
8711 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
8712 (define_mode_attr pmov_dst_3
8713 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
8714 (define_mode_attr pmov_dst_zeroed_3
8715 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
8716 (define_mode_attr pmov_suff_3
8717 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
8718
8719 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
8720 [(set (match_operand:V16QI 0 "register_operand" "=v")
8721 (vec_concat:V16QI
8722 (any_truncate:<pmov_dst_3>
8723 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
8724 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
8725 "TARGET_AVX512VL"
8726 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8727 [(set_attr "type" "ssemov")
8728 (set_attr "prefix" "evex")
8729 (set_attr "mode" "TI")])
8730
8731 (define_insn "*avx512vl_<code>v2div2qi2_store"
8732 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8733 (vec_concat:V16QI
8734 (any_truncate:V2QI
8735 (match_operand:V2DI 1 "register_operand" "v"))
8736 (vec_select:V14QI
8737 (match_dup 0)
8738 (parallel [(const_int 2) (const_int 3)
8739 (const_int 4) (const_int 5)
8740 (const_int 6) (const_int 7)
8741 (const_int 8) (const_int 9)
8742 (const_int 10) (const_int 11)
8743 (const_int 12) (const_int 13)
8744 (const_int 14) (const_int 15)]))))]
8745 "TARGET_AVX512VL"
8746 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8747 [(set_attr "type" "ssemov")
8748 (set_attr "memory" "store")
8749 (set_attr "prefix" "evex")
8750 (set_attr "mode" "TI")])
8751
8752 (define_insn "avx512vl_<code>v2div2qi2_mask"
8753 [(set (match_operand:V16QI 0 "register_operand" "=v")
8754 (vec_concat:V16QI
8755 (vec_merge:V2QI
8756 (any_truncate:V2QI
8757 (match_operand:V2DI 1 "register_operand" "v"))
8758 (vec_select:V2QI
8759 (match_operand:V16QI 2 "vector_move_operand" "0C")
8760 (parallel [(const_int 0) (const_int 1)]))
8761 (match_operand:QI 3 "register_operand" "Yk"))
8762 (const_vector:V14QI [(const_int 0) (const_int 0)
8763 (const_int 0) (const_int 0)
8764 (const_int 0) (const_int 0)
8765 (const_int 0) (const_int 0)
8766 (const_int 0) (const_int 0)
8767 (const_int 0) (const_int 0)
8768 (const_int 0) (const_int 0)])))]
8769 "TARGET_AVX512VL"
8770 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8771 [(set_attr "type" "ssemov")
8772 (set_attr "prefix" "evex")
8773 (set_attr "mode" "TI")])
8774
8775 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
8776 [(set (match_operand:V16QI 0 "register_operand" "=v")
8777 (vec_concat:V16QI
8778 (vec_merge:V2QI
8779 (any_truncate:V2QI
8780 (match_operand:V2DI 1 "register_operand" "v"))
8781 (const_vector:V2QI [(const_int 0) (const_int 0)])
8782 (match_operand:QI 2 "register_operand" "Yk"))
8783 (const_vector:V14QI [(const_int 0) (const_int 0)
8784 (const_int 0) (const_int 0)
8785 (const_int 0) (const_int 0)
8786 (const_int 0) (const_int 0)
8787 (const_int 0) (const_int 0)
8788 (const_int 0) (const_int 0)
8789 (const_int 0) (const_int 0)])))]
8790 "TARGET_AVX512VL"
8791 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
8792 [(set_attr "type" "ssemov")
8793 (set_attr "prefix" "evex")
8794 (set_attr "mode" "TI")])
8795
8796 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
8797 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8798 (vec_concat:V16QI
8799 (vec_merge:V2QI
8800 (any_truncate:V2QI
8801 (match_operand:V2DI 1 "register_operand" "v"))
8802 (vec_select:V2QI
8803 (match_dup 0)
8804 (parallel [(const_int 0) (const_int 1)]))
8805 (match_operand:QI 2 "register_operand" "Yk"))
8806 (vec_select:V14QI
8807 (match_dup 0)
8808 (parallel [(const_int 2) (const_int 3)
8809 (const_int 4) (const_int 5)
8810 (const_int 6) (const_int 7)
8811 (const_int 8) (const_int 9)
8812 (const_int 10) (const_int 11)
8813 (const_int 12) (const_int 13)
8814 (const_int 14) (const_int 15)]))))]
8815 "TARGET_AVX512VL"
8816 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8817 [(set_attr "type" "ssemov")
8818 (set_attr "memory" "store")
8819 (set_attr "prefix" "evex")
8820 (set_attr "mode" "TI")])
8821
8822 (define_insn "*avx512vl_<code><mode>v4qi2_store"
8823 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8824 (vec_concat:V16QI
8825 (any_truncate:V4QI
8826 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8827 (vec_select:V12QI
8828 (match_dup 0)
8829 (parallel [(const_int 4) (const_int 5)
8830 (const_int 6) (const_int 7)
8831 (const_int 8) (const_int 9)
8832 (const_int 10) (const_int 11)
8833 (const_int 12) (const_int 13)
8834 (const_int 14) (const_int 15)]))))]
8835 "TARGET_AVX512VL"
8836 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8837 [(set_attr "type" "ssemov")
8838 (set_attr "memory" "store")
8839 (set_attr "prefix" "evex")
8840 (set_attr "mode" "TI")])
8841
8842 (define_insn "avx512vl_<code><mode>v4qi2_mask"
8843 [(set (match_operand:V16QI 0 "register_operand" "=v")
8844 (vec_concat:V16QI
8845 (vec_merge:V4QI
8846 (any_truncate:V4QI
8847 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8848 (vec_select:V4QI
8849 (match_operand:V16QI 2 "vector_move_operand" "0C")
8850 (parallel [(const_int 0) (const_int 1)
8851 (const_int 2) (const_int 3)]))
8852 (match_operand:QI 3 "register_operand" "Yk"))
8853 (const_vector:V12QI [(const_int 0) (const_int 0)
8854 (const_int 0) (const_int 0)
8855 (const_int 0) (const_int 0)
8856 (const_int 0) (const_int 0)
8857 (const_int 0) (const_int 0)
8858 (const_int 0) (const_int 0)])))]
8859 "TARGET_AVX512VL"
8860 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8861 [(set_attr "type" "ssemov")
8862 (set_attr "prefix" "evex")
8863 (set_attr "mode" "TI")])
8864
8865 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
8866 [(set (match_operand:V16QI 0 "register_operand" "=v")
8867 (vec_concat:V16QI
8868 (vec_merge:V4QI
8869 (any_truncate:V4QI
8870 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8871 (const_vector:V4QI [(const_int 0) (const_int 0)
8872 (const_int 0) (const_int 0)])
8873 (match_operand:QI 2 "register_operand" "Yk"))
8874 (const_vector:V12QI [(const_int 0) (const_int 0)
8875 (const_int 0) (const_int 0)
8876 (const_int 0) (const_int 0)
8877 (const_int 0) (const_int 0)
8878 (const_int 0) (const_int 0)
8879 (const_int 0) (const_int 0)])))]
8880 "TARGET_AVX512VL"
8881 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
8882 [(set_attr "type" "ssemov")
8883 (set_attr "prefix" "evex")
8884 (set_attr "mode" "TI")])
8885
8886 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
8887 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8888 (vec_concat:V16QI
8889 (vec_merge:V4QI
8890 (any_truncate:V4QI
8891 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8892 (vec_select:V4QI
8893 (match_dup 0)
8894 (parallel [(const_int 0) (const_int 1)
8895 (const_int 2) (const_int 3)]))
8896 (match_operand:QI 2 "register_operand" "Yk"))
8897 (vec_select:V12QI
8898 (match_dup 0)
8899 (parallel [(const_int 4) (const_int 5)
8900 (const_int 6) (const_int 7)
8901 (const_int 8) (const_int 9)
8902 (const_int 10) (const_int 11)
8903 (const_int 12) (const_int 13)
8904 (const_int 14) (const_int 15)]))))]
8905 "TARGET_AVX512VL"
8906 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8907 [(set_attr "type" "ssemov")
8908 (set_attr "memory" "store")
8909 (set_attr "prefix" "evex")
8910 (set_attr "mode" "TI")])
8911
8912 (define_mode_iterator VI2_128_BW_4_256
8913 [(V8HI "TARGET_AVX512BW") V8SI])
8914
8915 (define_insn "*avx512vl_<code><mode>v8qi2_store"
8916 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8917 (vec_concat:V16QI
8918 (any_truncate:V8QI
8919 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8920 (vec_select:V8QI
8921 (match_dup 0)
8922 (parallel [(const_int 8) (const_int 9)
8923 (const_int 10) (const_int 11)
8924 (const_int 12) (const_int 13)
8925 (const_int 14) (const_int 15)]))))]
8926 "TARGET_AVX512VL"
8927 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8928 [(set_attr "type" "ssemov")
8929 (set_attr "memory" "store")
8930 (set_attr "prefix" "evex")
8931 (set_attr "mode" "TI")])
8932
8933 (define_insn "avx512vl_<code><mode>v8qi2_mask"
8934 [(set (match_operand:V16QI 0 "register_operand" "=v")
8935 (vec_concat:V16QI
8936 (vec_merge:V8QI
8937 (any_truncate:V8QI
8938 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8939 (vec_select:V8QI
8940 (match_operand:V16QI 2 "vector_move_operand" "0C")
8941 (parallel [(const_int 0) (const_int 1)
8942 (const_int 2) (const_int 3)
8943 (const_int 4) (const_int 5)
8944 (const_int 6) (const_int 7)]))
8945 (match_operand:QI 3 "register_operand" "Yk"))
8946 (const_vector:V8QI [(const_int 0) (const_int 0)
8947 (const_int 0) (const_int 0)
8948 (const_int 0) (const_int 0)
8949 (const_int 0) (const_int 0)])))]
8950 "TARGET_AVX512VL"
8951 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8952 [(set_attr "type" "ssemov")
8953 (set_attr "prefix" "evex")
8954 (set_attr "mode" "TI")])
8955
8956 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
8957 [(set (match_operand:V16QI 0 "register_operand" "=v")
8958 (vec_concat:V16QI
8959 (vec_merge:V8QI
8960 (any_truncate:V8QI
8961 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8962 (const_vector:V8QI [(const_int 0) (const_int 0)
8963 (const_int 0) (const_int 0)
8964 (const_int 0) (const_int 0)
8965 (const_int 0) (const_int 0)])
8966 (match_operand:QI 2 "register_operand" "Yk"))
8967 (const_vector:V8QI [(const_int 0) (const_int 0)
8968 (const_int 0) (const_int 0)
8969 (const_int 0) (const_int 0)
8970 (const_int 0) (const_int 0)])))]
8971 "TARGET_AVX512VL"
8972 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
8973 [(set_attr "type" "ssemov")
8974 (set_attr "prefix" "evex")
8975 (set_attr "mode" "TI")])
8976
8977 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
8978 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8979 (vec_concat:V16QI
8980 (vec_merge:V8QI
8981 (any_truncate:V8QI
8982 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8983 (vec_select:V8QI
8984 (match_dup 0)
8985 (parallel [(const_int 0) (const_int 1)
8986 (const_int 2) (const_int 3)
8987 (const_int 4) (const_int 5)
8988 (const_int 6) (const_int 7)]))
8989 (match_operand:QI 2 "register_operand" "Yk"))
8990 (vec_select:V8QI
8991 (match_dup 0)
8992 (parallel [(const_int 8) (const_int 9)
8993 (const_int 10) (const_int 11)
8994 (const_int 12) (const_int 13)
8995 (const_int 14) (const_int 15)]))))]
8996 "TARGET_AVX512VL"
8997 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8998 [(set_attr "type" "ssemov")
8999 (set_attr "memory" "store")
9000 (set_attr "prefix" "evex")
9001 (set_attr "mode" "TI")])
9002
9003 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
9004 (define_mode_attr pmov_dst_4
9005 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
9006 (define_mode_attr pmov_dst_zeroed_4
9007 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
9008 (define_mode_attr pmov_suff_4
9009 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
9010
9011 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
9012 [(set (match_operand:V8HI 0 "register_operand" "=v")
9013 (vec_concat:V8HI
9014 (any_truncate:<pmov_dst_4>
9015 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
9016 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
9017 "TARGET_AVX512VL"
9018 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9019 [(set_attr "type" "ssemov")
9020 (set_attr "prefix" "evex")
9021 (set_attr "mode" "TI")])
9022
9023 (define_insn "*avx512vl_<code><mode>v4hi2_store"
9024 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9025 (vec_concat:V8HI
9026 (any_truncate:V4HI
9027 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9028 (vec_select:V4HI
9029 (match_dup 0)
9030 (parallel [(const_int 4) (const_int 5)
9031 (const_int 6) (const_int 7)]))))]
9032 "TARGET_AVX512VL"
9033 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9034 [(set_attr "type" "ssemov")
9035 (set_attr "memory" "store")
9036 (set_attr "prefix" "evex")
9037 (set_attr "mode" "TI")])
9038
9039 (define_insn "avx512vl_<code><mode>v4hi2_mask"
9040 [(set (match_operand:V8HI 0 "register_operand" "=v")
9041 (vec_concat:V8HI
9042 (vec_merge:V4HI
9043 (any_truncate:V4HI
9044 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9045 (vec_select:V4HI
9046 (match_operand:V8HI 2 "vector_move_operand" "0C")
9047 (parallel [(const_int 0) (const_int 1)
9048 (const_int 2) (const_int 3)]))
9049 (match_operand:QI 3 "register_operand" "Yk"))
9050 (const_vector:V4HI [(const_int 0) (const_int 0)
9051 (const_int 0) (const_int 0)])))]
9052 "TARGET_AVX512VL"
9053 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9054 [(set_attr "type" "ssemov")
9055 (set_attr "prefix" "evex")
9056 (set_attr "mode" "TI")])
9057
9058 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
9059 [(set (match_operand:V8HI 0 "register_operand" "=v")
9060 (vec_concat:V8HI
9061 (vec_merge:V4HI
9062 (any_truncate:V4HI
9063 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9064 (const_vector:V4HI [(const_int 0) (const_int 0)
9065 (const_int 0) (const_int 0)])
9066 (match_operand:QI 2 "register_operand" "Yk"))
9067 (const_vector:V4HI [(const_int 0) (const_int 0)
9068 (const_int 0) (const_int 0)])))]
9069 "TARGET_AVX512VL"
9070 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9071 [(set_attr "type" "ssemov")
9072 (set_attr "prefix" "evex")
9073 (set_attr "mode" "TI")])
9074
9075 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
9076 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9077 (vec_concat:V8HI
9078 (vec_merge:V4HI
9079 (any_truncate:V4HI
9080 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9081 (vec_select:V4HI
9082 (match_dup 0)
9083 (parallel [(const_int 0) (const_int 1)
9084 (const_int 2) (const_int 3)]))
9085 (match_operand:QI 2 "register_operand" "Yk"))
9086 (vec_select:V4HI
9087 (match_dup 0)
9088 (parallel [(const_int 4) (const_int 5)
9089 (const_int 6) (const_int 7)]))))]
9090 "TARGET_AVX512VL"
9091 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9092 [(set_attr "type" "ssemov")
9093 (set_attr "memory" "store")
9094 (set_attr "prefix" "evex")
9095 (set_attr "mode" "TI")])
9096
9097 (define_insn "*avx512vl_<code>v2div2hi2_store"
9098 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9099 (vec_concat:V8HI
9100 (any_truncate:V2HI
9101 (match_operand:V2DI 1 "register_operand" "v"))
9102 (vec_select:V6HI
9103 (match_dup 0)
9104 (parallel [(const_int 2) (const_int 3)
9105 (const_int 4) (const_int 5)
9106 (const_int 6) (const_int 7)]))))]
9107 "TARGET_AVX512VL"
9108 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
9109 [(set_attr "type" "ssemov")
9110 (set_attr "memory" "store")
9111 (set_attr "prefix" "evex")
9112 (set_attr "mode" "TI")])
9113
9114 (define_insn "avx512vl_<code>v2div2hi2_mask"
9115 [(set (match_operand:V8HI 0 "register_operand" "=v")
9116 (vec_concat:V8HI
9117 (vec_merge:V2HI
9118 (any_truncate:V2HI
9119 (match_operand:V2DI 1 "register_operand" "v"))
9120 (vec_select:V2HI
9121 (match_operand:V8HI 2 "vector_move_operand" "0C")
9122 (parallel [(const_int 0) (const_int 1)]))
9123 (match_operand:QI 3 "register_operand" "Yk"))
9124 (const_vector:V6HI [(const_int 0) (const_int 0)
9125 (const_int 0) (const_int 0)
9126 (const_int 0) (const_int 0)])))]
9127 "TARGET_AVX512VL"
9128 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9129 [(set_attr "type" "ssemov")
9130 (set_attr "prefix" "evex")
9131 (set_attr "mode" "TI")])
9132
9133 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
9134 [(set (match_operand:V8HI 0 "register_operand" "=v")
9135 (vec_concat:V8HI
9136 (vec_merge:V2HI
9137 (any_truncate:V2HI
9138 (match_operand:V2DI 1 "register_operand" "v"))
9139 (const_vector:V2HI [(const_int 0) (const_int 0)])
9140 (match_operand:QI 2 "register_operand" "Yk"))
9141 (const_vector:V6HI [(const_int 0) (const_int 0)
9142 (const_int 0) (const_int 0)
9143 (const_int 0) (const_int 0)])))]
9144 "TARGET_AVX512VL"
9145 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9146 [(set_attr "type" "ssemov")
9147 (set_attr "prefix" "evex")
9148 (set_attr "mode" "TI")])
9149
9150 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
9151 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9152 (vec_concat:V8HI
9153 (vec_merge:V2HI
9154 (any_truncate:V2HI
9155 (match_operand:V2DI 1 "register_operand" "v"))
9156 (vec_select:V2HI
9157 (match_dup 0)
9158 (parallel [(const_int 0) (const_int 1)]))
9159 (match_operand:QI 2 "register_operand" "Yk"))
9160 (vec_select:V6HI
9161 (match_dup 0)
9162 (parallel [(const_int 2) (const_int 3)
9163 (const_int 4) (const_int 5)
9164 (const_int 6) (const_int 7)]))))]
9165 "TARGET_AVX512VL"
9166 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9167 [(set_attr "type" "ssemov")
9168 (set_attr "memory" "store")
9169 (set_attr "prefix" "evex")
9170 (set_attr "mode" "TI")])
9171
9172 (define_insn "*avx512vl_<code>v2div2si2"
9173 [(set (match_operand:V4SI 0 "register_operand" "=v")
9174 (vec_concat:V4SI
9175 (any_truncate:V2SI
9176 (match_operand:V2DI 1 "register_operand" "v"))
9177 (match_operand:V2SI 2 "const0_operand")))]
9178 "TARGET_AVX512VL"
9179 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9180 [(set_attr "type" "ssemov")
9181 (set_attr "prefix" "evex")
9182 (set_attr "mode" "TI")])
9183
9184 (define_insn "*avx512vl_<code>v2div2si2_store"
9185 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9186 (vec_concat:V4SI
9187 (any_truncate:V2SI
9188 (match_operand:V2DI 1 "register_operand" "v"))
9189 (vec_select:V2SI
9190 (match_dup 0)
9191 (parallel [(const_int 2) (const_int 3)]))))]
9192 "TARGET_AVX512VL"
9193 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9194 [(set_attr "type" "ssemov")
9195 (set_attr "memory" "store")
9196 (set_attr "prefix" "evex")
9197 (set_attr "mode" "TI")])
9198
9199 (define_insn "avx512vl_<code>v2div2si2_mask"
9200 [(set (match_operand:V4SI 0 "register_operand" "=v")
9201 (vec_concat:V4SI
9202 (vec_merge:V2SI
9203 (any_truncate:V2SI
9204 (match_operand:V2DI 1 "register_operand" "v"))
9205 (vec_select:V2SI
9206 (match_operand:V4SI 2 "vector_move_operand" "0C")
9207 (parallel [(const_int 0) (const_int 1)]))
9208 (match_operand:QI 3 "register_operand" "Yk"))
9209 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9210 "TARGET_AVX512VL"
9211 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9212 [(set_attr "type" "ssemov")
9213 (set_attr "prefix" "evex")
9214 (set_attr "mode" "TI")])
9215
9216 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
9217 [(set (match_operand:V4SI 0 "register_operand" "=v")
9218 (vec_concat:V4SI
9219 (vec_merge:V2SI
9220 (any_truncate:V2SI
9221 (match_operand:V2DI 1 "register_operand" "v"))
9222 (const_vector:V2SI [(const_int 0) (const_int 0)])
9223 (match_operand:QI 2 "register_operand" "Yk"))
9224 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9225 "TARGET_AVX512VL"
9226 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9227 [(set_attr "type" "ssemov")
9228 (set_attr "prefix" "evex")
9229 (set_attr "mode" "TI")])
9230
9231 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9232 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9233 (vec_concat:V4SI
9234 (vec_merge:V2SI
9235 (any_truncate:V2SI
9236 (match_operand:V2DI 1 "register_operand" "v"))
9237 (vec_select:V2SI
9238 (match_dup 0)
9239 (parallel [(const_int 0) (const_int 1)]))
9240 (match_operand:QI 2 "register_operand" "Yk"))
9241 (vec_select:V2SI
9242 (match_dup 0)
9243 (parallel [(const_int 2) (const_int 3)]))))]
9244 "TARGET_AVX512VL"
9245 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9246 [(set_attr "type" "ssemov")
9247 (set_attr "memory" "store")
9248 (set_attr "prefix" "evex")
9249 (set_attr "mode" "TI")])
9250
9251 (define_insn "*avx512f_<code>v8div16qi2"
9252 [(set (match_operand:V16QI 0 "register_operand" "=v")
9253 (vec_concat:V16QI
9254 (any_truncate:V8QI
9255 (match_operand:V8DI 1 "register_operand" "v"))
9256 (const_vector:V8QI [(const_int 0) (const_int 0)
9257 (const_int 0) (const_int 0)
9258 (const_int 0) (const_int 0)
9259 (const_int 0) (const_int 0)])))]
9260 "TARGET_AVX512F"
9261 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9262 [(set_attr "type" "ssemov")
9263 (set_attr "prefix" "evex")
9264 (set_attr "mode" "TI")])
9265
9266 (define_insn "*avx512f_<code>v8div16qi2_store"
9267 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9268 (vec_concat:V16QI
9269 (any_truncate:V8QI
9270 (match_operand:V8DI 1 "register_operand" "v"))
9271 (vec_select:V8QI
9272 (match_dup 0)
9273 (parallel [(const_int 8) (const_int 9)
9274 (const_int 10) (const_int 11)
9275 (const_int 12) (const_int 13)
9276 (const_int 14) (const_int 15)]))))]
9277 "TARGET_AVX512F"
9278 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9279 [(set_attr "type" "ssemov")
9280 (set_attr "memory" "store")
9281 (set_attr "prefix" "evex")
9282 (set_attr "mode" "TI")])
9283
9284 (define_insn "avx512f_<code>v8div16qi2_mask"
9285 [(set (match_operand:V16QI 0 "register_operand" "=v")
9286 (vec_concat:V16QI
9287 (vec_merge:V8QI
9288 (any_truncate:V8QI
9289 (match_operand:V8DI 1 "register_operand" "v"))
9290 (vec_select:V8QI
9291 (match_operand:V16QI 2 "vector_move_operand" "0C")
9292 (parallel [(const_int 0) (const_int 1)
9293 (const_int 2) (const_int 3)
9294 (const_int 4) (const_int 5)
9295 (const_int 6) (const_int 7)]))
9296 (match_operand:QI 3 "register_operand" "Yk"))
9297 (const_vector:V8QI [(const_int 0) (const_int 0)
9298 (const_int 0) (const_int 0)
9299 (const_int 0) (const_int 0)
9300 (const_int 0) (const_int 0)])))]
9301 "TARGET_AVX512F"
9302 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9303 [(set_attr "type" "ssemov")
9304 (set_attr "prefix" "evex")
9305 (set_attr "mode" "TI")])
9306
9307 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
9308 [(set (match_operand:V16QI 0 "register_operand" "=v")
9309 (vec_concat:V16QI
9310 (vec_merge:V8QI
9311 (any_truncate:V8QI
9312 (match_operand:V8DI 1 "register_operand" "v"))
9313 (const_vector:V8QI [(const_int 0) (const_int 0)
9314 (const_int 0) (const_int 0)
9315 (const_int 0) (const_int 0)
9316 (const_int 0) (const_int 0)])
9317 (match_operand:QI 2 "register_operand" "Yk"))
9318 (const_vector:V8QI [(const_int 0) (const_int 0)
9319 (const_int 0) (const_int 0)
9320 (const_int 0) (const_int 0)
9321 (const_int 0) (const_int 0)])))]
9322 "TARGET_AVX512F"
9323 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9324 [(set_attr "type" "ssemov")
9325 (set_attr "prefix" "evex")
9326 (set_attr "mode" "TI")])
9327
9328 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9329 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9330 (vec_concat:V16QI
9331 (vec_merge:V8QI
9332 (any_truncate:V8QI
9333 (match_operand:V8DI 1 "register_operand" "v"))
9334 (vec_select:V8QI
9335 (match_dup 0)
9336 (parallel [(const_int 0) (const_int 1)
9337 (const_int 2) (const_int 3)
9338 (const_int 4) (const_int 5)
9339 (const_int 6) (const_int 7)]))
9340 (match_operand:QI 2 "register_operand" "Yk"))
9341 (vec_select:V8QI
9342 (match_dup 0)
9343 (parallel [(const_int 8) (const_int 9)
9344 (const_int 10) (const_int 11)
9345 (const_int 12) (const_int 13)
9346 (const_int 14) (const_int 15)]))))]
9347 "TARGET_AVX512F"
9348 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9349 [(set_attr "type" "ssemov")
9350 (set_attr "memory" "store")
9351 (set_attr "prefix" "evex")
9352 (set_attr "mode" "TI")])
9353
9354 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9355 ;;
9356 ;; Parallel integral arithmetic
9357 ;;
9358 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9359
9360 (define_expand "neg<mode>2"
9361 [(set (match_operand:VI_AVX2 0 "register_operand")
9362 (minus:VI_AVX2
9363 (match_dup 2)
9364 (match_operand:VI_AVX2 1 "vector_operand")))]
9365 "TARGET_SSE2"
9366 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9367
9368 (define_expand "<plusminus_insn><mode>3"
9369 [(set (match_operand:VI_AVX2 0 "register_operand")
9370 (plusminus:VI_AVX2
9371 (match_operand:VI_AVX2 1 "vector_operand")
9372 (match_operand:VI_AVX2 2 "vector_operand")))]
9373 "TARGET_SSE2"
9374 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9375
9376 (define_expand "<plusminus_insn><mode>3_mask"
9377 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9378 (vec_merge:VI48_AVX512VL
9379 (plusminus:VI48_AVX512VL
9380 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9381 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9382 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9383 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9384 "TARGET_AVX512F"
9385 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9386
9387 (define_expand "<plusminus_insn><mode>3_mask"
9388 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9389 (vec_merge:VI12_AVX512VL
9390 (plusminus:VI12_AVX512VL
9391 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9392 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9393 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9394 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9395 "TARGET_AVX512BW"
9396 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9397
9398 (define_insn "*<plusminus_insn><mode>3"
9399 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
9400 (plusminus:VI_AVX2
9401 (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
9402 (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
9403 "TARGET_SSE2
9404 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9405 "@
9406 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9407 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9408 [(set_attr "isa" "noavx,avx")
9409 (set_attr "type" "sseiadd")
9410 (set_attr "prefix_data16" "1,*")
9411 (set_attr "prefix" "<mask_prefix3>")
9412 (set_attr "mode" "<sseinsnmode>")])
9413
9414 (define_insn "*<plusminus_insn><mode>3_mask"
9415 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9416 (vec_merge:VI48_AVX512VL
9417 (plusminus:VI48_AVX512VL
9418 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9419 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
9420 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
9421 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9422 "TARGET_AVX512F
9423 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9424 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9425 [(set_attr "type" "sseiadd")
9426 (set_attr "prefix" "evex")
9427 (set_attr "mode" "<sseinsnmode>")])
9428
9429 (define_insn "*<plusminus_insn><mode>3_mask"
9430 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9431 (vec_merge:VI12_AVX512VL
9432 (plusminus:VI12_AVX512VL
9433 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9434 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
9435 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
9436 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9437 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9438 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9439 [(set_attr "type" "sseiadd")
9440 (set_attr "prefix" "evex")
9441 (set_attr "mode" "<sseinsnmode>")])
9442
9443 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9444 [(set (match_operand:VI12_AVX2 0 "register_operand")
9445 (sat_plusminus:VI12_AVX2
9446 (match_operand:VI12_AVX2 1 "vector_operand")
9447 (match_operand:VI12_AVX2 2 "vector_operand")))]
9448 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9449 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9450
9451 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9452 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
9453 (sat_plusminus:VI12_AVX2
9454 (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
9455 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
9456 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9457 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9458 "@
9459 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9460 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9461 [(set_attr "isa" "noavx,avx")
9462 (set_attr "type" "sseiadd")
9463 (set_attr "prefix_data16" "1,*")
9464 (set_attr "prefix" "orig,maybe_evex")
9465 (set_attr "mode" "TI")])
9466
9467 (define_expand "mul<mode>3<mask_name>"
9468 [(set (match_operand:VI1_AVX512 0 "register_operand")
9469 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
9470 (match_operand:VI1_AVX512 2 "register_operand")))]
9471 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9472 {
9473 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
9474 DONE;
9475 })
9476
9477 (define_expand "mul<mode>3<mask_name>"
9478 [(set (match_operand:VI2_AVX2 0 "register_operand")
9479 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
9480 (match_operand:VI2_AVX2 2 "vector_operand")))]
9481 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9482 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9483
9484 (define_insn "*mul<mode>3<mask_name>"
9485 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9486 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
9487 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
9488 "TARGET_SSE2
9489 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9490 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9491 "@
9492 pmullw\t{%2, %0|%0, %2}
9493 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9494 [(set_attr "isa" "noavx,avx")
9495 (set_attr "type" "sseimul")
9496 (set_attr "prefix_data16" "1,*")
9497 (set_attr "prefix" "orig,vex")
9498 (set_attr "mode" "<sseinsnmode>")])
9499
9500 (define_expand "<s>mul<mode>3_highpart<mask_name>"
9501 [(set (match_operand:VI2_AVX2 0 "register_operand")
9502 (truncate:VI2_AVX2
9503 (lshiftrt:<ssedoublemode>
9504 (mult:<ssedoublemode>
9505 (any_extend:<ssedoublemode>
9506 (match_operand:VI2_AVX2 1 "vector_operand"))
9507 (any_extend:<ssedoublemode>
9508 (match_operand:VI2_AVX2 2 "vector_operand")))
9509 (const_int 16))))]
9510 "TARGET_SSE2
9511 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9512 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9513
9514 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
9515 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9516 (truncate:VI2_AVX2
9517 (lshiftrt:<ssedoublemode>
9518 (mult:<ssedoublemode>
9519 (any_extend:<ssedoublemode>
9520 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
9521 (any_extend:<ssedoublemode>
9522 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
9523 (const_int 16))))]
9524 "TARGET_SSE2
9525 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9526 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9527 "@
9528 pmulh<u>w\t{%2, %0|%0, %2}
9529 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9530 [(set_attr "isa" "noavx,avx")
9531 (set_attr "type" "sseimul")
9532 (set_attr "prefix_data16" "1,*")
9533 (set_attr "prefix" "orig,vex")
9534 (set_attr "mode" "<sseinsnmode>")])
9535
9536 (define_expand "vec_widen_umult_even_v16si<mask_name>"
9537 [(set (match_operand:V8DI 0 "register_operand")
9538 (mult:V8DI
9539 (zero_extend:V8DI
9540 (vec_select:V8SI
9541 (match_operand:V16SI 1 "nonimmediate_operand")
9542 (parallel [(const_int 0) (const_int 2)
9543 (const_int 4) (const_int 6)
9544 (const_int 8) (const_int 10)
9545 (const_int 12) (const_int 14)])))
9546 (zero_extend:V8DI
9547 (vec_select:V8SI
9548 (match_operand:V16SI 2 "nonimmediate_operand")
9549 (parallel [(const_int 0) (const_int 2)
9550 (const_int 4) (const_int 6)
9551 (const_int 8) (const_int 10)
9552 (const_int 12) (const_int 14)])))))]
9553 "TARGET_AVX512F"
9554 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9555
9556 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
9557 [(set (match_operand:V8DI 0 "register_operand" "=v")
9558 (mult:V8DI
9559 (zero_extend:V8DI
9560 (vec_select:V8SI
9561 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9562 (parallel [(const_int 0) (const_int 2)
9563 (const_int 4) (const_int 6)
9564 (const_int 8) (const_int 10)
9565 (const_int 12) (const_int 14)])))
9566 (zero_extend:V8DI
9567 (vec_select:V8SI
9568 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9569 (parallel [(const_int 0) (const_int 2)
9570 (const_int 4) (const_int 6)
9571 (const_int 8) (const_int 10)
9572 (const_int 12) (const_int 14)])))))]
9573 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9574 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9575 [(set_attr "isa" "avx512f")
9576 (set_attr "type" "sseimul")
9577 (set_attr "prefix_extra" "1")
9578 (set_attr "prefix" "evex")
9579 (set_attr "mode" "XI")])
9580
9581 (define_expand "vec_widen_umult_even_v8si<mask_name>"
9582 [(set (match_operand:V4DI 0 "register_operand")
9583 (mult:V4DI
9584 (zero_extend:V4DI
9585 (vec_select:V4SI
9586 (match_operand:V8SI 1 "nonimmediate_operand")
9587 (parallel [(const_int 0) (const_int 2)
9588 (const_int 4) (const_int 6)])))
9589 (zero_extend:V4DI
9590 (vec_select:V4SI
9591 (match_operand:V8SI 2 "nonimmediate_operand")
9592 (parallel [(const_int 0) (const_int 2)
9593 (const_int 4) (const_int 6)])))))]
9594 "TARGET_AVX2 && <mask_avx512vl_condition>"
9595 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9596
9597 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
9598 [(set (match_operand:V4DI 0 "register_operand" "=v")
9599 (mult:V4DI
9600 (zero_extend:V4DI
9601 (vec_select:V4SI
9602 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9603 (parallel [(const_int 0) (const_int 2)
9604 (const_int 4) (const_int 6)])))
9605 (zero_extend:V4DI
9606 (vec_select:V4SI
9607 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9608 (parallel [(const_int 0) (const_int 2)
9609 (const_int 4) (const_int 6)])))))]
9610 "TARGET_AVX2 && <mask_avx512vl_condition>
9611 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9612 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9613 [(set_attr "type" "sseimul")
9614 (set_attr "prefix" "maybe_evex")
9615 (set_attr "mode" "OI")])
9616
9617 (define_expand "vec_widen_umult_even_v4si<mask_name>"
9618 [(set (match_operand:V2DI 0 "register_operand")
9619 (mult:V2DI
9620 (zero_extend:V2DI
9621 (vec_select:V2SI
9622 (match_operand:V4SI 1 "vector_operand")
9623 (parallel [(const_int 0) (const_int 2)])))
9624 (zero_extend:V2DI
9625 (vec_select:V2SI
9626 (match_operand:V4SI 2 "vector_operand")
9627 (parallel [(const_int 0) (const_int 2)])))))]
9628 "TARGET_SSE2 && <mask_avx512vl_condition>"
9629 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9630
9631 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
9632 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9633 (mult:V2DI
9634 (zero_extend:V2DI
9635 (vec_select:V2SI
9636 (match_operand:V4SI 1 "vector_operand" "%0,v")
9637 (parallel [(const_int 0) (const_int 2)])))
9638 (zero_extend:V2DI
9639 (vec_select:V2SI
9640 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
9641 (parallel [(const_int 0) (const_int 2)])))))]
9642 "TARGET_SSE2 && <mask_avx512vl_condition>
9643 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9644 "@
9645 pmuludq\t{%2, %0|%0, %2}
9646 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9647 [(set_attr "isa" "noavx,avx")
9648 (set_attr "type" "sseimul")
9649 (set_attr "prefix_data16" "1,*")
9650 (set_attr "prefix" "orig,maybe_evex")
9651 (set_attr "mode" "TI")])
9652
9653 (define_expand "vec_widen_smult_even_v16si<mask_name>"
9654 [(set (match_operand:V8DI 0 "register_operand")
9655 (mult:V8DI
9656 (sign_extend:V8DI
9657 (vec_select:V8SI
9658 (match_operand:V16SI 1 "nonimmediate_operand")
9659 (parallel [(const_int 0) (const_int 2)
9660 (const_int 4) (const_int 6)
9661 (const_int 8) (const_int 10)
9662 (const_int 12) (const_int 14)])))
9663 (sign_extend:V8DI
9664 (vec_select:V8SI
9665 (match_operand:V16SI 2 "nonimmediate_operand")
9666 (parallel [(const_int 0) (const_int 2)
9667 (const_int 4) (const_int 6)
9668 (const_int 8) (const_int 10)
9669 (const_int 12) (const_int 14)])))))]
9670 "TARGET_AVX512F"
9671 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9672
9673 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
9674 [(set (match_operand:V8DI 0 "register_operand" "=v")
9675 (mult:V8DI
9676 (sign_extend:V8DI
9677 (vec_select:V8SI
9678 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9679 (parallel [(const_int 0) (const_int 2)
9680 (const_int 4) (const_int 6)
9681 (const_int 8) (const_int 10)
9682 (const_int 12) (const_int 14)])))
9683 (sign_extend:V8DI
9684 (vec_select:V8SI
9685 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9686 (parallel [(const_int 0) (const_int 2)
9687 (const_int 4) (const_int 6)
9688 (const_int 8) (const_int 10)
9689 (const_int 12) (const_int 14)])))))]
9690 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9691 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9692 [(set_attr "isa" "avx512f")
9693 (set_attr "type" "sseimul")
9694 (set_attr "prefix_extra" "1")
9695 (set_attr "prefix" "evex")
9696 (set_attr "mode" "XI")])
9697
9698 (define_expand "vec_widen_smult_even_v8si<mask_name>"
9699 [(set (match_operand:V4DI 0 "register_operand")
9700 (mult:V4DI
9701 (sign_extend:V4DI
9702 (vec_select:V4SI
9703 (match_operand:V8SI 1 "nonimmediate_operand")
9704 (parallel [(const_int 0) (const_int 2)
9705 (const_int 4) (const_int 6)])))
9706 (sign_extend:V4DI
9707 (vec_select:V4SI
9708 (match_operand:V8SI 2 "nonimmediate_operand")
9709 (parallel [(const_int 0) (const_int 2)
9710 (const_int 4) (const_int 6)])))))]
9711 "TARGET_AVX2 && <mask_avx512vl_condition>"
9712 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9713
9714 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
9715 [(set (match_operand:V4DI 0 "register_operand" "=v")
9716 (mult:V4DI
9717 (sign_extend:V4DI
9718 (vec_select:V4SI
9719 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9720 (parallel [(const_int 0) (const_int 2)
9721 (const_int 4) (const_int 6)])))
9722 (sign_extend:V4DI
9723 (vec_select:V4SI
9724 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9725 (parallel [(const_int 0) (const_int 2)
9726 (const_int 4) (const_int 6)])))))]
9727 "TARGET_AVX2
9728 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9729 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9730 [(set_attr "type" "sseimul")
9731 (set_attr "prefix_extra" "1")
9732 (set_attr "prefix" "vex")
9733 (set_attr "mode" "OI")])
9734
9735 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
9736 [(set (match_operand:V2DI 0 "register_operand")
9737 (mult:V2DI
9738 (sign_extend:V2DI
9739 (vec_select:V2SI
9740 (match_operand:V4SI 1 "vector_operand")
9741 (parallel [(const_int 0) (const_int 2)])))
9742 (sign_extend:V2DI
9743 (vec_select:V2SI
9744 (match_operand:V4SI 2 "vector_operand")
9745 (parallel [(const_int 0) (const_int 2)])))))]
9746 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
9747 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9748
9749 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
9750 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
9751 (mult:V2DI
9752 (sign_extend:V2DI
9753 (vec_select:V2SI
9754 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
9755 (parallel [(const_int 0) (const_int 2)])))
9756 (sign_extend:V2DI
9757 (vec_select:V2SI
9758 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
9759 (parallel [(const_int 0) (const_int 2)])))))]
9760 "TARGET_SSE4_1 && <mask_avx512vl_condition>
9761 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9762 "@
9763 pmuldq\t{%2, %0|%0, %2}
9764 pmuldq\t{%2, %0|%0, %2}
9765 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9766 [(set_attr "isa" "noavx,noavx,avx")
9767 (set_attr "type" "sseimul")
9768 (set_attr "prefix_data16" "1,1,*")
9769 (set_attr "prefix_extra" "1")
9770 (set_attr "prefix" "orig,orig,vex")
9771 (set_attr "mode" "TI")])
9772
9773 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
9774 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
9775 (unspec:<sseunpackmode>
9776 [(match_operand:VI2_AVX2 1 "register_operand" "v")
9777 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
9778 UNSPEC_PMADDWD512))]
9779 "TARGET_AVX512BW && <mask_mode512bit_condition>"
9780 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
9781 [(set_attr "type" "sseiadd")
9782 (set_attr "prefix" "evex")
9783 (set_attr "mode" "XI")])
9784
9785 (define_expand "avx2_pmaddwd"
9786 [(set (match_operand:V8SI 0 "register_operand")
9787 (plus:V8SI
9788 (mult:V8SI
9789 (sign_extend:V8SI
9790 (vec_select:V8HI
9791 (match_operand:V16HI 1 "nonimmediate_operand")
9792 (parallel [(const_int 0) (const_int 2)
9793 (const_int 4) (const_int 6)
9794 (const_int 8) (const_int 10)
9795 (const_int 12) (const_int 14)])))
9796 (sign_extend:V8SI
9797 (vec_select:V8HI
9798 (match_operand:V16HI 2 "nonimmediate_operand")
9799 (parallel [(const_int 0) (const_int 2)
9800 (const_int 4) (const_int 6)
9801 (const_int 8) (const_int 10)
9802 (const_int 12) (const_int 14)]))))
9803 (mult:V8SI
9804 (sign_extend:V8SI
9805 (vec_select:V8HI (match_dup 1)
9806 (parallel [(const_int 1) (const_int 3)
9807 (const_int 5) (const_int 7)
9808 (const_int 9) (const_int 11)
9809 (const_int 13) (const_int 15)])))
9810 (sign_extend:V8SI
9811 (vec_select:V8HI (match_dup 2)
9812 (parallel [(const_int 1) (const_int 3)
9813 (const_int 5) (const_int 7)
9814 (const_int 9) (const_int 11)
9815 (const_int 13) (const_int 15)]))))))]
9816 "TARGET_AVX2"
9817 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9818
9819 (define_insn "*avx2_pmaddwd"
9820 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
9821 (plus:V8SI
9822 (mult:V8SI
9823 (sign_extend:V8SI
9824 (vec_select:V8HI
9825 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
9826 (parallel [(const_int 0) (const_int 2)
9827 (const_int 4) (const_int 6)
9828 (const_int 8) (const_int 10)
9829 (const_int 12) (const_int 14)])))
9830 (sign_extend:V8SI
9831 (vec_select:V8HI
9832 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
9833 (parallel [(const_int 0) (const_int 2)
9834 (const_int 4) (const_int 6)
9835 (const_int 8) (const_int 10)
9836 (const_int 12) (const_int 14)]))))
9837 (mult:V8SI
9838 (sign_extend:V8SI
9839 (vec_select:V8HI (match_dup 1)
9840 (parallel [(const_int 1) (const_int 3)
9841 (const_int 5) (const_int 7)
9842 (const_int 9) (const_int 11)
9843 (const_int 13) (const_int 15)])))
9844 (sign_extend:V8SI
9845 (vec_select:V8HI (match_dup 2)
9846 (parallel [(const_int 1) (const_int 3)
9847 (const_int 5) (const_int 7)
9848 (const_int 9) (const_int 11)
9849 (const_int 13) (const_int 15)]))))))]
9850 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9851 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9852 [(set_attr "type" "sseiadd")
9853 (set_attr "isa" "*,avx512bw")
9854 (set_attr "prefix" "vex,evex")
9855 (set_attr "mode" "OI")])
9856
9857 (define_expand "sse2_pmaddwd"
9858 [(set (match_operand:V4SI 0 "register_operand")
9859 (plus:V4SI
9860 (mult:V4SI
9861 (sign_extend:V4SI
9862 (vec_select:V4HI
9863 (match_operand:V8HI 1 "vector_operand")
9864 (parallel [(const_int 0) (const_int 2)
9865 (const_int 4) (const_int 6)])))
9866 (sign_extend:V4SI
9867 (vec_select:V4HI
9868 (match_operand:V8HI 2 "vector_operand")
9869 (parallel [(const_int 0) (const_int 2)
9870 (const_int 4) (const_int 6)]))))
9871 (mult:V4SI
9872 (sign_extend:V4SI
9873 (vec_select:V4HI (match_dup 1)
9874 (parallel [(const_int 1) (const_int 3)
9875 (const_int 5) (const_int 7)])))
9876 (sign_extend:V4SI
9877 (vec_select:V4HI (match_dup 2)
9878 (parallel [(const_int 1) (const_int 3)
9879 (const_int 5) (const_int 7)]))))))]
9880 "TARGET_SSE2"
9881 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9882
9883 (define_insn "*sse2_pmaddwd"
9884 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
9885 (plus:V4SI
9886 (mult:V4SI
9887 (sign_extend:V4SI
9888 (vec_select:V4HI
9889 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
9890 (parallel [(const_int 0) (const_int 2)
9891 (const_int 4) (const_int 6)])))
9892 (sign_extend:V4SI
9893 (vec_select:V4HI
9894 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
9895 (parallel [(const_int 0) (const_int 2)
9896 (const_int 4) (const_int 6)]))))
9897 (mult:V4SI
9898 (sign_extend:V4SI
9899 (vec_select:V4HI (match_dup 1)
9900 (parallel [(const_int 1) (const_int 3)
9901 (const_int 5) (const_int 7)])))
9902 (sign_extend:V4SI
9903 (vec_select:V4HI (match_dup 2)
9904 (parallel [(const_int 1) (const_int 3)
9905 (const_int 5) (const_int 7)]))))))]
9906 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9907 "@
9908 pmaddwd\t{%2, %0|%0, %2}
9909 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
9910 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9911 [(set_attr "isa" "noavx,avx,avx512bw")
9912 (set_attr "type" "sseiadd")
9913 (set_attr "atom_unit" "simul")
9914 (set_attr "prefix_data16" "1,*,*")
9915 (set_attr "prefix" "orig,vex,evex")
9916 (set_attr "mode" "TI")])
9917
9918 (define_insn "avx512dq_mul<mode>3<mask_name>"
9919 [(set (match_operand:VI8 0 "register_operand" "=v")
9920 (mult:VI8
9921 (match_operand:VI8 1 "register_operand" "v")
9922 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
9923 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
9924 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9925 [(set_attr "type" "sseimul")
9926 (set_attr "prefix" "evex")
9927 (set_attr "mode" "<sseinsnmode>")])
9928
9929 (define_expand "mul<mode>3<mask_name>"
9930 [(set (match_operand:VI4_AVX512F 0 "register_operand")
9931 (mult:VI4_AVX512F
9932 (match_operand:VI4_AVX512F 1 "general_vector_operand")
9933 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
9934 "TARGET_SSE2 && <mask_mode512bit_condition>"
9935 {
9936 if (TARGET_SSE4_1)
9937 {
9938 if (!vector_operand (operands[1], <MODE>mode))
9939 operands[1] = force_reg (<MODE>mode, operands[1]);
9940 if (!vector_operand (operands[2], <MODE>mode))
9941 operands[2] = force_reg (<MODE>mode, operands[2]);
9942 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
9943 }
9944 else
9945 {
9946 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
9947 DONE;
9948 }
9949 })
9950
9951 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
9952 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
9953 (mult:VI4_AVX512F
9954 (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
9955 (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
9956 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
9957 "@
9958 pmulld\t{%2, %0|%0, %2}
9959 pmulld\t{%2, %0|%0, %2}
9960 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9961 [(set_attr "isa" "noavx,noavx,avx")
9962 (set_attr "type" "sseimul")
9963 (set_attr "prefix_extra" "1")
9964 (set_attr "prefix" "<mask_prefix4>")
9965 (set_attr "btver2_decode" "vector,vector,vector")
9966 (set_attr "mode" "<sseinsnmode>")])
9967
9968 (define_expand "mul<mode>3"
9969 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
9970 (mult:VI8_AVX2_AVX512F
9971 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
9972 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
9973 "TARGET_SSE2"
9974 {
9975 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
9976 DONE;
9977 })
9978
9979 (define_expand "vec_widen_<s>mult_hi_<mode>"
9980 [(match_operand:<sseunpackmode> 0 "register_operand")
9981 (any_extend:<sseunpackmode>
9982 (match_operand:VI124_AVX2 1 "register_operand"))
9983 (match_operand:VI124_AVX2 2 "register_operand")]
9984 "TARGET_SSE2"
9985 {
9986 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9987 <u_bool>, true);
9988 DONE;
9989 })
9990
9991 (define_expand "vec_widen_<s>mult_lo_<mode>"
9992 [(match_operand:<sseunpackmode> 0 "register_operand")
9993 (any_extend:<sseunpackmode>
9994 (match_operand:VI124_AVX2 1 "register_operand"))
9995 (match_operand:VI124_AVX2 2 "register_operand")]
9996 "TARGET_SSE2"
9997 {
9998 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9999 <u_bool>, false);
10000 DONE;
10001 })
10002
10003 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
10004 ;; named patterns, but signed V4SI needs special help for plain SSE2.
10005 (define_expand "vec_widen_smult_even_v4si"
10006 [(match_operand:V2DI 0 "register_operand")
10007 (match_operand:V4SI 1 "vector_operand")
10008 (match_operand:V4SI 2 "vector_operand")]
10009 "TARGET_SSE2"
10010 {
10011 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10012 false, false);
10013 DONE;
10014 })
10015
10016 (define_expand "vec_widen_<s>mult_odd_<mode>"
10017 [(match_operand:<sseunpackmode> 0 "register_operand")
10018 (any_extend:<sseunpackmode>
10019 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
10020 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
10021 "TARGET_SSE2"
10022 {
10023 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10024 <u_bool>, true);
10025 DONE;
10026 })
10027
10028 (define_mode_attr SDOT_PMADD_SUF
10029 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
10030
10031 (define_expand "sdot_prod<mode>"
10032 [(match_operand:<sseunpackmode> 0 "register_operand")
10033 (match_operand:VI2_AVX2 1 "register_operand")
10034 (match_operand:VI2_AVX2 2 "register_operand")
10035 (match_operand:<sseunpackmode> 3 "register_operand")]
10036 "TARGET_SSE2"
10037 {
10038 rtx t = gen_reg_rtx (<sseunpackmode>mode);
10039 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
10040 emit_insn (gen_rtx_SET (operands[0],
10041 gen_rtx_PLUS (<sseunpackmode>mode,
10042 operands[3], t)));
10043 DONE;
10044 })
10045
10046 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
10047 ;; back together when madd is available.
10048 (define_expand "sdot_prodv4si"
10049 [(match_operand:V2DI 0 "register_operand")
10050 (match_operand:V4SI 1 "register_operand")
10051 (match_operand:V4SI 2 "register_operand")
10052 (match_operand:V2DI 3 "register_operand")]
10053 "TARGET_XOP"
10054 {
10055 rtx t = gen_reg_rtx (V2DImode);
10056 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
10057 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
10058 DONE;
10059 })
10060
10061 (define_expand "usadv16qi"
10062 [(match_operand:V4SI 0 "register_operand")
10063 (match_operand:V16QI 1 "register_operand")
10064 (match_operand:V16QI 2 "vector_operand")
10065 (match_operand:V4SI 3 "vector_operand")]
10066 "TARGET_SSE2"
10067 {
10068 rtx t1 = gen_reg_rtx (V2DImode);
10069 rtx t2 = gen_reg_rtx (V4SImode);
10070 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
10071 convert_move (t2, t1, 0);
10072 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
10073 DONE;
10074 })
10075
10076 (define_expand "usadv32qi"
10077 [(match_operand:V8SI 0 "register_operand")
10078 (match_operand:V32QI 1 "register_operand")
10079 (match_operand:V32QI 2 "nonimmediate_operand")
10080 (match_operand:V8SI 3 "nonimmediate_operand")]
10081 "TARGET_AVX2"
10082 {
10083 rtx t1 = gen_reg_rtx (V4DImode);
10084 rtx t2 = gen_reg_rtx (V8SImode);
10085 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
10086 convert_move (t2, t1, 0);
10087 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
10088 DONE;
10089 })
10090
10091 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
10092 [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
10093 (ashiftrt:VI24_AVX512BW_1
10094 (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
10095 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
10096 "TARGET_AVX512VL"
10097 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10098 [(set_attr "type" "sseishft")
10099 (set (attr "length_immediate")
10100 (if_then_else (match_operand 2 "const_int_operand")
10101 (const_string "1")
10102 (const_string "0")))
10103 (set_attr "mode" "<sseinsnmode>")])
10104
10105 (define_insn "ashr<mode>3"
10106 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
10107 (ashiftrt:VI24_AVX2
10108 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
10109 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
10110 "TARGET_SSE2"
10111 "@
10112 psra<ssemodesuffix>\t{%2, %0|%0, %2}
10113 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10114 [(set_attr "isa" "noavx,avx")
10115 (set_attr "type" "sseishft")
10116 (set (attr "length_immediate")
10117 (if_then_else (match_operand 2 "const_int_operand")
10118 (const_string "1")
10119 (const_string "0")))
10120 (set_attr "prefix_data16" "1,*")
10121 (set_attr "prefix" "orig,vex")
10122 (set_attr "mode" "<sseinsnmode>")])
10123
10124 (define_insn "<mask_codefor>ashrv2di3<mask_name>"
10125 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
10126 (ashiftrt:V2DI
10127 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
10128 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10129 "TARGET_AVX512VL"
10130 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10131 [(set_attr "type" "sseishft")
10132 (set (attr "length_immediate")
10133 (if_then_else (match_operand 2 "const_int_operand")
10134 (const_string "1")
10135 (const_string "0")))
10136 (set_attr "mode" "TI")])
10137
10138 (define_insn "ashr<mode>3<mask_name>"
10139 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
10140 (ashiftrt:VI248_AVX512BW_AVX512VL
10141 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
10142 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
10143 "TARGET_AVX512F"
10144 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10145 [(set_attr "type" "sseishft")
10146 (set (attr "length_immediate")
10147 (if_then_else (match_operand 2 "const_int_operand")
10148 (const_string "1")
10149 (const_string "0")))
10150 (set_attr "mode" "<sseinsnmode>")])
10151
10152 (define_insn "<shift_insn><mode>3<mask_name>"
10153 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
10154 (any_lshift:VI2_AVX2_AVX512BW
10155 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
10156 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
10157 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10158 "@
10159 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10160 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10161 [(set_attr "isa" "noavx,avx")
10162 (set_attr "type" "sseishft")
10163 (set (attr "length_immediate")
10164 (if_then_else (match_operand 2 "const_int_operand")
10165 (const_string "1")
10166 (const_string "0")))
10167 (set_attr "prefix_data16" "1,*")
10168 (set_attr "prefix" "orig,vex")
10169 (set_attr "mode" "<sseinsnmode>")])
10170
10171 (define_insn "<shift_insn><mode>3<mask_name>"
10172 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,x,v")
10173 (any_lshift:VI48_AVX2
10174 (match_operand:VI48_AVX2 1 "register_operand" "0,x,v")
10175 (match_operand:SI 2 "nonmemory_operand" "xN,xN,vN")))]
10176 "TARGET_SSE2 && <mask_mode512bit_condition>"
10177 "@
10178 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10179 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
10180 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10181 [(set_attr "isa" "noavx,avx,avx512bw")
10182 (set_attr "type" "sseishft")
10183 (set (attr "length_immediate")
10184 (if_then_else (match_operand 2 "const_int_operand")
10185 (const_string "1")
10186 (const_string "0")))
10187 (set_attr "prefix_data16" "1,*,*")
10188 (set_attr "prefix" "orig,vex,evex")
10189 (set_attr "mode" "<sseinsnmode>")])
10190
10191 (define_insn "<shift_insn><mode>3<mask_name>"
10192 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
10193 (any_lshift:VI48_512
10194 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
10195 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
10196 "TARGET_AVX512F && <mask_mode512bit_condition>"
10197 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10198 [(set_attr "isa" "avx512f")
10199 (set_attr "type" "sseishft")
10200 (set (attr "length_immediate")
10201 (if_then_else (match_operand 2 "const_int_operand")
10202 (const_string "1")
10203 (const_string "0")))
10204 (set_attr "prefix" "evex")
10205 (set_attr "mode" "<sseinsnmode>")])
10206
10207
10208 (define_expand "vec_shl_<mode>"
10209 [(set (match_dup 3)
10210 (ashift:V1TI
10211 (match_operand:VI_128 1 "register_operand")
10212 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10213 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10214 "TARGET_SSE2"
10215 {
10216 operands[1] = gen_lowpart (V1TImode, operands[1]);
10217 operands[3] = gen_reg_rtx (V1TImode);
10218 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10219 })
10220
10221 (define_insn "<sse2_avx2>_ashl<mode>3"
10222 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10223 (ashift:VIMAX_AVX2
10224 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10225 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10226 "TARGET_SSE2"
10227 {
10228 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10229
10230 switch (which_alternative)
10231 {
10232 case 0:
10233 return "pslldq\t{%2, %0|%0, %2}";
10234 case 1:
10235 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10236 default:
10237 gcc_unreachable ();
10238 }
10239 }
10240 [(set_attr "isa" "noavx,avx")
10241 (set_attr "type" "sseishft")
10242 (set_attr "length_immediate" "1")
10243 (set_attr "prefix_data16" "1,*")
10244 (set_attr "prefix" "orig,vex")
10245 (set_attr "mode" "<sseinsnmode>")])
10246
10247 (define_expand "vec_shr_<mode>"
10248 [(set (match_dup 3)
10249 (lshiftrt:V1TI
10250 (match_operand:VI_128 1 "register_operand")
10251 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10252 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10253 "TARGET_SSE2"
10254 {
10255 operands[1] = gen_lowpart (V1TImode, operands[1]);
10256 operands[3] = gen_reg_rtx (V1TImode);
10257 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10258 })
10259
10260 (define_insn "<sse2_avx2>_lshr<mode>3"
10261 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10262 (lshiftrt:VIMAX_AVX2
10263 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10264 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10265 "TARGET_SSE2"
10266 {
10267 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10268
10269 switch (which_alternative)
10270 {
10271 case 0:
10272 return "psrldq\t{%2, %0|%0, %2}";
10273 case 1:
10274 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10275 default:
10276 gcc_unreachable ();
10277 }
10278 }
10279 [(set_attr "isa" "noavx,avx")
10280 (set_attr "type" "sseishft")
10281 (set_attr "length_immediate" "1")
10282 (set_attr "atom_unit" "sishuf")
10283 (set_attr "prefix_data16" "1,*")
10284 (set_attr "prefix" "orig,vex")
10285 (set_attr "mode" "<sseinsnmode>")])
10286
10287 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10288 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10289 (any_rotate:VI48_AVX512VL
10290 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10291 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10292 "TARGET_AVX512F"
10293 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10294 [(set_attr "prefix" "evex")
10295 (set_attr "mode" "<sseinsnmode>")])
10296
10297 (define_insn "<avx512>_<rotate><mode><mask_name>"
10298 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10299 (any_rotate:VI48_AVX512VL
10300 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10301 (match_operand:SI 2 "const_0_to_255_operand")))]
10302 "TARGET_AVX512F"
10303 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10304 [(set_attr "prefix" "evex")
10305 (set_attr "mode" "<sseinsnmode>")])
10306
10307 (define_expand "<code><mode>3"
10308 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10309 (maxmin:VI124_256_AVX512F_AVX512BW
10310 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10311 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10312 "TARGET_AVX2"
10313 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10314
10315 (define_insn "*avx2_<code><mode>3"
10316 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10317 (maxmin:VI124_256
10318 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10319 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10320 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10321 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10322 [(set_attr "type" "sseiadd")
10323 (set_attr "prefix_extra" "1")
10324 (set_attr "prefix" "vex")
10325 (set_attr "mode" "OI")])
10326
10327 (define_expand "<code><mode>3_mask"
10328 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10329 (vec_merge:VI48_AVX512VL
10330 (maxmin:VI48_AVX512VL
10331 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10332 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10333 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10334 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10335 "TARGET_AVX512F"
10336 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10337
10338 (define_insn "*avx512bw_<code><mode>3<mask_name>"
10339 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10340 (maxmin:VI48_AVX512VL
10341 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10342 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10343 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10344 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10345 [(set_attr "type" "sseiadd")
10346 (set_attr "prefix_extra" "1")
10347 (set_attr "prefix" "maybe_evex")
10348 (set_attr "mode" "<sseinsnmode>")])
10349
10350 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10351 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10352 (maxmin:VI12_AVX512VL
10353 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10354 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10355 "TARGET_AVX512BW"
10356 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10357 [(set_attr "type" "sseiadd")
10358 (set_attr "prefix" "evex")
10359 (set_attr "mode" "<sseinsnmode>")])
10360
10361 (define_expand "<code><mode>3"
10362 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
10363 (maxmin:VI8_AVX2_AVX512BW
10364 (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
10365 (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
10366 "TARGET_SSE4_2"
10367 {
10368 if (TARGET_AVX512F
10369 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10370 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10371 else
10372 {
10373 enum rtx_code code;
10374 rtx xops[6];
10375 bool ok;
10376
10377
10378 xops[0] = operands[0];
10379
10380 if (<CODE> == SMAX || <CODE> == UMAX)
10381 {
10382 xops[1] = operands[1];
10383 xops[2] = operands[2];
10384 }
10385 else
10386 {
10387 xops[1] = operands[2];
10388 xops[2] = operands[1];
10389 }
10390
10391 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10392
10393 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10394 xops[4] = operands[1];
10395 xops[5] = operands[2];
10396
10397 ok = ix86_expand_int_vcond (xops);
10398 gcc_assert (ok);
10399 DONE;
10400 }
10401 })
10402
10403 (define_expand "<code><mode>3"
10404 [(set (match_operand:VI124_128 0 "register_operand")
10405 (smaxmin:VI124_128
10406 (match_operand:VI124_128 1 "vector_operand")
10407 (match_operand:VI124_128 2 "vector_operand")))]
10408 "TARGET_SSE2"
10409 {
10410 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10411 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10412 else
10413 {
10414 rtx xops[6];
10415 bool ok;
10416
10417 xops[0] = operands[0];
10418 operands[1] = force_reg (<MODE>mode, operands[1]);
10419 operands[2] = force_reg (<MODE>mode, operands[2]);
10420
10421 if (<CODE> == SMAX)
10422 {
10423 xops[1] = operands[1];
10424 xops[2] = operands[2];
10425 }
10426 else
10427 {
10428 xops[1] = operands[2];
10429 xops[2] = operands[1];
10430 }
10431
10432 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10433 xops[4] = operands[1];
10434 xops[5] = operands[2];
10435
10436 ok = ix86_expand_int_vcond (xops);
10437 gcc_assert (ok);
10438 DONE;
10439 }
10440 })
10441
10442 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10443 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
10444 (smaxmin:VI14_128
10445 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
10446 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
10447 "TARGET_SSE4_1
10448 && <mask_mode512bit_condition>
10449 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10450 "@
10451 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10452 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10453 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10454 [(set_attr "isa" "noavx,noavx,avx")
10455 (set_attr "type" "sseiadd")
10456 (set_attr "prefix_extra" "1,1,*")
10457 (set_attr "prefix" "orig,orig,vex")
10458 (set_attr "mode" "TI")])
10459
10460 (define_insn "*<code>v8hi3"
10461 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
10462 (smaxmin:V8HI
10463 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
10464 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
10465 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
10466 "@
10467 p<maxmin_int>w\t{%2, %0|%0, %2}
10468 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
10469 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
10470 [(set_attr "isa" "noavx,avx,avx512bw")
10471 (set_attr "type" "sseiadd")
10472 (set_attr "prefix_data16" "1,*,*")
10473 (set_attr "prefix_extra" "*,1,1")
10474 (set_attr "prefix" "orig,vex,evex")
10475 (set_attr "mode" "TI")])
10476
10477 (define_expand "<code><mode>3"
10478 [(set (match_operand:VI124_128 0 "register_operand")
10479 (umaxmin:VI124_128
10480 (match_operand:VI124_128 1 "vector_operand")
10481 (match_operand:VI124_128 2 "vector_operand")))]
10482 "TARGET_SSE2"
10483 {
10484 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
10485 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10486 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10487 {
10488 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
10489 operands[1] = force_reg (<MODE>mode, operands[1]);
10490 if (rtx_equal_p (op3, op2))
10491 op3 = gen_reg_rtx (V8HImode);
10492 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10493 emit_insn (gen_addv8hi3 (op0, op3, op2));
10494 DONE;
10495 }
10496 else
10497 {
10498 rtx xops[6];
10499 bool ok;
10500
10501 operands[1] = force_reg (<MODE>mode, operands[1]);
10502 operands[2] = force_reg (<MODE>mode, operands[2]);
10503
10504 xops[0] = operands[0];
10505
10506 if (<CODE> == UMAX)
10507 {
10508 xops[1] = operands[1];
10509 xops[2] = operands[2];
10510 }
10511 else
10512 {
10513 xops[1] = operands[2];
10514 xops[2] = operands[1];
10515 }
10516
10517 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
10518 xops[4] = operands[1];
10519 xops[5] = operands[2];
10520
10521 ok = ix86_expand_int_vcond (xops);
10522 gcc_assert (ok);
10523 DONE;
10524 }
10525 })
10526
10527 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10528 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
10529 (umaxmin:VI24_128
10530 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
10531 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
10532 "TARGET_SSE4_1
10533 && <mask_mode512bit_condition>
10534 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10535 "@
10536 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10537 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10538 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10539 [(set_attr "isa" "noavx,noavx,avx")
10540 (set_attr "type" "sseiadd")
10541 (set_attr "prefix_extra" "1,1,*")
10542 (set_attr "prefix" "orig,orig,vex")
10543 (set_attr "mode" "TI")])
10544
10545 (define_insn "*<code>v16qi3"
10546 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
10547 (umaxmin:V16QI
10548 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
10549 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
10550 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10551 "@
10552 p<maxmin_int>b\t{%2, %0|%0, %2}
10553 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
10554 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10555 [(set_attr "isa" "noavx,avx,avx512bw")
10556 (set_attr "type" "sseiadd")
10557 (set_attr "prefix_data16" "1,*,*")
10558 (set_attr "prefix_extra" "*,1,1")
10559 (set_attr "prefix" "orig,vex,evex")
10560 (set_attr "mode" "TI")])
10561
10562 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10563 ;;
10564 ;; Parallel integral comparisons
10565 ;;
10566 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10567
10568 (define_expand "avx2_eq<mode>3"
10569 [(set (match_operand:VI_256 0 "register_operand")
10570 (eq:VI_256
10571 (match_operand:VI_256 1 "nonimmediate_operand")
10572 (match_operand:VI_256 2 "nonimmediate_operand")))]
10573 "TARGET_AVX2"
10574 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10575
10576 (define_insn "*avx2_eq<mode>3"
10577 [(set (match_operand:VI_256 0 "register_operand" "=x")
10578 (eq:VI_256
10579 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10580 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10581 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10582 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10583 [(set_attr "type" "ssecmp")
10584 (set_attr "prefix_extra" "1")
10585 (set_attr "prefix" "vex")
10586 (set_attr "mode" "OI")])
10587
10588 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10589 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10590 (unspec:<avx512fmaskmode>
10591 [(match_operand:VI12_AVX512VL 1 "register_operand")
10592 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
10593 UNSPEC_MASKED_EQ))]
10594 "TARGET_AVX512BW"
10595 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10596
10597 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10598 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10599 (unspec:<avx512fmaskmode>
10600 [(match_operand:VI48_AVX512VL 1 "register_operand")
10601 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
10602 UNSPEC_MASKED_EQ))]
10603 "TARGET_AVX512F"
10604 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10605
10606 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10607 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10608 (unspec:<avx512fmaskmode>
10609 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
10610 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10611 UNSPEC_MASKED_EQ))]
10612 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10613 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10614 [(set_attr "type" "ssecmp")
10615 (set_attr "prefix_extra" "1")
10616 (set_attr "prefix" "evex")
10617 (set_attr "mode" "<sseinsnmode>")])
10618
10619 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10620 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10621 (unspec:<avx512fmaskmode>
10622 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
10623 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
10624 UNSPEC_MASKED_EQ))]
10625 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10626 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10627 [(set_attr "type" "ssecmp")
10628 (set_attr "prefix_extra" "1")
10629 (set_attr "prefix" "evex")
10630 (set_attr "mode" "<sseinsnmode>")])
10631
10632 (define_insn "*sse4_1_eqv2di3"
10633 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10634 (eq:V2DI
10635 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
10636 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
10637 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10638 "@
10639 pcmpeqq\t{%2, %0|%0, %2}
10640 pcmpeqq\t{%2, %0|%0, %2}
10641 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
10642 [(set_attr "isa" "noavx,noavx,avx")
10643 (set_attr "type" "ssecmp")
10644 (set_attr "prefix_extra" "1")
10645 (set_attr "prefix" "orig,orig,vex")
10646 (set_attr "mode" "TI")])
10647
10648 (define_insn "*sse2_eq<mode>3"
10649 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10650 (eq:VI124_128
10651 (match_operand:VI124_128 1 "vector_operand" "%0,x")
10652 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
10653 "TARGET_SSE2 && !TARGET_XOP
10654 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10655 "@
10656 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10657 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10658 [(set_attr "isa" "noavx,avx")
10659 (set_attr "type" "ssecmp")
10660 (set_attr "prefix_data16" "1,*")
10661 (set_attr "prefix" "orig,vex")
10662 (set_attr "mode" "TI")])
10663
10664 (define_expand "sse2_eq<mode>3"
10665 [(set (match_operand:VI124_128 0 "register_operand")
10666 (eq:VI124_128
10667 (match_operand:VI124_128 1 "vector_operand")
10668 (match_operand:VI124_128 2 "vector_operand")))]
10669 "TARGET_SSE2 && !TARGET_XOP "
10670 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10671
10672 (define_expand "sse4_1_eqv2di3"
10673 [(set (match_operand:V2DI 0 "register_operand")
10674 (eq:V2DI
10675 (match_operand:V2DI 1 "vector_operand")
10676 (match_operand:V2DI 2 "vector_operand")))]
10677 "TARGET_SSE4_1"
10678 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10679
10680 (define_insn "sse4_2_gtv2di3"
10681 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10682 (gt:V2DI
10683 (match_operand:V2DI 1 "register_operand" "0,0,x")
10684 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
10685 "TARGET_SSE4_2"
10686 "@
10687 pcmpgtq\t{%2, %0|%0, %2}
10688 pcmpgtq\t{%2, %0|%0, %2}
10689 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
10690 [(set_attr "isa" "noavx,noavx,avx")
10691 (set_attr "type" "ssecmp")
10692 (set_attr "prefix_extra" "1")
10693 (set_attr "prefix" "orig,orig,vex")
10694 (set_attr "mode" "TI")])
10695
10696 (define_insn "avx2_gt<mode>3"
10697 [(set (match_operand:VI_256 0 "register_operand" "=x")
10698 (gt:VI_256
10699 (match_operand:VI_256 1 "register_operand" "x")
10700 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10701 "TARGET_AVX2"
10702 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10703 [(set_attr "type" "ssecmp")
10704 (set_attr "prefix_extra" "1")
10705 (set_attr "prefix" "vex")
10706 (set_attr "mode" "OI")])
10707
10708 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10709 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10710 (unspec:<avx512fmaskmode>
10711 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10712 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10713 "TARGET_AVX512F"
10714 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10715 [(set_attr "type" "ssecmp")
10716 (set_attr "prefix_extra" "1")
10717 (set_attr "prefix" "evex")
10718 (set_attr "mode" "<sseinsnmode>")])
10719
10720 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10721 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10722 (unspec:<avx512fmaskmode>
10723 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10724 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10725 "TARGET_AVX512BW"
10726 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10727 [(set_attr "type" "ssecmp")
10728 (set_attr "prefix_extra" "1")
10729 (set_attr "prefix" "evex")
10730 (set_attr "mode" "<sseinsnmode>")])
10731
10732 (define_insn "sse2_gt<mode>3"
10733 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10734 (gt:VI124_128
10735 (match_operand:VI124_128 1 "register_operand" "0,x")
10736 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
10737 "TARGET_SSE2 && !TARGET_XOP"
10738 "@
10739 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10740 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10741 [(set_attr "isa" "noavx,avx")
10742 (set_attr "type" "ssecmp")
10743 (set_attr "prefix_data16" "1,*")
10744 (set_attr "prefix" "orig,vex")
10745 (set_attr "mode" "TI")])
10746
10747 (define_expand "vcond<V_512:mode><VI_512:mode>"
10748 [(set (match_operand:V_512 0 "register_operand")
10749 (if_then_else:V_512
10750 (match_operator 3 ""
10751 [(match_operand:VI_512 4 "nonimmediate_operand")
10752 (match_operand:VI_512 5 "general_operand")])
10753 (match_operand:V_512 1)
10754 (match_operand:V_512 2)))]
10755 "TARGET_AVX512F
10756 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10757 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10758 {
10759 bool ok = ix86_expand_int_vcond (operands);
10760 gcc_assert (ok);
10761 DONE;
10762 })
10763
10764 (define_expand "vcond<V_256:mode><VI_256:mode>"
10765 [(set (match_operand:V_256 0 "register_operand")
10766 (if_then_else:V_256
10767 (match_operator 3 ""
10768 [(match_operand:VI_256 4 "nonimmediate_operand")
10769 (match_operand:VI_256 5 "general_operand")])
10770 (match_operand:V_256 1)
10771 (match_operand:V_256 2)))]
10772 "TARGET_AVX2
10773 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10774 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10775 {
10776 bool ok = ix86_expand_int_vcond (operands);
10777 gcc_assert (ok);
10778 DONE;
10779 })
10780
10781 (define_expand "vcond<V_128:mode><VI124_128:mode>"
10782 [(set (match_operand:V_128 0 "register_operand")
10783 (if_then_else:V_128
10784 (match_operator 3 ""
10785 [(match_operand:VI124_128 4 "vector_operand")
10786 (match_operand:VI124_128 5 "general_operand")])
10787 (match_operand:V_128 1)
10788 (match_operand:V_128 2)))]
10789 "TARGET_SSE2
10790 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10791 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10792 {
10793 bool ok = ix86_expand_int_vcond (operands);
10794 gcc_assert (ok);
10795 DONE;
10796 })
10797
10798 (define_expand "vcond<VI8F_128:mode>v2di"
10799 [(set (match_operand:VI8F_128 0 "register_operand")
10800 (if_then_else:VI8F_128
10801 (match_operator 3 ""
10802 [(match_operand:V2DI 4 "vector_operand")
10803 (match_operand:V2DI 5 "general_operand")])
10804 (match_operand:VI8F_128 1)
10805 (match_operand:VI8F_128 2)))]
10806 "TARGET_SSE4_2"
10807 {
10808 bool ok = ix86_expand_int_vcond (operands);
10809 gcc_assert (ok);
10810 DONE;
10811 })
10812
10813 (define_expand "vcondu<V_512:mode><VI_512:mode>"
10814 [(set (match_operand:V_512 0 "register_operand")
10815 (if_then_else:V_512
10816 (match_operator 3 ""
10817 [(match_operand:VI_512 4 "nonimmediate_operand")
10818 (match_operand:VI_512 5 "nonimmediate_operand")])
10819 (match_operand:V_512 1 "general_operand")
10820 (match_operand:V_512 2 "general_operand")))]
10821 "TARGET_AVX512F
10822 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10823 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10824 {
10825 bool ok = ix86_expand_int_vcond (operands);
10826 gcc_assert (ok);
10827 DONE;
10828 })
10829
10830 (define_expand "vcondu<V_256:mode><VI_256:mode>"
10831 [(set (match_operand:V_256 0 "register_operand")
10832 (if_then_else:V_256
10833 (match_operator 3 ""
10834 [(match_operand:VI_256 4 "nonimmediate_operand")
10835 (match_operand:VI_256 5 "nonimmediate_operand")])
10836 (match_operand:V_256 1 "general_operand")
10837 (match_operand:V_256 2 "general_operand")))]
10838 "TARGET_AVX2
10839 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10840 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10841 {
10842 bool ok = ix86_expand_int_vcond (operands);
10843 gcc_assert (ok);
10844 DONE;
10845 })
10846
10847 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
10848 [(set (match_operand:V_128 0 "register_operand")
10849 (if_then_else:V_128
10850 (match_operator 3 ""
10851 [(match_operand:VI124_128 4 "vector_operand")
10852 (match_operand:VI124_128 5 "vector_operand")])
10853 (match_operand:V_128 1 "general_operand")
10854 (match_operand:V_128 2 "general_operand")))]
10855 "TARGET_SSE2
10856 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10857 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10858 {
10859 bool ok = ix86_expand_int_vcond (operands);
10860 gcc_assert (ok);
10861 DONE;
10862 })
10863
10864 (define_expand "vcondu<VI8F_128:mode>v2di"
10865 [(set (match_operand:VI8F_128 0 "register_operand")
10866 (if_then_else:VI8F_128
10867 (match_operator 3 ""
10868 [(match_operand:V2DI 4 "vector_operand")
10869 (match_operand:V2DI 5 "vector_operand")])
10870 (match_operand:VI8F_128 1 "general_operand")
10871 (match_operand:VI8F_128 2 "general_operand")))]
10872 "TARGET_SSE4_2"
10873 {
10874 bool ok = ix86_expand_int_vcond (operands);
10875 gcc_assert (ok);
10876 DONE;
10877 })
10878
10879 (define_mode_iterator VEC_PERM_AVX2
10880 [V16QI V8HI V4SI V2DI V4SF V2DF
10881 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10882 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
10883 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
10884 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10885 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10886 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
10887
10888 (define_expand "vec_perm<mode>"
10889 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
10890 (match_operand:VEC_PERM_AVX2 1 "register_operand")
10891 (match_operand:VEC_PERM_AVX2 2 "register_operand")
10892 (match_operand:<sseintvecmode> 3 "register_operand")]
10893 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
10894 {
10895 ix86_expand_vec_perm (operands);
10896 DONE;
10897 })
10898
10899 (define_mode_iterator VEC_PERM_CONST
10900 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
10901 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
10902 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
10903 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
10904 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
10905 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10906 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10907 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10908 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
10909
10910 (define_expand "vec_perm_const<mode>"
10911 [(match_operand:VEC_PERM_CONST 0 "register_operand")
10912 (match_operand:VEC_PERM_CONST 1 "register_operand")
10913 (match_operand:VEC_PERM_CONST 2 "register_operand")
10914 (match_operand:<sseintvecmode> 3)]
10915 ""
10916 {
10917 if (ix86_expand_vec_perm_const (operands))
10918 DONE;
10919 else
10920 FAIL;
10921 })
10922
10923 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10924 ;;
10925 ;; Parallel bitwise logical operations
10926 ;;
10927 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10928
10929 (define_expand "one_cmpl<mode>2"
10930 [(set (match_operand:VI 0 "register_operand")
10931 (xor:VI (match_operand:VI 1 "vector_operand")
10932 (match_dup 2)))]
10933 "TARGET_SSE"
10934 {
10935 int i, n = GET_MODE_NUNITS (<MODE>mode);
10936 rtvec v = rtvec_alloc (n);
10937
10938 for (i = 0; i < n; ++i)
10939 RTVEC_ELT (v, i) = constm1_rtx;
10940
10941 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
10942 })
10943
10944 (define_expand "<sse2_avx2>_andnot<mode>3"
10945 [(set (match_operand:VI_AVX2 0 "register_operand")
10946 (and:VI_AVX2
10947 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
10948 (match_operand:VI_AVX2 2 "vector_operand")))]
10949 "TARGET_SSE2")
10950
10951 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10952 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10953 (vec_merge:VI48_AVX512VL
10954 (and:VI48_AVX512VL
10955 (not:VI48_AVX512VL
10956 (match_operand:VI48_AVX512VL 1 "register_operand"))
10957 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10958 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10959 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10960 "TARGET_AVX512F")
10961
10962 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10963 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10964 (vec_merge:VI12_AVX512VL
10965 (and:VI12_AVX512VL
10966 (not:VI12_AVX512VL
10967 (match_operand:VI12_AVX512VL 1 "register_operand"))
10968 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10969 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10970 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10971 "TARGET_AVX512BW")
10972
10973 (define_insn "*andnot<mode>3"
10974 [(set (match_operand:VI 0 "register_operand" "=x,v")
10975 (and:VI
10976 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
10977 (match_operand:VI 2 "vector_operand" "xBm,vm")))]
10978 "TARGET_SSE"
10979 {
10980 static char buf[64];
10981 const char *ops;
10982 const char *tmp;
10983
10984 switch (get_attr_mode (insn))
10985 {
10986 case MODE_XI:
10987 gcc_assert (TARGET_AVX512F);
10988 case MODE_OI:
10989 gcc_assert (TARGET_AVX2);
10990 case MODE_TI:
10991 gcc_assert (TARGET_SSE2);
10992 switch (<MODE>mode)
10993 {
10994 case V64QImode:
10995 case V32HImode:
10996 /* There is no vpandnb or vpandnw instruction, nor vpandn for
10997 512-bit vectors. Use vpandnq instead. */
10998 tmp = "pandnq";
10999 break;
11000 case V16SImode:
11001 case V8DImode:
11002 tmp = "pandn<ssemodesuffix>";
11003 break;
11004 case V8SImode:
11005 case V4DImode:
11006 case V4SImode:
11007 case V2DImode:
11008 tmp = TARGET_AVX512VL ? "pandn<ssemodesuffix>" : "pandn";
11009 break;
11010 default:
11011 tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
11012 break;
11013 }
11014 break;
11015
11016 case MODE_V16SF:
11017 gcc_assert (TARGET_AVX512F);
11018 case MODE_V8SF:
11019 gcc_assert (TARGET_AVX);
11020 case MODE_V4SF:
11021 gcc_assert (TARGET_SSE);
11022
11023 tmp = "andnps";
11024 break;
11025
11026 default:
11027 gcc_unreachable ();
11028 }
11029
11030 switch (which_alternative)
11031 {
11032 case 0:
11033 ops = "%s\t{%%2, %%0|%%0, %%2}";
11034 break;
11035 case 1:
11036 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11037 break;
11038 default:
11039 gcc_unreachable ();
11040 }
11041
11042 snprintf (buf, sizeof (buf), ops, tmp);
11043 return buf;
11044 }
11045 [(set_attr "isa" "noavx,avx")
11046 (set_attr "type" "sselog")
11047 (set (attr "prefix_data16")
11048 (if_then_else
11049 (and (eq_attr "alternative" "0")
11050 (eq_attr "mode" "TI"))
11051 (const_string "1")
11052 (const_string "*")))
11053 (set_attr "prefix" "orig,vex")
11054 (set (attr "mode")
11055 (cond [(and (match_test "<MODE_SIZE> == 16")
11056 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11057 (const_string "<ssePSmode>")
11058 (match_test "TARGET_AVX2")
11059 (const_string "<sseinsnmode>")
11060 (match_test "TARGET_AVX")
11061 (if_then_else
11062 (match_test "<MODE_SIZE> > 16")
11063 (const_string "V8SF")
11064 (const_string "<sseinsnmode>"))
11065 (ior (not (match_test "TARGET_SSE2"))
11066 (match_test "optimize_function_for_size_p (cfun)"))
11067 (const_string "V4SF")
11068 ]
11069 (const_string "<sseinsnmode>")))])
11070
11071 (define_insn "*andnot<mode>3_mask"
11072 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11073 (vec_merge:VI48_AVX512VL
11074 (and:VI48_AVX512VL
11075 (not:VI48_AVX512VL
11076 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
11077 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11078 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
11079 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11080 "TARGET_AVX512F"
11081 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11082 [(set_attr "type" "sselog")
11083 (set_attr "prefix" "evex")
11084 (set_attr "mode" "<sseinsnmode>")])
11085
11086 (define_expand "<code><mode>3"
11087 [(set (match_operand:VI 0 "register_operand")
11088 (any_logic:VI
11089 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
11090 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
11091 "TARGET_SSE"
11092 {
11093 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
11094 DONE;
11095 })
11096
11097 (define_insn "<mask_codefor><code><mode>3<mask_name>"
11098 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v")
11099 (any_logic:VI48_AVX_AVX512F
11100 (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,v")
11101 (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,vm")))]
11102 "TARGET_SSE && <mask_mode512bit_condition>
11103 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11104 {
11105 static char buf[64];
11106 const char *ops;
11107 const char *tmp;
11108
11109 switch (get_attr_mode (insn))
11110 {
11111 case MODE_XI:
11112 gcc_assert (TARGET_AVX512F);
11113 case MODE_OI:
11114 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
11115 case MODE_TI:
11116 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
11117 switch (<MODE>mode)
11118 {
11119 case V16SImode:
11120 case V8DImode:
11121 if (TARGET_AVX512F)
11122 {
11123 tmp = "p<logic><ssemodesuffix>";
11124 break;
11125 }
11126 case V8SImode:
11127 case V4DImode:
11128 case V4SImode:
11129 case V2DImode:
11130 tmp = TARGET_AVX512VL ? "p<logic><ssemodesuffix>" : "p<logic>";
11131 break;
11132 default:
11133 gcc_unreachable ();
11134 }
11135 break;
11136
11137 case MODE_V8SF:
11138 gcc_assert (TARGET_AVX);
11139 case MODE_V4SF:
11140 gcc_assert (TARGET_SSE);
11141 gcc_assert (!<mask_applied>);
11142 tmp = "<logic>ps";
11143 break;
11144
11145 default:
11146 gcc_unreachable ();
11147 }
11148
11149 switch (which_alternative)
11150 {
11151 case 0:
11152 if (<mask_applied>)
11153 ops = "v%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
11154 else
11155 ops = "%s\t{%%2, %%0|%%0, %%2}";
11156 break;
11157 case 1:
11158 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11159 break;
11160 default:
11161 gcc_unreachable ();
11162 }
11163
11164 snprintf (buf, sizeof (buf), ops, tmp);
11165 return buf;
11166 }
11167 [(set_attr "isa" "noavx,avx")
11168 (set_attr "type" "sselog")
11169 (set (attr "prefix_data16")
11170 (if_then_else
11171 (and (eq_attr "alternative" "0")
11172 (eq_attr "mode" "TI"))
11173 (const_string "1")
11174 (const_string "*")))
11175 (set_attr "prefix" "<mask_prefix3>")
11176 (set (attr "mode")
11177 (cond [(and (match_test "<MODE_SIZE> == 16")
11178 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11179 (const_string "<ssePSmode>")
11180 (match_test "TARGET_AVX2")
11181 (const_string "<sseinsnmode>")
11182 (match_test "TARGET_AVX")
11183 (if_then_else
11184 (match_test "<MODE_SIZE> > 16")
11185 (const_string "V8SF")
11186 (const_string "<sseinsnmode>"))
11187 (ior (not (match_test "TARGET_SSE2"))
11188 (match_test "optimize_function_for_size_p (cfun)"))
11189 (const_string "V4SF")
11190 ]
11191 (const_string "<sseinsnmode>")))])
11192
11193 (define_insn "*<code><mode>3"
11194 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v")
11195 (any_logic: VI12_AVX_AVX512F
11196 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,v")
11197 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,vm")))]
11198 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11199 {
11200 static char buf[64];
11201 const char *ops;
11202 const char *tmp;
11203 const char *ssesuffix;
11204
11205 switch (get_attr_mode (insn))
11206 {
11207 case MODE_XI:
11208 gcc_assert (TARGET_AVX512F);
11209 case MODE_OI:
11210 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
11211 case MODE_TI:
11212 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
11213 switch (<MODE>mode)
11214 {
11215 case V64QImode:
11216 case V32HImode:
11217 if (TARGET_AVX512F)
11218 {
11219 tmp = "p<logic>";
11220 ssesuffix = "q";
11221 break;
11222 }
11223 case V32QImode:
11224 case V16HImode:
11225 case V16QImode:
11226 case V8HImode:
11227 if (TARGET_AVX512VL || TARGET_AVX2 || TARGET_SSE2)
11228 {
11229 tmp = "p<logic>";
11230 ssesuffix = TARGET_AVX512VL ? "q" : "";
11231 break;
11232 }
11233 default:
11234 gcc_unreachable ();
11235 }
11236 break;
11237
11238 case MODE_V8SF:
11239 gcc_assert (TARGET_AVX);
11240 case MODE_V4SF:
11241 gcc_assert (TARGET_SSE);
11242 tmp = "<logic>ps";
11243 ssesuffix = "";
11244 break;
11245
11246 default:
11247 gcc_unreachable ();
11248 }
11249
11250 switch (which_alternative)
11251 {
11252 case 0:
11253 ops = "%s\t{%%2, %%0|%%0, %%2}";
11254 snprintf (buf, sizeof (buf), ops, tmp);
11255 break;
11256 case 1:
11257 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11258 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11259 break;
11260 default:
11261 gcc_unreachable ();
11262 }
11263
11264 return buf;
11265 }
11266 [(set_attr "isa" "noavx,avx")
11267 (set_attr "type" "sselog")
11268 (set (attr "prefix_data16")
11269 (if_then_else
11270 (and (eq_attr "alternative" "0")
11271 (eq_attr "mode" "TI"))
11272 (const_string "1")
11273 (const_string "*")))
11274 (set_attr "prefix" "<mask_prefix3>")
11275 (set (attr "mode")
11276 (cond [(and (match_test "<MODE_SIZE> == 16")
11277 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11278 (const_string "<ssePSmode>")
11279 (match_test "TARGET_AVX2")
11280 (const_string "<sseinsnmode>")
11281 (match_test "TARGET_AVX")
11282 (if_then_else
11283 (match_test "<MODE_SIZE> > 16")
11284 (const_string "V8SF")
11285 (const_string "<sseinsnmode>"))
11286 (ior (not (match_test "TARGET_SSE2"))
11287 (match_test "optimize_function_for_size_p (cfun)"))
11288 (const_string "V4SF")
11289 ]
11290 (const_string "<sseinsnmode>")))])
11291
11292 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11293 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11294 (unspec:<avx512fmaskmode>
11295 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11296 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11297 UNSPEC_TESTM))]
11298 "TARGET_AVX512BW"
11299 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11300 [(set_attr "prefix" "evex")
11301 (set_attr "mode" "<sseinsnmode>")])
11302
11303 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11304 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11305 (unspec:<avx512fmaskmode>
11306 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11307 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11308 UNSPEC_TESTM))]
11309 "TARGET_AVX512F"
11310 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11311 [(set_attr "prefix" "evex")
11312 (set_attr "mode" "<sseinsnmode>")])
11313
11314 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11315 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11316 (unspec:<avx512fmaskmode>
11317 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11318 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11319 UNSPEC_TESTNM))]
11320 "TARGET_AVX512BW"
11321 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11322 [(set_attr "prefix" "evex")
11323 (set_attr "mode" "<sseinsnmode>")])
11324
11325 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11326 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11327 (unspec:<avx512fmaskmode>
11328 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11329 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11330 UNSPEC_TESTNM))]
11331 "TARGET_AVX512F"
11332 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11333 [(set_attr "prefix" "evex")
11334 (set_attr "mode" "<sseinsnmode>")])
11335
11336 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11337 ;;
11338 ;; Parallel integral element swizzling
11339 ;;
11340 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11341
11342 (define_expand "vec_pack_trunc_<mode>"
11343 [(match_operand:<ssepackmode> 0 "register_operand")
11344 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
11345 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
11346 "TARGET_SSE2"
11347 {
11348 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11349 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11350 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11351 DONE;
11352 })
11353
11354 (define_expand "vec_pack_trunc_qi"
11355 [(set (match_operand:HI 0 ("register_operand"))
11356 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand")))
11357 (const_int 8))
11358 (zero_extend:HI (match_operand:QI 1 ("register_operand")))))]
11359 "TARGET_AVX512F")
11360
11361 (define_expand "vec_pack_trunc_<mode>"
11362 [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
11363 (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))
11364 (match_dup 3))
11365 (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))))]
11366 "TARGET_AVX512BW"
11367 {
11368 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
11369 })
11370
11371 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11372 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11373 (vec_concat:VI1_AVX512
11374 (ss_truncate:<ssehalfvecmode>
11375 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11376 (ss_truncate:<ssehalfvecmode>
11377 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))]
11378 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11379 "@
11380 packsswb\t{%2, %0|%0, %2}
11381 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11382 [(set_attr "isa" "noavx,avx")
11383 (set_attr "type" "sselog")
11384 (set_attr "prefix_data16" "1,*")
11385 (set_attr "prefix" "orig,maybe_evex")
11386 (set_attr "mode" "<sseinsnmode>")])
11387
11388 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11389 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11390 (vec_concat:VI2_AVX2
11391 (ss_truncate:<ssehalfvecmode>
11392 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11393 (ss_truncate:<ssehalfvecmode>
11394 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))]
11395 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11396 "@
11397 packssdw\t{%2, %0|%0, %2}
11398 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11399 [(set_attr "isa" "noavx,avx")
11400 (set_attr "type" "sselog")
11401 (set_attr "prefix_data16" "1,*")
11402 (set_attr "prefix" "orig,vex")
11403 (set_attr "mode" "<sseinsnmode>")])
11404
11405 (define_insn "<sse2_avx2>_packuswb<mask_name>"
11406 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11407 (vec_concat:VI1_AVX512
11408 (us_truncate:<ssehalfvecmode>
11409 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11410 (us_truncate:<ssehalfvecmode>
11411 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))]
11412 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11413 "@
11414 packuswb\t{%2, %0|%0, %2}
11415 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11416 [(set_attr "isa" "noavx,avx")
11417 (set_attr "type" "sselog")
11418 (set_attr "prefix_data16" "1,*")
11419 (set_attr "prefix" "orig,vex")
11420 (set_attr "mode" "<sseinsnmode>")])
11421
11422 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
11423 [(set (match_operand:V64QI 0 "register_operand" "=v")
11424 (vec_select:V64QI
11425 (vec_concat:V128QI
11426 (match_operand:V64QI 1 "register_operand" "v")
11427 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11428 (parallel [(const_int 8) (const_int 72)
11429 (const_int 9) (const_int 73)
11430 (const_int 10) (const_int 74)
11431 (const_int 11) (const_int 75)
11432 (const_int 12) (const_int 76)
11433 (const_int 13) (const_int 77)
11434 (const_int 14) (const_int 78)
11435 (const_int 15) (const_int 79)
11436 (const_int 24) (const_int 88)
11437 (const_int 25) (const_int 89)
11438 (const_int 26) (const_int 90)
11439 (const_int 27) (const_int 91)
11440 (const_int 28) (const_int 92)
11441 (const_int 29) (const_int 93)
11442 (const_int 30) (const_int 94)
11443 (const_int 31) (const_int 95)
11444 (const_int 40) (const_int 104)
11445 (const_int 41) (const_int 105)
11446 (const_int 42) (const_int 106)
11447 (const_int 43) (const_int 107)
11448 (const_int 44) (const_int 108)
11449 (const_int 45) (const_int 109)
11450 (const_int 46) (const_int 110)
11451 (const_int 47) (const_int 111)
11452 (const_int 56) (const_int 120)
11453 (const_int 57) (const_int 121)
11454 (const_int 58) (const_int 122)
11455 (const_int 59) (const_int 123)
11456 (const_int 60) (const_int 124)
11457 (const_int 61) (const_int 125)
11458 (const_int 62) (const_int 126)
11459 (const_int 63) (const_int 127)])))]
11460 "TARGET_AVX512BW"
11461 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11462 [(set_attr "type" "sselog")
11463 (set_attr "prefix" "evex")
11464 (set_attr "mode" "XI")])
11465
11466 (define_insn "avx2_interleave_highv32qi<mask_name>"
11467 [(set (match_operand:V32QI 0 "register_operand" "=v")
11468 (vec_select:V32QI
11469 (vec_concat:V64QI
11470 (match_operand:V32QI 1 "register_operand" "v")
11471 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11472 (parallel [(const_int 8) (const_int 40)
11473 (const_int 9) (const_int 41)
11474 (const_int 10) (const_int 42)
11475 (const_int 11) (const_int 43)
11476 (const_int 12) (const_int 44)
11477 (const_int 13) (const_int 45)
11478 (const_int 14) (const_int 46)
11479 (const_int 15) (const_int 47)
11480 (const_int 24) (const_int 56)
11481 (const_int 25) (const_int 57)
11482 (const_int 26) (const_int 58)
11483 (const_int 27) (const_int 59)
11484 (const_int 28) (const_int 60)
11485 (const_int 29) (const_int 61)
11486 (const_int 30) (const_int 62)
11487 (const_int 31) (const_int 63)])))]
11488 "TARGET_AVX2 && <mask_avx512vl_condition>"
11489 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11490 [(set_attr "type" "sselog")
11491 (set_attr "prefix" "<mask_prefix>")
11492 (set_attr "mode" "OI")])
11493
11494 (define_insn "vec_interleave_highv16qi<mask_name>"
11495 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11496 (vec_select:V16QI
11497 (vec_concat:V32QI
11498 (match_operand:V16QI 1 "register_operand" "0,v")
11499 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
11500 (parallel [(const_int 8) (const_int 24)
11501 (const_int 9) (const_int 25)
11502 (const_int 10) (const_int 26)
11503 (const_int 11) (const_int 27)
11504 (const_int 12) (const_int 28)
11505 (const_int 13) (const_int 29)
11506 (const_int 14) (const_int 30)
11507 (const_int 15) (const_int 31)])))]
11508 "TARGET_SSE2 && <mask_avx512vl_condition>"
11509 "@
11510 punpckhbw\t{%2, %0|%0, %2}
11511 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11512 [(set_attr "isa" "noavx,avx")
11513 (set_attr "type" "sselog")
11514 (set_attr "prefix_data16" "1,*")
11515 (set_attr "prefix" "orig,<mask_prefix>")
11516 (set_attr "mode" "TI")])
11517
11518 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
11519 [(set (match_operand:V64QI 0 "register_operand" "=v")
11520 (vec_select:V64QI
11521 (vec_concat:V128QI
11522 (match_operand:V64QI 1 "register_operand" "v")
11523 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11524 (parallel [(const_int 0) (const_int 64)
11525 (const_int 1) (const_int 65)
11526 (const_int 2) (const_int 66)
11527 (const_int 3) (const_int 67)
11528 (const_int 4) (const_int 68)
11529 (const_int 5) (const_int 69)
11530 (const_int 6) (const_int 70)
11531 (const_int 7) (const_int 71)
11532 (const_int 16) (const_int 80)
11533 (const_int 17) (const_int 81)
11534 (const_int 18) (const_int 82)
11535 (const_int 19) (const_int 83)
11536 (const_int 20) (const_int 84)
11537 (const_int 21) (const_int 85)
11538 (const_int 22) (const_int 86)
11539 (const_int 23) (const_int 87)
11540 (const_int 32) (const_int 96)
11541 (const_int 33) (const_int 97)
11542 (const_int 34) (const_int 98)
11543 (const_int 35) (const_int 99)
11544 (const_int 36) (const_int 100)
11545 (const_int 37) (const_int 101)
11546 (const_int 38) (const_int 102)
11547 (const_int 39) (const_int 103)
11548 (const_int 48) (const_int 112)
11549 (const_int 49) (const_int 113)
11550 (const_int 50) (const_int 114)
11551 (const_int 51) (const_int 115)
11552 (const_int 52) (const_int 116)
11553 (const_int 53) (const_int 117)
11554 (const_int 54) (const_int 118)
11555 (const_int 55) (const_int 119)])))]
11556 "TARGET_AVX512BW"
11557 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11558 [(set_attr "type" "sselog")
11559 (set_attr "prefix" "evex")
11560 (set_attr "mode" "XI")])
11561
11562 (define_insn "avx2_interleave_lowv32qi<mask_name>"
11563 [(set (match_operand:V32QI 0 "register_operand" "=v")
11564 (vec_select:V32QI
11565 (vec_concat:V64QI
11566 (match_operand:V32QI 1 "register_operand" "v")
11567 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11568 (parallel [(const_int 0) (const_int 32)
11569 (const_int 1) (const_int 33)
11570 (const_int 2) (const_int 34)
11571 (const_int 3) (const_int 35)
11572 (const_int 4) (const_int 36)
11573 (const_int 5) (const_int 37)
11574 (const_int 6) (const_int 38)
11575 (const_int 7) (const_int 39)
11576 (const_int 16) (const_int 48)
11577 (const_int 17) (const_int 49)
11578 (const_int 18) (const_int 50)
11579 (const_int 19) (const_int 51)
11580 (const_int 20) (const_int 52)
11581 (const_int 21) (const_int 53)
11582 (const_int 22) (const_int 54)
11583 (const_int 23) (const_int 55)])))]
11584 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11585 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11586 [(set_attr "type" "sselog")
11587 (set_attr "prefix" "maybe_vex")
11588 (set_attr "mode" "OI")])
11589
11590 (define_insn "vec_interleave_lowv16qi<mask_name>"
11591 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11592 (vec_select:V16QI
11593 (vec_concat:V32QI
11594 (match_operand:V16QI 1 "register_operand" "0,v")
11595 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
11596 (parallel [(const_int 0) (const_int 16)
11597 (const_int 1) (const_int 17)
11598 (const_int 2) (const_int 18)
11599 (const_int 3) (const_int 19)
11600 (const_int 4) (const_int 20)
11601 (const_int 5) (const_int 21)
11602 (const_int 6) (const_int 22)
11603 (const_int 7) (const_int 23)])))]
11604 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11605 "@
11606 punpcklbw\t{%2, %0|%0, %2}
11607 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11608 [(set_attr "isa" "noavx,avx")
11609 (set_attr "type" "sselog")
11610 (set_attr "prefix_data16" "1,*")
11611 (set_attr "prefix" "orig,vex")
11612 (set_attr "mode" "TI")])
11613
11614 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
11615 [(set (match_operand:V32HI 0 "register_operand" "=v")
11616 (vec_select:V32HI
11617 (vec_concat:V64HI
11618 (match_operand:V32HI 1 "register_operand" "v")
11619 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11620 (parallel [(const_int 4) (const_int 36)
11621 (const_int 5) (const_int 37)
11622 (const_int 6) (const_int 38)
11623 (const_int 7) (const_int 39)
11624 (const_int 12) (const_int 44)
11625 (const_int 13) (const_int 45)
11626 (const_int 14) (const_int 46)
11627 (const_int 15) (const_int 47)
11628 (const_int 20) (const_int 52)
11629 (const_int 21) (const_int 53)
11630 (const_int 22) (const_int 54)
11631 (const_int 23) (const_int 55)
11632 (const_int 28) (const_int 60)
11633 (const_int 29) (const_int 61)
11634 (const_int 30) (const_int 62)
11635 (const_int 31) (const_int 63)])))]
11636 "TARGET_AVX512BW"
11637 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11638 [(set_attr "type" "sselog")
11639 (set_attr "prefix" "evex")
11640 (set_attr "mode" "XI")])
11641
11642 (define_insn "avx2_interleave_highv16hi<mask_name>"
11643 [(set (match_operand:V16HI 0 "register_operand" "=v")
11644 (vec_select:V16HI
11645 (vec_concat:V32HI
11646 (match_operand:V16HI 1 "register_operand" "v")
11647 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11648 (parallel [(const_int 4) (const_int 20)
11649 (const_int 5) (const_int 21)
11650 (const_int 6) (const_int 22)
11651 (const_int 7) (const_int 23)
11652 (const_int 12) (const_int 28)
11653 (const_int 13) (const_int 29)
11654 (const_int 14) (const_int 30)
11655 (const_int 15) (const_int 31)])))]
11656 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11657 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11658 [(set_attr "type" "sselog")
11659 (set_attr "prefix" "maybe_evex")
11660 (set_attr "mode" "OI")])
11661
11662 (define_insn "vec_interleave_highv8hi<mask_name>"
11663 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11664 (vec_select:V8HI
11665 (vec_concat:V16HI
11666 (match_operand:V8HI 1 "register_operand" "0,v")
11667 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
11668 (parallel [(const_int 4) (const_int 12)
11669 (const_int 5) (const_int 13)
11670 (const_int 6) (const_int 14)
11671 (const_int 7) (const_int 15)])))]
11672 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11673 "@
11674 punpckhwd\t{%2, %0|%0, %2}
11675 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11676 [(set_attr "isa" "noavx,avx")
11677 (set_attr "type" "sselog")
11678 (set_attr "prefix_data16" "1,*")
11679 (set_attr "prefix" "orig,maybe_vex")
11680 (set_attr "mode" "TI")])
11681
11682 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
11683 [(set (match_operand:V32HI 0 "register_operand" "=v")
11684 (vec_select:V32HI
11685 (vec_concat:V64HI
11686 (match_operand:V32HI 1 "register_operand" "v")
11687 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11688 (parallel [(const_int 0) (const_int 32)
11689 (const_int 1) (const_int 33)
11690 (const_int 2) (const_int 34)
11691 (const_int 3) (const_int 35)
11692 (const_int 8) (const_int 40)
11693 (const_int 9) (const_int 41)
11694 (const_int 10) (const_int 42)
11695 (const_int 11) (const_int 43)
11696 (const_int 16) (const_int 48)
11697 (const_int 17) (const_int 49)
11698 (const_int 18) (const_int 50)
11699 (const_int 19) (const_int 51)
11700 (const_int 24) (const_int 56)
11701 (const_int 25) (const_int 57)
11702 (const_int 26) (const_int 58)
11703 (const_int 27) (const_int 59)])))]
11704 "TARGET_AVX512BW"
11705 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11706 [(set_attr "type" "sselog")
11707 (set_attr "prefix" "evex")
11708 (set_attr "mode" "XI")])
11709
11710 (define_insn "avx2_interleave_lowv16hi<mask_name>"
11711 [(set (match_operand:V16HI 0 "register_operand" "=v")
11712 (vec_select:V16HI
11713 (vec_concat:V32HI
11714 (match_operand:V16HI 1 "register_operand" "v")
11715 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11716 (parallel [(const_int 0) (const_int 16)
11717 (const_int 1) (const_int 17)
11718 (const_int 2) (const_int 18)
11719 (const_int 3) (const_int 19)
11720 (const_int 8) (const_int 24)
11721 (const_int 9) (const_int 25)
11722 (const_int 10) (const_int 26)
11723 (const_int 11) (const_int 27)])))]
11724 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11725 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11726 [(set_attr "type" "sselog")
11727 (set_attr "prefix" "maybe_evex")
11728 (set_attr "mode" "OI")])
11729
11730 (define_insn "vec_interleave_lowv8hi<mask_name>"
11731 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11732 (vec_select:V8HI
11733 (vec_concat:V16HI
11734 (match_operand:V8HI 1 "register_operand" "0,v")
11735 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
11736 (parallel [(const_int 0) (const_int 8)
11737 (const_int 1) (const_int 9)
11738 (const_int 2) (const_int 10)
11739 (const_int 3) (const_int 11)])))]
11740 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11741 "@
11742 punpcklwd\t{%2, %0|%0, %2}
11743 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11744 [(set_attr "isa" "noavx,avx")
11745 (set_attr "type" "sselog")
11746 (set_attr "prefix_data16" "1,*")
11747 (set_attr "prefix" "orig,maybe_evex")
11748 (set_attr "mode" "TI")])
11749
11750 (define_insn "avx2_interleave_highv8si<mask_name>"
11751 [(set (match_operand:V8SI 0 "register_operand" "=v")
11752 (vec_select:V8SI
11753 (vec_concat:V16SI
11754 (match_operand:V8SI 1 "register_operand" "v")
11755 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11756 (parallel [(const_int 2) (const_int 10)
11757 (const_int 3) (const_int 11)
11758 (const_int 6) (const_int 14)
11759 (const_int 7) (const_int 15)])))]
11760 "TARGET_AVX2 && <mask_avx512vl_condition>"
11761 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11762 [(set_attr "type" "sselog")
11763 (set_attr "prefix" "maybe_evex")
11764 (set_attr "mode" "OI")])
11765
11766 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
11767 [(set (match_operand:V16SI 0 "register_operand" "=v")
11768 (vec_select:V16SI
11769 (vec_concat:V32SI
11770 (match_operand:V16SI 1 "register_operand" "v")
11771 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11772 (parallel [(const_int 2) (const_int 18)
11773 (const_int 3) (const_int 19)
11774 (const_int 6) (const_int 22)
11775 (const_int 7) (const_int 23)
11776 (const_int 10) (const_int 26)
11777 (const_int 11) (const_int 27)
11778 (const_int 14) (const_int 30)
11779 (const_int 15) (const_int 31)])))]
11780 "TARGET_AVX512F"
11781 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11782 [(set_attr "type" "sselog")
11783 (set_attr "prefix" "evex")
11784 (set_attr "mode" "XI")])
11785
11786
11787 (define_insn "vec_interleave_highv4si<mask_name>"
11788 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11789 (vec_select:V4SI
11790 (vec_concat:V8SI
11791 (match_operand:V4SI 1 "register_operand" "0,v")
11792 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
11793 (parallel [(const_int 2) (const_int 6)
11794 (const_int 3) (const_int 7)])))]
11795 "TARGET_SSE2 && <mask_avx512vl_condition>"
11796 "@
11797 punpckhdq\t{%2, %0|%0, %2}
11798 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11799 [(set_attr "isa" "noavx,avx")
11800 (set_attr "type" "sselog")
11801 (set_attr "prefix_data16" "1,*")
11802 (set_attr "prefix" "orig,maybe_vex")
11803 (set_attr "mode" "TI")])
11804
11805 (define_insn "avx2_interleave_lowv8si<mask_name>"
11806 [(set (match_operand:V8SI 0 "register_operand" "=v")
11807 (vec_select:V8SI
11808 (vec_concat:V16SI
11809 (match_operand:V8SI 1 "register_operand" "v")
11810 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11811 (parallel [(const_int 0) (const_int 8)
11812 (const_int 1) (const_int 9)
11813 (const_int 4) (const_int 12)
11814 (const_int 5) (const_int 13)])))]
11815 "TARGET_AVX2 && <mask_avx512vl_condition>"
11816 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11817 [(set_attr "type" "sselog")
11818 (set_attr "prefix" "maybe_evex")
11819 (set_attr "mode" "OI")])
11820
11821 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
11822 [(set (match_operand:V16SI 0 "register_operand" "=v")
11823 (vec_select:V16SI
11824 (vec_concat:V32SI
11825 (match_operand:V16SI 1 "register_operand" "v")
11826 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11827 (parallel [(const_int 0) (const_int 16)
11828 (const_int 1) (const_int 17)
11829 (const_int 4) (const_int 20)
11830 (const_int 5) (const_int 21)
11831 (const_int 8) (const_int 24)
11832 (const_int 9) (const_int 25)
11833 (const_int 12) (const_int 28)
11834 (const_int 13) (const_int 29)])))]
11835 "TARGET_AVX512F"
11836 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11837 [(set_attr "type" "sselog")
11838 (set_attr "prefix" "evex")
11839 (set_attr "mode" "XI")])
11840
11841 (define_insn "vec_interleave_lowv4si<mask_name>"
11842 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11843 (vec_select:V4SI
11844 (vec_concat:V8SI
11845 (match_operand:V4SI 1 "register_operand" "0,v")
11846 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
11847 (parallel [(const_int 0) (const_int 4)
11848 (const_int 1) (const_int 5)])))]
11849 "TARGET_SSE2 && <mask_avx512vl_condition>"
11850 "@
11851 punpckldq\t{%2, %0|%0, %2}
11852 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11853 [(set_attr "isa" "noavx,avx")
11854 (set_attr "type" "sselog")
11855 (set_attr "prefix_data16" "1,*")
11856 (set_attr "prefix" "orig,vex")
11857 (set_attr "mode" "TI")])
11858
11859 (define_expand "vec_interleave_high<mode>"
11860 [(match_operand:VI_256 0 "register_operand" "=x")
11861 (match_operand:VI_256 1 "register_operand" "x")
11862 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11863 "TARGET_AVX2"
11864 {
11865 rtx t1 = gen_reg_rtx (<MODE>mode);
11866 rtx t2 = gen_reg_rtx (<MODE>mode);
11867 rtx t3 = gen_reg_rtx (V4DImode);
11868 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11869 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11870 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11871 gen_lowpart (V4DImode, t2),
11872 GEN_INT (1 + (3 << 4))));
11873 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11874 DONE;
11875 })
11876
11877 (define_expand "vec_interleave_low<mode>"
11878 [(match_operand:VI_256 0 "register_operand" "=x")
11879 (match_operand:VI_256 1 "register_operand" "x")
11880 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11881 "TARGET_AVX2"
11882 {
11883 rtx t1 = gen_reg_rtx (<MODE>mode);
11884 rtx t2 = gen_reg_rtx (<MODE>mode);
11885 rtx t3 = gen_reg_rtx (V4DImode);
11886 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11887 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11888 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11889 gen_lowpart (V4DImode, t2),
11890 GEN_INT (0 + (2 << 4))));
11891 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11892 DONE;
11893 })
11894
11895 ;; Modes handled by pinsr patterns.
11896 (define_mode_iterator PINSR_MODE
11897 [(V16QI "TARGET_SSE4_1") V8HI
11898 (V4SI "TARGET_SSE4_1")
11899 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
11900
11901 (define_mode_attr sse2p4_1
11902 [(V16QI "sse4_1") (V8HI "sse2")
11903 (V4SI "sse4_1") (V2DI "sse4_1")])
11904
11905 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
11906 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
11907 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
11908 (vec_merge:PINSR_MODE
11909 (vec_duplicate:PINSR_MODE
11910 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
11911 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
11912 (match_operand:SI 3 "const_int_operand")))]
11913 "TARGET_SSE2
11914 && ((unsigned) exact_log2 (INTVAL (operands[3]))
11915 < GET_MODE_NUNITS (<MODE>mode))"
11916 {
11917 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
11918
11919 switch (which_alternative)
11920 {
11921 case 0:
11922 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11923 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
11924 /* FALLTHRU */
11925 case 1:
11926 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
11927 case 2:
11928 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11929 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
11930 /* FALLTHRU */
11931 case 3:
11932 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11933 default:
11934 gcc_unreachable ();
11935 }
11936 }
11937 [(set_attr "isa" "noavx,noavx,avx,avx")
11938 (set_attr "type" "sselog")
11939 (set (attr "prefix_rex")
11940 (if_then_else
11941 (and (not (match_test "TARGET_AVX"))
11942 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
11943 (const_string "1")
11944 (const_string "*")))
11945 (set (attr "prefix_data16")
11946 (if_then_else
11947 (and (not (match_test "TARGET_AVX"))
11948 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11949 (const_string "1")
11950 (const_string "*")))
11951 (set (attr "prefix_extra")
11952 (if_then_else
11953 (and (not (match_test "TARGET_AVX"))
11954 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11955 (const_string "*")
11956 (const_string "1")))
11957 (set_attr "length_immediate" "1")
11958 (set_attr "prefix" "orig,orig,vex,vex")
11959 (set_attr "mode" "TI")])
11960
11961 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
11962 [(match_operand:AVX512_VEC 0 "register_operand")
11963 (match_operand:AVX512_VEC 1 "register_operand")
11964 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
11965 (match_operand:SI 3 "const_0_to_3_operand")
11966 (match_operand:AVX512_VEC 4 "register_operand")
11967 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11968 "TARGET_AVX512F"
11969 {
11970 int mask,selector;
11971 mask = INTVAL (operands[3]);
11972 selector = GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ?
11973 0xFFFF ^ (0xF000 >> mask * 4)
11974 : 0xFF ^ (0xC0 >> mask * 2);
11975 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
11976 (operands[0], operands[1], operands[2], GEN_INT (selector),
11977 operands[4], operands[5]));
11978 DONE;
11979 })
11980
11981 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
11982 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
11983 (vec_merge:AVX512_VEC
11984 (match_operand:AVX512_VEC 1 "register_operand" "v")
11985 (vec_duplicate:AVX512_VEC
11986 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
11987 (match_operand:SI 3 "const_int_operand" "n")))]
11988 "TARGET_AVX512F"
11989 {
11990 int mask;
11991 int selector = INTVAL (operands[3]);
11992
11993 if (selector == 0xFFF || selector == 0x3F)
11994 mask = 0;
11995 else if ( selector == 0xF0FF || selector == 0xCF)
11996 mask = 1;
11997 else if ( selector == 0xFF0F || selector == 0xF3)
11998 mask = 2;
11999 else if ( selector == 0xFFF0 || selector == 0xFC)
12000 mask = 3;
12001 else
12002 gcc_unreachable ();
12003
12004 operands[3] = GEN_INT (mask);
12005
12006 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
12007 }
12008 [(set_attr "type" "sselog")
12009 (set_attr "length_immediate" "1")
12010 (set_attr "prefix" "evex")
12011 (set_attr "mode" "<sseinsnmode>")])
12012
12013 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
12014 [(match_operand:AVX512_VEC_2 0 "register_operand")
12015 (match_operand:AVX512_VEC_2 1 "register_operand")
12016 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
12017 (match_operand:SI 3 "const_0_to_1_operand")
12018 (match_operand:AVX512_VEC_2 4 "register_operand")
12019 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12020 "TARGET_AVX512F"
12021 {
12022 int mask = INTVAL (operands[3]);
12023 if (mask == 0)
12024 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
12025 operands[2], operands[4],
12026 operands[5]));
12027 else
12028 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
12029 operands[2], operands[4],
12030 operands[5]));
12031 DONE;
12032 })
12033
12034 (define_insn "vec_set_lo_<mode><mask_name>"
12035 [(set (match_operand:V16FI 0 "register_operand" "=v")
12036 (vec_concat:V16FI
12037 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12038 (vec_select:<ssehalfvecmode>
12039 (match_operand:V16FI 1 "register_operand" "v")
12040 (parallel [(const_int 8) (const_int 9)
12041 (const_int 10) (const_int 11)
12042 (const_int 12) (const_int 13)
12043 (const_int 14) (const_int 15)]))))]
12044 "TARGET_AVX512DQ"
12045 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
12046 [(set_attr "type" "sselog")
12047 (set_attr "length_immediate" "1")
12048 (set_attr "prefix" "evex")
12049 (set_attr "mode" "<sseinsnmode>")])
12050
12051 (define_insn "vec_set_hi_<mode><mask_name>"
12052 [(set (match_operand:V16FI 0 "register_operand" "=v")
12053 (vec_concat:V16FI
12054 (vec_select:<ssehalfvecmode>
12055 (match_operand:V16FI 1 "register_operand" "v")
12056 (parallel [(const_int 0) (const_int 1)
12057 (const_int 2) (const_int 3)
12058 (const_int 4) (const_int 5)
12059 (const_int 6) (const_int 7)]))
12060 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12061 "TARGET_AVX512DQ"
12062 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
12063 [(set_attr "type" "sselog")
12064 (set_attr "length_immediate" "1")
12065 (set_attr "prefix" "evex")
12066 (set_attr "mode" "<sseinsnmode>")])
12067
12068 (define_insn "vec_set_lo_<mode><mask_name>"
12069 [(set (match_operand:V8FI 0 "register_operand" "=v")
12070 (vec_concat:V8FI
12071 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12072 (vec_select:<ssehalfvecmode>
12073 (match_operand:V8FI 1 "register_operand" "v")
12074 (parallel [(const_int 4) (const_int 5)
12075 (const_int 6) (const_int 7)]))))]
12076 "TARGET_AVX512F"
12077 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
12078 [(set_attr "type" "sselog")
12079 (set_attr "length_immediate" "1")
12080 (set_attr "prefix" "evex")
12081 (set_attr "mode" "XI")])
12082
12083 (define_insn "vec_set_hi_<mode><mask_name>"
12084 [(set (match_operand:V8FI 0 "register_operand" "=v")
12085 (vec_concat:V8FI
12086 (vec_select:<ssehalfvecmode>
12087 (match_operand:V8FI 1 "register_operand" "v")
12088 (parallel [(const_int 0) (const_int 1)
12089 (const_int 2) (const_int 3)]))
12090 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12091 "TARGET_AVX512F"
12092 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
12093 [(set_attr "type" "sselog")
12094 (set_attr "length_immediate" "1")
12095 (set_attr "prefix" "evex")
12096 (set_attr "mode" "XI")])
12097
12098 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
12099 [(match_operand:VI8F_256 0 "register_operand")
12100 (match_operand:VI8F_256 1 "register_operand")
12101 (match_operand:VI8F_256 2 "nonimmediate_operand")
12102 (match_operand:SI 3 "const_0_to_3_operand")
12103 (match_operand:VI8F_256 4 "register_operand")
12104 (match_operand:QI 5 "register_operand")]
12105 "TARGET_AVX512DQ"
12106 {
12107 int mask = INTVAL (operands[3]);
12108 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
12109 (operands[0], operands[1], operands[2],
12110 GEN_INT (((mask >> 0) & 1) * 2 + 0),
12111 GEN_INT (((mask >> 0) & 1) * 2 + 1),
12112 GEN_INT (((mask >> 1) & 1) * 2 + 4),
12113 GEN_INT (((mask >> 1) & 1) * 2 + 5),
12114 operands[4], operands[5]));
12115 DONE;
12116 })
12117
12118 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
12119 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
12120 (vec_select:VI8F_256
12121 (vec_concat:<ssedoublemode>
12122 (match_operand:VI8F_256 1 "register_operand" "v")
12123 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
12124 (parallel [(match_operand 3 "const_0_to_3_operand")
12125 (match_operand 4 "const_0_to_3_operand")
12126 (match_operand 5 "const_4_to_7_operand")
12127 (match_operand 6 "const_4_to_7_operand")])))]
12128 "TARGET_AVX512VL
12129 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12130 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
12131 {
12132 int mask;
12133 mask = INTVAL (operands[3]) / 2;
12134 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
12135 operands[3] = GEN_INT (mask);
12136 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
12137 }
12138 [(set_attr "type" "sselog")
12139 (set_attr "length_immediate" "1")
12140 (set_attr "prefix" "evex")
12141 (set_attr "mode" "XI")])
12142
12143 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
12144 [(match_operand:V8FI 0 "register_operand")
12145 (match_operand:V8FI 1 "register_operand")
12146 (match_operand:V8FI 2 "nonimmediate_operand")
12147 (match_operand:SI 3 "const_0_to_255_operand")
12148 (match_operand:V8FI 4 "register_operand")
12149 (match_operand:QI 5 "register_operand")]
12150 "TARGET_AVX512F"
12151 {
12152 int mask = INTVAL (operands[3]);
12153 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
12154 (operands[0], operands[1], operands[2],
12155 GEN_INT (((mask >> 0) & 3) * 2),
12156 GEN_INT (((mask >> 0) & 3) * 2 + 1),
12157 GEN_INT (((mask >> 2) & 3) * 2),
12158 GEN_INT (((mask >> 2) & 3) * 2 + 1),
12159 GEN_INT (((mask >> 4) & 3) * 2 + 8),
12160 GEN_INT (((mask >> 4) & 3) * 2 + 9),
12161 GEN_INT (((mask >> 6) & 3) * 2 + 8),
12162 GEN_INT (((mask >> 6) & 3) * 2 + 9),
12163 operands[4], operands[5]));
12164 DONE;
12165 })
12166
12167 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
12168 [(set (match_operand:V8FI 0 "register_operand" "=v")
12169 (vec_select:V8FI
12170 (vec_concat:<ssedoublemode>
12171 (match_operand:V8FI 1 "register_operand" "v")
12172 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
12173 (parallel [(match_operand 3 "const_0_to_7_operand")
12174 (match_operand 4 "const_0_to_7_operand")
12175 (match_operand 5 "const_0_to_7_operand")
12176 (match_operand 6 "const_0_to_7_operand")
12177 (match_operand 7 "const_8_to_15_operand")
12178 (match_operand 8 "const_8_to_15_operand")
12179 (match_operand 9 "const_8_to_15_operand")
12180 (match_operand 10 "const_8_to_15_operand")])))]
12181 "TARGET_AVX512F
12182 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12183 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
12184 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12185 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
12186 {
12187 int mask;
12188 mask = INTVAL (operands[3]) / 2;
12189 mask |= INTVAL (operands[5]) / 2 << 2;
12190 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
12191 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
12192 operands[3] = GEN_INT (mask);
12193
12194 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12195 }
12196 [(set_attr "type" "sselog")
12197 (set_attr "length_immediate" "1")
12198 (set_attr "prefix" "evex")
12199 (set_attr "mode" "<sseinsnmode>")])
12200
12201 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
12202 [(match_operand:VI4F_256 0 "register_operand")
12203 (match_operand:VI4F_256 1 "register_operand")
12204 (match_operand:VI4F_256 2 "nonimmediate_operand")
12205 (match_operand:SI 3 "const_0_to_3_operand")
12206 (match_operand:VI4F_256 4 "register_operand")
12207 (match_operand:QI 5 "register_operand")]
12208 "TARGET_AVX512VL"
12209 {
12210 int mask = INTVAL (operands[3]);
12211 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
12212 (operands[0], operands[1], operands[2],
12213 GEN_INT (((mask >> 0) & 1) * 4 + 0),
12214 GEN_INT (((mask >> 0) & 1) * 4 + 1),
12215 GEN_INT (((mask >> 0) & 1) * 4 + 2),
12216 GEN_INT (((mask >> 0) & 1) * 4 + 3),
12217 GEN_INT (((mask >> 1) & 1) * 4 + 8),
12218 GEN_INT (((mask >> 1) & 1) * 4 + 9),
12219 GEN_INT (((mask >> 1) & 1) * 4 + 10),
12220 GEN_INT (((mask >> 1) & 1) * 4 + 11),
12221 operands[4], operands[5]));
12222 DONE;
12223 })
12224
12225 (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
12226 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
12227 (vec_select:VI4F_256
12228 (vec_concat:<ssedoublemode>
12229 (match_operand:VI4F_256 1 "register_operand" "v")
12230 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
12231 (parallel [(match_operand 3 "const_0_to_7_operand")
12232 (match_operand 4 "const_0_to_7_operand")
12233 (match_operand 5 "const_0_to_7_operand")
12234 (match_operand 6 "const_0_to_7_operand")
12235 (match_operand 7 "const_8_to_15_operand")
12236 (match_operand 8 "const_8_to_15_operand")
12237 (match_operand 9 "const_8_to_15_operand")
12238 (match_operand 10 "const_8_to_15_operand")])))]
12239 "TARGET_AVX512VL
12240 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12241 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12242 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12243 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12244 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12245 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
12246 {
12247 int mask;
12248 mask = INTVAL (operands[3]) / 4;
12249 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
12250 operands[3] = GEN_INT (mask);
12251
12252 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12253 }
12254 [(set_attr "type" "sselog")
12255 (set_attr "length_immediate" "1")
12256 (set_attr "prefix" "evex")
12257 (set_attr "mode" "<sseinsnmode>")])
12258
12259 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
12260 [(match_operand:V16FI 0 "register_operand")
12261 (match_operand:V16FI 1 "register_operand")
12262 (match_operand:V16FI 2 "nonimmediate_operand")
12263 (match_operand:SI 3 "const_0_to_255_operand")
12264 (match_operand:V16FI 4 "register_operand")
12265 (match_operand:HI 5 "register_operand")]
12266 "TARGET_AVX512F"
12267 {
12268 int mask = INTVAL (operands[3]);
12269 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
12270 (operands[0], operands[1], operands[2],
12271 GEN_INT (((mask >> 0) & 3) * 4),
12272 GEN_INT (((mask >> 0) & 3) * 4 + 1),
12273 GEN_INT (((mask >> 0) & 3) * 4 + 2),
12274 GEN_INT (((mask >> 0) & 3) * 4 + 3),
12275 GEN_INT (((mask >> 2) & 3) * 4),
12276 GEN_INT (((mask >> 2) & 3) * 4 + 1),
12277 GEN_INT (((mask >> 2) & 3) * 4 + 2),
12278 GEN_INT (((mask >> 2) & 3) * 4 + 3),
12279 GEN_INT (((mask >> 4) & 3) * 4 + 16),
12280 GEN_INT (((mask >> 4) & 3) * 4 + 17),
12281 GEN_INT (((mask >> 4) & 3) * 4 + 18),
12282 GEN_INT (((mask >> 4) & 3) * 4 + 19),
12283 GEN_INT (((mask >> 6) & 3) * 4 + 16),
12284 GEN_INT (((mask >> 6) & 3) * 4 + 17),
12285 GEN_INT (((mask >> 6) & 3) * 4 + 18),
12286 GEN_INT (((mask >> 6) & 3) * 4 + 19),
12287 operands[4], operands[5]));
12288 DONE;
12289 })
12290
12291 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
12292 [(set (match_operand:V16FI 0 "register_operand" "=v")
12293 (vec_select:V16FI
12294 (vec_concat:<ssedoublemode>
12295 (match_operand:V16FI 1 "register_operand" "v")
12296 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
12297 (parallel [(match_operand 3 "const_0_to_15_operand")
12298 (match_operand 4 "const_0_to_15_operand")
12299 (match_operand 5 "const_0_to_15_operand")
12300 (match_operand 6 "const_0_to_15_operand")
12301 (match_operand 7 "const_0_to_15_operand")
12302 (match_operand 8 "const_0_to_15_operand")
12303 (match_operand 9 "const_0_to_15_operand")
12304 (match_operand 10 "const_0_to_15_operand")
12305 (match_operand 11 "const_16_to_31_operand")
12306 (match_operand 12 "const_16_to_31_operand")
12307 (match_operand 13 "const_16_to_31_operand")
12308 (match_operand 14 "const_16_to_31_operand")
12309 (match_operand 15 "const_16_to_31_operand")
12310 (match_operand 16 "const_16_to_31_operand")
12311 (match_operand 17 "const_16_to_31_operand")
12312 (match_operand 18 "const_16_to_31_operand")])))]
12313 "TARGET_AVX512F
12314 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12315 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12316 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12317 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12318 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12319 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12320 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12321 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12322 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12323 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12324 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12325 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12326 {
12327 int mask;
12328 mask = INTVAL (operands[3]) / 4;
12329 mask |= INTVAL (operands[7]) / 4 << 2;
12330 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12331 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12332 operands[3] = GEN_INT (mask);
12333
12334 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12335 }
12336 [(set_attr "type" "sselog")
12337 (set_attr "length_immediate" "1")
12338 (set_attr "prefix" "evex")
12339 (set_attr "mode" "<sseinsnmode>")])
12340
12341 (define_expand "avx512f_pshufdv3_mask"
12342 [(match_operand:V16SI 0 "register_operand")
12343 (match_operand:V16SI 1 "nonimmediate_operand")
12344 (match_operand:SI 2 "const_0_to_255_operand")
12345 (match_operand:V16SI 3 "register_operand")
12346 (match_operand:HI 4 "register_operand")]
12347 "TARGET_AVX512F"
12348 {
12349 int mask = INTVAL (operands[2]);
12350 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12351 GEN_INT ((mask >> 0) & 3),
12352 GEN_INT ((mask >> 2) & 3),
12353 GEN_INT ((mask >> 4) & 3),
12354 GEN_INT ((mask >> 6) & 3),
12355 GEN_INT (((mask >> 0) & 3) + 4),
12356 GEN_INT (((mask >> 2) & 3) + 4),
12357 GEN_INT (((mask >> 4) & 3) + 4),
12358 GEN_INT (((mask >> 6) & 3) + 4),
12359 GEN_INT (((mask >> 0) & 3) + 8),
12360 GEN_INT (((mask >> 2) & 3) + 8),
12361 GEN_INT (((mask >> 4) & 3) + 8),
12362 GEN_INT (((mask >> 6) & 3) + 8),
12363 GEN_INT (((mask >> 0) & 3) + 12),
12364 GEN_INT (((mask >> 2) & 3) + 12),
12365 GEN_INT (((mask >> 4) & 3) + 12),
12366 GEN_INT (((mask >> 6) & 3) + 12),
12367 operands[3], operands[4]));
12368 DONE;
12369 })
12370
12371 (define_insn "avx512f_pshufd_1<mask_name>"
12372 [(set (match_operand:V16SI 0 "register_operand" "=v")
12373 (vec_select:V16SI
12374 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12375 (parallel [(match_operand 2 "const_0_to_3_operand")
12376 (match_operand 3 "const_0_to_3_operand")
12377 (match_operand 4 "const_0_to_3_operand")
12378 (match_operand 5 "const_0_to_3_operand")
12379 (match_operand 6 "const_4_to_7_operand")
12380 (match_operand 7 "const_4_to_7_operand")
12381 (match_operand 8 "const_4_to_7_operand")
12382 (match_operand 9 "const_4_to_7_operand")
12383 (match_operand 10 "const_8_to_11_operand")
12384 (match_operand 11 "const_8_to_11_operand")
12385 (match_operand 12 "const_8_to_11_operand")
12386 (match_operand 13 "const_8_to_11_operand")
12387 (match_operand 14 "const_12_to_15_operand")
12388 (match_operand 15 "const_12_to_15_operand")
12389 (match_operand 16 "const_12_to_15_operand")
12390 (match_operand 17 "const_12_to_15_operand")])))]
12391 "TARGET_AVX512F
12392 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12393 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12394 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12395 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12396 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12397 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12398 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12399 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12400 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12401 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12402 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12403 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12404 {
12405 int mask = 0;
12406 mask |= INTVAL (operands[2]) << 0;
12407 mask |= INTVAL (operands[3]) << 2;
12408 mask |= INTVAL (operands[4]) << 4;
12409 mask |= INTVAL (operands[5]) << 6;
12410 operands[2] = GEN_INT (mask);
12411
12412 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
12413 }
12414 [(set_attr "type" "sselog1")
12415 (set_attr "prefix" "evex")
12416 (set_attr "length_immediate" "1")
12417 (set_attr "mode" "XI")])
12418
12419 (define_expand "avx512vl_pshufdv3_mask"
12420 [(match_operand:V8SI 0 "register_operand")
12421 (match_operand:V8SI 1 "nonimmediate_operand")
12422 (match_operand:SI 2 "const_0_to_255_operand")
12423 (match_operand:V8SI 3 "register_operand")
12424 (match_operand:QI 4 "register_operand")]
12425 "TARGET_AVX512VL"
12426 {
12427 int mask = INTVAL (operands[2]);
12428 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
12429 GEN_INT ((mask >> 0) & 3),
12430 GEN_INT ((mask >> 2) & 3),
12431 GEN_INT ((mask >> 4) & 3),
12432 GEN_INT ((mask >> 6) & 3),
12433 GEN_INT (((mask >> 0) & 3) + 4),
12434 GEN_INT (((mask >> 2) & 3) + 4),
12435 GEN_INT (((mask >> 4) & 3) + 4),
12436 GEN_INT (((mask >> 6) & 3) + 4),
12437 operands[3], operands[4]));
12438 DONE;
12439 })
12440
12441 (define_expand "avx2_pshufdv3"
12442 [(match_operand:V8SI 0 "register_operand")
12443 (match_operand:V8SI 1 "nonimmediate_operand")
12444 (match_operand:SI 2 "const_0_to_255_operand")]
12445 "TARGET_AVX2"
12446 {
12447 int mask = INTVAL (operands[2]);
12448 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
12449 GEN_INT ((mask >> 0) & 3),
12450 GEN_INT ((mask >> 2) & 3),
12451 GEN_INT ((mask >> 4) & 3),
12452 GEN_INT ((mask >> 6) & 3),
12453 GEN_INT (((mask >> 0) & 3) + 4),
12454 GEN_INT (((mask >> 2) & 3) + 4),
12455 GEN_INT (((mask >> 4) & 3) + 4),
12456 GEN_INT (((mask >> 6) & 3) + 4)));
12457 DONE;
12458 })
12459
12460 (define_insn "avx2_pshufd_1<mask_name>"
12461 [(set (match_operand:V8SI 0 "register_operand" "=v")
12462 (vec_select:V8SI
12463 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
12464 (parallel [(match_operand 2 "const_0_to_3_operand")
12465 (match_operand 3 "const_0_to_3_operand")
12466 (match_operand 4 "const_0_to_3_operand")
12467 (match_operand 5 "const_0_to_3_operand")
12468 (match_operand 6 "const_4_to_7_operand")
12469 (match_operand 7 "const_4_to_7_operand")
12470 (match_operand 8 "const_4_to_7_operand")
12471 (match_operand 9 "const_4_to_7_operand")])))]
12472 "TARGET_AVX2
12473 && <mask_avx512vl_condition>
12474 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12475 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12476 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12477 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
12478 {
12479 int mask = 0;
12480 mask |= INTVAL (operands[2]) << 0;
12481 mask |= INTVAL (operands[3]) << 2;
12482 mask |= INTVAL (operands[4]) << 4;
12483 mask |= INTVAL (operands[5]) << 6;
12484 operands[2] = GEN_INT (mask);
12485
12486 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12487 }
12488 [(set_attr "type" "sselog1")
12489 (set_attr "prefix" "maybe_evex")
12490 (set_attr "length_immediate" "1")
12491 (set_attr "mode" "OI")])
12492
12493 (define_expand "avx512vl_pshufd_mask"
12494 [(match_operand:V4SI 0 "register_operand")
12495 (match_operand:V4SI 1 "nonimmediate_operand")
12496 (match_operand:SI 2 "const_0_to_255_operand")
12497 (match_operand:V4SI 3 "register_operand")
12498 (match_operand:QI 4 "register_operand")]
12499 "TARGET_AVX512VL"
12500 {
12501 int mask = INTVAL (operands[2]);
12502 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
12503 GEN_INT ((mask >> 0) & 3),
12504 GEN_INT ((mask >> 2) & 3),
12505 GEN_INT ((mask >> 4) & 3),
12506 GEN_INT ((mask >> 6) & 3),
12507 operands[3], operands[4]));
12508 DONE;
12509 })
12510
12511 (define_expand "sse2_pshufd"
12512 [(match_operand:V4SI 0 "register_operand")
12513 (match_operand:V4SI 1 "vector_operand")
12514 (match_operand:SI 2 "const_int_operand")]
12515 "TARGET_SSE2"
12516 {
12517 int mask = INTVAL (operands[2]);
12518 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
12519 GEN_INT ((mask >> 0) & 3),
12520 GEN_INT ((mask >> 2) & 3),
12521 GEN_INT ((mask >> 4) & 3),
12522 GEN_INT ((mask >> 6) & 3)));
12523 DONE;
12524 })
12525
12526 (define_insn "sse2_pshufd_1<mask_name>"
12527 [(set (match_operand:V4SI 0 "register_operand" "=v")
12528 (vec_select:V4SI
12529 (match_operand:V4SI 1 "vector_operand" "vBm")
12530 (parallel [(match_operand 2 "const_0_to_3_operand")
12531 (match_operand 3 "const_0_to_3_operand")
12532 (match_operand 4 "const_0_to_3_operand")
12533 (match_operand 5 "const_0_to_3_operand")])))]
12534 "TARGET_SSE2 && <mask_avx512vl_condition>"
12535 {
12536 int mask = 0;
12537 mask |= INTVAL (operands[2]) << 0;
12538 mask |= INTVAL (operands[3]) << 2;
12539 mask |= INTVAL (operands[4]) << 4;
12540 mask |= INTVAL (operands[5]) << 6;
12541 operands[2] = GEN_INT (mask);
12542
12543 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12544 }
12545 [(set_attr "type" "sselog1")
12546 (set_attr "prefix_data16" "1")
12547 (set_attr "prefix" "<mask_prefix2>")
12548 (set_attr "length_immediate" "1")
12549 (set_attr "mode" "TI")])
12550
12551 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
12552 [(set (match_operand:V32HI 0 "register_operand" "=v")
12553 (unspec:V32HI
12554 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12555 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12556 UNSPEC_PSHUFLW))]
12557 "TARGET_AVX512BW"
12558 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12559 [(set_attr "type" "sselog")
12560 (set_attr "prefix" "evex")
12561 (set_attr "mode" "XI")])
12562
12563 (define_expand "avx512vl_pshuflwv3_mask"
12564 [(match_operand:V16HI 0 "register_operand")
12565 (match_operand:V16HI 1 "nonimmediate_operand")
12566 (match_operand:SI 2 "const_0_to_255_operand")
12567 (match_operand:V16HI 3 "register_operand")
12568 (match_operand:HI 4 "register_operand")]
12569 "TARGET_AVX512VL && TARGET_AVX512BW"
12570 {
12571 int mask = INTVAL (operands[2]);
12572 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
12573 GEN_INT ((mask >> 0) & 3),
12574 GEN_INT ((mask >> 2) & 3),
12575 GEN_INT ((mask >> 4) & 3),
12576 GEN_INT ((mask >> 6) & 3),
12577 GEN_INT (((mask >> 0) & 3) + 8),
12578 GEN_INT (((mask >> 2) & 3) + 8),
12579 GEN_INT (((mask >> 4) & 3) + 8),
12580 GEN_INT (((mask >> 6) & 3) + 8),
12581 operands[3], operands[4]));
12582 DONE;
12583 })
12584
12585 (define_expand "avx2_pshuflwv3"
12586 [(match_operand:V16HI 0 "register_operand")
12587 (match_operand:V16HI 1 "nonimmediate_operand")
12588 (match_operand:SI 2 "const_0_to_255_operand")]
12589 "TARGET_AVX2"
12590 {
12591 int mask = INTVAL (operands[2]);
12592 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
12593 GEN_INT ((mask >> 0) & 3),
12594 GEN_INT ((mask >> 2) & 3),
12595 GEN_INT ((mask >> 4) & 3),
12596 GEN_INT ((mask >> 6) & 3),
12597 GEN_INT (((mask >> 0) & 3) + 8),
12598 GEN_INT (((mask >> 2) & 3) + 8),
12599 GEN_INT (((mask >> 4) & 3) + 8),
12600 GEN_INT (((mask >> 6) & 3) + 8)));
12601 DONE;
12602 })
12603
12604 (define_insn "avx2_pshuflw_1<mask_name>"
12605 [(set (match_operand:V16HI 0 "register_operand" "=v")
12606 (vec_select:V16HI
12607 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12608 (parallel [(match_operand 2 "const_0_to_3_operand")
12609 (match_operand 3 "const_0_to_3_operand")
12610 (match_operand 4 "const_0_to_3_operand")
12611 (match_operand 5 "const_0_to_3_operand")
12612 (const_int 4)
12613 (const_int 5)
12614 (const_int 6)
12615 (const_int 7)
12616 (match_operand 6 "const_8_to_11_operand")
12617 (match_operand 7 "const_8_to_11_operand")
12618 (match_operand 8 "const_8_to_11_operand")
12619 (match_operand 9 "const_8_to_11_operand")
12620 (const_int 12)
12621 (const_int 13)
12622 (const_int 14)
12623 (const_int 15)])))]
12624 "TARGET_AVX2
12625 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12626 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12627 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12628 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12629 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12630 {
12631 int mask = 0;
12632 mask |= INTVAL (operands[2]) << 0;
12633 mask |= INTVAL (operands[3]) << 2;
12634 mask |= INTVAL (operands[4]) << 4;
12635 mask |= INTVAL (operands[5]) << 6;
12636 operands[2] = GEN_INT (mask);
12637
12638 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12639 }
12640 [(set_attr "type" "sselog")
12641 (set_attr "prefix" "maybe_evex")
12642 (set_attr "length_immediate" "1")
12643 (set_attr "mode" "OI")])
12644
12645 (define_expand "avx512vl_pshuflw_mask"
12646 [(match_operand:V8HI 0 "register_operand")
12647 (match_operand:V8HI 1 "nonimmediate_operand")
12648 (match_operand:SI 2 "const_0_to_255_operand")
12649 (match_operand:V8HI 3 "register_operand")
12650 (match_operand:QI 4 "register_operand")]
12651 "TARGET_AVX512VL && TARGET_AVX512BW"
12652 {
12653 int mask = INTVAL (operands[2]);
12654 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
12655 GEN_INT ((mask >> 0) & 3),
12656 GEN_INT ((mask >> 2) & 3),
12657 GEN_INT ((mask >> 4) & 3),
12658 GEN_INT ((mask >> 6) & 3),
12659 operands[3], operands[4]));
12660 DONE;
12661 })
12662
12663 (define_expand "sse2_pshuflw"
12664 [(match_operand:V8HI 0 "register_operand")
12665 (match_operand:V8HI 1 "vector_operand")
12666 (match_operand:SI 2 "const_int_operand")]
12667 "TARGET_SSE2"
12668 {
12669 int mask = INTVAL (operands[2]);
12670 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
12671 GEN_INT ((mask >> 0) & 3),
12672 GEN_INT ((mask >> 2) & 3),
12673 GEN_INT ((mask >> 4) & 3),
12674 GEN_INT ((mask >> 6) & 3)));
12675 DONE;
12676 })
12677
12678 (define_insn "sse2_pshuflw_1<mask_name>"
12679 [(set (match_operand:V8HI 0 "register_operand" "=v")
12680 (vec_select:V8HI
12681 (match_operand:V8HI 1 "vector_operand" "vBm")
12682 (parallel [(match_operand 2 "const_0_to_3_operand")
12683 (match_operand 3 "const_0_to_3_operand")
12684 (match_operand 4 "const_0_to_3_operand")
12685 (match_operand 5 "const_0_to_3_operand")
12686 (const_int 4)
12687 (const_int 5)
12688 (const_int 6)
12689 (const_int 7)])))]
12690 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12691 {
12692 int mask = 0;
12693 mask |= INTVAL (operands[2]) << 0;
12694 mask |= INTVAL (operands[3]) << 2;
12695 mask |= INTVAL (operands[4]) << 4;
12696 mask |= INTVAL (operands[5]) << 6;
12697 operands[2] = GEN_INT (mask);
12698
12699 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12700 }
12701 [(set_attr "type" "sselog")
12702 (set_attr "prefix_data16" "0")
12703 (set_attr "prefix_rep" "1")
12704 (set_attr "prefix" "maybe_vex")
12705 (set_attr "length_immediate" "1")
12706 (set_attr "mode" "TI")])
12707
12708 (define_expand "avx2_pshufhwv3"
12709 [(match_operand:V16HI 0 "register_operand")
12710 (match_operand:V16HI 1 "nonimmediate_operand")
12711 (match_operand:SI 2 "const_0_to_255_operand")]
12712 "TARGET_AVX2"
12713 {
12714 int mask = INTVAL (operands[2]);
12715 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
12716 GEN_INT (((mask >> 0) & 3) + 4),
12717 GEN_INT (((mask >> 2) & 3) + 4),
12718 GEN_INT (((mask >> 4) & 3) + 4),
12719 GEN_INT (((mask >> 6) & 3) + 4),
12720 GEN_INT (((mask >> 0) & 3) + 12),
12721 GEN_INT (((mask >> 2) & 3) + 12),
12722 GEN_INT (((mask >> 4) & 3) + 12),
12723 GEN_INT (((mask >> 6) & 3) + 12)));
12724 DONE;
12725 })
12726
12727 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12728 [(set (match_operand:V32HI 0 "register_operand" "=v")
12729 (unspec:V32HI
12730 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12731 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12732 UNSPEC_PSHUFHW))]
12733 "TARGET_AVX512BW"
12734 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12735 [(set_attr "type" "sselog")
12736 (set_attr "prefix" "evex")
12737 (set_attr "mode" "XI")])
12738
12739 (define_expand "avx512vl_pshufhwv3_mask"
12740 [(match_operand:V16HI 0 "register_operand")
12741 (match_operand:V16HI 1 "nonimmediate_operand")
12742 (match_operand:SI 2 "const_0_to_255_operand")
12743 (match_operand:V16HI 3 "register_operand")
12744 (match_operand:HI 4 "register_operand")]
12745 "TARGET_AVX512VL && TARGET_AVX512BW"
12746 {
12747 int mask = INTVAL (operands[2]);
12748 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
12749 GEN_INT (((mask >> 0) & 3) + 4),
12750 GEN_INT (((mask >> 2) & 3) + 4),
12751 GEN_INT (((mask >> 4) & 3) + 4),
12752 GEN_INT (((mask >> 6) & 3) + 4),
12753 GEN_INT (((mask >> 0) & 3) + 12),
12754 GEN_INT (((mask >> 2) & 3) + 12),
12755 GEN_INT (((mask >> 4) & 3) + 12),
12756 GEN_INT (((mask >> 6) & 3) + 12),
12757 operands[3], operands[4]));
12758 DONE;
12759 })
12760
12761 (define_insn "avx2_pshufhw_1<mask_name>"
12762 [(set (match_operand:V16HI 0 "register_operand" "=v")
12763 (vec_select:V16HI
12764 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12765 (parallel [(const_int 0)
12766 (const_int 1)
12767 (const_int 2)
12768 (const_int 3)
12769 (match_operand 2 "const_4_to_7_operand")
12770 (match_operand 3 "const_4_to_7_operand")
12771 (match_operand 4 "const_4_to_7_operand")
12772 (match_operand 5 "const_4_to_7_operand")
12773 (const_int 8)
12774 (const_int 9)
12775 (const_int 10)
12776 (const_int 11)
12777 (match_operand 6 "const_12_to_15_operand")
12778 (match_operand 7 "const_12_to_15_operand")
12779 (match_operand 8 "const_12_to_15_operand")
12780 (match_operand 9 "const_12_to_15_operand")])))]
12781 "TARGET_AVX2
12782 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12783 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12784 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12785 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12786 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12787 {
12788 int mask = 0;
12789 mask |= (INTVAL (operands[2]) - 4) << 0;
12790 mask |= (INTVAL (operands[3]) - 4) << 2;
12791 mask |= (INTVAL (operands[4]) - 4) << 4;
12792 mask |= (INTVAL (operands[5]) - 4) << 6;
12793 operands[2] = GEN_INT (mask);
12794
12795 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12796 }
12797 [(set_attr "type" "sselog")
12798 (set_attr "prefix" "maybe_evex")
12799 (set_attr "length_immediate" "1")
12800 (set_attr "mode" "OI")])
12801
12802 (define_expand "avx512vl_pshufhw_mask"
12803 [(match_operand:V8HI 0 "register_operand")
12804 (match_operand:V8HI 1 "nonimmediate_operand")
12805 (match_operand:SI 2 "const_0_to_255_operand")
12806 (match_operand:V8HI 3 "register_operand")
12807 (match_operand:QI 4 "register_operand")]
12808 "TARGET_AVX512VL && TARGET_AVX512BW"
12809 {
12810 int mask = INTVAL (operands[2]);
12811 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
12812 GEN_INT (((mask >> 0) & 3) + 4),
12813 GEN_INT (((mask >> 2) & 3) + 4),
12814 GEN_INT (((mask >> 4) & 3) + 4),
12815 GEN_INT (((mask >> 6) & 3) + 4),
12816 operands[3], operands[4]));
12817 DONE;
12818 })
12819
12820 (define_expand "sse2_pshufhw"
12821 [(match_operand:V8HI 0 "register_operand")
12822 (match_operand:V8HI 1 "vector_operand")
12823 (match_operand:SI 2 "const_int_operand")]
12824 "TARGET_SSE2"
12825 {
12826 int mask = INTVAL (operands[2]);
12827 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
12828 GEN_INT (((mask >> 0) & 3) + 4),
12829 GEN_INT (((mask >> 2) & 3) + 4),
12830 GEN_INT (((mask >> 4) & 3) + 4),
12831 GEN_INT (((mask >> 6) & 3) + 4)));
12832 DONE;
12833 })
12834
12835 (define_insn "sse2_pshufhw_1<mask_name>"
12836 [(set (match_operand:V8HI 0 "register_operand" "=v")
12837 (vec_select:V8HI
12838 (match_operand:V8HI 1 "vector_operand" "vBm")
12839 (parallel [(const_int 0)
12840 (const_int 1)
12841 (const_int 2)
12842 (const_int 3)
12843 (match_operand 2 "const_4_to_7_operand")
12844 (match_operand 3 "const_4_to_7_operand")
12845 (match_operand 4 "const_4_to_7_operand")
12846 (match_operand 5 "const_4_to_7_operand")])))]
12847 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12848 {
12849 int mask = 0;
12850 mask |= (INTVAL (operands[2]) - 4) << 0;
12851 mask |= (INTVAL (operands[3]) - 4) << 2;
12852 mask |= (INTVAL (operands[4]) - 4) << 4;
12853 mask |= (INTVAL (operands[5]) - 4) << 6;
12854 operands[2] = GEN_INT (mask);
12855
12856 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12857 }
12858 [(set_attr "type" "sselog")
12859 (set_attr "prefix_rep" "1")
12860 (set_attr "prefix_data16" "0")
12861 (set_attr "prefix" "maybe_vex")
12862 (set_attr "length_immediate" "1")
12863 (set_attr "mode" "TI")])
12864
12865 (define_expand "sse2_loadd"
12866 [(set (match_operand:V4SI 0 "register_operand")
12867 (vec_merge:V4SI
12868 (vec_duplicate:V4SI
12869 (match_operand:SI 1 "nonimmediate_operand"))
12870 (match_dup 2)
12871 (const_int 1)))]
12872 "TARGET_SSE"
12873 "operands[2] = CONST0_RTX (V4SImode);")
12874
12875 (define_insn "sse2_loadld"
12876 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
12877 (vec_merge:V4SI
12878 (vec_duplicate:V4SI
12879 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
12880 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
12881 (const_int 1)))]
12882 "TARGET_SSE"
12883 "@
12884 %vmovd\t{%2, %0|%0, %2}
12885 %vmovd\t{%2, %0|%0, %2}
12886 movss\t{%2, %0|%0, %2}
12887 movss\t{%2, %0|%0, %2}
12888 vmovss\t{%2, %1, %0|%0, %1, %2}"
12889 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
12890 (set_attr "type" "ssemov")
12891 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
12892 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
12893
12894 ;; QI and HI modes handled by pextr patterns.
12895 (define_mode_iterator PEXTR_MODE12
12896 [(V16QI "TARGET_SSE4_1") V8HI])
12897
12898 (define_insn "*vec_extract<mode>"
12899 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m")
12900 (vec_select:<ssescalarmode>
12901 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x")
12902 (parallel
12903 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
12904 "TARGET_SSE2"
12905 "@
12906 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
12907 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12908 [(set_attr "isa" "*,sse4")
12909 (set_attr "type" "sselog1")
12910 (set_attr "prefix_data16" "1")
12911 (set (attr "prefix_extra")
12912 (if_then_else
12913 (and (eq_attr "alternative" "0")
12914 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12915 (const_string "*")
12916 (const_string "1")))
12917 (set_attr "length_immediate" "1")
12918 (set_attr "prefix" "maybe_vex")
12919 (set_attr "mode" "TI")])
12920
12921 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
12922 [(set (match_operand:SWI48 0 "register_operand" "=r")
12923 (zero_extend:SWI48
12924 (vec_select:<PEXTR_MODE12:ssescalarmode>
12925 (match_operand:PEXTR_MODE12 1 "register_operand" "x")
12926 (parallel
12927 [(match_operand:SI 2
12928 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
12929 "TARGET_SSE2"
12930 "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
12931 [(set_attr "type" "sselog1")
12932 (set_attr "prefix_data16" "1")
12933 (set (attr "prefix_extra")
12934 (if_then_else
12935 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
12936 (const_string "*")
12937 (const_string "1")))
12938 (set_attr "length_immediate" "1")
12939 (set_attr "prefix" "maybe_vex")
12940 (set_attr "mode" "TI")])
12941
12942 (define_insn "*vec_extract<mode>_mem"
12943 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
12944 (vec_select:<ssescalarmode>
12945 (match_operand:VI12_128 1 "memory_operand" "o")
12946 (parallel
12947 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12948 "TARGET_SSE"
12949 "#")
12950
12951 (define_insn "*vec_extract<ssevecmodelower>_0"
12952 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
12953 (vec_select:SWI48
12954 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
12955 (parallel [(const_int 0)])))]
12956 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12957 "#"
12958 [(set_attr "isa" "*,sse4,*,*")])
12959
12960 (define_insn_and_split "*vec_extractv4si_0_zext"
12961 [(set (match_operand:DI 0 "register_operand" "=r")
12962 (zero_extend:DI
12963 (vec_select:SI
12964 (match_operand:V4SI 1 "register_operand" "x")
12965 (parallel [(const_int 0)]))))]
12966 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
12967 "#"
12968 "&& reload_completed"
12969 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12970 "operands[1] = gen_lowpart (SImode, operands[1]);")
12971
12972 (define_insn "*vec_extractv2di_0_sse"
12973 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
12974 (vec_select:DI
12975 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
12976 (parallel [(const_int 0)])))]
12977 "TARGET_SSE && !TARGET_64BIT
12978 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12979 "#")
12980
12981 (define_split
12982 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12983 (vec_select:SWI48x
12984 (match_operand:<ssevecmode> 1 "register_operand")
12985 (parallel [(const_int 0)])))]
12986 "TARGET_SSE && reload_completed"
12987 [(set (match_dup 0) (match_dup 1))]
12988 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
12989
12990 (define_insn "*vec_extractv4si"
12991 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
12992 (vec_select:SI
12993 (match_operand:V4SI 1 "register_operand" "x,0,0,x")
12994 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
12995 "TARGET_SSE4_1"
12996 {
12997 switch (which_alternative)
12998 {
12999 case 0:
13000 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
13001
13002 case 1:
13003 case 2:
13004 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
13005 return "psrldq\t{%2, %0|%0, %2}";
13006
13007 case 3:
13008 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
13009 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
13010
13011 default:
13012 gcc_unreachable ();
13013 }
13014 }
13015 [(set_attr "isa" "*,noavx,noavx,avx")
13016 (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
13017 (set_attr "prefix_extra" "1,*,*,*")
13018 (set_attr "length_immediate" "1")
13019 (set_attr "prefix" "maybe_vex,orig,orig,vex")
13020 (set_attr "mode" "TI")])
13021
13022 (define_insn "*vec_extractv4si_zext"
13023 [(set (match_operand:DI 0 "register_operand" "=r")
13024 (zero_extend:DI
13025 (vec_select:SI
13026 (match_operand:V4SI 1 "register_operand" "x")
13027 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13028 "TARGET_64BIT && TARGET_SSE4_1"
13029 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
13030 [(set_attr "type" "sselog1")
13031 (set_attr "prefix_extra" "1")
13032 (set_attr "length_immediate" "1")
13033 (set_attr "prefix" "maybe_vex")
13034 (set_attr "mode" "TI")])
13035
13036 (define_insn "*vec_extractv4si_mem"
13037 [(set (match_operand:SI 0 "register_operand" "=x,r")
13038 (vec_select:SI
13039 (match_operand:V4SI 1 "memory_operand" "o,o")
13040 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
13041 "TARGET_SSE"
13042 "#")
13043
13044 (define_insn_and_split "*vec_extractv4si_zext_mem"
13045 [(set (match_operand:DI 0 "register_operand" "=x,r")
13046 (zero_extend:DI
13047 (vec_select:SI
13048 (match_operand:V4SI 1 "memory_operand" "o,o")
13049 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13050 "TARGET_64BIT && TARGET_SSE"
13051 "#"
13052 "&& reload_completed"
13053 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13054 {
13055 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
13056 })
13057
13058 (define_insn "*vec_extractv2di_1"
13059 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
13060 (vec_select:DI
13061 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
13062 (parallel [(const_int 1)])))]
13063 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13064 "@
13065 %vpextrq\t{$1, %1, %0|%0, %1, 1}
13066 %vmovhps\t{%1, %0|%0, %1}
13067 psrldq\t{$8, %0|%0, 8}
13068 vpsrldq\t{$8, %1, %0|%0, %1, 8}
13069 movhlps\t{%1, %0|%0, %1}
13070 #
13071 #"
13072 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
13073 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
13074 (set_attr "length_immediate" "1,*,1,1,*,*,*")
13075 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
13076 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
13077 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
13078 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
13079
13080 (define_split
13081 [(set (match_operand:<ssescalarmode> 0 "register_operand")
13082 (vec_select:<ssescalarmode>
13083 (match_operand:VI_128 1 "memory_operand")
13084 (parallel
13085 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13086 "TARGET_SSE && reload_completed"
13087 [(set (match_dup 0) (match_dup 1))]
13088 {
13089 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
13090
13091 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
13092 })
13093
13094 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
13095 ;; vector modes into vec_extract*.
13096 (define_split
13097 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13098 (match_operand:SWI48x 1 "register_operand"))]
13099 "can_create_pseudo_p ()
13100 && SUBREG_P (operands[1])
13101 && REG_P (SUBREG_REG (operands[1]))
13102 && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT
13103 || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1])))
13104 == MODE_VECTOR_FLOAT))
13105 && SUBREG_BYTE (operands[1]) == 0
13106 && TARGET_SSE
13107 && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16
13108 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32
13109 && TARGET_AVX)
13110 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64
13111 && TARGET_AVX512F))
13112 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
13113 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
13114 (parallel [(const_int 0)])))]
13115 {
13116 rtx tmp;
13117 operands[1] = SUBREG_REG (operands[1]);
13118 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
13119 {
13120 case 64:
13121 if (<MODE>mode == SImode)
13122 {
13123 tmp = gen_reg_rtx (V8SImode);
13124 emit_insn (gen_vec_extract_lo_v16si (tmp,
13125 gen_lowpart (V16SImode,
13126 operands[1])));
13127 }
13128 else
13129 {
13130 tmp = gen_reg_rtx (V4DImode);
13131 emit_insn (gen_vec_extract_lo_v8di (tmp,
13132 gen_lowpart (V8DImode,
13133 operands[1])));
13134 }
13135 operands[1] = tmp;
13136 /* FALLTHRU */
13137 case 32:
13138 tmp = gen_reg_rtx (<ssevecmode>mode);
13139 if (<MODE>mode == SImode)
13140 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
13141 operands[1])));
13142 else
13143 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
13144 operands[1])));
13145 operands[1] = tmp;
13146 break;
13147 case 16:
13148 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
13149 break;
13150 }
13151 })
13152
13153 (define_insn "*vec_concatv2si_sse4_1"
13154 [(set (match_operand:V2SI 0 "register_operand"
13155 "=Yr,*x,x, Yr,*x,x, x, *y,*y")
13156 (vec_concat:V2SI
13157 (match_operand:SI 1 "nonimmediate_operand"
13158 " 0, 0,x, 0,0, x,rm, 0,rm")
13159 (match_operand:SI 2 "vector_move_operand"
13160 " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
13161 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13162 "@
13163 pinsrd\t{$1, %2, %0|%0, %2, 1}
13164 pinsrd\t{$1, %2, %0|%0, %2, 1}
13165 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13166 punpckldq\t{%2, %0|%0, %2}
13167 punpckldq\t{%2, %0|%0, %2}
13168 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
13169 %vmovd\t{%1, %0|%0, %1}
13170 punpckldq\t{%2, %0|%0, %2}
13171 movd\t{%1, %0|%0, %1}"
13172 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
13173 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
13174 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
13175 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
13176 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
13177 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
13178
13179 ;; ??? In theory we can match memory for the MMX alternative, but allowing
13180 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
13181 ;; alternatives pretty much forces the MMX alternative to be chosen.
13182 (define_insn "*vec_concatv2si"
13183 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
13184 (vec_concat:V2SI
13185 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
13186 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
13187 "TARGET_SSE && !TARGET_SSE4_1"
13188 "@
13189 punpckldq\t{%2, %0|%0, %2}
13190 movd\t{%1, %0|%0, %1}
13191 movd\t{%1, %0|%0, %1}
13192 unpcklps\t{%2, %0|%0, %2}
13193 movss\t{%1, %0|%0, %1}
13194 punpckldq\t{%2, %0|%0, %2}
13195 movd\t{%1, %0|%0, %1}"
13196 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
13197 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
13198 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
13199
13200 (define_insn "*vec_concatv4si"
13201 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
13202 (vec_concat:V4SI
13203 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
13204 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
13205 "TARGET_SSE"
13206 "@
13207 punpcklqdq\t{%2, %0|%0, %2}
13208 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13209 movlhps\t{%2, %0|%0, %2}
13210 movhps\t{%2, %0|%0, %q2}
13211 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
13212 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
13213 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
13214 (set_attr "prefix" "orig,vex,orig,orig,vex")
13215 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
13216
13217 ;; movd instead of movq is required to handle broken assemblers.
13218 (define_insn "vec_concatv2di"
13219 [(set (match_operand:V2DI 0 "register_operand"
13220 "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
13221 (vec_concat:V2DI
13222 (match_operand:DI 1 "nonimmediate_operand"
13223 " 0, 0,x ,r ,xm,*y,0,x,0,0,x")
13224 (match_operand:DI 2 "vector_move_operand"
13225 "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
13226 "TARGET_SSE"
13227 "@
13228 pinsrq\t{$1, %2, %0|%0, %2, 1}
13229 pinsrq\t{$1, %2, %0|%0, %2, 1}
13230 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13231 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
13232 %vmovq\t{%1, %0|%0, %1}
13233 movq2dq\t{%1, %0|%0, %1}
13234 punpcklqdq\t{%2, %0|%0, %2}
13235 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13236 movlhps\t{%2, %0|%0, %2}
13237 movhps\t{%2, %0|%0, %2}
13238 vmovhps\t{%2, %1, %0|%0, %1, %2}"
13239 [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
13240 (set (attr "type")
13241 (if_then_else
13242 (eq_attr "alternative" "0,1,2,6,7")
13243 (const_string "sselog")
13244 (const_string "ssemov")))
13245 (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*")
13246 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*")
13247 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*")
13248 (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
13249 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
13250
13251 (define_expand "vec_unpacks_lo_<mode>"
13252 [(match_operand:<sseunpackmode> 0 "register_operand")
13253 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13254 "TARGET_SSE2"
13255 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
13256
13257 (define_expand "vec_unpacks_hi_<mode>"
13258 [(match_operand:<sseunpackmode> 0 "register_operand")
13259 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13260 "TARGET_SSE2"
13261 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
13262
13263 (define_expand "vec_unpacku_lo_<mode>"
13264 [(match_operand:<sseunpackmode> 0 "register_operand")
13265 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13266 "TARGET_SSE2"
13267 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
13268
13269 (define_expand "vec_unpacks_lo_hi"
13270 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
13271 (match_operand:HI 1 "register_operand"))]
13272 "TARGET_AVX512F")
13273
13274 (define_expand "vec_unpacks_lo_si"
13275 [(set (match_operand:HI 0 "register_operand")
13276 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
13277 "TARGET_AVX512F")
13278
13279 (define_expand "vec_unpacks_lo_di"
13280 [(set (match_operand:SI 0 "register_operand")
13281 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
13282 "TARGET_AVX512BW")
13283
13284 (define_expand "vec_unpacku_hi_<mode>"
13285 [(match_operand:<sseunpackmode> 0 "register_operand")
13286 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13287 "TARGET_SSE2"
13288 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
13289
13290 (define_expand "vec_unpacks_hi_hi"
13291 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
13292 (lshiftrt:HI (match_operand:HI 1 "register_operand")
13293 (const_int 8)))]
13294 "TARGET_AVX512F")
13295
13296 (define_expand "vec_unpacks_hi_<mode>"
13297 [(set (subreg:SWI48x (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
13298 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
13299 (match_dup 2)))]
13300 "TARGET_AVX512BW"
13301 {
13302 operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));
13303 })
13304
13305 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13306 ;;
13307 ;; Miscellaneous
13308 ;;
13309 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13310
13311 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
13312 [(set (match_operand:VI12_AVX2 0 "register_operand")
13313 (truncate:VI12_AVX2
13314 (lshiftrt:<ssedoublemode>
13315 (plus:<ssedoublemode>
13316 (plus:<ssedoublemode>
13317 (zero_extend:<ssedoublemode>
13318 (match_operand:VI12_AVX2 1 "vector_operand"))
13319 (zero_extend:<ssedoublemode>
13320 (match_operand:VI12_AVX2 2 "vector_operand")))
13321 (match_dup <mask_expand_op3>))
13322 (const_int 1))))]
13323 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13324 {
13325 rtx tmp;
13326 if (<mask_applied>)
13327 tmp = operands[3];
13328 operands[3] = CONST1_RTX(<MODE>mode);
13329 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
13330
13331 if (<mask_applied>)
13332 {
13333 operands[5] = operands[3];
13334 operands[3] = tmp;
13335 }
13336 })
13337
13338 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
13339 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
13340 (truncate:VI12_AVX2
13341 (lshiftrt:<ssedoublemode>
13342 (plus:<ssedoublemode>
13343 (plus:<ssedoublemode>
13344 (zero_extend:<ssedoublemode>
13345 (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
13346 (zero_extend:<ssedoublemode>
13347 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
13348 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
13349 (const_int 1))))]
13350 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13351 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
13352 "@
13353 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
13354 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13355 [(set_attr "isa" "noavx,avx")
13356 (set_attr "type" "sseiadd")
13357 (set_attr "prefix_data16" "1,*")
13358 (set_attr "prefix" "orig,<mask_prefix>")
13359 (set_attr "mode" "<sseinsnmode>")])
13360
13361 ;; The correct representation for this is absolutely enormous, and
13362 ;; surely not generally useful.
13363 (define_insn "<sse2_avx2>_psadbw"
13364 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
13365 (unspec:VI8_AVX2_AVX512BW
13366 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
13367 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
13368 UNSPEC_PSADBW))]
13369 "TARGET_SSE2"
13370 "@
13371 psadbw\t{%2, %0|%0, %2}
13372 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
13373 [(set_attr "isa" "noavx,avx")
13374 (set_attr "type" "sseiadd")
13375 (set_attr "atom_unit" "simul")
13376 (set_attr "prefix_data16" "1,*")
13377 (set_attr "prefix" "orig,maybe_evex")
13378 (set_attr "mode" "<sseinsnmode>")])
13379
13380 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
13381 [(set (match_operand:SI 0 "register_operand" "=r")
13382 (unspec:SI
13383 [(match_operand:VF_128_256 1 "register_operand" "x")]
13384 UNSPEC_MOVMSK))]
13385 "TARGET_SSE"
13386 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
13387 [(set_attr "type" "ssemov")
13388 (set_attr "prefix" "maybe_vex")
13389 (set_attr "mode" "<MODE>")])
13390
13391 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
13392 [(set (match_operand:DI 0 "register_operand" "=r")
13393 (zero_extend:DI
13394 (unspec:SI
13395 [(match_operand:VF_128_256 1 "register_operand" "x")]
13396 UNSPEC_MOVMSK)))]
13397 "TARGET_64BIT && TARGET_SSE"
13398 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
13399 [(set_attr "type" "ssemov")
13400 (set_attr "prefix" "maybe_vex")
13401 (set_attr "mode" "<MODE>")])
13402
13403 (define_insn "<sse2_avx2>_pmovmskb"
13404 [(set (match_operand:SI 0 "register_operand" "=r")
13405 (unspec:SI
13406 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
13407 UNSPEC_MOVMSK))]
13408 "TARGET_SSE2"
13409 "%vpmovmskb\t{%1, %0|%0, %1}"
13410 [(set_attr "type" "ssemov")
13411 (set (attr "prefix_data16")
13412 (if_then_else
13413 (match_test "TARGET_AVX")
13414 (const_string "*")
13415 (const_string "1")))
13416 (set_attr "prefix" "maybe_vex")
13417 (set_attr "mode" "SI")])
13418
13419 (define_insn "*<sse2_avx2>_pmovmskb_zext"
13420 [(set (match_operand:DI 0 "register_operand" "=r")
13421 (zero_extend:DI
13422 (unspec:SI
13423 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
13424 UNSPEC_MOVMSK)))]
13425 "TARGET_64BIT && TARGET_SSE2"
13426 "%vpmovmskb\t{%1, %k0|%k0, %1}"
13427 [(set_attr "type" "ssemov")
13428 (set (attr "prefix_data16")
13429 (if_then_else
13430 (match_test "TARGET_AVX")
13431 (const_string "*")
13432 (const_string "1")))
13433 (set_attr "prefix" "maybe_vex")
13434 (set_attr "mode" "SI")])
13435
13436 (define_expand "sse2_maskmovdqu"
13437 [(set (match_operand:V16QI 0 "memory_operand")
13438 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
13439 (match_operand:V16QI 2 "register_operand")
13440 (match_dup 0)]
13441 UNSPEC_MASKMOV))]
13442 "TARGET_SSE2")
13443
13444 (define_insn "*sse2_maskmovdqu"
13445 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
13446 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
13447 (match_operand:V16QI 2 "register_operand" "x")
13448 (mem:V16QI (match_dup 0))]
13449 UNSPEC_MASKMOV))]
13450 "TARGET_SSE2"
13451 {
13452 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
13453 that requires %v to be at the beginning of the opcode name. */
13454 if (Pmode != word_mode)
13455 fputs ("\taddr32", asm_out_file);
13456 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
13457 }
13458 [(set_attr "type" "ssemov")
13459 (set_attr "prefix_data16" "1")
13460 (set (attr "length_address")
13461 (symbol_ref ("Pmode != word_mode")))
13462 ;; The implicit %rdi operand confuses default length_vex computation.
13463 (set (attr "length_vex")
13464 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
13465 (set_attr "prefix" "maybe_vex")
13466 (set_attr "znver1_decode" "vector")
13467 (set_attr "mode" "TI")])
13468
13469 (define_insn "sse_ldmxcsr"
13470 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
13471 UNSPECV_LDMXCSR)]
13472 "TARGET_SSE"
13473 "%vldmxcsr\t%0"
13474 [(set_attr "type" "sse")
13475 (set_attr "atom_sse_attr" "mxcsr")
13476 (set_attr "prefix" "maybe_vex")
13477 (set_attr "memory" "load")])
13478
13479 (define_insn "sse_stmxcsr"
13480 [(set (match_operand:SI 0 "memory_operand" "=m")
13481 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
13482 "TARGET_SSE"
13483 "%vstmxcsr\t%0"
13484 [(set_attr "type" "sse")
13485 (set_attr "atom_sse_attr" "mxcsr")
13486 (set_attr "prefix" "maybe_vex")
13487 (set_attr "memory" "store")])
13488
13489 (define_insn "sse2_clflush"
13490 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
13491 UNSPECV_CLFLUSH)]
13492 "TARGET_SSE2"
13493 "clflush\t%a0"
13494 [(set_attr "type" "sse")
13495 (set_attr "atom_sse_attr" "fence")
13496 (set_attr "memory" "unknown")])
13497
13498 ;; As per AMD and Intel ISA manuals, the first operand is extensions
13499 ;; and it goes to %ecx. The second operand received is hints and it goes
13500 ;; to %eax.
13501 (define_insn "sse3_mwait"
13502 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
13503 (match_operand:SI 1 "register_operand" "a")]
13504 UNSPECV_MWAIT)]
13505 "TARGET_SSE3"
13506 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
13507 ;; Since 32bit register operands are implicitly zero extended to 64bit,
13508 ;; we only need to set up 32bit registers.
13509 "mwait"
13510 [(set_attr "length" "3")])
13511
13512 (define_insn "sse3_monitor_<mode>"
13513 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
13514 (match_operand:SI 1 "register_operand" "c")
13515 (match_operand:SI 2 "register_operand" "d")]
13516 UNSPECV_MONITOR)]
13517 "TARGET_SSE3"
13518 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
13519 ;; RCX and RDX are used. Since 32bit register operands are implicitly
13520 ;; zero extended to 64bit, we only need to set up 32bit registers.
13521 "%^monitor"
13522 [(set (attr "length")
13523 (symbol_ref ("(Pmode != word_mode) + 3")))])
13524
13525 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13526 ;;
13527 ;; SSSE3 instructions
13528 ;;
13529 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13530
13531 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
13532
13533 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
13534 [(set (match_operand:V16HI 0 "register_operand" "=x")
13535 (vec_concat:V16HI
13536 (vec_concat:V8HI
13537 (vec_concat:V4HI
13538 (vec_concat:V2HI
13539 (ssse3_plusminus:HI
13540 (vec_select:HI
13541 (match_operand:V16HI 1 "register_operand" "x")
13542 (parallel [(const_int 0)]))
13543 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13544 (ssse3_plusminus:HI
13545 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13546 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13547 (vec_concat:V2HI
13548 (ssse3_plusminus:HI
13549 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13550 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13551 (ssse3_plusminus:HI
13552 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13553 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13554 (vec_concat:V4HI
13555 (vec_concat:V2HI
13556 (ssse3_plusminus:HI
13557 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
13558 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
13559 (ssse3_plusminus:HI
13560 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
13561 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
13562 (vec_concat:V2HI
13563 (ssse3_plusminus:HI
13564 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
13565 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
13566 (ssse3_plusminus:HI
13567 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
13568 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
13569 (vec_concat:V8HI
13570 (vec_concat:V4HI
13571 (vec_concat:V2HI
13572 (ssse3_plusminus:HI
13573 (vec_select:HI
13574 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13575 (parallel [(const_int 0)]))
13576 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13577 (ssse3_plusminus:HI
13578 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13579 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13580 (vec_concat:V2HI
13581 (ssse3_plusminus:HI
13582 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13583 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13584 (ssse3_plusminus:HI
13585 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13586 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
13587 (vec_concat:V4HI
13588 (vec_concat:V2HI
13589 (ssse3_plusminus:HI
13590 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
13591 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
13592 (ssse3_plusminus:HI
13593 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
13594 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
13595 (vec_concat:V2HI
13596 (ssse3_plusminus:HI
13597 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
13598 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
13599 (ssse3_plusminus:HI
13600 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
13601 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
13602 "TARGET_AVX2"
13603 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13604 [(set_attr "type" "sseiadd")
13605 (set_attr "prefix_extra" "1")
13606 (set_attr "prefix" "vex")
13607 (set_attr "mode" "OI")])
13608
13609 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
13610 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13611 (vec_concat:V8HI
13612 (vec_concat:V4HI
13613 (vec_concat:V2HI
13614 (ssse3_plusminus:HI
13615 (vec_select:HI
13616 (match_operand:V8HI 1 "register_operand" "0,x")
13617 (parallel [(const_int 0)]))
13618 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13619 (ssse3_plusminus:HI
13620 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13621 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13622 (vec_concat:V2HI
13623 (ssse3_plusminus:HI
13624 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13625 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13626 (ssse3_plusminus:HI
13627 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13628 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13629 (vec_concat:V4HI
13630 (vec_concat:V2HI
13631 (ssse3_plusminus:HI
13632 (vec_select:HI
13633 (match_operand:V8HI 2 "vector_operand" "xBm,xm")
13634 (parallel [(const_int 0)]))
13635 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13636 (ssse3_plusminus:HI
13637 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13638 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13639 (vec_concat:V2HI
13640 (ssse3_plusminus:HI
13641 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13642 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13643 (ssse3_plusminus:HI
13644 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13645 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
13646 "TARGET_SSSE3"
13647 "@
13648 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
13649 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13650 [(set_attr "isa" "noavx,avx")
13651 (set_attr "type" "sseiadd")
13652 (set_attr "atom_unit" "complex")
13653 (set_attr "prefix_data16" "1,*")
13654 (set_attr "prefix_extra" "1")
13655 (set_attr "prefix" "orig,vex")
13656 (set_attr "mode" "TI")])
13657
13658 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
13659 [(set (match_operand:V4HI 0 "register_operand" "=y")
13660 (vec_concat:V4HI
13661 (vec_concat:V2HI
13662 (ssse3_plusminus:HI
13663 (vec_select:HI
13664 (match_operand:V4HI 1 "register_operand" "0")
13665 (parallel [(const_int 0)]))
13666 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13667 (ssse3_plusminus:HI
13668 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13669 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13670 (vec_concat:V2HI
13671 (ssse3_plusminus:HI
13672 (vec_select:HI
13673 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
13674 (parallel [(const_int 0)]))
13675 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13676 (ssse3_plusminus:HI
13677 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13678 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
13679 "TARGET_SSSE3"
13680 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
13681 [(set_attr "type" "sseiadd")
13682 (set_attr "atom_unit" "complex")
13683 (set_attr "prefix_extra" "1")
13684 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13685 (set_attr "mode" "DI")])
13686
13687 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
13688 [(set (match_operand:V8SI 0 "register_operand" "=x")
13689 (vec_concat:V8SI
13690 (vec_concat:V4SI
13691 (vec_concat:V2SI
13692 (plusminus:SI
13693 (vec_select:SI
13694 (match_operand:V8SI 1 "register_operand" "x")
13695 (parallel [(const_int 0)]))
13696 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13697 (plusminus:SI
13698 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13699 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13700 (vec_concat:V2SI
13701 (plusminus:SI
13702 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
13703 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
13704 (plusminus:SI
13705 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
13706 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
13707 (vec_concat:V4SI
13708 (vec_concat:V2SI
13709 (plusminus:SI
13710 (vec_select:SI
13711 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
13712 (parallel [(const_int 0)]))
13713 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13714 (plusminus:SI
13715 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13716 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
13717 (vec_concat:V2SI
13718 (plusminus:SI
13719 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
13720 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
13721 (plusminus:SI
13722 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
13723 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
13724 "TARGET_AVX2"
13725 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13726 [(set_attr "type" "sseiadd")
13727 (set_attr "prefix_extra" "1")
13728 (set_attr "prefix" "vex")
13729 (set_attr "mode" "OI")])
13730
13731 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
13732 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13733 (vec_concat:V4SI
13734 (vec_concat:V2SI
13735 (plusminus:SI
13736 (vec_select:SI
13737 (match_operand:V4SI 1 "register_operand" "0,x")
13738 (parallel [(const_int 0)]))
13739 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13740 (plusminus:SI
13741 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13742 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13743 (vec_concat:V2SI
13744 (plusminus:SI
13745 (vec_select:SI
13746 (match_operand:V4SI 2 "vector_operand" "xBm,xm")
13747 (parallel [(const_int 0)]))
13748 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13749 (plusminus:SI
13750 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13751 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
13752 "TARGET_SSSE3"
13753 "@
13754 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
13755 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13756 [(set_attr "isa" "noavx,avx")
13757 (set_attr "type" "sseiadd")
13758 (set_attr "atom_unit" "complex")
13759 (set_attr "prefix_data16" "1,*")
13760 (set_attr "prefix_extra" "1")
13761 (set_attr "prefix" "orig,vex")
13762 (set_attr "mode" "TI")])
13763
13764 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
13765 [(set (match_operand:V2SI 0 "register_operand" "=y")
13766 (vec_concat:V2SI
13767 (plusminus:SI
13768 (vec_select:SI
13769 (match_operand:V2SI 1 "register_operand" "0")
13770 (parallel [(const_int 0)]))
13771 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13772 (plusminus:SI
13773 (vec_select:SI
13774 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
13775 (parallel [(const_int 0)]))
13776 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
13777 "TARGET_SSSE3"
13778 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
13779 [(set_attr "type" "sseiadd")
13780 (set_attr "atom_unit" "complex")
13781 (set_attr "prefix_extra" "1")
13782 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13783 (set_attr "mode" "DI")])
13784
13785 (define_insn "avx2_pmaddubsw256"
13786 [(set (match_operand:V16HI 0 "register_operand" "=x")
13787 (ss_plus:V16HI
13788 (mult:V16HI
13789 (zero_extend:V16HI
13790 (vec_select:V16QI
13791 (match_operand:V32QI 1 "register_operand" "x")
13792 (parallel [(const_int 0) (const_int 2)
13793 (const_int 4) (const_int 6)
13794 (const_int 8) (const_int 10)
13795 (const_int 12) (const_int 14)
13796 (const_int 16) (const_int 18)
13797 (const_int 20) (const_int 22)
13798 (const_int 24) (const_int 26)
13799 (const_int 28) (const_int 30)])))
13800 (sign_extend:V16HI
13801 (vec_select:V16QI
13802 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
13803 (parallel [(const_int 0) (const_int 2)
13804 (const_int 4) (const_int 6)
13805 (const_int 8) (const_int 10)
13806 (const_int 12) (const_int 14)
13807 (const_int 16) (const_int 18)
13808 (const_int 20) (const_int 22)
13809 (const_int 24) (const_int 26)
13810 (const_int 28) (const_int 30)]))))
13811 (mult:V16HI
13812 (zero_extend:V16HI
13813 (vec_select:V16QI (match_dup 1)
13814 (parallel [(const_int 1) (const_int 3)
13815 (const_int 5) (const_int 7)
13816 (const_int 9) (const_int 11)
13817 (const_int 13) (const_int 15)
13818 (const_int 17) (const_int 19)
13819 (const_int 21) (const_int 23)
13820 (const_int 25) (const_int 27)
13821 (const_int 29) (const_int 31)])))
13822 (sign_extend:V16HI
13823 (vec_select:V16QI (match_dup 2)
13824 (parallel [(const_int 1) (const_int 3)
13825 (const_int 5) (const_int 7)
13826 (const_int 9) (const_int 11)
13827 (const_int 13) (const_int 15)
13828 (const_int 17) (const_int 19)
13829 (const_int 21) (const_int 23)
13830 (const_int 25) (const_int 27)
13831 (const_int 29) (const_int 31)]))))))]
13832 "TARGET_AVX2"
13833 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13834 [(set_attr "type" "sseiadd")
13835 (set_attr "prefix_extra" "1")
13836 (set_attr "prefix" "vex")
13837 (set_attr "mode" "OI")])
13838
13839 ;; The correct representation for this is absolutely enormous, and
13840 ;; surely not generally useful.
13841 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
13842 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
13843 (unspec:VI2_AVX512VL
13844 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
13845 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
13846 UNSPEC_PMADDUBSW512))]
13847 "TARGET_AVX512BW"
13848 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
13849 [(set_attr "type" "sseiadd")
13850 (set_attr "prefix" "evex")
13851 (set_attr "mode" "XI")])
13852
13853 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
13854 [(set (match_operand:V32HI 0 "register_operand" "=v")
13855 (truncate:V32HI
13856 (lshiftrt:V32SI
13857 (plus:V32SI
13858 (lshiftrt:V32SI
13859 (mult:V32SI
13860 (sign_extend:V32SI
13861 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
13862 (sign_extend:V32SI
13863 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
13864 (const_int 14))
13865 (const_vector:V32HI [(const_int 1) (const_int 1)
13866 (const_int 1) (const_int 1)
13867 (const_int 1) (const_int 1)
13868 (const_int 1) (const_int 1)
13869 (const_int 1) (const_int 1)
13870 (const_int 1) (const_int 1)
13871 (const_int 1) (const_int 1)
13872 (const_int 1) (const_int 1)
13873 (const_int 1) (const_int 1)
13874 (const_int 1) (const_int 1)
13875 (const_int 1) (const_int 1)
13876 (const_int 1) (const_int 1)
13877 (const_int 1) (const_int 1)
13878 (const_int 1) (const_int 1)
13879 (const_int 1) (const_int 1)
13880 (const_int 1) (const_int 1)]))
13881 (const_int 1))))]
13882 "TARGET_AVX512BW"
13883 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13884 [(set_attr "type" "sseimul")
13885 (set_attr "prefix" "evex")
13886 (set_attr "mode" "XI")])
13887
13888 (define_insn "ssse3_pmaddubsw128"
13889 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13890 (ss_plus:V8HI
13891 (mult:V8HI
13892 (zero_extend:V8HI
13893 (vec_select:V8QI
13894 (match_operand:V16QI 1 "register_operand" "0,x")
13895 (parallel [(const_int 0) (const_int 2)
13896 (const_int 4) (const_int 6)
13897 (const_int 8) (const_int 10)
13898 (const_int 12) (const_int 14)])))
13899 (sign_extend:V8HI
13900 (vec_select:V8QI
13901 (match_operand:V16QI 2 "vector_operand" "xBm,xm")
13902 (parallel [(const_int 0) (const_int 2)
13903 (const_int 4) (const_int 6)
13904 (const_int 8) (const_int 10)
13905 (const_int 12) (const_int 14)]))))
13906 (mult:V8HI
13907 (zero_extend:V8HI
13908 (vec_select:V8QI (match_dup 1)
13909 (parallel [(const_int 1) (const_int 3)
13910 (const_int 5) (const_int 7)
13911 (const_int 9) (const_int 11)
13912 (const_int 13) (const_int 15)])))
13913 (sign_extend:V8HI
13914 (vec_select:V8QI (match_dup 2)
13915 (parallel [(const_int 1) (const_int 3)
13916 (const_int 5) (const_int 7)
13917 (const_int 9) (const_int 11)
13918 (const_int 13) (const_int 15)]))))))]
13919 "TARGET_SSSE3"
13920 "@
13921 pmaddubsw\t{%2, %0|%0, %2}
13922 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13923 [(set_attr "isa" "noavx,avx")
13924 (set_attr "type" "sseiadd")
13925 (set_attr "atom_unit" "simul")
13926 (set_attr "prefix_data16" "1,*")
13927 (set_attr "prefix_extra" "1")
13928 (set_attr "prefix" "orig,vex")
13929 (set_attr "mode" "TI")])
13930
13931 (define_insn "ssse3_pmaddubsw"
13932 [(set (match_operand:V4HI 0 "register_operand" "=y")
13933 (ss_plus:V4HI
13934 (mult:V4HI
13935 (zero_extend:V4HI
13936 (vec_select:V4QI
13937 (match_operand:V8QI 1 "register_operand" "0")
13938 (parallel [(const_int 0) (const_int 2)
13939 (const_int 4) (const_int 6)])))
13940 (sign_extend:V4HI
13941 (vec_select:V4QI
13942 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
13943 (parallel [(const_int 0) (const_int 2)
13944 (const_int 4) (const_int 6)]))))
13945 (mult:V4HI
13946 (zero_extend:V4HI
13947 (vec_select:V4QI (match_dup 1)
13948 (parallel [(const_int 1) (const_int 3)
13949 (const_int 5) (const_int 7)])))
13950 (sign_extend:V4HI
13951 (vec_select:V4QI (match_dup 2)
13952 (parallel [(const_int 1) (const_int 3)
13953 (const_int 5) (const_int 7)]))))))]
13954 "TARGET_SSSE3"
13955 "pmaddubsw\t{%2, %0|%0, %2}"
13956 [(set_attr "type" "sseiadd")
13957 (set_attr "atom_unit" "simul")
13958 (set_attr "prefix_extra" "1")
13959 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13960 (set_attr "mode" "DI")])
13961
13962 (define_mode_iterator PMULHRSW
13963 [V4HI V8HI (V16HI "TARGET_AVX2")])
13964
13965 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
13966 [(set (match_operand:PMULHRSW 0 "register_operand")
13967 (vec_merge:PMULHRSW
13968 (truncate:PMULHRSW
13969 (lshiftrt:<ssedoublemode>
13970 (plus:<ssedoublemode>
13971 (lshiftrt:<ssedoublemode>
13972 (mult:<ssedoublemode>
13973 (sign_extend:<ssedoublemode>
13974 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13975 (sign_extend:<ssedoublemode>
13976 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13977 (const_int 14))
13978 (match_dup 5))
13979 (const_int 1)))
13980 (match_operand:PMULHRSW 3 "register_operand")
13981 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13982 "TARGET_AVX512BW && TARGET_AVX512VL"
13983 {
13984 operands[5] = CONST1_RTX(<MODE>mode);
13985 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13986 })
13987
13988 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
13989 [(set (match_operand:PMULHRSW 0 "register_operand")
13990 (truncate:PMULHRSW
13991 (lshiftrt:<ssedoublemode>
13992 (plus:<ssedoublemode>
13993 (lshiftrt:<ssedoublemode>
13994 (mult:<ssedoublemode>
13995 (sign_extend:<ssedoublemode>
13996 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13997 (sign_extend:<ssedoublemode>
13998 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13999 (const_int 14))
14000 (match_dup 3))
14001 (const_int 1))))]
14002 "TARGET_AVX2"
14003 {
14004 operands[3] = CONST1_RTX(<MODE>mode);
14005 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14006 })
14007
14008 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
14009 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
14010 (truncate:VI2_AVX2
14011 (lshiftrt:<ssedoublemode>
14012 (plus:<ssedoublemode>
14013 (lshiftrt:<ssedoublemode>
14014 (mult:<ssedoublemode>
14015 (sign_extend:<ssedoublemode>
14016 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
14017 (sign_extend:<ssedoublemode>
14018 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
14019 (const_int 14))
14020 (match_operand:VI2_AVX2 3 "const1_operand"))
14021 (const_int 1))))]
14022 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14023 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
14024 "@
14025 pmulhrsw\t{%2, %0|%0, %2}
14026 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
14027 [(set_attr "isa" "noavx,avx")
14028 (set_attr "type" "sseimul")
14029 (set_attr "prefix_data16" "1,*")
14030 (set_attr "prefix_extra" "1")
14031 (set_attr "prefix" "orig,maybe_evex")
14032 (set_attr "mode" "<sseinsnmode>")])
14033
14034 (define_insn "*ssse3_pmulhrswv4hi3"
14035 [(set (match_operand:V4HI 0 "register_operand" "=y")
14036 (truncate:V4HI
14037 (lshiftrt:V4SI
14038 (plus:V4SI
14039 (lshiftrt:V4SI
14040 (mult:V4SI
14041 (sign_extend:V4SI
14042 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
14043 (sign_extend:V4SI
14044 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
14045 (const_int 14))
14046 (match_operand:V4HI 3 "const1_operand"))
14047 (const_int 1))))]
14048 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
14049 "pmulhrsw\t{%2, %0|%0, %2}"
14050 [(set_attr "type" "sseimul")
14051 (set_attr "prefix_extra" "1")
14052 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14053 (set_attr "mode" "DI")])
14054
14055 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
14056 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
14057 (unspec:VI1_AVX512
14058 [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
14059 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,vm")]
14060 UNSPEC_PSHUFB))]
14061 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14062 "@
14063 pshufb\t{%2, %0|%0, %2}
14064 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14065 [(set_attr "isa" "noavx,avx")
14066 (set_attr "type" "sselog1")
14067 (set_attr "prefix_data16" "1,*")
14068 (set_attr "prefix_extra" "1")
14069 (set_attr "prefix" "orig,maybe_evex")
14070 (set_attr "btver2_decode" "vector,vector")
14071 (set_attr "mode" "<sseinsnmode>")])
14072
14073 (define_insn "ssse3_pshufbv8qi3"
14074 [(set (match_operand:V8QI 0 "register_operand" "=y")
14075 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
14076 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
14077 UNSPEC_PSHUFB))]
14078 "TARGET_SSSE3"
14079 "pshufb\t{%2, %0|%0, %2}";
14080 [(set_attr "type" "sselog1")
14081 (set_attr "prefix_extra" "1")
14082 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14083 (set_attr "mode" "DI")])
14084
14085 (define_insn "<ssse3_avx2>_psign<mode>3"
14086 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
14087 (unspec:VI124_AVX2
14088 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
14089 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
14090 UNSPEC_PSIGN))]
14091 "TARGET_SSSE3"
14092 "@
14093 psign<ssemodesuffix>\t{%2, %0|%0, %2}
14094 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14095 [(set_attr "isa" "noavx,avx")
14096 (set_attr "type" "sselog1")
14097 (set_attr "prefix_data16" "1,*")
14098 (set_attr "prefix_extra" "1")
14099 (set_attr "prefix" "orig,vex")
14100 (set_attr "mode" "<sseinsnmode>")])
14101
14102 (define_insn "ssse3_psign<mode>3"
14103 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14104 (unspec:MMXMODEI
14105 [(match_operand:MMXMODEI 1 "register_operand" "0")
14106 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
14107 UNSPEC_PSIGN))]
14108 "TARGET_SSSE3"
14109 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
14110 [(set_attr "type" "sselog1")
14111 (set_attr "prefix_extra" "1")
14112 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14113 (set_attr "mode" "DI")])
14114
14115 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
14116 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
14117 (vec_merge:VI1_AVX512
14118 (unspec:VI1_AVX512
14119 [(match_operand:VI1_AVX512 1 "register_operand" "v")
14120 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
14121 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14122 UNSPEC_PALIGNR)
14123 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
14124 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
14125 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
14126 {
14127 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14128 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
14129 }
14130 [(set_attr "type" "sseishft")
14131 (set_attr "atom_unit" "sishuf")
14132 (set_attr "prefix_extra" "1")
14133 (set_attr "length_immediate" "1")
14134 (set_attr "prefix" "evex")
14135 (set_attr "mode" "<sseinsnmode>")])
14136
14137 (define_insn "<ssse3_avx2>_palignr<mode>"
14138 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
14139 (unspec:SSESCALARMODE
14140 [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
14141 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,vm")
14142 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
14143 UNSPEC_PALIGNR))]
14144 "TARGET_SSSE3"
14145 {
14146 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14147
14148 switch (which_alternative)
14149 {
14150 case 0:
14151 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14152 case 1:
14153 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14154 default:
14155 gcc_unreachable ();
14156 }
14157 }
14158 [(set_attr "isa" "noavx,avx")
14159 (set_attr "type" "sseishft")
14160 (set_attr "atom_unit" "sishuf")
14161 (set_attr "prefix_data16" "1,*")
14162 (set_attr "prefix_extra" "1")
14163 (set_attr "length_immediate" "1")
14164 (set_attr "prefix" "orig,vex")
14165 (set_attr "mode" "<sseinsnmode>")])
14166
14167 (define_insn "ssse3_palignrdi"
14168 [(set (match_operand:DI 0 "register_operand" "=y")
14169 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
14170 (match_operand:DI 2 "nonimmediate_operand" "ym")
14171 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14172 UNSPEC_PALIGNR))]
14173 "TARGET_SSSE3"
14174 {
14175 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14176 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14177 }
14178 [(set_attr "type" "sseishft")
14179 (set_attr "atom_unit" "sishuf")
14180 (set_attr "prefix_extra" "1")
14181 (set_attr "length_immediate" "1")
14182 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14183 (set_attr "mode" "DI")])
14184
14185 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
14186 ;; modes for abs instruction on pre AVX-512 targets.
14187 (define_mode_iterator VI1248_AVX512VL_AVX512BW
14188 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
14189 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
14190 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
14191 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
14192
14193 (define_insn "*abs<mode>2"
14194 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
14195 (abs:VI1248_AVX512VL_AVX512BW
14196 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
14197 "TARGET_SSSE3"
14198 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
14199 [(set_attr "type" "sselog1")
14200 (set_attr "prefix_data16" "1")
14201 (set_attr "prefix_extra" "1")
14202 (set_attr "prefix" "maybe_vex")
14203 (set_attr "mode" "<sseinsnmode>")])
14204
14205 (define_insn "abs<mode>2_mask"
14206 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
14207 (vec_merge:VI48_AVX512VL
14208 (abs:VI48_AVX512VL
14209 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
14210 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
14211 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14212 "TARGET_AVX512F"
14213 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14214 [(set_attr "type" "sselog1")
14215 (set_attr "prefix" "evex")
14216 (set_attr "mode" "<sseinsnmode>")])
14217
14218 (define_insn "abs<mode>2_mask"
14219 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
14220 (vec_merge:VI12_AVX512VL
14221 (abs:VI12_AVX512VL
14222 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
14223 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
14224 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14225 "TARGET_AVX512BW"
14226 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14227 [(set_attr "type" "sselog1")
14228 (set_attr "prefix" "evex")
14229 (set_attr "mode" "<sseinsnmode>")])
14230
14231 (define_expand "abs<mode>2"
14232 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
14233 (abs:VI1248_AVX512VL_AVX512BW
14234 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))]
14235 "TARGET_SSE2"
14236 {
14237 if (!TARGET_SSSE3)
14238 {
14239 ix86_expand_sse2_abs (operands[0], operands[1]);
14240 DONE;
14241 }
14242 })
14243
14244 (define_insn "abs<mode>2"
14245 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14246 (abs:MMXMODEI
14247 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
14248 "TARGET_SSSE3"
14249 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
14250 [(set_attr "type" "sselog1")
14251 (set_attr "prefix_rep" "0")
14252 (set_attr "prefix_extra" "1")
14253 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14254 (set_attr "mode" "DI")])
14255
14256 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14257 ;;
14258 ;; AMD SSE4A instructions
14259 ;;
14260 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14261
14262 (define_insn "sse4a_movnt<mode>"
14263 [(set (match_operand:MODEF 0 "memory_operand" "=m")
14264 (unspec:MODEF
14265 [(match_operand:MODEF 1 "register_operand" "x")]
14266 UNSPEC_MOVNT))]
14267 "TARGET_SSE4A"
14268 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
14269 [(set_attr "type" "ssemov")
14270 (set_attr "mode" "<MODE>")])
14271
14272 (define_insn "sse4a_vmmovnt<mode>"
14273 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
14274 (unspec:<ssescalarmode>
14275 [(vec_select:<ssescalarmode>
14276 (match_operand:VF_128 1 "register_operand" "x")
14277 (parallel [(const_int 0)]))]
14278 UNSPEC_MOVNT))]
14279 "TARGET_SSE4A"
14280 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
14281 [(set_attr "type" "ssemov")
14282 (set_attr "mode" "<ssescalarmode>")])
14283
14284 (define_insn "sse4a_extrqi"
14285 [(set (match_operand:V2DI 0 "register_operand" "=x")
14286 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14287 (match_operand 2 "const_0_to_255_operand")
14288 (match_operand 3 "const_0_to_255_operand")]
14289 UNSPEC_EXTRQI))]
14290 "TARGET_SSE4A"
14291 "extrq\t{%3, %2, %0|%0, %2, %3}"
14292 [(set_attr "type" "sse")
14293 (set_attr "prefix_data16" "1")
14294 (set_attr "length_immediate" "2")
14295 (set_attr "mode" "TI")])
14296
14297 (define_insn "sse4a_extrq"
14298 [(set (match_operand:V2DI 0 "register_operand" "=x")
14299 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14300 (match_operand:V16QI 2 "register_operand" "x")]
14301 UNSPEC_EXTRQ))]
14302 "TARGET_SSE4A"
14303 "extrq\t{%2, %0|%0, %2}"
14304 [(set_attr "type" "sse")
14305 (set_attr "prefix_data16" "1")
14306 (set_attr "mode" "TI")])
14307
14308 (define_insn "sse4a_insertqi"
14309 [(set (match_operand:V2DI 0 "register_operand" "=x")
14310 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14311 (match_operand:V2DI 2 "register_operand" "x")
14312 (match_operand 3 "const_0_to_255_operand")
14313 (match_operand 4 "const_0_to_255_operand")]
14314 UNSPEC_INSERTQI))]
14315 "TARGET_SSE4A"
14316 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
14317 [(set_attr "type" "sseins")
14318 (set_attr "prefix_data16" "0")
14319 (set_attr "prefix_rep" "1")
14320 (set_attr "length_immediate" "2")
14321 (set_attr "mode" "TI")])
14322
14323 (define_insn "sse4a_insertq"
14324 [(set (match_operand:V2DI 0 "register_operand" "=x")
14325 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14326 (match_operand:V2DI 2 "register_operand" "x")]
14327 UNSPEC_INSERTQ))]
14328 "TARGET_SSE4A"
14329 "insertq\t{%2, %0|%0, %2}"
14330 [(set_attr "type" "sseins")
14331 (set_attr "prefix_data16" "0")
14332 (set_attr "prefix_rep" "1")
14333 (set_attr "mode" "TI")])
14334
14335 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14336 ;;
14337 ;; Intel SSE4.1 instructions
14338 ;;
14339 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14340
14341 ;; Mapping of immediate bits for blend instructions
14342 (define_mode_attr blendbits
14343 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
14344
14345 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
14346 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14347 (vec_merge:VF_128_256
14348 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
14349 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
14350 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
14351 "TARGET_SSE4_1"
14352 "@
14353 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14354 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14355 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14356 [(set_attr "isa" "noavx,noavx,avx")
14357 (set_attr "type" "ssemov")
14358 (set_attr "length_immediate" "1")
14359 (set_attr "prefix_data16" "1,1,*")
14360 (set_attr "prefix_extra" "1")
14361 (set_attr "prefix" "orig,orig,vex")
14362 (set_attr "mode" "<MODE>")])
14363
14364 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
14365 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14366 (unspec:VF_128_256
14367 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
14368 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
14369 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
14370 UNSPEC_BLENDV))]
14371 "TARGET_SSE4_1"
14372 "@
14373 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14374 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14375 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14376 [(set_attr "isa" "noavx,noavx,avx")
14377 (set_attr "type" "ssemov")
14378 (set_attr "length_immediate" "1")
14379 (set_attr "prefix_data16" "1,1,*")
14380 (set_attr "prefix_extra" "1")
14381 (set_attr "prefix" "orig,orig,vex")
14382 (set_attr "btver2_decode" "vector,vector,vector")
14383 (set_attr "mode" "<MODE>")])
14384
14385 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
14386 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14387 (unspec:VF_128_256
14388 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
14389 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
14390 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14391 UNSPEC_DP))]
14392 "TARGET_SSE4_1"
14393 "@
14394 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14395 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14396 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14397 [(set_attr "isa" "noavx,noavx,avx")
14398 (set_attr "type" "ssemul")
14399 (set_attr "length_immediate" "1")
14400 (set_attr "prefix_data16" "1,1,*")
14401 (set_attr "prefix_extra" "1")
14402 (set_attr "prefix" "orig,orig,vex")
14403 (set_attr "btver2_decode" "vector,vector,vector")
14404 (set_attr "znver1_decode" "vector,vector,vector")
14405 (set_attr "mode" "<MODE>")])
14406
14407 ;; Mode attribute used by `vmovntdqa' pattern
14408 (define_mode_attr vi8_sse4_1_avx2_avx512
14409 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
14410
14411 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
14412 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v")
14413 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")]
14414 UNSPEC_MOVNTDQA))]
14415 "TARGET_SSE4_1"
14416 "%vmovntdqa\t{%1, %0|%0, %1}"
14417 [(set_attr "type" "ssemov")
14418 (set_attr "prefix_extra" "1,1,*")
14419 (set_attr "prefix" "maybe_vex,maybe_vex,evex")
14420 (set_attr "mode" "<sseinsnmode>")])
14421
14422 (define_insn "<sse4_1_avx2>_mpsadbw"
14423 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14424 (unspec:VI1_AVX2
14425 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14426 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
14427 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14428 UNSPEC_MPSADBW))]
14429 "TARGET_SSE4_1"
14430 "@
14431 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14432 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14433 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14434 [(set_attr "isa" "noavx,noavx,avx")
14435 (set_attr "type" "sselog1")
14436 (set_attr "length_immediate" "1")
14437 (set_attr "prefix_extra" "1")
14438 (set_attr "prefix" "orig,orig,vex")
14439 (set_attr "btver2_decode" "vector,vector,vector")
14440 (set_attr "znver1_decode" "vector,vector,vector")
14441 (set_attr "mode" "<sseinsnmode>")])
14442
14443 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
14444 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
14445 (vec_concat:VI2_AVX2
14446 (us_truncate:<ssehalfvecmode>
14447 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
14448 (us_truncate:<ssehalfvecmode>
14449 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,vm"))))]
14450 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14451 "@
14452 packusdw\t{%2, %0|%0, %2}
14453 packusdw\t{%2, %0|%0, %2}
14454 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14455 [(set_attr "isa" "noavx,noavx,avx")
14456 (set_attr "type" "sselog")
14457 (set_attr "prefix_extra" "1")
14458 (set_attr "prefix" "orig,orig,maybe_evex")
14459 (set_attr "mode" "<sseinsnmode>")])
14460
14461 (define_insn "<sse4_1_avx2>_pblendvb"
14462 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14463 (unspec:VI1_AVX2
14464 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14465 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
14466 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
14467 UNSPEC_BLENDV))]
14468 "TARGET_SSE4_1"
14469 "@
14470 pblendvb\t{%3, %2, %0|%0, %2, %3}
14471 pblendvb\t{%3, %2, %0|%0, %2, %3}
14472 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14473 [(set_attr "isa" "noavx,noavx,avx")
14474 (set_attr "type" "ssemov")
14475 (set_attr "prefix_extra" "1")
14476 (set_attr "length_immediate" "*,*,1")
14477 (set_attr "prefix" "orig,orig,vex")
14478 (set_attr "btver2_decode" "vector,vector,vector")
14479 (set_attr "mode" "<sseinsnmode>")])
14480
14481 (define_insn "sse4_1_pblendw"
14482 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
14483 (vec_merge:V8HI
14484 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
14485 (match_operand:V8HI 1 "register_operand" "0,0,x")
14486 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
14487 "TARGET_SSE4_1"
14488 "@
14489 pblendw\t{%3, %2, %0|%0, %2, %3}
14490 pblendw\t{%3, %2, %0|%0, %2, %3}
14491 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14492 [(set_attr "isa" "noavx,noavx,avx")
14493 (set_attr "type" "ssemov")
14494 (set_attr "prefix_extra" "1")
14495 (set_attr "length_immediate" "1")
14496 (set_attr "prefix" "orig,orig,vex")
14497 (set_attr "mode" "TI")])
14498
14499 ;; The builtin uses an 8-bit immediate. Expand that.
14500 (define_expand "avx2_pblendw"
14501 [(set (match_operand:V16HI 0 "register_operand")
14502 (vec_merge:V16HI
14503 (match_operand:V16HI 2 "nonimmediate_operand")
14504 (match_operand:V16HI 1 "register_operand")
14505 (match_operand:SI 3 "const_0_to_255_operand")))]
14506 "TARGET_AVX2"
14507 {
14508 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
14509 operands[3] = GEN_INT (val << 8 | val);
14510 })
14511
14512 (define_insn "*avx2_pblendw"
14513 [(set (match_operand:V16HI 0 "register_operand" "=x")
14514 (vec_merge:V16HI
14515 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14516 (match_operand:V16HI 1 "register_operand" "x")
14517 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
14518 "TARGET_AVX2"
14519 {
14520 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
14521 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14522 }
14523 [(set_attr "type" "ssemov")
14524 (set_attr "prefix_extra" "1")
14525 (set_attr "length_immediate" "1")
14526 (set_attr "prefix" "vex")
14527 (set_attr "mode" "OI")])
14528
14529 (define_insn "avx2_pblendd<mode>"
14530 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
14531 (vec_merge:VI4_AVX2
14532 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
14533 (match_operand:VI4_AVX2 1 "register_operand" "x")
14534 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
14535 "TARGET_AVX2"
14536 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14537 [(set_attr "type" "ssemov")
14538 (set_attr "prefix_extra" "1")
14539 (set_attr "length_immediate" "1")
14540 (set_attr "prefix" "vex")
14541 (set_attr "mode" "<sseinsnmode>")])
14542
14543 (define_insn "sse4_1_phminposuw"
14544 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x")
14545 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm")]
14546 UNSPEC_PHMINPOSUW))]
14547 "TARGET_SSE4_1"
14548 "%vphminposuw\t{%1, %0|%0, %1}"
14549 [(set_attr "type" "sselog1")
14550 (set_attr "prefix_extra" "1")
14551 (set_attr "prefix" "maybe_vex")
14552 (set_attr "mode" "TI")])
14553
14554 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
14555 [(set (match_operand:V16HI 0 "register_operand" "=v")
14556 (any_extend:V16HI
14557 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14558 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14559 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14560 [(set_attr "type" "ssemov")
14561 (set_attr "prefix_extra" "1")
14562 (set_attr "prefix" "maybe_evex")
14563 (set_attr "mode" "OI")])
14564
14565 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
14566 [(set (match_operand:V32HI 0 "register_operand" "=v")
14567 (any_extend:V32HI
14568 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
14569 "TARGET_AVX512BW"
14570 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14571 [(set_attr "type" "ssemov")
14572 (set_attr "prefix_extra" "1")
14573 (set_attr "prefix" "evex")
14574 (set_attr "mode" "XI")])
14575
14576 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
14577 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v")
14578 (any_extend:V8HI
14579 (vec_select:V8QI
14580 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14581 (parallel [(const_int 0) (const_int 1)
14582 (const_int 2) (const_int 3)
14583 (const_int 4) (const_int 5)
14584 (const_int 6) (const_int 7)]))))]
14585 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14586 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14587 [(set_attr "type" "ssemov")
14588 (set_attr "prefix_extra" "1")
14589 (set_attr "prefix" "maybe_vex")
14590 (set_attr "mode" "TI")])
14591
14592 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
14593 [(set (match_operand:V16SI 0 "register_operand" "=v")
14594 (any_extend:V16SI
14595 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14596 "TARGET_AVX512F"
14597 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14598 [(set_attr "type" "ssemov")
14599 (set_attr "prefix" "evex")
14600 (set_attr "mode" "XI")])
14601
14602 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
14603 [(set (match_operand:V8SI 0 "register_operand" "=v")
14604 (any_extend:V8SI
14605 (vec_select:V8QI
14606 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14607 (parallel [(const_int 0) (const_int 1)
14608 (const_int 2) (const_int 3)
14609 (const_int 4) (const_int 5)
14610 (const_int 6) (const_int 7)]))))]
14611 "TARGET_AVX2 && <mask_avx512vl_condition>"
14612 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14613 [(set_attr "type" "ssemov")
14614 (set_attr "prefix_extra" "1")
14615 (set_attr "prefix" "maybe_evex")
14616 (set_attr "mode" "OI")])
14617
14618 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
14619 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14620 (any_extend:V4SI
14621 (vec_select:V4QI
14622 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14623 (parallel [(const_int 0) (const_int 1)
14624 (const_int 2) (const_int 3)]))))]
14625 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14626 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14627 [(set_attr "type" "ssemov")
14628 (set_attr "prefix_extra" "1")
14629 (set_attr "prefix" "maybe_vex")
14630 (set_attr "mode" "TI")])
14631
14632 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
14633 [(set (match_operand:V16SI 0 "register_operand" "=v")
14634 (any_extend:V16SI
14635 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
14636 "TARGET_AVX512F"
14637 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14638 [(set_attr "type" "ssemov")
14639 (set_attr "prefix" "evex")
14640 (set_attr "mode" "XI")])
14641
14642 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
14643 [(set (match_operand:V8SI 0 "register_operand" "=v")
14644 (any_extend:V8SI
14645 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14646 "TARGET_AVX2 && <mask_avx512vl_condition>"
14647 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14648 [(set_attr "type" "ssemov")
14649 (set_attr "prefix_extra" "1")
14650 (set_attr "prefix" "maybe_evex")
14651 (set_attr "mode" "OI")])
14652
14653 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
14654 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14655 (any_extend:V4SI
14656 (vec_select:V4HI
14657 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14658 (parallel [(const_int 0) (const_int 1)
14659 (const_int 2) (const_int 3)]))))]
14660 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14661 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14662 [(set_attr "type" "ssemov")
14663 (set_attr "prefix_extra" "1")
14664 (set_attr "prefix" "maybe_vex")
14665 (set_attr "mode" "TI")])
14666
14667 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
14668 [(set (match_operand:V8DI 0 "register_operand" "=v")
14669 (any_extend:V8DI
14670 (vec_select:V8QI
14671 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14672 (parallel [(const_int 0) (const_int 1)
14673 (const_int 2) (const_int 3)
14674 (const_int 4) (const_int 5)
14675 (const_int 6) (const_int 7)]))))]
14676 "TARGET_AVX512F"
14677 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14678 [(set_attr "type" "ssemov")
14679 (set_attr "prefix" "evex")
14680 (set_attr "mode" "XI")])
14681
14682 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
14683 [(set (match_operand:V4DI 0 "register_operand" "=v")
14684 (any_extend:V4DI
14685 (vec_select:V4QI
14686 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14687 (parallel [(const_int 0) (const_int 1)
14688 (const_int 2) (const_int 3)]))))]
14689 "TARGET_AVX2 && <mask_avx512vl_condition>"
14690 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14691 [(set_attr "type" "ssemov")
14692 (set_attr "prefix_extra" "1")
14693 (set_attr "prefix" "maybe_evex")
14694 (set_attr "mode" "OI")])
14695
14696 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
14697 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14698 (any_extend:V2DI
14699 (vec_select:V2QI
14700 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14701 (parallel [(const_int 0) (const_int 1)]))))]
14702 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14703 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
14704 [(set_attr "type" "ssemov")
14705 (set_attr "prefix_extra" "1")
14706 (set_attr "prefix" "maybe_vex")
14707 (set_attr "mode" "TI")])
14708
14709 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
14710 [(set (match_operand:V8DI 0 "register_operand" "=v")
14711 (any_extend:V8DI
14712 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14713 "TARGET_AVX512F"
14714 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14715 [(set_attr "type" "ssemov")
14716 (set_attr "prefix" "evex")
14717 (set_attr "mode" "XI")])
14718
14719 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
14720 [(set (match_operand:V4DI 0 "register_operand" "=v")
14721 (any_extend:V4DI
14722 (vec_select:V4HI
14723 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
14724 (parallel [(const_int 0) (const_int 1)
14725 (const_int 2) (const_int 3)]))))]
14726 "TARGET_AVX2 && <mask_avx512vl_condition>"
14727 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14728 [(set_attr "type" "ssemov")
14729 (set_attr "prefix_extra" "1")
14730 (set_attr "prefix" "maybe_evex")
14731 (set_attr "mode" "OI")])
14732
14733 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
14734 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14735 (any_extend:V2DI
14736 (vec_select:V2HI
14737 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14738 (parallel [(const_int 0) (const_int 1)]))))]
14739 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14740 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14741 [(set_attr "type" "ssemov")
14742 (set_attr "prefix_extra" "1")
14743 (set_attr "prefix" "maybe_vex")
14744 (set_attr "mode" "TI")])
14745
14746 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
14747 [(set (match_operand:V8DI 0 "register_operand" "=v")
14748 (any_extend:V8DI
14749 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
14750 "TARGET_AVX512F"
14751 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14752 [(set_attr "type" "ssemov")
14753 (set_attr "prefix" "evex")
14754 (set_attr "mode" "XI")])
14755
14756 (define_insn "avx2_<code>v4siv4di2<mask_name>"
14757 [(set (match_operand:V4DI 0 "register_operand" "=v")
14758 (any_extend:V4DI
14759 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
14760 "TARGET_AVX2 && <mask_avx512vl_condition>"
14761 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14762 [(set_attr "type" "ssemov")
14763 (set_attr "prefix" "maybe_evex")
14764 (set_attr "prefix_extra" "1")
14765 (set_attr "mode" "OI")])
14766
14767 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
14768 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14769 (any_extend:V2DI
14770 (vec_select:V2SI
14771 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm")
14772 (parallel [(const_int 0) (const_int 1)]))))]
14773 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14774 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14775 [(set_attr "type" "ssemov")
14776 (set_attr "prefix_extra" "1")
14777 (set_attr "prefix" "maybe_vex")
14778 (set_attr "mode" "TI")])
14779
14780 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
14781 ;; setting FLAGS_REG. But it is not a really compare instruction.
14782 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
14783 [(set (reg:CC FLAGS_REG)
14784 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
14785 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
14786 UNSPEC_VTESTP))]
14787 "TARGET_AVX"
14788 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
14789 [(set_attr "type" "ssecomi")
14790 (set_attr "prefix_extra" "1")
14791 (set_attr "prefix" "vex")
14792 (set_attr "mode" "<MODE>")])
14793
14794 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
14795 ;; But it is not a really compare instruction.
14796 (define_insn "<sse4_1>_ptest<mode>"
14797 [(set (reg:CC FLAGS_REG)
14798 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
14799 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
14800 UNSPEC_PTEST))]
14801 "TARGET_SSE4_1"
14802 "%vptest\t{%1, %0|%0, %1}"
14803 [(set_attr "isa" "*,*,avx")
14804 (set_attr "type" "ssecomi")
14805 (set_attr "prefix_extra" "1")
14806 (set_attr "prefix" "maybe_vex")
14807 (set (attr "btver2_decode")
14808 (if_then_else
14809 (match_test "<sseinsnmode>mode==OImode")
14810 (const_string "vector")
14811 (const_string "*")))
14812 (set_attr "mode" "<sseinsnmode>")])
14813
14814 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
14815 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x")
14816 (unspec:VF_128_256
14817 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm")
14818 (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
14819 UNSPEC_ROUND))]
14820 "TARGET_ROUND"
14821 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14822 [(set_attr "type" "ssecvt")
14823 (set (attr "prefix_data16")
14824 (if_then_else
14825 (match_test "TARGET_AVX")
14826 (const_string "*")
14827 (const_string "1")))
14828 (set_attr "prefix_extra" "1")
14829 (set_attr "length_immediate" "1")
14830 (set_attr "prefix" "maybe_vex")
14831 (set_attr "mode" "<MODE>")])
14832
14833 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
14834 [(match_operand:<sseintvecmode> 0 "register_operand")
14835 (match_operand:VF1_128_256 1 "vector_operand")
14836 (match_operand:SI 2 "const_0_to_15_operand")]
14837 "TARGET_ROUND"
14838 {
14839 rtx tmp = gen_reg_rtx (<MODE>mode);
14840
14841 emit_insn
14842 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
14843 operands[2]));
14844 emit_insn
14845 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14846 DONE;
14847 })
14848
14849 (define_expand "avx512f_roundpd512"
14850 [(match_operand:V8DF 0 "register_operand")
14851 (match_operand:V8DF 1 "nonimmediate_operand")
14852 (match_operand:SI 2 "const_0_to_15_operand")]
14853 "TARGET_AVX512F"
14854 {
14855 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
14856 DONE;
14857 })
14858
14859 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
14860 [(match_operand:<ssepackfltmode> 0 "register_operand")
14861 (match_operand:VF2 1 "vector_operand")
14862 (match_operand:VF2 2 "vector_operand")
14863 (match_operand:SI 3 "const_0_to_15_operand")]
14864 "TARGET_ROUND"
14865 {
14866 rtx tmp0, tmp1;
14867
14868 if (<MODE>mode == V2DFmode
14869 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14870 {
14871 rtx tmp2 = gen_reg_rtx (V4DFmode);
14872
14873 tmp0 = gen_reg_rtx (V4DFmode);
14874 tmp1 = force_reg (V2DFmode, operands[1]);
14875
14876 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14877 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
14878 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14879 }
14880 else
14881 {
14882 tmp0 = gen_reg_rtx (<MODE>mode);
14883 tmp1 = gen_reg_rtx (<MODE>mode);
14884
14885 emit_insn
14886 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
14887 operands[3]));
14888 emit_insn
14889 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
14890 operands[3]));
14891 emit_insn
14892 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14893 }
14894 DONE;
14895 })
14896
14897 (define_insn "sse4_1_round<ssescalarmodesuffix>"
14898 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
14899 (vec_merge:VF_128
14900 (unspec:VF_128
14901 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
14902 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
14903 UNSPEC_ROUND)
14904 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
14905 (const_int 1)))]
14906 "TARGET_ROUND"
14907 "@
14908 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14909 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14910 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
14911 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14912 [(set_attr "isa" "noavx,noavx,avx,avx512f")
14913 (set_attr "type" "ssecvt")
14914 (set_attr "length_immediate" "1")
14915 (set_attr "prefix_data16" "1,1,*,*")
14916 (set_attr "prefix_extra" "1")
14917 (set_attr "prefix" "orig,orig,vex,evex")
14918 (set_attr "mode" "<MODE>")])
14919
14920 (define_expand "round<mode>2"
14921 [(set (match_dup 4)
14922 (plus:VF
14923 (match_operand:VF 1 "register_operand")
14924 (match_dup 3)))
14925 (set (match_operand:VF 0 "register_operand")
14926 (unspec:VF
14927 [(match_dup 4) (match_dup 5)]
14928 UNSPEC_ROUND))]
14929 "TARGET_ROUND && !flag_trapping_math"
14930 {
14931 machine_mode scalar_mode;
14932 const struct real_format *fmt;
14933 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
14934 rtx half, vec_half;
14935
14936 scalar_mode = GET_MODE_INNER (<MODE>mode);
14937
14938 /* load nextafter (0.5, 0.0) */
14939 fmt = REAL_MODE_FORMAT (scalar_mode);
14940 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
14941 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
14942 half = const_double_from_real_value (pred_half, scalar_mode);
14943
14944 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
14945 vec_half = force_reg (<MODE>mode, vec_half);
14946
14947 operands[3] = gen_reg_rtx (<MODE>mode);
14948 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
14949
14950 operands[4] = gen_reg_rtx (<MODE>mode);
14951 operands[5] = GEN_INT (ROUND_TRUNC);
14952 })
14953
14954 (define_expand "round<mode>2_sfix"
14955 [(match_operand:<sseintvecmode> 0 "register_operand")
14956 (match_operand:VF1_128_256 1 "register_operand")]
14957 "TARGET_ROUND && !flag_trapping_math"
14958 {
14959 rtx tmp = gen_reg_rtx (<MODE>mode);
14960
14961 emit_insn (gen_round<mode>2 (tmp, operands[1]));
14962
14963 emit_insn
14964 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14965 DONE;
14966 })
14967
14968 (define_expand "round<mode>2_vec_pack_sfix"
14969 [(match_operand:<ssepackfltmode> 0 "register_operand")
14970 (match_operand:VF2 1 "register_operand")
14971 (match_operand:VF2 2 "register_operand")]
14972 "TARGET_ROUND && !flag_trapping_math"
14973 {
14974 rtx tmp0, tmp1;
14975
14976 if (<MODE>mode == V2DFmode
14977 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14978 {
14979 rtx tmp2 = gen_reg_rtx (V4DFmode);
14980
14981 tmp0 = gen_reg_rtx (V4DFmode);
14982 tmp1 = force_reg (V2DFmode, operands[1]);
14983
14984 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14985 emit_insn (gen_roundv4df2 (tmp2, tmp0));
14986 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14987 }
14988 else
14989 {
14990 tmp0 = gen_reg_rtx (<MODE>mode);
14991 tmp1 = gen_reg_rtx (<MODE>mode);
14992
14993 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
14994 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
14995
14996 emit_insn
14997 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14998 }
14999 DONE;
15000 })
15001
15002 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15003 ;;
15004 ;; Intel SSE4.2 string/text processing instructions
15005 ;;
15006 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15007
15008 (define_insn_and_split "sse4_2_pcmpestr"
15009 [(set (match_operand:SI 0 "register_operand" "=c,c")
15010 (unspec:SI
15011 [(match_operand:V16QI 2 "register_operand" "x,x")
15012 (match_operand:SI 3 "register_operand" "a,a")
15013 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
15014 (match_operand:SI 5 "register_operand" "d,d")
15015 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
15016 UNSPEC_PCMPESTR))
15017 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15018 (unspec:V16QI
15019 [(match_dup 2)
15020 (match_dup 3)
15021 (match_dup 4)
15022 (match_dup 5)
15023 (match_dup 6)]
15024 UNSPEC_PCMPESTR))
15025 (set (reg:CC FLAGS_REG)
15026 (unspec:CC
15027 [(match_dup 2)
15028 (match_dup 3)
15029 (match_dup 4)
15030 (match_dup 5)
15031 (match_dup 6)]
15032 UNSPEC_PCMPESTR))]
15033 "TARGET_SSE4_2
15034 && can_create_pseudo_p ()"
15035 "#"
15036 "&& 1"
15037 [(const_int 0)]
15038 {
15039 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15040 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15041 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15042
15043 if (ecx)
15044 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
15045 operands[3], operands[4],
15046 operands[5], operands[6]));
15047 if (xmm0)
15048 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
15049 operands[3], operands[4],
15050 operands[5], operands[6]));
15051 if (flags && !(ecx || xmm0))
15052 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
15053 operands[2], operands[3],
15054 operands[4], operands[5],
15055 operands[6]));
15056 if (!(flags || ecx || xmm0))
15057 emit_note (NOTE_INSN_DELETED);
15058
15059 DONE;
15060 }
15061 [(set_attr "type" "sselog")
15062 (set_attr "prefix_data16" "1")
15063 (set_attr "prefix_extra" "1")
15064 (set_attr "length_immediate" "1")
15065 (set_attr "memory" "none,load")
15066 (set_attr "mode" "TI")])
15067
15068 (define_insn "sse4_2_pcmpestri"
15069 [(set (match_operand:SI 0 "register_operand" "=c,c")
15070 (unspec:SI
15071 [(match_operand:V16QI 1 "register_operand" "x,x")
15072 (match_operand:SI 2 "register_operand" "a,a")
15073 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15074 (match_operand:SI 4 "register_operand" "d,d")
15075 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15076 UNSPEC_PCMPESTR))
15077 (set (reg:CC FLAGS_REG)
15078 (unspec:CC
15079 [(match_dup 1)
15080 (match_dup 2)
15081 (match_dup 3)
15082 (match_dup 4)
15083 (match_dup 5)]
15084 UNSPEC_PCMPESTR))]
15085 "TARGET_SSE4_2"
15086 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
15087 [(set_attr "type" "sselog")
15088 (set_attr "prefix_data16" "1")
15089 (set_attr "prefix_extra" "1")
15090 (set_attr "prefix" "maybe_vex")
15091 (set_attr "length_immediate" "1")
15092 (set_attr "btver2_decode" "vector")
15093 (set_attr "memory" "none,load")
15094 (set_attr "mode" "TI")])
15095
15096 (define_insn "sse4_2_pcmpestrm"
15097 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15098 (unspec:V16QI
15099 [(match_operand:V16QI 1 "register_operand" "x,x")
15100 (match_operand:SI 2 "register_operand" "a,a")
15101 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15102 (match_operand:SI 4 "register_operand" "d,d")
15103 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15104 UNSPEC_PCMPESTR))
15105 (set (reg:CC FLAGS_REG)
15106 (unspec:CC
15107 [(match_dup 1)
15108 (match_dup 2)
15109 (match_dup 3)
15110 (match_dup 4)
15111 (match_dup 5)]
15112 UNSPEC_PCMPESTR))]
15113 "TARGET_SSE4_2"
15114 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
15115 [(set_attr "type" "sselog")
15116 (set_attr "prefix_data16" "1")
15117 (set_attr "prefix_extra" "1")
15118 (set_attr "length_immediate" "1")
15119 (set_attr "prefix" "maybe_vex")
15120 (set_attr "btver2_decode" "vector")
15121 (set_attr "memory" "none,load")
15122 (set_attr "mode" "TI")])
15123
15124 (define_insn "sse4_2_pcmpestr_cconly"
15125 [(set (reg:CC FLAGS_REG)
15126 (unspec:CC
15127 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15128 (match_operand:SI 3 "register_operand" "a,a,a,a")
15129 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
15130 (match_operand:SI 5 "register_operand" "d,d,d,d")
15131 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
15132 UNSPEC_PCMPESTR))
15133 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15134 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15135 "TARGET_SSE4_2"
15136 "@
15137 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15138 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15139 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
15140 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
15141 [(set_attr "type" "sselog")
15142 (set_attr "prefix_data16" "1")
15143 (set_attr "prefix_extra" "1")
15144 (set_attr "length_immediate" "1")
15145 (set_attr "memory" "none,load,none,load")
15146 (set_attr "btver2_decode" "vector,vector,vector,vector")
15147 (set_attr "prefix" "maybe_vex")
15148 (set_attr "mode" "TI")])
15149
15150 (define_insn_and_split "sse4_2_pcmpistr"
15151 [(set (match_operand:SI 0 "register_operand" "=c,c")
15152 (unspec:SI
15153 [(match_operand:V16QI 2 "register_operand" "x,x")
15154 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15155 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
15156 UNSPEC_PCMPISTR))
15157 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15158 (unspec:V16QI
15159 [(match_dup 2)
15160 (match_dup 3)
15161 (match_dup 4)]
15162 UNSPEC_PCMPISTR))
15163 (set (reg:CC FLAGS_REG)
15164 (unspec:CC
15165 [(match_dup 2)
15166 (match_dup 3)
15167 (match_dup 4)]
15168 UNSPEC_PCMPISTR))]
15169 "TARGET_SSE4_2
15170 && can_create_pseudo_p ()"
15171 "#"
15172 "&& 1"
15173 [(const_int 0)]
15174 {
15175 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15176 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15177 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15178
15179 if (ecx)
15180 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15181 operands[3], operands[4]));
15182 if (xmm0)
15183 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15184 operands[3], operands[4]));
15185 if (flags && !(ecx || xmm0))
15186 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15187 operands[2], operands[3],
15188 operands[4]));
15189 if (!(flags || ecx || xmm0))
15190 emit_note (NOTE_INSN_DELETED);
15191
15192 DONE;
15193 }
15194 [(set_attr "type" "sselog")
15195 (set_attr "prefix_data16" "1")
15196 (set_attr "prefix_extra" "1")
15197 (set_attr "length_immediate" "1")
15198 (set_attr "memory" "none,load")
15199 (set_attr "mode" "TI")])
15200
15201 (define_insn "sse4_2_pcmpistri"
15202 [(set (match_operand:SI 0 "register_operand" "=c,c")
15203 (unspec:SI
15204 [(match_operand:V16QI 1 "register_operand" "x,x")
15205 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15206 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15207 UNSPEC_PCMPISTR))
15208 (set (reg:CC FLAGS_REG)
15209 (unspec:CC
15210 [(match_dup 1)
15211 (match_dup 2)
15212 (match_dup 3)]
15213 UNSPEC_PCMPISTR))]
15214 "TARGET_SSE4_2"
15215 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
15216 [(set_attr "type" "sselog")
15217 (set_attr "prefix_data16" "1")
15218 (set_attr "prefix_extra" "1")
15219 (set_attr "length_immediate" "1")
15220 (set_attr "prefix" "maybe_vex")
15221 (set_attr "memory" "none,load")
15222 (set_attr "btver2_decode" "vector")
15223 (set_attr "mode" "TI")])
15224
15225 (define_insn "sse4_2_pcmpistrm"
15226 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15227 (unspec:V16QI
15228 [(match_operand:V16QI 1 "register_operand" "x,x")
15229 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15230 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15231 UNSPEC_PCMPISTR))
15232 (set (reg:CC FLAGS_REG)
15233 (unspec:CC
15234 [(match_dup 1)
15235 (match_dup 2)
15236 (match_dup 3)]
15237 UNSPEC_PCMPISTR))]
15238 "TARGET_SSE4_2"
15239 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
15240 [(set_attr "type" "sselog")
15241 (set_attr "prefix_data16" "1")
15242 (set_attr "prefix_extra" "1")
15243 (set_attr "length_immediate" "1")
15244 (set_attr "prefix" "maybe_vex")
15245 (set_attr "memory" "none,load")
15246 (set_attr "btver2_decode" "vector")
15247 (set_attr "mode" "TI")])
15248
15249 (define_insn "sse4_2_pcmpistr_cconly"
15250 [(set (reg:CC FLAGS_REG)
15251 (unspec:CC
15252 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15253 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
15254 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
15255 UNSPEC_PCMPISTR))
15256 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15257 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15258 "TARGET_SSE4_2"
15259 "@
15260 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15261 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15262 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
15263 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
15264 [(set_attr "type" "sselog")
15265 (set_attr "prefix_data16" "1")
15266 (set_attr "prefix_extra" "1")
15267 (set_attr "length_immediate" "1")
15268 (set_attr "memory" "none,load,none,load")
15269 (set_attr "prefix" "maybe_vex")
15270 (set_attr "btver2_decode" "vector,vector,vector,vector")
15271 (set_attr "mode" "TI")])
15272
15273 ;; Packed float variants
15274 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
15275 [(V8DI "V8SF") (V16SI "V16SF")])
15276
15277 (define_expand "avx512pf_gatherpf<mode>sf"
15278 [(unspec
15279 [(match_operand:<avx512fmaskmode> 0 "register_operand")
15280 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15281 (match_par_dup 5
15282 [(match_operand 2 "vsib_address_operand")
15283 (match_operand:VI48_512 1 "register_operand")
15284 (match_operand:SI 3 "const1248_operand")]))
15285 (match_operand:SI 4 "const_2_to_3_operand")]
15286 UNSPEC_GATHER_PREFETCH)]
15287 "TARGET_AVX512PF"
15288 {
15289 operands[5]
15290 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15291 operands[3]), UNSPEC_VSIBADDR);
15292 })
15293
15294 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
15295 [(unspec
15296 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15297 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15298 [(unspec:P
15299 [(match_operand:P 2 "vsib_address_operand" "Tv")
15300 (match_operand:VI48_512 1 "register_operand" "v")
15301 (match_operand:SI 3 "const1248_operand" "n")]
15302 UNSPEC_VSIBADDR)])
15303 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15304 UNSPEC_GATHER_PREFETCH)]
15305 "TARGET_AVX512PF"
15306 {
15307 switch (INTVAL (operands[4]))
15308 {
15309 case 3:
15310 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15311 case 2:
15312 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15313 default:
15314 gcc_unreachable ();
15315 }
15316 }
15317 [(set_attr "type" "sse")
15318 (set_attr "prefix" "evex")
15319 (set_attr "mode" "XI")])
15320
15321 ;; Packed double variants
15322 (define_expand "avx512pf_gatherpf<mode>df"
15323 [(unspec
15324 [(match_operand:<avx512fmaskmode> 0 "register_operand")
15325 (mem:V8DF
15326 (match_par_dup 5
15327 [(match_operand 2 "vsib_address_operand")
15328 (match_operand:VI4_256_8_512 1 "register_operand")
15329 (match_operand:SI 3 "const1248_operand")]))
15330 (match_operand:SI 4 "const_2_to_3_operand")]
15331 UNSPEC_GATHER_PREFETCH)]
15332 "TARGET_AVX512PF"
15333 {
15334 operands[5]
15335 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15336 operands[3]), UNSPEC_VSIBADDR);
15337 })
15338
15339 (define_insn "*avx512pf_gatherpf<mode>df_mask"
15340 [(unspec
15341 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15342 (match_operator:V8DF 5 "vsib_mem_operator"
15343 [(unspec:P
15344 [(match_operand:P 2 "vsib_address_operand" "Tv")
15345 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15346 (match_operand:SI 3 "const1248_operand" "n")]
15347 UNSPEC_VSIBADDR)])
15348 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15349 UNSPEC_GATHER_PREFETCH)]
15350 "TARGET_AVX512PF"
15351 {
15352 switch (INTVAL (operands[4]))
15353 {
15354 case 3:
15355 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15356 case 2:
15357 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15358 default:
15359 gcc_unreachable ();
15360 }
15361 }
15362 [(set_attr "type" "sse")
15363 (set_attr "prefix" "evex")
15364 (set_attr "mode" "XI")])
15365
15366 ;; Packed float variants
15367 (define_expand "avx512pf_scatterpf<mode>sf"
15368 [(unspec
15369 [(match_operand:<avx512fmaskmode> 0 "register_operand")
15370 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15371 (match_par_dup 5
15372 [(match_operand 2 "vsib_address_operand")
15373 (match_operand:VI48_512 1 "register_operand")
15374 (match_operand:SI 3 "const1248_operand")]))
15375 (match_operand:SI 4 "const2367_operand")]
15376 UNSPEC_SCATTER_PREFETCH)]
15377 "TARGET_AVX512PF"
15378 {
15379 operands[5]
15380 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15381 operands[3]), UNSPEC_VSIBADDR);
15382 })
15383
15384 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
15385 [(unspec
15386 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15387 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15388 [(unspec:P
15389 [(match_operand:P 2 "vsib_address_operand" "Tv")
15390 (match_operand:VI48_512 1 "register_operand" "v")
15391 (match_operand:SI 3 "const1248_operand" "n")]
15392 UNSPEC_VSIBADDR)])
15393 (match_operand:SI 4 "const2367_operand" "n")]
15394 UNSPEC_SCATTER_PREFETCH)]
15395 "TARGET_AVX512PF"
15396 {
15397 switch (INTVAL (operands[4]))
15398 {
15399 case 3:
15400 case 7:
15401 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15402 case 2:
15403 case 6:
15404 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15405 default:
15406 gcc_unreachable ();
15407 }
15408 }
15409 [(set_attr "type" "sse")
15410 (set_attr "prefix" "evex")
15411 (set_attr "mode" "XI")])
15412
15413 ;; Packed double variants
15414 (define_expand "avx512pf_scatterpf<mode>df"
15415 [(unspec
15416 [(match_operand:<avx512fmaskmode> 0 "register_operand")
15417 (mem:V8DF
15418 (match_par_dup 5
15419 [(match_operand 2 "vsib_address_operand")
15420 (match_operand:VI4_256_8_512 1 "register_operand")
15421 (match_operand:SI 3 "const1248_operand")]))
15422 (match_operand:SI 4 "const2367_operand")]
15423 UNSPEC_SCATTER_PREFETCH)]
15424 "TARGET_AVX512PF"
15425 {
15426 operands[5]
15427 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15428 operands[3]), UNSPEC_VSIBADDR);
15429 })
15430
15431 (define_insn "*avx512pf_scatterpf<mode>df_mask"
15432 [(unspec
15433 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15434 (match_operator:V8DF 5 "vsib_mem_operator"
15435 [(unspec:P
15436 [(match_operand:P 2 "vsib_address_operand" "Tv")
15437 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15438 (match_operand:SI 3 "const1248_operand" "n")]
15439 UNSPEC_VSIBADDR)])
15440 (match_operand:SI 4 "const2367_operand" "n")]
15441 UNSPEC_SCATTER_PREFETCH)]
15442 "TARGET_AVX512PF"
15443 {
15444 switch (INTVAL (operands[4]))
15445 {
15446 case 3:
15447 case 7:
15448 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15449 case 2:
15450 case 6:
15451 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15452 default:
15453 gcc_unreachable ();
15454 }
15455 }
15456 [(set_attr "type" "sse")
15457 (set_attr "prefix" "evex")
15458 (set_attr "mode" "XI")])
15459
15460 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
15461 [(set (match_operand:VF_512 0 "register_operand" "=v")
15462 (unspec:VF_512
15463 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15464 UNSPEC_EXP2))]
15465 "TARGET_AVX512ER"
15466 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15467 [(set_attr "prefix" "evex")
15468 (set_attr "type" "sse")
15469 (set_attr "mode" "<MODE>")])
15470
15471 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
15472 [(set (match_operand:VF_512 0 "register_operand" "=v")
15473 (unspec:VF_512
15474 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15475 UNSPEC_RCP28))]
15476 "TARGET_AVX512ER"
15477 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15478 [(set_attr "prefix" "evex")
15479 (set_attr "type" "sse")
15480 (set_attr "mode" "<MODE>")])
15481
15482 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
15483 [(set (match_operand:VF_128 0 "register_operand" "=v")
15484 (vec_merge:VF_128
15485 (unspec:VF_128
15486 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15487 UNSPEC_RCP28)
15488 (match_operand:VF_128 2 "register_operand" "v")
15489 (const_int 1)))]
15490 "TARGET_AVX512ER"
15491 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15492 [(set_attr "length_immediate" "1")
15493 (set_attr "prefix" "evex")
15494 (set_attr "type" "sse")
15495 (set_attr "mode" "<MODE>")])
15496
15497 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
15498 [(set (match_operand:VF_512 0 "register_operand" "=v")
15499 (unspec:VF_512
15500 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15501 UNSPEC_RSQRT28))]
15502 "TARGET_AVX512ER"
15503 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15504 [(set_attr "prefix" "evex")
15505 (set_attr "type" "sse")
15506 (set_attr "mode" "<MODE>")])
15507
15508 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
15509 [(set (match_operand:VF_128 0 "register_operand" "=v")
15510 (vec_merge:VF_128
15511 (unspec:VF_128
15512 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15513 UNSPEC_RSQRT28)
15514 (match_operand:VF_128 2 "register_operand" "v")
15515 (const_int 1)))]
15516 "TARGET_AVX512ER"
15517 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15518 [(set_attr "length_immediate" "1")
15519 (set_attr "type" "sse")
15520 (set_attr "prefix" "evex")
15521 (set_attr "mode" "<MODE>")])
15522
15523 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15524 ;;
15525 ;; XOP instructions
15526 ;;
15527 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15528
15529 (define_code_iterator xop_plus [plus ss_plus])
15530
15531 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
15532 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
15533
15534 ;; XOP parallel integer multiply/add instructions.
15535
15536 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
15537 [(set (match_operand:VI24_128 0 "register_operand" "=x")
15538 (xop_plus:VI24_128
15539 (mult:VI24_128
15540 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
15541 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
15542 (match_operand:VI24_128 3 "register_operand" "x")))]
15543 "TARGET_XOP"
15544 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15545 [(set_attr "type" "ssemuladd")
15546 (set_attr "mode" "TI")])
15547
15548 (define_insn "xop_p<macs>dql"
15549 [(set (match_operand:V2DI 0 "register_operand" "=x")
15550 (xop_plus:V2DI
15551 (mult:V2DI
15552 (sign_extend:V2DI
15553 (vec_select:V2SI
15554 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15555 (parallel [(const_int 0) (const_int 2)])))
15556 (sign_extend:V2DI
15557 (vec_select:V2SI
15558 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15559 (parallel [(const_int 0) (const_int 2)]))))
15560 (match_operand:V2DI 3 "register_operand" "x")))]
15561 "TARGET_XOP"
15562 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15563 [(set_attr "type" "ssemuladd")
15564 (set_attr "mode" "TI")])
15565
15566 (define_insn "xop_p<macs>dqh"
15567 [(set (match_operand:V2DI 0 "register_operand" "=x")
15568 (xop_plus:V2DI
15569 (mult:V2DI
15570 (sign_extend:V2DI
15571 (vec_select:V2SI
15572 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15573 (parallel [(const_int 1) (const_int 3)])))
15574 (sign_extend:V2DI
15575 (vec_select:V2SI
15576 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15577 (parallel [(const_int 1) (const_int 3)]))))
15578 (match_operand:V2DI 3 "register_operand" "x")))]
15579 "TARGET_XOP"
15580 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15581 [(set_attr "type" "ssemuladd")
15582 (set_attr "mode" "TI")])
15583
15584 ;; XOP parallel integer multiply/add instructions for the intrinisics
15585 (define_insn "xop_p<macs>wd"
15586 [(set (match_operand:V4SI 0 "register_operand" "=x")
15587 (xop_plus:V4SI
15588 (mult:V4SI
15589 (sign_extend:V4SI
15590 (vec_select:V4HI
15591 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15592 (parallel [(const_int 1) (const_int 3)
15593 (const_int 5) (const_int 7)])))
15594 (sign_extend:V4SI
15595 (vec_select:V4HI
15596 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15597 (parallel [(const_int 1) (const_int 3)
15598 (const_int 5) (const_int 7)]))))
15599 (match_operand:V4SI 3 "register_operand" "x")))]
15600 "TARGET_XOP"
15601 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15602 [(set_attr "type" "ssemuladd")
15603 (set_attr "mode" "TI")])
15604
15605 (define_insn "xop_p<madcs>wd"
15606 [(set (match_operand:V4SI 0 "register_operand" "=x")
15607 (xop_plus:V4SI
15608 (plus:V4SI
15609 (mult:V4SI
15610 (sign_extend:V4SI
15611 (vec_select:V4HI
15612 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15613 (parallel [(const_int 0) (const_int 2)
15614 (const_int 4) (const_int 6)])))
15615 (sign_extend:V4SI
15616 (vec_select:V4HI
15617 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15618 (parallel [(const_int 0) (const_int 2)
15619 (const_int 4) (const_int 6)]))))
15620 (mult:V4SI
15621 (sign_extend:V4SI
15622 (vec_select:V4HI
15623 (match_dup 1)
15624 (parallel [(const_int 1) (const_int 3)
15625 (const_int 5) (const_int 7)])))
15626 (sign_extend:V4SI
15627 (vec_select:V4HI
15628 (match_dup 2)
15629 (parallel [(const_int 1) (const_int 3)
15630 (const_int 5) (const_int 7)])))))
15631 (match_operand:V4SI 3 "register_operand" "x")))]
15632 "TARGET_XOP"
15633 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15634 [(set_attr "type" "ssemuladd")
15635 (set_attr "mode" "TI")])
15636
15637 ;; XOP parallel XMM conditional moves
15638 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
15639 [(set (match_operand:V 0 "register_operand" "=x,x")
15640 (if_then_else:V
15641 (match_operand:V 3 "nonimmediate_operand" "x,m")
15642 (match_operand:V 1 "register_operand" "x,x")
15643 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
15644 "TARGET_XOP"
15645 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15646 [(set_attr "type" "sse4arg")])
15647
15648 ;; XOP horizontal add/subtract instructions
15649 (define_insn "xop_phadd<u>bw"
15650 [(set (match_operand:V8HI 0 "register_operand" "=x")
15651 (plus:V8HI
15652 (any_extend:V8HI
15653 (vec_select:V8QI
15654 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15655 (parallel [(const_int 0) (const_int 2)
15656 (const_int 4) (const_int 6)
15657 (const_int 8) (const_int 10)
15658 (const_int 12) (const_int 14)])))
15659 (any_extend:V8HI
15660 (vec_select:V8QI
15661 (match_dup 1)
15662 (parallel [(const_int 1) (const_int 3)
15663 (const_int 5) (const_int 7)
15664 (const_int 9) (const_int 11)
15665 (const_int 13) (const_int 15)])))))]
15666 "TARGET_XOP"
15667 "vphadd<u>bw\t{%1, %0|%0, %1}"
15668 [(set_attr "type" "sseiadd1")])
15669
15670 (define_insn "xop_phadd<u>bd"
15671 [(set (match_operand:V4SI 0 "register_operand" "=x")
15672 (plus:V4SI
15673 (plus:V4SI
15674 (any_extend:V4SI
15675 (vec_select:V4QI
15676 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15677 (parallel [(const_int 0) (const_int 4)
15678 (const_int 8) (const_int 12)])))
15679 (any_extend:V4SI
15680 (vec_select:V4QI
15681 (match_dup 1)
15682 (parallel [(const_int 1) (const_int 5)
15683 (const_int 9) (const_int 13)]))))
15684 (plus:V4SI
15685 (any_extend:V4SI
15686 (vec_select:V4QI
15687 (match_dup 1)
15688 (parallel [(const_int 2) (const_int 6)
15689 (const_int 10) (const_int 14)])))
15690 (any_extend:V4SI
15691 (vec_select:V4QI
15692 (match_dup 1)
15693 (parallel [(const_int 3) (const_int 7)
15694 (const_int 11) (const_int 15)]))))))]
15695 "TARGET_XOP"
15696 "vphadd<u>bd\t{%1, %0|%0, %1}"
15697 [(set_attr "type" "sseiadd1")])
15698
15699 (define_insn "xop_phadd<u>bq"
15700 [(set (match_operand:V2DI 0 "register_operand" "=x")
15701 (plus:V2DI
15702 (plus:V2DI
15703 (plus:V2DI
15704 (any_extend:V2DI
15705 (vec_select:V2QI
15706 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15707 (parallel [(const_int 0) (const_int 8)])))
15708 (any_extend:V2DI
15709 (vec_select:V2QI
15710 (match_dup 1)
15711 (parallel [(const_int 1) (const_int 9)]))))
15712 (plus:V2DI
15713 (any_extend:V2DI
15714 (vec_select:V2QI
15715 (match_dup 1)
15716 (parallel [(const_int 2) (const_int 10)])))
15717 (any_extend:V2DI
15718 (vec_select:V2QI
15719 (match_dup 1)
15720 (parallel [(const_int 3) (const_int 11)])))))
15721 (plus:V2DI
15722 (plus:V2DI
15723 (any_extend:V2DI
15724 (vec_select:V2QI
15725 (match_dup 1)
15726 (parallel [(const_int 4) (const_int 12)])))
15727 (any_extend:V2DI
15728 (vec_select:V2QI
15729 (match_dup 1)
15730 (parallel [(const_int 5) (const_int 13)]))))
15731 (plus:V2DI
15732 (any_extend:V2DI
15733 (vec_select:V2QI
15734 (match_dup 1)
15735 (parallel [(const_int 6) (const_int 14)])))
15736 (any_extend:V2DI
15737 (vec_select:V2QI
15738 (match_dup 1)
15739 (parallel [(const_int 7) (const_int 15)])))))))]
15740 "TARGET_XOP"
15741 "vphadd<u>bq\t{%1, %0|%0, %1}"
15742 [(set_attr "type" "sseiadd1")])
15743
15744 (define_insn "xop_phadd<u>wd"
15745 [(set (match_operand:V4SI 0 "register_operand" "=x")
15746 (plus:V4SI
15747 (any_extend:V4SI
15748 (vec_select:V4HI
15749 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15750 (parallel [(const_int 0) (const_int 2)
15751 (const_int 4) (const_int 6)])))
15752 (any_extend:V4SI
15753 (vec_select:V4HI
15754 (match_dup 1)
15755 (parallel [(const_int 1) (const_int 3)
15756 (const_int 5) (const_int 7)])))))]
15757 "TARGET_XOP"
15758 "vphadd<u>wd\t{%1, %0|%0, %1}"
15759 [(set_attr "type" "sseiadd1")])
15760
15761 (define_insn "xop_phadd<u>wq"
15762 [(set (match_operand:V2DI 0 "register_operand" "=x")
15763 (plus:V2DI
15764 (plus:V2DI
15765 (any_extend:V2DI
15766 (vec_select:V2HI
15767 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15768 (parallel [(const_int 0) (const_int 4)])))
15769 (any_extend:V2DI
15770 (vec_select:V2HI
15771 (match_dup 1)
15772 (parallel [(const_int 1) (const_int 5)]))))
15773 (plus:V2DI
15774 (any_extend:V2DI
15775 (vec_select:V2HI
15776 (match_dup 1)
15777 (parallel [(const_int 2) (const_int 6)])))
15778 (any_extend:V2DI
15779 (vec_select:V2HI
15780 (match_dup 1)
15781 (parallel [(const_int 3) (const_int 7)]))))))]
15782 "TARGET_XOP"
15783 "vphadd<u>wq\t{%1, %0|%0, %1}"
15784 [(set_attr "type" "sseiadd1")])
15785
15786 (define_insn "xop_phadd<u>dq"
15787 [(set (match_operand:V2DI 0 "register_operand" "=x")
15788 (plus:V2DI
15789 (any_extend:V2DI
15790 (vec_select:V2SI
15791 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15792 (parallel [(const_int 0) (const_int 2)])))
15793 (any_extend:V2DI
15794 (vec_select:V2SI
15795 (match_dup 1)
15796 (parallel [(const_int 1) (const_int 3)])))))]
15797 "TARGET_XOP"
15798 "vphadd<u>dq\t{%1, %0|%0, %1}"
15799 [(set_attr "type" "sseiadd1")])
15800
15801 (define_insn "xop_phsubbw"
15802 [(set (match_operand:V8HI 0 "register_operand" "=x")
15803 (minus:V8HI
15804 (sign_extend:V8HI
15805 (vec_select:V8QI
15806 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15807 (parallel [(const_int 0) (const_int 2)
15808 (const_int 4) (const_int 6)
15809 (const_int 8) (const_int 10)
15810 (const_int 12) (const_int 14)])))
15811 (sign_extend:V8HI
15812 (vec_select:V8QI
15813 (match_dup 1)
15814 (parallel [(const_int 1) (const_int 3)
15815 (const_int 5) (const_int 7)
15816 (const_int 9) (const_int 11)
15817 (const_int 13) (const_int 15)])))))]
15818 "TARGET_XOP"
15819 "vphsubbw\t{%1, %0|%0, %1}"
15820 [(set_attr "type" "sseiadd1")])
15821
15822 (define_insn "xop_phsubwd"
15823 [(set (match_operand:V4SI 0 "register_operand" "=x")
15824 (minus:V4SI
15825 (sign_extend:V4SI
15826 (vec_select:V4HI
15827 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15828 (parallel [(const_int 0) (const_int 2)
15829 (const_int 4) (const_int 6)])))
15830 (sign_extend:V4SI
15831 (vec_select:V4HI
15832 (match_dup 1)
15833 (parallel [(const_int 1) (const_int 3)
15834 (const_int 5) (const_int 7)])))))]
15835 "TARGET_XOP"
15836 "vphsubwd\t{%1, %0|%0, %1}"
15837 [(set_attr "type" "sseiadd1")])
15838
15839 (define_insn "xop_phsubdq"
15840 [(set (match_operand:V2DI 0 "register_operand" "=x")
15841 (minus:V2DI
15842 (sign_extend:V2DI
15843 (vec_select:V2SI
15844 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15845 (parallel [(const_int 0) (const_int 2)])))
15846 (sign_extend:V2DI
15847 (vec_select:V2SI
15848 (match_dup 1)
15849 (parallel [(const_int 1) (const_int 3)])))))]
15850 "TARGET_XOP"
15851 "vphsubdq\t{%1, %0|%0, %1}"
15852 [(set_attr "type" "sseiadd1")])
15853
15854 ;; XOP permute instructions
15855 (define_insn "xop_pperm"
15856 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15857 (unspec:V16QI
15858 [(match_operand:V16QI 1 "register_operand" "x,x")
15859 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15860 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
15861 UNSPEC_XOP_PERMUTE))]
15862 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15863 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15864 [(set_attr "type" "sse4arg")
15865 (set_attr "mode" "TI")])
15866
15867 ;; XOP pack instructions that combine two vectors into a smaller vector
15868 (define_insn "xop_pperm_pack_v2di_v4si"
15869 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15870 (vec_concat:V4SI
15871 (truncate:V2SI
15872 (match_operand:V2DI 1 "register_operand" "x,x"))
15873 (truncate:V2SI
15874 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
15875 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15876 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15877 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15878 [(set_attr "type" "sse4arg")
15879 (set_attr "mode" "TI")])
15880
15881 (define_insn "xop_pperm_pack_v4si_v8hi"
15882 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15883 (vec_concat:V8HI
15884 (truncate:V4HI
15885 (match_operand:V4SI 1 "register_operand" "x,x"))
15886 (truncate:V4HI
15887 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
15888 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15889 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15890 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15891 [(set_attr "type" "sse4arg")
15892 (set_attr "mode" "TI")])
15893
15894 (define_insn "xop_pperm_pack_v8hi_v16qi"
15895 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15896 (vec_concat:V16QI
15897 (truncate:V8QI
15898 (match_operand:V8HI 1 "register_operand" "x,x"))
15899 (truncate:V8QI
15900 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
15901 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15902 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15903 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15904 [(set_attr "type" "sse4arg")
15905 (set_attr "mode" "TI")])
15906
15907 ;; XOP packed rotate instructions
15908 (define_expand "rotl<mode>3"
15909 [(set (match_operand:VI_128 0 "register_operand")
15910 (rotate:VI_128
15911 (match_operand:VI_128 1 "nonimmediate_operand")
15912 (match_operand:SI 2 "general_operand")))]
15913 "TARGET_XOP"
15914 {
15915 /* If we were given a scalar, convert it to parallel */
15916 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15917 {
15918 rtvec vs = rtvec_alloc (<ssescalarnum>);
15919 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15920 rtx reg = gen_reg_rtx (<MODE>mode);
15921 rtx op2 = operands[2];
15922 int i;
15923
15924 if (GET_MODE (op2) != <ssescalarmode>mode)
15925 {
15926 op2 = gen_reg_rtx (<ssescalarmode>mode);
15927 convert_move (op2, operands[2], false);
15928 }
15929
15930 for (i = 0; i < <ssescalarnum>; i++)
15931 RTVEC_ELT (vs, i) = op2;
15932
15933 emit_insn (gen_vec_init<mode> (reg, par));
15934 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15935 DONE;
15936 }
15937 })
15938
15939 (define_expand "rotr<mode>3"
15940 [(set (match_operand:VI_128 0 "register_operand")
15941 (rotatert:VI_128
15942 (match_operand:VI_128 1 "nonimmediate_operand")
15943 (match_operand:SI 2 "general_operand")))]
15944 "TARGET_XOP"
15945 {
15946 /* If we were given a scalar, convert it to parallel */
15947 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15948 {
15949 rtvec vs = rtvec_alloc (<ssescalarnum>);
15950 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15951 rtx neg = gen_reg_rtx (<MODE>mode);
15952 rtx reg = gen_reg_rtx (<MODE>mode);
15953 rtx op2 = operands[2];
15954 int i;
15955
15956 if (GET_MODE (op2) != <ssescalarmode>mode)
15957 {
15958 op2 = gen_reg_rtx (<ssescalarmode>mode);
15959 convert_move (op2, operands[2], false);
15960 }
15961
15962 for (i = 0; i < <ssescalarnum>; i++)
15963 RTVEC_ELT (vs, i) = op2;
15964
15965 emit_insn (gen_vec_init<mode> (reg, par));
15966 emit_insn (gen_neg<mode>2 (neg, reg));
15967 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
15968 DONE;
15969 }
15970 })
15971
15972 (define_insn "xop_rotl<mode>3"
15973 [(set (match_operand:VI_128 0 "register_operand" "=x")
15974 (rotate:VI_128
15975 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15976 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15977 "TARGET_XOP"
15978 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15979 [(set_attr "type" "sseishft")
15980 (set_attr "length_immediate" "1")
15981 (set_attr "mode" "TI")])
15982
15983 (define_insn "xop_rotr<mode>3"
15984 [(set (match_operand:VI_128 0 "register_operand" "=x")
15985 (rotatert:VI_128
15986 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15987 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15988 "TARGET_XOP"
15989 {
15990 operands[3]
15991 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
15992 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
15993 }
15994 [(set_attr "type" "sseishft")
15995 (set_attr "length_immediate" "1")
15996 (set_attr "mode" "TI")])
15997
15998 (define_expand "vrotr<mode>3"
15999 [(match_operand:VI_128 0 "register_operand")
16000 (match_operand:VI_128 1 "register_operand")
16001 (match_operand:VI_128 2 "register_operand")]
16002 "TARGET_XOP"
16003 {
16004 rtx reg = gen_reg_rtx (<MODE>mode);
16005 emit_insn (gen_neg<mode>2 (reg, operands[2]));
16006 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16007 DONE;
16008 })
16009
16010 (define_expand "vrotl<mode>3"
16011 [(match_operand:VI_128 0 "register_operand")
16012 (match_operand:VI_128 1 "register_operand")
16013 (match_operand:VI_128 2 "register_operand")]
16014 "TARGET_XOP"
16015 {
16016 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
16017 DONE;
16018 })
16019
16020 (define_insn "xop_vrotl<mode>3"
16021 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16022 (if_then_else:VI_128
16023 (ge:VI_128
16024 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16025 (const_int 0))
16026 (rotate:VI_128
16027 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16028 (match_dup 2))
16029 (rotatert:VI_128
16030 (match_dup 1)
16031 (neg:VI_128 (match_dup 2)))))]
16032 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16033 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16034 [(set_attr "type" "sseishft")
16035 (set_attr "prefix_data16" "0")
16036 (set_attr "prefix_extra" "2")
16037 (set_attr "mode" "TI")])
16038
16039 ;; XOP packed shift instructions.
16040 (define_expand "vlshr<mode>3"
16041 [(set (match_operand:VI12_128 0 "register_operand")
16042 (lshiftrt:VI12_128
16043 (match_operand:VI12_128 1 "register_operand")
16044 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16045 "TARGET_XOP"
16046 {
16047 rtx neg = gen_reg_rtx (<MODE>mode);
16048 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16049 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16050 DONE;
16051 })
16052
16053 (define_expand "vlshr<mode>3"
16054 [(set (match_operand:VI48_128 0 "register_operand")
16055 (lshiftrt:VI48_128
16056 (match_operand:VI48_128 1 "register_operand")
16057 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16058 "TARGET_AVX2 || TARGET_XOP"
16059 {
16060 if (!TARGET_AVX2)
16061 {
16062 rtx neg = gen_reg_rtx (<MODE>mode);
16063 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16064 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16065 DONE;
16066 }
16067 })
16068
16069 (define_expand "vlshr<mode>3"
16070 [(set (match_operand:VI48_512 0 "register_operand")
16071 (lshiftrt:VI48_512
16072 (match_operand:VI48_512 1 "register_operand")
16073 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16074 "TARGET_AVX512F")
16075
16076 (define_expand "vlshr<mode>3"
16077 [(set (match_operand:VI48_256 0 "register_operand")
16078 (lshiftrt:VI48_256
16079 (match_operand:VI48_256 1 "register_operand")
16080 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16081 "TARGET_AVX2")
16082
16083 (define_expand "vashrv8hi3<mask_name>"
16084 [(set (match_operand:V8HI 0 "register_operand")
16085 (ashiftrt:V8HI
16086 (match_operand:V8HI 1 "register_operand")
16087 (match_operand:V8HI 2 "nonimmediate_operand")))]
16088 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
16089 {
16090 if (TARGET_XOP)
16091 {
16092 rtx neg = gen_reg_rtx (V8HImode);
16093 emit_insn (gen_negv8hi2 (neg, operands[2]));
16094 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16095 DONE;
16096 }
16097 })
16098
16099 (define_expand "vashrv16qi3"
16100 [(set (match_operand:V16QI 0 "register_operand")
16101 (ashiftrt:V16QI
16102 (match_operand:V16QI 1 "register_operand")
16103 (match_operand:V16QI 2 "nonimmediate_operand")))]
16104 "TARGET_XOP"
16105 {
16106 rtx neg = gen_reg_rtx (V16QImode);
16107 emit_insn (gen_negv16qi2 (neg, operands[2]));
16108 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16109 DONE;
16110 })
16111
16112 (define_expand "vashrv2di3<mask_name>"
16113 [(set (match_operand:V2DI 0 "register_operand")
16114 (ashiftrt:V2DI
16115 (match_operand:V2DI 1 "register_operand")
16116 (match_operand:V2DI 2 "nonimmediate_operand")))]
16117 "TARGET_XOP || TARGET_AVX512VL"
16118 {
16119 if (TARGET_XOP)
16120 {
16121 rtx neg = gen_reg_rtx (V2DImode);
16122 emit_insn (gen_negv2di2 (neg, operands[2]));
16123 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16124 DONE;
16125 }
16126 })
16127
16128 (define_expand "vashrv4si3"
16129 [(set (match_operand:V4SI 0 "register_operand")
16130 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16131 (match_operand:V4SI 2 "nonimmediate_operand")))]
16132 "TARGET_AVX2 || TARGET_XOP"
16133 {
16134 if (!TARGET_AVX2)
16135 {
16136 rtx neg = gen_reg_rtx (V4SImode);
16137 emit_insn (gen_negv4si2 (neg, operands[2]));
16138 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16139 DONE;
16140 }
16141 })
16142
16143 (define_expand "vashrv16si3"
16144 [(set (match_operand:V16SI 0 "register_operand")
16145 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16146 (match_operand:V16SI 2 "nonimmediate_operand")))]
16147 "TARGET_AVX512F")
16148
16149 (define_expand "vashrv8si3"
16150 [(set (match_operand:V8SI 0 "register_operand")
16151 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16152 (match_operand:V8SI 2 "nonimmediate_operand")))]
16153 "TARGET_AVX2")
16154
16155 (define_expand "vashl<mode>3"
16156 [(set (match_operand:VI12_128 0 "register_operand")
16157 (ashift:VI12_128
16158 (match_operand:VI12_128 1 "register_operand")
16159 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16160 "TARGET_XOP"
16161 {
16162 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16163 DONE;
16164 })
16165
16166 (define_expand "vashl<mode>3"
16167 [(set (match_operand:VI48_128 0 "register_operand")
16168 (ashift:VI48_128
16169 (match_operand:VI48_128 1 "register_operand")
16170 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16171 "TARGET_AVX2 || TARGET_XOP"
16172 {
16173 if (!TARGET_AVX2)
16174 {
16175 operands[2] = force_reg (<MODE>mode, operands[2]);
16176 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16177 DONE;
16178 }
16179 })
16180
16181 (define_expand "vashl<mode>3"
16182 [(set (match_operand:VI48_512 0 "register_operand")
16183 (ashift:VI48_512
16184 (match_operand:VI48_512 1 "register_operand")
16185 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16186 "TARGET_AVX512F")
16187
16188 (define_expand "vashl<mode>3"
16189 [(set (match_operand:VI48_256 0 "register_operand")
16190 (ashift:VI48_256
16191 (match_operand:VI48_256 1 "register_operand")
16192 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16193 "TARGET_AVX2")
16194
16195 (define_insn "xop_sha<mode>3"
16196 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16197 (if_then_else:VI_128
16198 (ge:VI_128
16199 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16200 (const_int 0))
16201 (ashift:VI_128
16202 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16203 (match_dup 2))
16204 (ashiftrt:VI_128
16205 (match_dup 1)
16206 (neg:VI_128 (match_dup 2)))))]
16207 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16208 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16209 [(set_attr "type" "sseishft")
16210 (set_attr "prefix_data16" "0")
16211 (set_attr "prefix_extra" "2")
16212 (set_attr "mode" "TI")])
16213
16214 (define_insn "xop_shl<mode>3"
16215 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16216 (if_then_else:VI_128
16217 (ge:VI_128
16218 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16219 (const_int 0))
16220 (ashift:VI_128
16221 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16222 (match_dup 2))
16223 (lshiftrt:VI_128
16224 (match_dup 1)
16225 (neg:VI_128 (match_dup 2)))))]
16226 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16227 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16228 [(set_attr "type" "sseishft")
16229 (set_attr "prefix_data16" "0")
16230 (set_attr "prefix_extra" "2")
16231 (set_attr "mode" "TI")])
16232
16233 (define_expand "<shift_insn><mode>3"
16234 [(set (match_operand:VI1_AVX512 0 "register_operand")
16235 (any_shift:VI1_AVX512
16236 (match_operand:VI1_AVX512 1 "register_operand")
16237 (match_operand:SI 2 "nonmemory_operand")))]
16238 "TARGET_SSE2"
16239 {
16240 if (TARGET_XOP && <MODE>mode == V16QImode)
16241 {
16242 bool negate = false;
16243 rtx (*gen) (rtx, rtx, rtx);
16244 rtx tmp, par;
16245 int i;
16246
16247 if (<CODE> != ASHIFT)
16248 {
16249 if (CONST_INT_P (operands[2]))
16250 operands[2] = GEN_INT (-INTVAL (operands[2]));
16251 else
16252 negate = true;
16253 }
16254 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
16255 for (i = 0; i < 16; i++)
16256 XVECEXP (par, 0, i) = operands[2];
16257
16258 tmp = gen_reg_rtx (V16QImode);
16259 emit_insn (gen_vec_initv16qi (tmp, par));
16260
16261 if (negate)
16262 emit_insn (gen_negv16qi2 (tmp, tmp));
16263
16264 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
16265 emit_insn (gen (operands[0], operands[1], tmp));
16266 }
16267 else
16268 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
16269 DONE;
16270 })
16271
16272 (define_expand "ashrv2di3"
16273 [(set (match_operand:V2DI 0 "register_operand")
16274 (ashiftrt:V2DI
16275 (match_operand:V2DI 1 "register_operand")
16276 (match_operand:DI 2 "nonmemory_operand")))]
16277 "TARGET_XOP || TARGET_AVX512VL"
16278 {
16279 if (!TARGET_AVX512VL)
16280 {
16281 rtx reg = gen_reg_rtx (V2DImode);
16282 rtx par;
16283 bool negate = false;
16284 int i;
16285
16286 if (CONST_INT_P (operands[2]))
16287 operands[2] = GEN_INT (-INTVAL (operands[2]));
16288 else
16289 negate = true;
16290
16291 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
16292 for (i = 0; i < 2; i++)
16293 XVECEXP (par, 0, i) = operands[2];
16294
16295 emit_insn (gen_vec_initv2di (reg, par));
16296
16297 if (negate)
16298 emit_insn (gen_negv2di2 (reg, reg));
16299
16300 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
16301 DONE;
16302 }
16303 })
16304
16305 ;; XOP FRCZ support
16306 (define_insn "xop_frcz<mode>2"
16307 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
16308 (unspec:FMAMODE
16309 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
16310 UNSPEC_FRCZ))]
16311 "TARGET_XOP"
16312 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
16313 [(set_attr "type" "ssecvt1")
16314 (set_attr "mode" "<MODE>")])
16315
16316 (define_expand "xop_vmfrcz<mode>2"
16317 [(set (match_operand:VF_128 0 "register_operand")
16318 (vec_merge:VF_128
16319 (unspec:VF_128
16320 [(match_operand:VF_128 1 "nonimmediate_operand")]
16321 UNSPEC_FRCZ)
16322 (match_dup 2)
16323 (const_int 1)))]
16324 "TARGET_XOP"
16325 "operands[2] = CONST0_RTX (<MODE>mode);")
16326
16327 (define_insn "*xop_vmfrcz<mode>2"
16328 [(set (match_operand:VF_128 0 "register_operand" "=x")
16329 (vec_merge:VF_128
16330 (unspec:VF_128
16331 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
16332 UNSPEC_FRCZ)
16333 (match_operand:VF_128 2 "const0_operand")
16334 (const_int 1)))]
16335 "TARGET_XOP"
16336 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
16337 [(set_attr "type" "ssecvt1")
16338 (set_attr "mode" "<MODE>")])
16339
16340 (define_insn "xop_maskcmp<mode>3"
16341 [(set (match_operand:VI_128 0 "register_operand" "=x")
16342 (match_operator:VI_128 1 "ix86_comparison_int_operator"
16343 [(match_operand:VI_128 2 "register_operand" "x")
16344 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16345 "TARGET_XOP"
16346 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16347 [(set_attr "type" "sse4arg")
16348 (set_attr "prefix_data16" "0")
16349 (set_attr "prefix_rep" "0")
16350 (set_attr "prefix_extra" "2")
16351 (set_attr "length_immediate" "1")
16352 (set_attr "mode" "TI")])
16353
16354 (define_insn "xop_maskcmp_uns<mode>3"
16355 [(set (match_operand:VI_128 0 "register_operand" "=x")
16356 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
16357 [(match_operand:VI_128 2 "register_operand" "x")
16358 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16359 "TARGET_XOP"
16360 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16361 [(set_attr "type" "ssecmp")
16362 (set_attr "prefix_data16" "0")
16363 (set_attr "prefix_rep" "0")
16364 (set_attr "prefix_extra" "2")
16365 (set_attr "length_immediate" "1")
16366 (set_attr "mode" "TI")])
16367
16368 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
16369 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
16370 ;; the exact instruction generated for the intrinsic.
16371 (define_insn "xop_maskcmp_uns2<mode>3"
16372 [(set (match_operand:VI_128 0 "register_operand" "=x")
16373 (unspec:VI_128
16374 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16375 [(match_operand:VI_128 2 "register_operand" "x")
16376 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
16377 UNSPEC_XOP_UNSIGNED_CMP))]
16378 "TARGET_XOP"
16379 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16380 [(set_attr "type" "ssecmp")
16381 (set_attr "prefix_data16" "0")
16382 (set_attr "prefix_extra" "2")
16383 (set_attr "length_immediate" "1")
16384 (set_attr "mode" "TI")])
16385
16386 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
16387 ;; being added here to be complete.
16388 (define_insn "xop_pcom_tf<mode>3"
16389 [(set (match_operand:VI_128 0 "register_operand" "=x")
16390 (unspec:VI_128
16391 [(match_operand:VI_128 1 "register_operand" "x")
16392 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
16393 (match_operand:SI 3 "const_int_operand" "n")]
16394 UNSPEC_XOP_TRUEFALSE))]
16395 "TARGET_XOP"
16396 {
16397 return ((INTVAL (operands[3]) != 0)
16398 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16399 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
16400 }
16401 [(set_attr "type" "ssecmp")
16402 (set_attr "prefix_data16" "0")
16403 (set_attr "prefix_extra" "2")
16404 (set_attr "length_immediate" "1")
16405 (set_attr "mode" "TI")])
16406
16407 (define_insn "xop_vpermil2<mode>3"
16408 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
16409 (unspec:VF_128_256
16410 [(match_operand:VF_128_256 1 "register_operand" "x")
16411 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
16412 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
16413 (match_operand:SI 4 "const_0_to_3_operand" "n")]
16414 UNSPEC_VPERMIL2))]
16415 "TARGET_XOP"
16416 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
16417 [(set_attr "type" "sse4arg")
16418 (set_attr "length_immediate" "1")
16419 (set_attr "mode" "<MODE>")])
16420
16421 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16422
16423 (define_insn "aesenc"
16424 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16425 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16426 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
16427 UNSPEC_AESENC))]
16428 "TARGET_AES"
16429 "@
16430 aesenc\t{%2, %0|%0, %2}
16431 vaesenc\t{%2, %1, %0|%0, %1, %2}"
16432 [(set_attr "isa" "noavx,avx")
16433 (set_attr "type" "sselog1")
16434 (set_attr "prefix_extra" "1")
16435 (set_attr "prefix" "orig,vex")
16436 (set_attr "btver2_decode" "double,double")
16437 (set_attr "mode" "TI")])
16438
16439 (define_insn "aesenclast"
16440 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16441 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16442 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
16443 UNSPEC_AESENCLAST))]
16444 "TARGET_AES"
16445 "@
16446 aesenclast\t{%2, %0|%0, %2}
16447 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
16448 [(set_attr "isa" "noavx,avx")
16449 (set_attr "type" "sselog1")
16450 (set_attr "prefix_extra" "1")
16451 (set_attr "prefix" "orig,vex")
16452 (set_attr "btver2_decode" "double,double")
16453 (set_attr "mode" "TI")])
16454
16455 (define_insn "aesdec"
16456 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16457 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16458 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
16459 UNSPEC_AESDEC))]
16460 "TARGET_AES"
16461 "@
16462 aesdec\t{%2, %0|%0, %2}
16463 vaesdec\t{%2, %1, %0|%0, %1, %2}"
16464 [(set_attr "isa" "noavx,avx")
16465 (set_attr "type" "sselog1")
16466 (set_attr "prefix_extra" "1")
16467 (set_attr "prefix" "orig,vex")
16468 (set_attr "btver2_decode" "double,double")
16469 (set_attr "mode" "TI")])
16470
16471 (define_insn "aesdeclast"
16472 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16473 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16474 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
16475 UNSPEC_AESDECLAST))]
16476 "TARGET_AES"
16477 "@
16478 aesdeclast\t{%2, %0|%0, %2}
16479 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
16480 [(set_attr "isa" "noavx,avx")
16481 (set_attr "type" "sselog1")
16482 (set_attr "prefix_extra" "1")
16483 (set_attr "prefix" "orig,vex")
16484 (set_attr "btver2_decode" "double,double")
16485 (set_attr "mode" "TI")])
16486
16487 (define_insn "aesimc"
16488 [(set (match_operand:V2DI 0 "register_operand" "=x")
16489 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
16490 UNSPEC_AESIMC))]
16491 "TARGET_AES"
16492 "%vaesimc\t{%1, %0|%0, %1}"
16493 [(set_attr "type" "sselog1")
16494 (set_attr "prefix_extra" "1")
16495 (set_attr "prefix" "maybe_vex")
16496 (set_attr "mode" "TI")])
16497
16498 (define_insn "aeskeygenassist"
16499 [(set (match_operand:V2DI 0 "register_operand" "=x")
16500 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
16501 (match_operand:SI 2 "const_0_to_255_operand" "n")]
16502 UNSPEC_AESKEYGENASSIST))]
16503 "TARGET_AES"
16504 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
16505 [(set_attr "type" "sselog1")
16506 (set_attr "prefix_extra" "1")
16507 (set_attr "length_immediate" "1")
16508 (set_attr "prefix" "maybe_vex")
16509 (set_attr "mode" "TI")])
16510
16511 (define_insn "pclmulqdq"
16512 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16513 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16514 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
16515 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16516 UNSPEC_PCLMUL))]
16517 "TARGET_PCLMUL"
16518 "@
16519 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
16520 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16521 [(set_attr "isa" "noavx,avx")
16522 (set_attr "type" "sselog1")
16523 (set_attr "prefix_extra" "1")
16524 (set_attr "length_immediate" "1")
16525 (set_attr "prefix" "orig,vex")
16526 (set_attr "mode" "TI")])
16527
16528 (define_expand "avx_vzeroall"
16529 [(match_par_dup 0 [(const_int 0)])]
16530 "TARGET_AVX"
16531 {
16532 int nregs = TARGET_64BIT ? 16 : 8;
16533 int regno;
16534
16535 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
16536
16537 XVECEXP (operands[0], 0, 0)
16538 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
16539 UNSPECV_VZEROALL);
16540
16541 for (regno = 0; regno < nregs; regno++)
16542 XVECEXP (operands[0], 0, regno + 1)
16543 = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
16544 CONST0_RTX (V8SImode));
16545 })
16546
16547 (define_insn "*avx_vzeroall"
16548 [(match_parallel 0 "vzeroall_operation"
16549 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
16550 "TARGET_AVX"
16551 "vzeroall"
16552 [(set_attr "type" "sse")
16553 (set_attr "modrm" "0")
16554 (set_attr "memory" "none")
16555 (set_attr "prefix" "vex")
16556 (set_attr "btver2_decode" "vector")
16557 (set_attr "mode" "OI")])
16558
16559 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
16560 ;; if the upper 128bits are unused.
16561 (define_insn "avx_vzeroupper"
16562 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
16563 "TARGET_AVX"
16564 "vzeroupper"
16565 [(set_attr "type" "sse")
16566 (set_attr "modrm" "0")
16567 (set_attr "memory" "none")
16568 (set_attr "prefix" "vex")
16569 (set_attr "btver2_decode" "vector")
16570 (set_attr "mode" "OI")])
16571
16572 (define_insn "avx2_pbroadcast<mode>"
16573 [(set (match_operand:VI 0 "register_operand" "=x")
16574 (vec_duplicate:VI
16575 (vec_select:<ssescalarmode>
16576 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
16577 (parallel [(const_int 0)]))))]
16578 "TARGET_AVX2"
16579 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
16580 [(set_attr "type" "ssemov")
16581 (set_attr "prefix_extra" "1")
16582 (set_attr "prefix" "vex")
16583 (set_attr "mode" "<sseinsnmode>")])
16584
16585 (define_insn "avx2_pbroadcast<mode>_1"
16586 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
16587 (vec_duplicate:VI_256
16588 (vec_select:<ssescalarmode>
16589 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
16590 (parallel [(const_int 0)]))))]
16591 "TARGET_AVX2"
16592 "@
16593 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16594 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16595 [(set_attr "type" "ssemov")
16596 (set_attr "prefix_extra" "1")
16597 (set_attr "prefix" "vex")
16598 (set_attr "mode" "<sseinsnmode>")])
16599
16600 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
16601 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
16602 (unspec:VI48F_256_512
16603 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
16604 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16605 UNSPEC_VPERMVAR))]
16606 "TARGET_AVX2 && <mask_mode512bit_condition>"
16607 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16608 [(set_attr "type" "sselog")
16609 (set_attr "prefix" "<mask_prefix2>")
16610 (set_attr "mode" "<sseinsnmode>")])
16611
16612 (define_insn "<avx512>_permvar<mode><mask_name>"
16613 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
16614 (unspec:VI1_AVX512VL
16615 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
16616 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16617 UNSPEC_VPERMVAR))]
16618 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
16619 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16620 [(set_attr "type" "sselog")
16621 (set_attr "prefix" "<mask_prefix2>")
16622 (set_attr "mode" "<sseinsnmode>")])
16623
16624 (define_insn "<avx512>_permvar<mode><mask_name>"
16625 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16626 (unspec:VI2_AVX512VL
16627 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
16628 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16629 UNSPEC_VPERMVAR))]
16630 "TARGET_AVX512BW && <mask_mode512bit_condition>"
16631 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16632 [(set_attr "type" "sselog")
16633 (set_attr "prefix" "<mask_prefix2>")
16634 (set_attr "mode" "<sseinsnmode>")])
16635
16636 (define_expand "<avx2_avx512>_perm<mode>"
16637 [(match_operand:VI8F_256_512 0 "register_operand")
16638 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16639 (match_operand:SI 2 "const_0_to_255_operand")]
16640 "TARGET_AVX2"
16641 {
16642 int mask = INTVAL (operands[2]);
16643 emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
16644 GEN_INT ((mask >> 0) & 3),
16645 GEN_INT ((mask >> 2) & 3),
16646 GEN_INT ((mask >> 4) & 3),
16647 GEN_INT ((mask >> 6) & 3)));
16648 DONE;
16649 })
16650
16651 (define_expand "<avx512>_perm<mode>_mask"
16652 [(match_operand:VI8F_256_512 0 "register_operand")
16653 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16654 (match_operand:SI 2 "const_0_to_255_operand")
16655 (match_operand:VI8F_256_512 3 "vector_move_operand")
16656 (match_operand:<avx512fmaskmode> 4 "register_operand")]
16657 "TARGET_AVX512F"
16658 {
16659 int mask = INTVAL (operands[2]);
16660 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
16661 GEN_INT ((mask >> 0) & 3),
16662 GEN_INT ((mask >> 2) & 3),
16663 GEN_INT ((mask >> 4) & 3),
16664 GEN_INT ((mask >> 6) & 3),
16665 operands[3], operands[4]));
16666 DONE;
16667 })
16668
16669 (define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
16670 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
16671 (vec_select:VI8F_256_512
16672 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
16673 (parallel [(match_operand 2 "const_0_to_3_operand")
16674 (match_operand 3 "const_0_to_3_operand")
16675 (match_operand 4 "const_0_to_3_operand")
16676 (match_operand 5 "const_0_to_3_operand")])))]
16677 "TARGET_AVX2 && <mask_mode512bit_condition>"
16678 {
16679 int mask = 0;
16680 mask |= INTVAL (operands[2]) << 0;
16681 mask |= INTVAL (operands[3]) << 2;
16682 mask |= INTVAL (operands[4]) << 4;
16683 mask |= INTVAL (operands[5]) << 6;
16684 operands[2] = GEN_INT (mask);
16685 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16686 }
16687 [(set_attr "type" "sselog")
16688 (set_attr "prefix" "<mask_prefix2>")
16689 (set_attr "mode" "<sseinsnmode>")])
16690
16691 (define_insn "avx2_permv2ti"
16692 [(set (match_operand:V4DI 0 "register_operand" "=x")
16693 (unspec:V4DI
16694 [(match_operand:V4DI 1 "register_operand" "x")
16695 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
16696 (match_operand:SI 3 "const_0_to_255_operand" "n")]
16697 UNSPEC_VPERMTI))]
16698 "TARGET_AVX2"
16699 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16700 [(set_attr "type" "sselog")
16701 (set_attr "prefix" "vex")
16702 (set_attr "mode" "OI")])
16703
16704 (define_insn "avx2_vec_dupv4df"
16705 [(set (match_operand:V4DF 0 "register_operand" "=x")
16706 (vec_duplicate:V4DF
16707 (vec_select:DF
16708 (match_operand:V2DF 1 "register_operand" "x")
16709 (parallel [(const_int 0)]))))]
16710 "TARGET_AVX2"
16711 "vbroadcastsd\t{%1, %0|%0, %1}"
16712 [(set_attr "type" "sselog1")
16713 (set_attr "prefix" "vex")
16714 (set_attr "mode" "V4DF")])
16715
16716 (define_insn "<avx512>_vec_dup<mode>_1"
16717 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
16718 (vec_duplicate:VI_AVX512BW
16719 (vec_select:VI_AVX512BW
16720 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
16721 (parallel [(const_int 0)]))))]
16722 "TARGET_AVX512F"
16723 "@
16724 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
16725 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
16726 [(set_attr "type" "ssemov")
16727 (set_attr "prefix" "evex")
16728 (set_attr "mode" "<sseinsnmode>")])
16729
16730 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16731 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
16732 (vec_duplicate:V48_AVX512VL
16733 (vec_select:<ssescalarmode>
16734 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16735 (parallel [(const_int 0)]))))]
16736 "TARGET_AVX512F"
16737 {
16738 /* There is no DF broadcast (in AVX-512*) to 128b register.
16739 Mimic it with integer variant. */
16740 if (<MODE>mode == V2DFmode)
16741 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
16742
16743 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
16744 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}";
16745 else
16746 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
16747 }
16748 [(set_attr "type" "ssemov")
16749 (set_attr "prefix" "evex")
16750 (set_attr "mode" "<sseinsnmode>")])
16751
16752 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16753 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16754 (vec_duplicate:VI12_AVX512VL
16755 (vec_select:<ssescalarmode>
16756 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16757 (parallel [(const_int 0)]))))]
16758 "TARGET_AVX512BW"
16759 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16760 [(set_attr "type" "ssemov")
16761 (set_attr "prefix" "evex")
16762 (set_attr "mode" "<sseinsnmode>")])
16763
16764 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16765 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16766 (vec_duplicate:V16FI
16767 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16768 "TARGET_AVX512F"
16769 "@
16770 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
16771 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16772 [(set_attr "type" "ssemov")
16773 (set_attr "prefix" "evex")
16774 (set_attr "mode" "<sseinsnmode>")])
16775
16776 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16777 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
16778 (vec_duplicate:V8FI
16779 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16780 "TARGET_AVX512F"
16781 "@
16782 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16783 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16784 [(set_attr "type" "ssemov")
16785 (set_attr "prefix" "evex")
16786 (set_attr "mode" "<sseinsnmode>")])
16787
16788 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16789 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
16790 (vec_duplicate:VI12_AVX512VL
16791 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16792 "TARGET_AVX512BW"
16793 "@
16794 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
16795 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
16796 [(set_attr "type" "ssemov")
16797 (set_attr "prefix" "evex")
16798 (set_attr "mode" "<sseinsnmode>")])
16799
16800 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16801 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
16802 (vec_duplicate:V48_AVX512VL
16803 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16804 "TARGET_AVX512F"
16805 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16806 [(set_attr "type" "ssemov")
16807 (set_attr "prefix" "evex")
16808 (set_attr "mode" "<sseinsnmode>")
16809 (set (attr "enabled")
16810 (if_then_else (eq_attr "alternative" "1")
16811 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
16812 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
16813 (const_int 1)))])
16814
16815 (define_insn "vec_dupv4sf"
16816 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16817 (vec_duplicate:V4SF
16818 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
16819 "TARGET_SSE"
16820 "@
16821 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
16822 vbroadcastss\t{%1, %0|%0, %1}
16823 shufps\t{$0, %0, %0|%0, %0, 0}"
16824 [(set_attr "isa" "avx,avx,noavx")
16825 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
16826 (set_attr "length_immediate" "1,0,1")
16827 (set_attr "prefix_extra" "0,1,*")
16828 (set_attr "prefix" "vex,vex,orig")
16829 (set_attr "mode" "V4SF")])
16830
16831 (define_insn "*vec_dupv4si"
16832 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
16833 (vec_duplicate:V4SI
16834 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
16835 "TARGET_SSE"
16836 "@
16837 %vpshufd\t{$0, %1, %0|%0, %1, 0}
16838 vbroadcastss\t{%1, %0|%0, %1}
16839 shufps\t{$0, %0, %0|%0, %0, 0}"
16840 [(set_attr "isa" "sse2,avx,noavx")
16841 (set_attr "type" "sselog1,ssemov,sselog1")
16842 (set_attr "length_immediate" "1,0,1")
16843 (set_attr "prefix_extra" "0,1,*")
16844 (set_attr "prefix" "maybe_vex,vex,orig")
16845 (set_attr "mode" "TI,V4SF,V4SF")])
16846
16847 (define_insn "*vec_dupv2di"
16848 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
16849 (vec_duplicate:V2DI
16850 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
16851 "TARGET_SSE"
16852 "@
16853 punpcklqdq\t%0, %0
16854 vpunpcklqdq\t{%d1, %0|%0, %d1}
16855 %vmovddup\t{%1, %0|%0, %1}
16856 movlhps\t%0, %0"
16857 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
16858 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
16859 (set_attr "prefix" "orig,vex,maybe_vex,orig")
16860 (set_attr "mode" "TI,TI,DF,V4SF")])
16861
16862 (define_insn "avx2_vbroadcasti128_<mode>"
16863 [(set (match_operand:VI_256 0 "register_operand" "=x")
16864 (vec_concat:VI_256
16865 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
16866 (match_dup 1)))]
16867 "TARGET_AVX2"
16868 "vbroadcasti128\t{%1, %0|%0, %1}"
16869 [(set_attr "type" "ssemov")
16870 (set_attr "prefix_extra" "1")
16871 (set_attr "prefix" "vex")
16872 (set_attr "mode" "OI")])
16873
16874 ;; Modes handled by AVX vec_dup patterns.
16875 (define_mode_iterator AVX_VEC_DUP_MODE
16876 [V8SI V8SF V4DI V4DF])
16877 ;; Modes handled by AVX2 vec_dup patterns.
16878 (define_mode_iterator AVX2_VEC_DUP_MODE
16879 [V32QI V16QI V16HI V8HI V8SI V4SI])
16880
16881 (define_insn "*vec_dup<mode>"
16882 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
16883 (vec_duplicate:AVX2_VEC_DUP_MODE
16884 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
16885 "TARGET_AVX2"
16886 "@
16887 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16888 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16889 #"
16890 [(set_attr "isa" "*,*,noavx512vl")
16891 (set_attr "type" "ssemov")
16892 (set_attr "prefix_extra" "1")
16893 (set_attr "prefix" "maybe_evex")
16894 (set_attr "mode" "<sseinsnmode>")])
16895
16896 (define_insn "vec_dup<mode>"
16897 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
16898 (vec_duplicate:AVX_VEC_DUP_MODE
16899 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
16900 "TARGET_AVX"
16901 "@
16902 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16903 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
16904 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16905 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
16906 #"
16907 [(set_attr "type" "ssemov")
16908 (set_attr "prefix_extra" "1")
16909 (set_attr "prefix" "maybe_evex")
16910 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
16911 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
16912
16913 (define_split
16914 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
16915 (vec_duplicate:AVX2_VEC_DUP_MODE
16916 (match_operand:<ssescalarmode> 1 "register_operand")))]
16917 "TARGET_AVX2
16918 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
16919 available, because then we can broadcast from GPRs directly.
16920 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
16921 for V*SI mode it requires just -mavx512vl. */
16922 && !(TARGET_AVX512VL
16923 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
16924 && reload_completed && GENERAL_REG_P (operands[1])"
16925 [(const_int 0)]
16926 {
16927 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
16928 CONST0_RTX (V4SImode),
16929 gen_lowpart (SImode, operands[1])));
16930 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
16931 gen_lowpart (<ssexmmmode>mode,
16932 operands[0])));
16933 DONE;
16934 })
16935
16936 (define_split
16937 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
16938 (vec_duplicate:AVX_VEC_DUP_MODE
16939 (match_operand:<ssescalarmode> 1 "register_operand")))]
16940 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
16941 [(set (match_dup 2)
16942 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
16943 (set (match_dup 0)
16944 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
16945 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
16946
16947 (define_insn "avx_vbroadcastf128_<mode>"
16948 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
16949 (vec_concat:V_256
16950 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
16951 (match_dup 1)))]
16952 "TARGET_AVX"
16953 "@
16954 vbroadcast<i128>\t{%1, %0|%0, %1}
16955 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
16956 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
16957 [(set_attr "type" "ssemov,sselog1,sselog1")
16958 (set_attr "prefix_extra" "1")
16959 (set_attr "length_immediate" "0,1,1")
16960 (set_attr "prefix" "vex")
16961 (set_attr "mode" "<sseinsnmode>")])
16962
16963 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
16964 (define_mode_iterator VI4F_BRCST32x2
16965 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
16966 V16SF (V8SF "TARGET_AVX512VL")])
16967
16968 (define_mode_attr 64x2mode
16969 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
16970
16971 (define_mode_attr 32x2mode
16972 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
16973 (V8SF "V2SF") (V4SI "V2SI")])
16974
16975 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
16976 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
16977 (vec_duplicate:VI4F_BRCST32x2
16978 (vec_select:<32x2mode>
16979 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16980 (parallel [(const_int 0) (const_int 1)]))))]
16981 "TARGET_AVX512DQ"
16982 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16983 [(set_attr "type" "ssemov")
16984 (set_attr "prefix_extra" "1")
16985 (set_attr "prefix" "evex")
16986 (set_attr "mode" "<sseinsnmode>")])
16987
16988 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
16989 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
16990 (vec_duplicate:VI4F_256
16991 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16992 "TARGET_AVX512VL"
16993 "@
16994 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
16995 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16996 [(set_attr "type" "ssemov")
16997 (set_attr "prefix_extra" "1")
16998 (set_attr "prefix" "evex")
16999 (set_attr "mode" "<sseinsnmode>")])
17000
17001 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17002 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17003 (vec_duplicate:V16FI
17004 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17005 "TARGET_AVX512DQ"
17006 "@
17007 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17008 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17009 [(set_attr "type" "ssemov")
17010 (set_attr "prefix_extra" "1")
17011 (set_attr "prefix" "evex")
17012 (set_attr "mode" "<sseinsnmode>")])
17013
17014 ;; For broadcast[i|f]64x2
17015 (define_mode_iterator VI8F_BRCST64x2
17016 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
17017
17018 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17019 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
17020 (vec_duplicate:VI8F_BRCST64x2
17021 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
17022 "TARGET_AVX512DQ"
17023 "@
17024 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
17025 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17026 [(set_attr "type" "ssemov")
17027 (set_attr "prefix_extra" "1")
17028 (set_attr "prefix" "evex")
17029 (set_attr "mode" "<sseinsnmode>")])
17030
17031 (define_insn "avx512cd_maskb_vec_dup<mode>"
17032 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
17033 (vec_duplicate:VI8_AVX512VL
17034 (zero_extend:DI
17035 (match_operand:QI 1 "register_operand" "Yk"))))]
17036 "TARGET_AVX512CD"
17037 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
17038 [(set_attr "type" "mskmov")
17039 (set_attr "prefix" "evex")
17040 (set_attr "mode" "XI")])
17041
17042 (define_insn "avx512cd_maskw_vec_dup<mode>"
17043 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
17044 (vec_duplicate:VI4_AVX512VL
17045 (zero_extend:SI
17046 (match_operand:HI 1 "register_operand" "Yk"))))]
17047 "TARGET_AVX512CD"
17048 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
17049 [(set_attr "type" "mskmov")
17050 (set_attr "prefix" "evex")
17051 (set_attr "mode" "XI")])
17052
17053 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
17054 ;; If it so happens that the input is in memory, use vbroadcast.
17055 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
17056 (define_insn "*avx_vperm_broadcast_v4sf"
17057 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
17058 (vec_select:V4SF
17059 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
17060 (match_parallel 2 "avx_vbroadcast_operand"
17061 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17062 "TARGET_AVX"
17063 {
17064 int elt = INTVAL (operands[3]);
17065 switch (which_alternative)
17066 {
17067 case 0:
17068 case 1:
17069 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
17070 return "vbroadcastss\t{%1, %0|%0, %k1}";
17071 case 2:
17072 operands[2] = GEN_INT (elt * 0x55);
17073 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
17074 default:
17075 gcc_unreachable ();
17076 }
17077 }
17078 [(set_attr "type" "ssemov,ssemov,sselog1")
17079 (set_attr "prefix_extra" "1")
17080 (set_attr "length_immediate" "0,0,1")
17081 (set_attr "prefix" "vex")
17082 (set_attr "mode" "SF,SF,V4SF")])
17083
17084 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
17085 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
17086 (vec_select:VF_256
17087 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
17088 (match_parallel 2 "avx_vbroadcast_operand"
17089 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17090 "TARGET_AVX"
17091 "#"
17092 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
17093 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
17094 {
17095 rtx op0 = operands[0], op1 = operands[1];
17096 int elt = INTVAL (operands[3]);
17097
17098 if (REG_P (op1))
17099 {
17100 int mask;
17101
17102 if (TARGET_AVX2 && elt == 0)
17103 {
17104 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17105 op1)));
17106 DONE;
17107 }
17108
17109 /* Shuffle element we care about into all elements of the 128-bit lane.
17110 The other lane gets shuffled too, but we don't care. */
17111 if (<MODE>mode == V4DFmode)
17112 mask = (elt & 1 ? 15 : 0);
17113 else
17114 mask = (elt & 3) * 0x55;
17115 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17116
17117 /* Shuffle the lane we care about into both lanes of the dest. */
17118 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17119 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
17120 DONE;
17121 }
17122
17123 operands[1] = adjust_address (op1, <ssescalarmode>mode,
17124 elt * GET_MODE_SIZE (<ssescalarmode>mode));
17125 })
17126
17127 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17128 [(set (match_operand:VF2 0 "register_operand")
17129 (vec_select:VF2
17130 (match_operand:VF2 1 "nonimmediate_operand")
17131 (match_operand:SI 2 "const_0_to_255_operand")))]
17132 "TARGET_AVX && <mask_mode512bit_condition>"
17133 {
17134 int mask = INTVAL (operands[2]);
17135 rtx perm[<ssescalarnum>];
17136
17137 int i;
17138 for (i = 0; i < <ssescalarnum>; i = i + 2)
17139 {
17140 perm[i] = GEN_INT (((mask >> i) & 1) + i);
17141 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
17142 }
17143
17144 operands[2]
17145 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17146 })
17147
17148 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17149 [(set (match_operand:VF1 0 "register_operand")
17150 (vec_select:VF1
17151 (match_operand:VF1 1 "nonimmediate_operand")
17152 (match_operand:SI 2 "const_0_to_255_operand")))]
17153 "TARGET_AVX && <mask_mode512bit_condition>"
17154 {
17155 int mask = INTVAL (operands[2]);
17156 rtx perm[<ssescalarnum>];
17157
17158 int i;
17159 for (i = 0; i < <ssescalarnum>; i = i + 4)
17160 {
17161 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
17162 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
17163 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
17164 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
17165 }
17166
17167 operands[2]
17168 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17169 })
17170
17171 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
17172 [(set (match_operand:VF 0 "register_operand" "=v")
17173 (vec_select:VF
17174 (match_operand:VF 1 "nonimmediate_operand" "vm")
17175 (match_parallel 2 ""
17176 [(match_operand 3 "const_int_operand")])))]
17177 "TARGET_AVX && <mask_mode512bit_condition>
17178 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
17179 {
17180 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
17181 operands[2] = GEN_INT (mask);
17182 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
17183 }
17184 [(set_attr "type" "sselog")
17185 (set_attr "prefix_extra" "1")
17186 (set_attr "length_immediate" "1")
17187 (set_attr "prefix" "<mask_prefix>")
17188 (set_attr "mode" "<sseinsnmode>")])
17189
17190 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
17191 [(set (match_operand:VF 0 "register_operand" "=v")
17192 (unspec:VF
17193 [(match_operand:VF 1 "register_operand" "v")
17194 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
17195 UNSPEC_VPERMIL))]
17196 "TARGET_AVX && <mask_mode512bit_condition>"
17197 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17198 [(set_attr "type" "sselog")
17199 (set_attr "prefix_extra" "1")
17200 (set_attr "btver2_decode" "vector")
17201 (set_attr "prefix" "<mask_prefix>")
17202 (set_attr "mode" "<sseinsnmode>")])
17203
17204 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17205 [(match_operand:VI48F 0 "register_operand" "=v")
17206 (match_operand:VI48F 1 "register_operand" "v")
17207 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17208 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17209 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17210 "TARGET_AVX512F"
17211 {
17212 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17213 operands[0], operands[1], operands[2], operands[3],
17214 CONST0_RTX (<MODE>mode), operands[4]));
17215 DONE;
17216 })
17217
17218 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17219 [(match_operand:VI1_AVX512VL 0 "register_operand")
17220 (match_operand:VI1_AVX512VL 1 "register_operand")
17221 (match_operand:<sseintvecmode> 2 "register_operand")
17222 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
17223 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17224 "TARGET_AVX512VBMI"
17225 {
17226 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17227 operands[0], operands[1], operands[2], operands[3],
17228 CONST0_RTX (<MODE>mode), operands[4]));
17229 DONE;
17230 })
17231
17232 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17233 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17234 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17235 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17236 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17237 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17238 "TARGET_AVX512BW"
17239 {
17240 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17241 operands[0], operands[1], operands[2], operands[3],
17242 CONST0_RTX (<MODE>mode), operands[4]));
17243 DONE;
17244 })
17245
17246 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17247 [(set (match_operand:VI48F 0 "register_operand" "=v")
17248 (unspec:VI48F
17249 [(match_operand:VI48F 1 "register_operand" "v")
17250 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17251 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17252 UNSPEC_VPERMI2))]
17253 "TARGET_AVX512F"
17254 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17255 [(set_attr "type" "sselog")
17256 (set_attr "prefix" "evex")
17257 (set_attr "mode" "<sseinsnmode>")])
17258
17259 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17260 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17261 (unspec:VI1_AVX512VL
17262 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17263 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17264 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17265 UNSPEC_VPERMI2))]
17266 "TARGET_AVX512VBMI"
17267 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17268 [(set_attr "type" "sselog")
17269 (set_attr "prefix" "evex")
17270 (set_attr "mode" "<sseinsnmode>")])
17271
17272 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17273 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17274 (unspec:VI2_AVX512VL
17275 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17276 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17277 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17278 UNSPEC_VPERMI2))]
17279 "TARGET_AVX512BW"
17280 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17281 [(set_attr "type" "sselog")
17282 (set_attr "prefix" "evex")
17283 (set_attr "mode" "<sseinsnmode>")])
17284
17285 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17286 [(set (match_operand:VI48F 0 "register_operand" "=v")
17287 (vec_merge:VI48F
17288 (unspec:VI48F
17289 [(match_operand:VI48F 1 "register_operand" "v")
17290 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17291 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17292 UNSPEC_VPERMI2_MASK)
17293 (match_dup 0)
17294 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17295 "TARGET_AVX512F"
17296 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17297 [(set_attr "type" "sselog")
17298 (set_attr "prefix" "evex")
17299 (set_attr "mode" "<sseinsnmode>")])
17300
17301 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17302 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17303 (vec_merge:VI1_AVX512VL
17304 (unspec:VI1_AVX512VL
17305 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17306 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17307 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17308 UNSPEC_VPERMI2_MASK)
17309 (match_dup 0)
17310 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17311 "TARGET_AVX512VBMI"
17312 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17313 [(set_attr "type" "sselog")
17314 (set_attr "prefix" "evex")
17315 (set_attr "mode" "<sseinsnmode>")])
17316
17317 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17318 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17319 (vec_merge:VI2_AVX512VL
17320 (unspec:VI2_AVX512VL
17321 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17322 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17323 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17324 UNSPEC_VPERMI2_MASK)
17325 (match_dup 0)
17326 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17327 "TARGET_AVX512BW"
17328 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17329 [(set_attr "type" "sselog")
17330 (set_attr "prefix" "evex")
17331 (set_attr "mode" "<sseinsnmode>")])
17332
17333 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17334 [(match_operand:VI48F 0 "register_operand" "=v")
17335 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17336 (match_operand:VI48F 2 "register_operand" "0")
17337 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17338 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17339 "TARGET_AVX512F"
17340 {
17341 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17342 operands[0], operands[1], operands[2], operands[3],
17343 CONST0_RTX (<MODE>mode), operands[4]));
17344 DONE;
17345 })
17346
17347 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17348 [(match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17349 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17350 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17351 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")
17352 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17353 "TARGET_AVX512VBMI"
17354 {
17355 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17356 operands[0], operands[1], operands[2], operands[3],
17357 CONST0_RTX (<MODE>mode), operands[4]));
17358 DONE;
17359 })
17360
17361 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17362 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17363 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17364 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17365 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17366 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17367 "TARGET_AVX512BW"
17368 {
17369 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17370 operands[0], operands[1], operands[2], operands[3],
17371 CONST0_RTX (<MODE>mode), operands[4]));
17372 DONE;
17373 })
17374
17375 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17376 [(set (match_operand:VI48F 0 "register_operand" "=v")
17377 (unspec:VI48F
17378 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17379 (match_operand:VI48F 2 "register_operand" "0")
17380 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17381 UNSPEC_VPERMT2))]
17382 "TARGET_AVX512F"
17383 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17384 [(set_attr "type" "sselog")
17385 (set_attr "prefix" "evex")
17386 (set_attr "mode" "<sseinsnmode>")])
17387
17388 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17389 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17390 (unspec:VI1_AVX512VL
17391 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17392 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17393 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17394 UNSPEC_VPERMT2))]
17395 "TARGET_AVX512VBMI"
17396 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17397 [(set_attr "type" "sselog")
17398 (set_attr "prefix" "evex")
17399 (set_attr "mode" "<sseinsnmode>")])
17400
17401 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17402 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17403 (unspec:VI2_AVX512VL
17404 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17405 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17406 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17407 UNSPEC_VPERMT2))]
17408 "TARGET_AVX512BW"
17409 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17410 [(set_attr "type" "sselog")
17411 (set_attr "prefix" "evex")
17412 (set_attr "mode" "<sseinsnmode>")])
17413
17414 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17415 [(set (match_operand:VI48F 0 "register_operand" "=v")
17416 (vec_merge:VI48F
17417 (unspec:VI48F
17418 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17419 (match_operand:VI48F 2 "register_operand" "0")
17420 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17421 UNSPEC_VPERMT2)
17422 (match_dup 2)
17423 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17424 "TARGET_AVX512F"
17425 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17426 [(set_attr "type" "sselog")
17427 (set_attr "prefix" "evex")
17428 (set_attr "mode" "<sseinsnmode>")])
17429
17430 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17431 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17432 (vec_merge:VI1_AVX512VL
17433 (unspec:VI1_AVX512VL
17434 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17435 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17436 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17437 UNSPEC_VPERMT2)
17438 (match_dup 2)
17439 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17440 "TARGET_AVX512VBMI"
17441 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17442 [(set_attr "type" "sselog")
17443 (set_attr "prefix" "evex")
17444 (set_attr "mode" "<sseinsnmode>")])
17445
17446 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17447 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17448 (vec_merge:VI2_AVX512VL
17449 (unspec:VI2_AVX512VL
17450 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17451 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17452 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17453 UNSPEC_VPERMT2)
17454 (match_dup 2)
17455 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17456 "TARGET_AVX512BW"
17457 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17458 [(set_attr "type" "sselog")
17459 (set_attr "prefix" "evex")
17460 (set_attr "mode" "<sseinsnmode>")])
17461
17462 (define_expand "avx_vperm2f128<mode>3"
17463 [(set (match_operand:AVX256MODE2P 0 "register_operand")
17464 (unspec:AVX256MODE2P
17465 [(match_operand:AVX256MODE2P 1 "register_operand")
17466 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
17467 (match_operand:SI 3 "const_0_to_255_operand")]
17468 UNSPEC_VPERMIL2F128))]
17469 "TARGET_AVX"
17470 {
17471 int mask = INTVAL (operands[3]);
17472 if ((mask & 0x88) == 0)
17473 {
17474 rtx perm[<ssescalarnum>], t1, t2;
17475 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
17476
17477 base = (mask & 3) * nelt2;
17478 for (i = 0; i < nelt2; ++i)
17479 perm[i] = GEN_INT (base + i);
17480
17481 base = ((mask >> 4) & 3) * nelt2;
17482 for (i = 0; i < nelt2; ++i)
17483 perm[i + nelt2] = GEN_INT (base + i);
17484
17485 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
17486 operands[1], operands[2]);
17487 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
17488 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
17489 t2 = gen_rtx_SET (operands[0], t2);
17490 emit_insn (t2);
17491 DONE;
17492 }
17493 })
17494
17495 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
17496 ;; means that in order to represent this properly in rtl we'd have to
17497 ;; nest *another* vec_concat with a zero operand and do the select from
17498 ;; a 4x wide vector. That doesn't seem very nice.
17499 (define_insn "*avx_vperm2f128<mode>_full"
17500 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17501 (unspec:AVX256MODE2P
17502 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
17503 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
17504 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17505 UNSPEC_VPERMIL2F128))]
17506 "TARGET_AVX"
17507 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17508 [(set_attr "type" "sselog")
17509 (set_attr "prefix_extra" "1")
17510 (set_attr "length_immediate" "1")
17511 (set_attr "prefix" "vex")
17512 (set_attr "mode" "<sseinsnmode>")])
17513
17514 (define_insn "*avx_vperm2f128<mode>_nozero"
17515 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17516 (vec_select:AVX256MODE2P
17517 (vec_concat:<ssedoublevecmode>
17518 (match_operand:AVX256MODE2P 1 "register_operand" "x")
17519 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
17520 (match_parallel 3 ""
17521 [(match_operand 4 "const_int_operand")])))]
17522 "TARGET_AVX
17523 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
17524 {
17525 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
17526 if (mask == 0x12)
17527 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
17528 if (mask == 0x20)
17529 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
17530 operands[3] = GEN_INT (mask);
17531 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17532 }
17533 [(set_attr "type" "sselog")
17534 (set_attr "prefix_extra" "1")
17535 (set_attr "length_immediate" "1")
17536 (set_attr "prefix" "vex")
17537 (set_attr "mode" "<sseinsnmode>")])
17538
17539 (define_insn "*ssse3_palignr<mode>_perm"
17540 [(set (match_operand:V_128 0 "register_operand" "=x,x")
17541 (vec_select:V_128
17542 (match_operand:V_128 1 "register_operand" "0,x")
17543 (match_parallel 2 "palignr_operand"
17544 [(match_operand 3 "const_int_operand" "n, n")])))]
17545 "TARGET_SSSE3"
17546 {
17547 operands[2] =
17548 GEN_INT (INTVAL (operands[3]) * GET_MODE_UNIT_SIZE (GET_MODE (operands[0])));
17549
17550 switch (which_alternative)
17551 {
17552 case 0:
17553 return "palignr\t{%2, %1, %0|%0, %1, %2}";
17554 case 1:
17555 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
17556 default:
17557 gcc_unreachable ();
17558 }
17559 }
17560 [(set_attr "isa" "noavx,avx")
17561 (set_attr "type" "sseishft")
17562 (set_attr "atom_unit" "sishuf")
17563 (set_attr "prefix_data16" "1,*")
17564 (set_attr "prefix_extra" "1")
17565 (set_attr "length_immediate" "1")
17566 (set_attr "prefix" "orig,vex")])
17567
17568 (define_expand "avx512vl_vinsert<mode>"
17569 [(match_operand:VI48F_256 0 "register_operand")
17570 (match_operand:VI48F_256 1 "register_operand")
17571 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17572 (match_operand:SI 3 "const_0_to_1_operand")
17573 (match_operand:VI48F_256 4 "register_operand")
17574 (match_operand:<avx512fmaskmode> 5 "register_operand")]
17575 "TARGET_AVX512VL"
17576 {
17577 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17578
17579 switch (INTVAL (operands[3]))
17580 {
17581 case 0:
17582 insn = gen_vec_set_lo_<mode>_mask;
17583 break;
17584 case 1:
17585 insn = gen_vec_set_hi_<mode>_mask;
17586 break;
17587 default:
17588 gcc_unreachable ();
17589 }
17590
17591 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
17592 operands[5]));
17593 DONE;
17594 })
17595
17596 (define_expand "avx_vinsertf128<mode>"
17597 [(match_operand:V_256 0 "register_operand")
17598 (match_operand:V_256 1 "register_operand")
17599 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17600 (match_operand:SI 3 "const_0_to_1_operand")]
17601 "TARGET_AVX"
17602 {
17603 rtx (*insn)(rtx, rtx, rtx);
17604
17605 switch (INTVAL (operands[3]))
17606 {
17607 case 0:
17608 insn = gen_vec_set_lo_<mode>;
17609 break;
17610 case 1:
17611 insn = gen_vec_set_hi_<mode>;
17612 break;
17613 default:
17614 gcc_unreachable ();
17615 }
17616
17617 emit_insn (insn (operands[0], operands[1], operands[2]));
17618 DONE;
17619 })
17620
17621 (define_insn "vec_set_lo_<mode><mask_name>"
17622 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17623 (vec_concat:VI8F_256
17624 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17625 (vec_select:<ssehalfvecmode>
17626 (match_operand:VI8F_256 1 "register_operand" "v")
17627 (parallel [(const_int 2) (const_int 3)]))))]
17628 "TARGET_AVX"
17629 {
17630 if (TARGET_AVX512VL)
17631 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17632 else
17633 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17634 }
17635 [(set_attr "type" "sselog")
17636 (set_attr "prefix_extra" "1")
17637 (set_attr "length_immediate" "1")
17638 (set_attr "prefix" "vex")
17639 (set_attr "mode" "<sseinsnmode>")])
17640
17641 (define_insn "vec_set_hi_<mode><mask_name>"
17642 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17643 (vec_concat:VI8F_256
17644 (vec_select:<ssehalfvecmode>
17645 (match_operand:VI8F_256 1 "register_operand" "v")
17646 (parallel [(const_int 0) (const_int 1)]))
17647 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17648 "TARGET_AVX"
17649 {
17650 if (TARGET_AVX512VL)
17651 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17652 else
17653 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17654 }
17655 [(set_attr "type" "sselog")
17656 (set_attr "prefix_extra" "1")
17657 (set_attr "length_immediate" "1")
17658 (set_attr "prefix" "vex")
17659 (set_attr "mode" "<sseinsnmode>")])
17660
17661 (define_insn "vec_set_lo_<mode><mask_name>"
17662 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17663 (vec_concat:VI4F_256
17664 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17665 (vec_select:<ssehalfvecmode>
17666 (match_operand:VI4F_256 1 "register_operand" "v")
17667 (parallel [(const_int 4) (const_int 5)
17668 (const_int 6) (const_int 7)]))))]
17669 "TARGET_AVX"
17670 {
17671 if (TARGET_AVX512VL)
17672 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17673 else
17674 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17675 }
17676 [(set_attr "type" "sselog")
17677 (set_attr "prefix_extra" "1")
17678 (set_attr "length_immediate" "1")
17679 (set_attr "prefix" "vex")
17680 (set_attr "mode" "<sseinsnmode>")])
17681
17682 (define_insn "vec_set_hi_<mode><mask_name>"
17683 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17684 (vec_concat:VI4F_256
17685 (vec_select:<ssehalfvecmode>
17686 (match_operand:VI4F_256 1 "register_operand" "v")
17687 (parallel [(const_int 0) (const_int 1)
17688 (const_int 2) (const_int 3)]))
17689 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17690 "TARGET_AVX"
17691 {
17692 if (TARGET_AVX512VL)
17693 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17694 else
17695 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17696 }
17697 [(set_attr "type" "sselog")
17698 (set_attr "prefix_extra" "1")
17699 (set_attr "length_immediate" "1")
17700 (set_attr "prefix" "vex")
17701 (set_attr "mode" "<sseinsnmode>")])
17702
17703 (define_insn "vec_set_lo_v16hi"
17704 [(set (match_operand:V16HI 0 "register_operand" "=x")
17705 (vec_concat:V16HI
17706 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
17707 (vec_select:V8HI
17708 (match_operand:V16HI 1 "register_operand" "x")
17709 (parallel [(const_int 8) (const_int 9)
17710 (const_int 10) (const_int 11)
17711 (const_int 12) (const_int 13)
17712 (const_int 14) (const_int 15)]))))]
17713 "TARGET_AVX"
17714 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17715 [(set_attr "type" "sselog")
17716 (set_attr "prefix_extra" "1")
17717 (set_attr "length_immediate" "1")
17718 (set_attr "prefix" "vex")
17719 (set_attr "mode" "OI")])
17720
17721 (define_insn "vec_set_hi_v16hi"
17722 [(set (match_operand:V16HI 0 "register_operand" "=x")
17723 (vec_concat:V16HI
17724 (vec_select:V8HI
17725 (match_operand:V16HI 1 "register_operand" "x")
17726 (parallel [(const_int 0) (const_int 1)
17727 (const_int 2) (const_int 3)
17728 (const_int 4) (const_int 5)
17729 (const_int 6) (const_int 7)]))
17730 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
17731 "TARGET_AVX"
17732 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17733 [(set_attr "type" "sselog")
17734 (set_attr "prefix_extra" "1")
17735 (set_attr "length_immediate" "1")
17736 (set_attr "prefix" "vex")
17737 (set_attr "mode" "OI")])
17738
17739 (define_insn "vec_set_lo_v32qi"
17740 [(set (match_operand:V32QI 0 "register_operand" "=x")
17741 (vec_concat:V32QI
17742 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
17743 (vec_select:V16QI
17744 (match_operand:V32QI 1 "register_operand" "x")
17745 (parallel [(const_int 16) (const_int 17)
17746 (const_int 18) (const_int 19)
17747 (const_int 20) (const_int 21)
17748 (const_int 22) (const_int 23)
17749 (const_int 24) (const_int 25)
17750 (const_int 26) (const_int 27)
17751 (const_int 28) (const_int 29)
17752 (const_int 30) (const_int 31)]))))]
17753 "TARGET_AVX"
17754 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17755 [(set_attr "type" "sselog")
17756 (set_attr "prefix_extra" "1")
17757 (set_attr "length_immediate" "1")
17758 (set_attr "prefix" "vex")
17759 (set_attr "mode" "OI")])
17760
17761 (define_insn "vec_set_hi_v32qi"
17762 [(set (match_operand:V32QI 0 "register_operand" "=x")
17763 (vec_concat:V32QI
17764 (vec_select:V16QI
17765 (match_operand:V32QI 1 "register_operand" "x")
17766 (parallel [(const_int 0) (const_int 1)
17767 (const_int 2) (const_int 3)
17768 (const_int 4) (const_int 5)
17769 (const_int 6) (const_int 7)
17770 (const_int 8) (const_int 9)
17771 (const_int 10) (const_int 11)
17772 (const_int 12) (const_int 13)
17773 (const_int 14) (const_int 15)]))
17774 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
17775 "TARGET_AVX"
17776 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17777 [(set_attr "type" "sselog")
17778 (set_attr "prefix_extra" "1")
17779 (set_attr "length_immediate" "1")
17780 (set_attr "prefix" "vex")
17781 (set_attr "mode" "OI")])
17782
17783 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
17784 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
17785 (unspec:V48_AVX2
17786 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
17787 (match_operand:V48_AVX2 1 "memory_operand" "m")]
17788 UNSPEC_MASKMOV))]
17789 "TARGET_AVX"
17790 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
17791 [(set_attr "type" "sselog1")
17792 (set_attr "prefix_extra" "1")
17793 (set_attr "prefix" "vex")
17794 (set_attr "btver2_decode" "vector")
17795 (set_attr "mode" "<sseinsnmode>")])
17796
17797 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
17798 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
17799 (unspec:V48_AVX2
17800 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
17801 (match_operand:V48_AVX2 2 "register_operand" "x")
17802 (match_dup 0)]
17803 UNSPEC_MASKMOV))]
17804 "TARGET_AVX"
17805 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17806 [(set_attr "type" "sselog1")
17807 (set_attr "prefix_extra" "1")
17808 (set_attr "prefix" "vex")
17809 (set_attr "btver2_decode" "vector")
17810 (set_attr "mode" "<sseinsnmode>")])
17811
17812 (define_expand "maskload<mode><sseintvecmodelower>"
17813 [(set (match_operand:V48_AVX2 0 "register_operand")
17814 (unspec:V48_AVX2
17815 [(match_operand:<sseintvecmode> 2 "register_operand")
17816 (match_operand:V48_AVX2 1 "memory_operand")]
17817 UNSPEC_MASKMOV))]
17818 "TARGET_AVX")
17819
17820 (define_expand "maskload<mode><avx512fmaskmodelower>"
17821 [(set (match_operand:V48_AVX512VL 0 "register_operand")
17822 (vec_merge:V48_AVX512VL
17823 (match_operand:V48_AVX512VL 1 "memory_operand")
17824 (match_dup 0)
17825 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
17826 "TARGET_AVX512F")
17827
17828 (define_expand "maskload<mode><avx512fmaskmodelower>"
17829 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
17830 (vec_merge:VI12_AVX512VL
17831 (match_operand:VI12_AVX512VL 1 "memory_operand")
17832 (match_dup 0)
17833 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
17834 "TARGET_AVX512BW")
17835
17836 (define_expand "maskstore<mode><sseintvecmodelower>"
17837 [(set (match_operand:V48_AVX2 0 "memory_operand")
17838 (unspec:V48_AVX2
17839 [(match_operand:<sseintvecmode> 2 "register_operand")
17840 (match_operand:V48_AVX2 1 "register_operand")
17841 (match_dup 0)]
17842 UNSPEC_MASKMOV))]
17843 "TARGET_AVX")
17844
17845 (define_expand "maskstore<mode><avx512fmaskmodelower>"
17846 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
17847 (vec_merge:V48_AVX512VL
17848 (match_operand:V48_AVX512VL 1 "register_operand")
17849 (match_dup 0)
17850 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
17851 "TARGET_AVX512F")
17852
17853 (define_expand "maskstore<mode><avx512fmaskmodelower>"
17854 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
17855 (vec_merge:VI12_AVX512VL
17856 (match_operand:VI12_AVX512VL 1 "register_operand")
17857 (match_dup 0)
17858 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
17859 "TARGET_AVX512BW")
17860
17861 (define_expand "cbranch<mode>4"
17862 [(set (reg:CC FLAGS_REG)
17863 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
17864 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
17865 (set (pc) (if_then_else
17866 (match_operator 0 "bt_comparison_operator"
17867 [(reg:CC FLAGS_REG) (const_int 0)])
17868 (label_ref (match_operand 3))
17869 (pc)))]
17870 "TARGET_SSE4_1"
17871 {
17872 ix86_expand_branch (GET_CODE (operands[0]),
17873 operands[1], operands[2], operands[3]);
17874 DONE;
17875 })
17876
17877
17878 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
17879 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
17880 (unspec:AVX256MODE2P
17881 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
17882 UNSPEC_CAST))]
17883 "TARGET_AVX"
17884 "#"
17885 "&& reload_completed"
17886 [(set (match_dup 0) (match_dup 1))]
17887 {
17888 if (REG_P (operands[0]))
17889 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
17890 else
17891 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
17892 <ssehalfvecmode>mode);
17893 })
17894
17895 (define_expand "vec_init<mode>"
17896 [(match_operand:V_256 0 "register_operand")
17897 (match_operand 1)]
17898 "TARGET_AVX"
17899 {
17900 ix86_expand_vector_init (false, operands[0], operands[1]);
17901 DONE;
17902 })
17903
17904 (define_expand "vec_init<mode>"
17905 [(match_operand:VF48_I1248 0 "register_operand")
17906 (match_operand 1)]
17907 "TARGET_AVX512F"
17908 {
17909 ix86_expand_vector_init (false, operands[0], operands[1]);
17910 DONE;
17911 })
17912
17913 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17914 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
17915 (ashiftrt:VI48_AVX512F_AVX512VL
17916 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
17917 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
17918 "TARGET_AVX2 && <mask_mode512bit_condition>"
17919 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17920 [(set_attr "type" "sseishft")
17921 (set_attr "prefix" "maybe_evex")
17922 (set_attr "mode" "<sseinsnmode>")])
17923
17924 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17925 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17926 (ashiftrt:VI2_AVX512VL
17927 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17928 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17929 "TARGET_AVX512BW"
17930 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17931 [(set_attr "type" "sseishft")
17932 (set_attr "prefix" "maybe_evex")
17933 (set_attr "mode" "<sseinsnmode>")])
17934
17935 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17936 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
17937 (any_lshift:VI48_AVX512F
17938 (match_operand:VI48_AVX512F 1 "register_operand" "v")
17939 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
17940 "TARGET_AVX2 && <mask_mode512bit_condition>"
17941 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17942 [(set_attr "type" "sseishft")
17943 (set_attr "prefix" "maybe_evex")
17944 (set_attr "mode" "<sseinsnmode>")])
17945
17946 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17947 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17948 (any_lshift:VI2_AVX512VL
17949 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17950 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17951 "TARGET_AVX512BW"
17952 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17953 [(set_attr "type" "sseishft")
17954 (set_attr "prefix" "maybe_evex")
17955 (set_attr "mode" "<sseinsnmode>")])
17956
17957 (define_insn "avx_vec_concat<mode>"
17958 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
17959 (vec_concat:V_256_512
17960 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
17961 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
17962 "TARGET_AVX"
17963 {
17964 switch (which_alternative)
17965 {
17966 case 0:
17967 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
17968 case 1:
17969 switch (get_attr_mode (insn))
17970 {
17971 case MODE_V16SF:
17972 return "vmovaps\t{%1, %t0|%t0, %1}";
17973 case MODE_V8DF:
17974 return "vmovapd\t{%1, %t0|%t0, %1}";
17975 case MODE_V8SF:
17976 return "vmovaps\t{%1, %x0|%x0, %1}";
17977 case MODE_V4DF:
17978 return "vmovapd\t{%1, %x0|%x0, %1}";
17979 case MODE_XI:
17980 return "vmovdqa\t{%1, %t0|%t0, %1}";
17981 case MODE_OI:
17982 return "vmovdqa\t{%1, %x0|%x0, %1}";
17983 default:
17984 gcc_unreachable ();
17985 }
17986 default:
17987 gcc_unreachable ();
17988 }
17989 }
17990 [(set_attr "type" "sselog,ssemov")
17991 (set_attr "prefix_extra" "1,*")
17992 (set_attr "length_immediate" "1,*")
17993 (set_attr "prefix" "maybe_evex")
17994 (set_attr "mode" "<sseinsnmode>")])
17995
17996 (define_insn "vcvtph2ps<mask_name>"
17997 [(set (match_operand:V4SF 0 "register_operand" "=v")
17998 (vec_select:V4SF
17999 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
18000 UNSPEC_VCVTPH2PS)
18001 (parallel [(const_int 0) (const_int 1)
18002 (const_int 2) (const_int 3)])))]
18003 "TARGET_F16C || TARGET_AVX512VL"
18004 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18005 [(set_attr "type" "ssecvt")
18006 (set_attr "prefix" "maybe_evex")
18007 (set_attr "mode" "V4SF")])
18008
18009 (define_insn "*vcvtph2ps_load<mask_name>"
18010 [(set (match_operand:V4SF 0 "register_operand" "=v")
18011 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
18012 UNSPEC_VCVTPH2PS))]
18013 "TARGET_F16C || TARGET_AVX512VL"
18014 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18015 [(set_attr "type" "ssecvt")
18016 (set_attr "prefix" "vex")
18017 (set_attr "mode" "V8SF")])
18018
18019 (define_insn "vcvtph2ps256<mask_name>"
18020 [(set (match_operand:V8SF 0 "register_operand" "=v")
18021 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
18022 UNSPEC_VCVTPH2PS))]
18023 "TARGET_F16C || TARGET_AVX512VL"
18024 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18025 [(set_attr "type" "ssecvt")
18026 (set_attr "prefix" "vex")
18027 (set_attr "btver2_decode" "double")
18028 (set_attr "mode" "V8SF")])
18029
18030 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
18031 [(set (match_operand:V16SF 0 "register_operand" "=v")
18032 (unspec:V16SF
18033 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18034 UNSPEC_VCVTPH2PS))]
18035 "TARGET_AVX512F"
18036 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18037 [(set_attr "type" "ssecvt")
18038 (set_attr "prefix" "evex")
18039 (set_attr "mode" "V16SF")])
18040
18041 (define_expand "vcvtps2ph_mask"
18042 [(set (match_operand:V8HI 0 "register_operand")
18043 (vec_merge:V8HI
18044 (vec_concat:V8HI
18045 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18046 (match_operand:SI 2 "const_0_to_255_operand")]
18047 UNSPEC_VCVTPS2PH)
18048 (match_dup 5))
18049 (match_operand:V8HI 3 "vector_move_operand")
18050 (match_operand:QI 4 "register_operand")))]
18051 "TARGET_AVX512VL"
18052 "operands[5] = CONST0_RTX (V4HImode);")
18053
18054 (define_expand "vcvtps2ph"
18055 [(set (match_operand:V8HI 0 "register_operand")
18056 (vec_concat:V8HI
18057 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18058 (match_operand:SI 2 "const_0_to_255_operand")]
18059 UNSPEC_VCVTPS2PH)
18060 (match_dup 3)))]
18061 "TARGET_F16C"
18062 "operands[3] = CONST0_RTX (V4HImode);")
18063
18064 (define_insn "*vcvtps2ph<mask_name>"
18065 [(set (match_operand:V8HI 0 "register_operand" "=v")
18066 (vec_concat:V8HI
18067 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
18068 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18069 UNSPEC_VCVTPS2PH)
18070 (match_operand:V4HI 3 "const0_operand")))]
18071 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
18072 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
18073 [(set_attr "type" "ssecvt")
18074 (set_attr "prefix" "maybe_evex")
18075 (set_attr "mode" "V4SF")])
18076
18077 (define_insn "*vcvtps2ph_store<mask_name>"
18078 [(set (match_operand:V4HI 0 "memory_operand" "=m")
18079 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
18080 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18081 UNSPEC_VCVTPS2PH))]
18082 "TARGET_F16C || TARGET_AVX512VL"
18083 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18084 [(set_attr "type" "ssecvt")
18085 (set_attr "prefix" "maybe_evex")
18086 (set_attr "mode" "V4SF")])
18087
18088 (define_insn "vcvtps2ph256<mask_name>"
18089 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
18090 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
18091 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18092 UNSPEC_VCVTPS2PH))]
18093 "TARGET_F16C || TARGET_AVX512VL"
18094 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18095 [(set_attr "type" "ssecvt")
18096 (set_attr "prefix" "maybe_evex")
18097 (set_attr "btver2_decode" "vector")
18098 (set_attr "mode" "V8SF")])
18099
18100 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
18101 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
18102 (unspec:V16HI
18103 [(match_operand:V16SF 1 "register_operand" "v")
18104 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18105 UNSPEC_VCVTPS2PH))]
18106 "TARGET_AVX512F"
18107 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18108 [(set_attr "type" "ssecvt")
18109 (set_attr "prefix" "evex")
18110 (set_attr "mode" "V16SF")])
18111
18112 ;; For gather* insn patterns
18113 (define_mode_iterator VEC_GATHER_MODE
18114 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
18115 (define_mode_attr VEC_GATHER_IDXSI
18116 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
18117 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
18118 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
18119 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
18120
18121 (define_mode_attr VEC_GATHER_IDXDI
18122 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18123 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
18124 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
18125 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
18126
18127 (define_mode_attr VEC_GATHER_SRCDI
18128 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18129 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
18130 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
18131 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
18132
18133 (define_expand "avx2_gathersi<mode>"
18134 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18135 (unspec:VEC_GATHER_MODE
18136 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
18137 (mem:<ssescalarmode>
18138 (match_par_dup 7
18139 [(match_operand 2 "vsib_address_operand")
18140 (match_operand:<VEC_GATHER_IDXSI>
18141 3 "register_operand")
18142 (match_operand:SI 5 "const1248_operand ")]))
18143 (mem:BLK (scratch))
18144 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
18145 UNSPEC_GATHER))
18146 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18147 "TARGET_AVX2"
18148 {
18149 operands[7]
18150 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18151 operands[5]), UNSPEC_VSIBADDR);
18152 })
18153
18154 (define_insn "*avx2_gathersi<mode>"
18155 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18156 (unspec:VEC_GATHER_MODE
18157 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
18158 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18159 [(unspec:P
18160 [(match_operand:P 3 "vsib_address_operand" "Tv")
18161 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
18162 (match_operand:SI 6 "const1248_operand" "n")]
18163 UNSPEC_VSIBADDR)])
18164 (mem:BLK (scratch))
18165 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
18166 UNSPEC_GATHER))
18167 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18168 "TARGET_AVX2"
18169 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
18170 [(set_attr "type" "ssemov")
18171 (set_attr "prefix" "vex")
18172 (set_attr "mode" "<sseinsnmode>")])
18173
18174 (define_insn "*avx2_gathersi<mode>_2"
18175 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18176 (unspec:VEC_GATHER_MODE
18177 [(pc)
18178 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18179 [(unspec:P
18180 [(match_operand:P 2 "vsib_address_operand" "Tv")
18181 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18182 (match_operand:SI 5 "const1248_operand" "n")]
18183 UNSPEC_VSIBADDR)])
18184 (mem:BLK (scratch))
18185 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18186 UNSPEC_GATHER))
18187 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18188 "TARGET_AVX2"
18189 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18190 [(set_attr "type" "ssemov")
18191 (set_attr "prefix" "vex")
18192 (set_attr "mode" "<sseinsnmode>")])
18193
18194 (define_expand "avx2_gatherdi<mode>"
18195 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18196 (unspec:VEC_GATHER_MODE
18197 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18198 (mem:<ssescalarmode>
18199 (match_par_dup 7
18200 [(match_operand 2 "vsib_address_operand")
18201 (match_operand:<VEC_GATHER_IDXDI>
18202 3 "register_operand")
18203 (match_operand:SI 5 "const1248_operand ")]))
18204 (mem:BLK (scratch))
18205 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
18206 UNSPEC_GATHER))
18207 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18208 "TARGET_AVX2"
18209 {
18210 operands[7]
18211 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18212 operands[5]), UNSPEC_VSIBADDR);
18213 })
18214
18215 (define_insn "*avx2_gatherdi<mode>"
18216 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18217 (unspec:VEC_GATHER_MODE
18218 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18219 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18220 [(unspec:P
18221 [(match_operand:P 3 "vsib_address_operand" "Tv")
18222 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18223 (match_operand:SI 6 "const1248_operand" "n")]
18224 UNSPEC_VSIBADDR)])
18225 (mem:BLK (scratch))
18226 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18227 UNSPEC_GATHER))
18228 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18229 "TARGET_AVX2"
18230 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
18231 [(set_attr "type" "ssemov")
18232 (set_attr "prefix" "vex")
18233 (set_attr "mode" "<sseinsnmode>")])
18234
18235 (define_insn "*avx2_gatherdi<mode>_2"
18236 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18237 (unspec:VEC_GATHER_MODE
18238 [(pc)
18239 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18240 [(unspec:P
18241 [(match_operand:P 2 "vsib_address_operand" "Tv")
18242 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18243 (match_operand:SI 5 "const1248_operand" "n")]
18244 UNSPEC_VSIBADDR)])
18245 (mem:BLK (scratch))
18246 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18247 UNSPEC_GATHER))
18248 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18249 "TARGET_AVX2"
18250 {
18251 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18252 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
18253 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
18254 }
18255 [(set_attr "type" "ssemov")
18256 (set_attr "prefix" "vex")
18257 (set_attr "mode" "<sseinsnmode>")])
18258
18259 (define_insn "*avx2_gatherdi<mode>_3"
18260 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18261 (vec_select:<VEC_GATHER_SRCDI>
18262 (unspec:VI4F_256
18263 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18264 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18265 [(unspec:P
18266 [(match_operand:P 3 "vsib_address_operand" "Tv")
18267 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18268 (match_operand:SI 6 "const1248_operand" "n")]
18269 UNSPEC_VSIBADDR)])
18270 (mem:BLK (scratch))
18271 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18272 UNSPEC_GATHER)
18273 (parallel [(const_int 0) (const_int 1)
18274 (const_int 2) (const_int 3)])))
18275 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18276 "TARGET_AVX2"
18277 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
18278 [(set_attr "type" "ssemov")
18279 (set_attr "prefix" "vex")
18280 (set_attr "mode" "<sseinsnmode>")])
18281
18282 (define_insn "*avx2_gatherdi<mode>_4"
18283 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18284 (vec_select:<VEC_GATHER_SRCDI>
18285 (unspec:VI4F_256
18286 [(pc)
18287 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18288 [(unspec:P
18289 [(match_operand:P 2 "vsib_address_operand" "Tv")
18290 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18291 (match_operand:SI 5 "const1248_operand" "n")]
18292 UNSPEC_VSIBADDR)])
18293 (mem:BLK (scratch))
18294 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18295 UNSPEC_GATHER)
18296 (parallel [(const_int 0) (const_int 1)
18297 (const_int 2) (const_int 3)])))
18298 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18299 "TARGET_AVX2"
18300 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
18301 [(set_attr "type" "ssemov")
18302 (set_attr "prefix" "vex")
18303 (set_attr "mode" "<sseinsnmode>")])
18304
18305 (define_expand "<avx512>_gathersi<mode>"
18306 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18307 (unspec:VI48F
18308 [(match_operand:VI48F 1 "register_operand")
18309 (match_operand:<avx512fmaskmode> 4 "register_operand")
18310 (mem:<ssescalarmode>
18311 (match_par_dup 6
18312 [(match_operand 2 "vsib_address_operand")
18313 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
18314 (match_operand:SI 5 "const1248_operand")]))]
18315 UNSPEC_GATHER))
18316 (clobber (match_scratch:<avx512fmaskmode> 7))])]
18317 "TARGET_AVX512F"
18318 {
18319 operands[6]
18320 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18321 operands[5]), UNSPEC_VSIBADDR);
18322 })
18323
18324 (define_insn "*avx512f_gathersi<mode>"
18325 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18326 (unspec:VI48F
18327 [(match_operand:VI48F 1 "register_operand" "0")
18328 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
18329 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18330 [(unspec:P
18331 [(match_operand:P 4 "vsib_address_operand" "Tv")
18332 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
18333 (match_operand:SI 5 "const1248_operand" "n")]
18334 UNSPEC_VSIBADDR)])]
18335 UNSPEC_GATHER))
18336 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
18337 "TARGET_AVX512F"
18338 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
18339 [(set_attr "type" "ssemov")
18340 (set_attr "prefix" "evex")
18341 (set_attr "mode" "<sseinsnmode>")])
18342
18343 (define_insn "*avx512f_gathersi<mode>_2"
18344 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18345 (unspec:VI48F
18346 [(pc)
18347 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18348 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18349 [(unspec:P
18350 [(match_operand:P 3 "vsib_address_operand" "Tv")
18351 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18352 (match_operand:SI 4 "const1248_operand" "n")]
18353 UNSPEC_VSIBADDR)])]
18354 UNSPEC_GATHER))
18355 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18356 "TARGET_AVX512F"
18357 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
18358 [(set_attr "type" "ssemov")
18359 (set_attr "prefix" "evex")
18360 (set_attr "mode" "<sseinsnmode>")])
18361
18362
18363 (define_expand "<avx512>_gatherdi<mode>"
18364 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18365 (unspec:VI48F
18366 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18367 (match_operand:QI 4 "register_operand")
18368 (mem:<ssescalarmode>
18369 (match_par_dup 6
18370 [(match_operand 2 "vsib_address_operand")
18371 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
18372 (match_operand:SI 5 "const1248_operand")]))]
18373 UNSPEC_GATHER))
18374 (clobber (match_scratch:QI 7))])]
18375 "TARGET_AVX512F"
18376 {
18377 operands[6]
18378 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18379 operands[5]), UNSPEC_VSIBADDR);
18380 })
18381
18382 (define_insn "*avx512f_gatherdi<mode>"
18383 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18384 (unspec:VI48F
18385 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
18386 (match_operand:QI 7 "register_operand" "2")
18387 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18388 [(unspec:P
18389 [(match_operand:P 4 "vsib_address_operand" "Tv")
18390 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
18391 (match_operand:SI 5 "const1248_operand" "n")]
18392 UNSPEC_VSIBADDR)])]
18393 UNSPEC_GATHER))
18394 (clobber (match_scratch:QI 2 "=&Yk"))]
18395 "TARGET_AVX512F"
18396 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
18397 [(set_attr "type" "ssemov")
18398 (set_attr "prefix" "evex")
18399 (set_attr "mode" "<sseinsnmode>")])
18400
18401 (define_insn "*avx512f_gatherdi<mode>_2"
18402 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18403 (unspec:VI48F
18404 [(pc)
18405 (match_operand:QI 6 "register_operand" "1")
18406 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18407 [(unspec:P
18408 [(match_operand:P 3 "vsib_address_operand" "Tv")
18409 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18410 (match_operand:SI 4 "const1248_operand" "n")]
18411 UNSPEC_VSIBADDR)])]
18412 UNSPEC_GATHER))
18413 (clobber (match_scratch:QI 1 "=&Yk"))]
18414 "TARGET_AVX512F"
18415 {
18416 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18417 {
18418 if (<MODE_SIZE> != 64)
18419 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
18420 else
18421 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
18422 }
18423 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
18424 }
18425 [(set_attr "type" "ssemov")
18426 (set_attr "prefix" "evex")
18427 (set_attr "mode" "<sseinsnmode>")])
18428
18429 (define_expand "<avx512>_scattersi<mode>"
18430 [(parallel [(set (mem:VI48F
18431 (match_par_dup 5
18432 [(match_operand 0 "vsib_address_operand")
18433 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
18434 (match_operand:SI 4 "const1248_operand")]))
18435 (unspec:VI48F
18436 [(match_operand:<avx512fmaskmode> 1 "register_operand")
18437 (match_operand:VI48F 3 "register_operand")]
18438 UNSPEC_SCATTER))
18439 (clobber (match_scratch:<avx512fmaskmode> 6))])]
18440 "TARGET_AVX512F"
18441 {
18442 operands[5]
18443 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18444 operands[4]), UNSPEC_VSIBADDR);
18445 })
18446
18447 (define_insn "*avx512f_scattersi<mode>"
18448 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18449 [(unspec:P
18450 [(match_operand:P 0 "vsib_address_operand" "Tv")
18451 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18452 (match_operand:SI 4 "const1248_operand" "n")]
18453 UNSPEC_VSIBADDR)])
18454 (unspec:VI48F
18455 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18456 (match_operand:VI48F 3 "register_operand" "v")]
18457 UNSPEC_SCATTER))
18458 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18459 "TARGET_AVX512F"
18460 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18461 [(set_attr "type" "ssemov")
18462 (set_attr "prefix" "evex")
18463 (set_attr "mode" "<sseinsnmode>")])
18464
18465 (define_expand "<avx512>_scatterdi<mode>"
18466 [(parallel [(set (mem:VI48F
18467 (match_par_dup 5
18468 [(match_operand 0 "vsib_address_operand")
18469 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
18470 (match_operand:SI 4 "const1248_operand")]))
18471 (unspec:VI48F
18472 [(match_operand:QI 1 "register_operand")
18473 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
18474 UNSPEC_SCATTER))
18475 (clobber (match_scratch:QI 6))])]
18476 "TARGET_AVX512F"
18477 {
18478 operands[5]
18479 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18480 operands[4]), UNSPEC_VSIBADDR);
18481 })
18482
18483 (define_insn "*avx512f_scatterdi<mode>"
18484 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18485 [(unspec:P
18486 [(match_operand:P 0 "vsib_address_operand" "Tv")
18487 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18488 (match_operand:SI 4 "const1248_operand" "n")]
18489 UNSPEC_VSIBADDR)])
18490 (unspec:VI48F
18491 [(match_operand:QI 6 "register_operand" "1")
18492 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
18493 UNSPEC_SCATTER))
18494 (clobber (match_scratch:QI 1 "=&Yk"))]
18495 "TARGET_AVX512F"
18496 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18497 [(set_attr "type" "ssemov")
18498 (set_attr "prefix" "evex")
18499 (set_attr "mode" "<sseinsnmode>")])
18500
18501 (define_insn "<avx512>_compress<mode>_mask"
18502 [(set (match_operand:VI48F 0 "register_operand" "=v")
18503 (unspec:VI48F
18504 [(match_operand:VI48F 1 "register_operand" "v")
18505 (match_operand:VI48F 2 "vector_move_operand" "0C")
18506 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
18507 UNSPEC_COMPRESS))]
18508 "TARGET_AVX512F"
18509 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18510 [(set_attr "type" "ssemov")
18511 (set_attr "prefix" "evex")
18512 (set_attr "mode" "<sseinsnmode>")])
18513
18514 (define_insn "<avx512>_compressstore<mode>_mask"
18515 [(set (match_operand:VI48F 0 "memory_operand" "=m")
18516 (unspec:VI48F
18517 [(match_operand:VI48F 1 "register_operand" "x")
18518 (match_dup 0)
18519 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
18520 UNSPEC_COMPRESS_STORE))]
18521 "TARGET_AVX512F"
18522 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
18523 [(set_attr "type" "ssemov")
18524 (set_attr "prefix" "evex")
18525 (set_attr "memory" "store")
18526 (set_attr "mode" "<sseinsnmode>")])
18527
18528 (define_expand "<avx512>_expand<mode>_maskz"
18529 [(set (match_operand:VI48F 0 "register_operand")
18530 (unspec:VI48F
18531 [(match_operand:VI48F 1 "nonimmediate_operand")
18532 (match_operand:VI48F 2 "vector_move_operand")
18533 (match_operand:<avx512fmaskmode> 3 "register_operand")]
18534 UNSPEC_EXPAND))]
18535 "TARGET_AVX512F"
18536 "operands[2] = CONST0_RTX (<MODE>mode);")
18537
18538 (define_insn "<avx512>_expand<mode>_mask"
18539 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
18540 (unspec:VI48F
18541 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
18542 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
18543 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
18544 UNSPEC_EXPAND))]
18545 "TARGET_AVX512F"
18546 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18547 [(set_attr "type" "ssemov")
18548 (set_attr "prefix" "evex")
18549 (set_attr "memory" "none,load")
18550 (set_attr "mode" "<sseinsnmode>")])
18551
18552 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
18553 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18554 (unspec:VF_AVX512VL
18555 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18556 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18557 (match_operand:SI 3 "const_0_to_15_operand")]
18558 UNSPEC_RANGE))]
18559 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
18560 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
18561 [(set_attr "type" "sse")
18562 (set_attr "prefix" "evex")
18563 (set_attr "mode" "<MODE>")])
18564
18565 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
18566 [(set (match_operand:VF_128 0 "register_operand" "=v")
18567 (vec_merge:VF_128
18568 (unspec:VF_128
18569 [(match_operand:VF_128 1 "register_operand" "v")
18570 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18571 (match_operand:SI 3 "const_0_to_15_operand")]
18572 UNSPEC_RANGE)
18573 (match_dup 1)
18574 (const_int 1)))]
18575 "TARGET_AVX512DQ"
18576 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
18577 [(set_attr "type" "sse")
18578 (set_attr "prefix" "evex")
18579 (set_attr "mode" "<MODE>")])
18580
18581 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
18582 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18583 (unspec:<avx512fmaskmode>
18584 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18585 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18586 UNSPEC_FPCLASS))]
18587 "TARGET_AVX512DQ"
18588 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
18589 [(set_attr "type" "sse")
18590 (set_attr "length_immediate" "1")
18591 (set_attr "prefix" "evex")
18592 (set_attr "mode" "<MODE>")])
18593
18594 (define_insn "avx512dq_vmfpclass<mode>"
18595 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18596 (and:<avx512fmaskmode>
18597 (unspec:<avx512fmaskmode>
18598 [(match_operand:VF_128 1 "register_operand" "v")
18599 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18600 UNSPEC_FPCLASS)
18601 (const_int 1)))]
18602 "TARGET_AVX512DQ"
18603 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
18604 [(set_attr "type" "sse")
18605 (set_attr "length_immediate" "1")
18606 (set_attr "prefix" "evex")
18607 (set_attr "mode" "<MODE>")])
18608
18609 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
18610 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18611 (unspec:VF_AVX512VL
18612 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
18613 (match_operand:SI 2 "const_0_to_15_operand")]
18614 UNSPEC_GETMANT))]
18615 "TARGET_AVX512F"
18616 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
18617 [(set_attr "prefix" "evex")
18618 (set_attr "mode" "<MODE>")])
18619
18620 (define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
18621 [(set (match_operand:VF_128 0 "register_operand" "=v")
18622 (vec_merge:VF_128
18623 (unspec:VF_128
18624 [(match_operand:VF_128 1 "register_operand" "v")
18625 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18626 (match_operand:SI 3 "const_0_to_15_operand")]
18627 UNSPEC_GETMANT)
18628 (match_dup 1)
18629 (const_int 1)))]
18630 "TARGET_AVX512F"
18631 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
18632 [(set_attr "prefix" "evex")
18633 (set_attr "mode" "<ssescalarmode>")])
18634
18635 ;; The correct representation for this is absolutely enormous, and
18636 ;; surely not generally useful.
18637 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
18638 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18639 (unspec:VI2_AVX512VL
18640 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
18641 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
18642 (match_operand:SI 3 "const_0_to_255_operand")]
18643 UNSPEC_DBPSADBW))]
18644 "TARGET_AVX512BW"
18645 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
18646 [(set_attr "isa" "avx")
18647 (set_attr "type" "sselog1")
18648 (set_attr "length_immediate" "1")
18649 (set_attr "prefix" "evex")
18650 (set_attr "mode" "<sseinsnmode>")])
18651
18652 (define_insn "clz<mode>2<mask_name>"
18653 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18654 (clz:VI48_AVX512VL
18655 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
18656 "TARGET_AVX512CD"
18657 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18658 [(set_attr "type" "sse")
18659 (set_attr "prefix" "evex")
18660 (set_attr "mode" "<sseinsnmode>")])
18661
18662 (define_insn "<mask_codefor>conflict<mode><mask_name>"
18663 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18664 (unspec:VI48_AVX512VL
18665 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
18666 UNSPEC_CONFLICT))]
18667 "TARGET_AVX512CD"
18668 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18669 [(set_attr "type" "sse")
18670 (set_attr "prefix" "evex")
18671 (set_attr "mode" "<sseinsnmode>")])
18672
18673 (define_insn "sha1msg1"
18674 [(set (match_operand:V4SI 0 "register_operand" "=x")
18675 (unspec:V4SI
18676 [(match_operand:V4SI 1 "register_operand" "0")
18677 (match_operand:V4SI 2 "vector_operand" "xBm")]
18678 UNSPEC_SHA1MSG1))]
18679 "TARGET_SHA"
18680 "sha1msg1\t{%2, %0|%0, %2}"
18681 [(set_attr "type" "sselog1")
18682 (set_attr "mode" "TI")])
18683
18684 (define_insn "sha1msg2"
18685 [(set (match_operand:V4SI 0 "register_operand" "=x")
18686 (unspec:V4SI
18687 [(match_operand:V4SI 1 "register_operand" "0")
18688 (match_operand:V4SI 2 "vector_operand" "xBm")]
18689 UNSPEC_SHA1MSG2))]
18690 "TARGET_SHA"
18691 "sha1msg2\t{%2, %0|%0, %2}"
18692 [(set_attr "type" "sselog1")
18693 (set_attr "mode" "TI")])
18694
18695 (define_insn "sha1nexte"
18696 [(set (match_operand:V4SI 0 "register_operand" "=x")
18697 (unspec:V4SI
18698 [(match_operand:V4SI 1 "register_operand" "0")
18699 (match_operand:V4SI 2 "vector_operand" "xBm")]
18700 UNSPEC_SHA1NEXTE))]
18701 "TARGET_SHA"
18702 "sha1nexte\t{%2, %0|%0, %2}"
18703 [(set_attr "type" "sselog1")
18704 (set_attr "mode" "TI")])
18705
18706 (define_insn "sha1rnds4"
18707 [(set (match_operand:V4SI 0 "register_operand" "=x")
18708 (unspec:V4SI
18709 [(match_operand:V4SI 1 "register_operand" "0")
18710 (match_operand:V4SI 2 "vector_operand" "xBm")
18711 (match_operand:SI 3 "const_0_to_3_operand" "n")]
18712 UNSPEC_SHA1RNDS4))]
18713 "TARGET_SHA"
18714 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
18715 [(set_attr "type" "sselog1")
18716 (set_attr "length_immediate" "1")
18717 (set_attr "mode" "TI")])
18718
18719 (define_insn "sha256msg1"
18720 [(set (match_operand:V4SI 0 "register_operand" "=x")
18721 (unspec:V4SI
18722 [(match_operand:V4SI 1 "register_operand" "0")
18723 (match_operand:V4SI 2 "vector_operand" "xBm")]
18724 UNSPEC_SHA256MSG1))]
18725 "TARGET_SHA"
18726 "sha256msg1\t{%2, %0|%0, %2}"
18727 [(set_attr "type" "sselog1")
18728 (set_attr "mode" "TI")])
18729
18730 (define_insn "sha256msg2"
18731 [(set (match_operand:V4SI 0 "register_operand" "=x")
18732 (unspec:V4SI
18733 [(match_operand:V4SI 1 "register_operand" "0")
18734 (match_operand:V4SI 2 "vector_operand" "xBm")]
18735 UNSPEC_SHA256MSG2))]
18736 "TARGET_SHA"
18737 "sha256msg2\t{%2, %0|%0, %2}"
18738 [(set_attr "type" "sselog1")
18739 (set_attr "mode" "TI")])
18740
18741 (define_insn "sha256rnds2"
18742 [(set (match_operand:V4SI 0 "register_operand" "=x")
18743 (unspec:V4SI
18744 [(match_operand:V4SI 1 "register_operand" "0")
18745 (match_operand:V4SI 2 "vector_operand" "xBm")
18746 (match_operand:V4SI 3 "register_operand" "Yz")]
18747 UNSPEC_SHA256RNDS2))]
18748 "TARGET_SHA"
18749 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
18750 [(set_attr "type" "sselog1")
18751 (set_attr "length_immediate" "1")
18752 (set_attr "mode" "TI")])
18753
18754 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
18755 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18756 (unspec:AVX512MODE2P
18757 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
18758 UNSPEC_CAST))]
18759 "TARGET_AVX512F"
18760 "#"
18761 "&& reload_completed"
18762 [(set (match_dup 0) (match_dup 1))]
18763 {
18764 if (REG_P (operands[0]))
18765 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
18766 else
18767 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
18768 <ssequartermode>mode);
18769 })
18770
18771 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
18772 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18773 (unspec:AVX512MODE2P
18774 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18775 UNSPEC_CAST))]
18776 "TARGET_AVX512F"
18777 "#"
18778 "&& reload_completed"
18779 [(set (match_dup 0) (match_dup 1))]
18780 {
18781 if (REG_P (operands[0]))
18782 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
18783 else
18784 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
18785 <ssehalfvecmode>mode);
18786 })
18787
18788 (define_int_iterator VPMADD52
18789 [UNSPEC_VPMADD52LUQ
18790 UNSPEC_VPMADD52HUQ])
18791
18792 (define_int_attr vpmadd52type
18793 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
18794
18795 (define_expand "vpamdd52huq<mode>_maskz"
18796 [(match_operand:VI8_AVX512VL 0 "register_operand")
18797 (match_operand:VI8_AVX512VL 1 "register_operand")
18798 (match_operand:VI8_AVX512VL 2 "register_operand")
18799 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18800 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18801 "TARGET_AVX512IFMA"
18802 {
18803 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
18804 operands[0], operands[1], operands[2], operands[3],
18805 CONST0_RTX (<MODE>mode), operands[4]));
18806 DONE;
18807 })
18808
18809 (define_expand "vpamdd52luq<mode>_maskz"
18810 [(match_operand:VI8_AVX512VL 0 "register_operand")
18811 (match_operand:VI8_AVX512VL 1 "register_operand")
18812 (match_operand:VI8_AVX512VL 2 "register_operand")
18813 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18814 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18815 "TARGET_AVX512IFMA"
18816 {
18817 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
18818 operands[0], operands[1], operands[2], operands[3],
18819 CONST0_RTX (<MODE>mode), operands[4]));
18820 DONE;
18821 })
18822
18823 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
18824 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18825 (unspec:VI8_AVX512VL
18826 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18827 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18828 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18829 VPMADD52))]
18830 "TARGET_AVX512IFMA"
18831 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18832 [(set_attr "type" "ssemuladd")
18833 (set_attr "prefix" "evex")
18834 (set_attr "mode" "<sseinsnmode>")])
18835
18836 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
18837 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18838 (vec_merge:VI8_AVX512VL
18839 (unspec:VI8_AVX512VL
18840 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18841 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18842 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18843 VPMADD52)
18844 (match_dup 1)
18845 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18846 "TARGET_AVX512IFMA"
18847 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
18848 [(set_attr "type" "ssemuladd")
18849 (set_attr "prefix" "evex")
18850 (set_attr "mode" "<sseinsnmode>")])
18851
18852 (define_insn "vpmultishiftqb<mode><mask_name>"
18853 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18854 (unspec:VI1_AVX512VL
18855 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18856 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
18857 UNSPEC_VPMULTISHIFT))]
18858 "TARGET_AVX512VBMI"
18859 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18860 [(set_attr "type" "sselog")
18861 (set_attr "prefix" "evex")
18862 (set_attr "mode" "<sseinsnmode>")])