tree-vectorizer.h (struct _loop_vec_info): Add scalar_loop field.
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23 UNSPEC_LOADU
24 UNSPEC_STOREU
25
26 ;; SSE3
27 UNSPEC_LDDQU
28
29 ;; SSSE3
30 UNSPEC_PSHUFB
31 UNSPEC_PSIGN
32 UNSPEC_PALIGNR
33
34 ;; For SSE4A support
35 UNSPEC_EXTRQI
36 UNSPEC_EXTRQ
37 UNSPEC_INSERTQI
38 UNSPEC_INSERTQ
39
40 ;; For SSE4.1 support
41 UNSPEC_BLENDV
42 UNSPEC_INSERTPS
43 UNSPEC_DP
44 UNSPEC_MOVNTDQA
45 UNSPEC_MPSADBW
46 UNSPEC_PHMINPOSUW
47 UNSPEC_PTEST
48
49 ;; For SSE4.2 support
50 UNSPEC_PCMPESTR
51 UNSPEC_PCMPISTR
52
53 ;; For FMA4 support
54 UNSPEC_FMADDSUB
55 UNSPEC_XOP_UNSIGNED_CMP
56 UNSPEC_XOP_TRUEFALSE
57 UNSPEC_XOP_PERMUTE
58 UNSPEC_FRCZ
59
60 ;; For AES support
61 UNSPEC_AESENC
62 UNSPEC_AESENCLAST
63 UNSPEC_AESDEC
64 UNSPEC_AESDECLAST
65 UNSPEC_AESIMC
66 UNSPEC_AESKEYGENASSIST
67
68 ;; For PCLMUL support
69 UNSPEC_PCLMUL
70
71 ;; For AVX support
72 UNSPEC_PCMP
73 UNSPEC_VPERMIL
74 UNSPEC_VPERMIL2
75 UNSPEC_VPERMIL2F128
76 UNSPEC_CAST
77 UNSPEC_VTESTP
78 UNSPEC_VCVTPH2PS
79 UNSPEC_VCVTPS2PH
80
81 ;; For AVX2 support
82 UNSPEC_VPERMVAR
83 UNSPEC_VPERMTI
84 UNSPEC_GATHER
85 UNSPEC_VSIBADDR
86
87 ;; For AVX512F support
88 UNSPEC_VPERMI2
89 UNSPEC_VPERMT2
90 UNSPEC_VPERMI2_MASK
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
92 UNSPEC_UNSIGNED_PCMP
93 UNSPEC_TESTM
94 UNSPEC_TESTNM
95 UNSPEC_SCATTER
96 UNSPEC_RCP14
97 UNSPEC_RSQRT14
98 UNSPEC_FIXUPIMM
99 UNSPEC_SCALEF
100 UNSPEC_VTERNLOG
101 UNSPEC_GETEXP
102 UNSPEC_GETMANT
103 UNSPEC_ALIGN
104 UNSPEC_CONFLICT
105 UNSPEC_COMPRESS
106 UNSPEC_COMPRESS_STORE
107 UNSPEC_EXPAND
108 UNSPEC_MASKED_EQ
109 UNSPEC_MASKED_GT
110
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
113
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
117
118 ;; For AVX512ER support
119 UNSPEC_EXP2
120 UNSPEC_RCP28
121 UNSPEC_RSQRT28
122 ])
123
124 (define_c_enum "unspecv" [
125 UNSPECV_LDMXCSR
126 UNSPECV_STMXCSR
127 UNSPECV_CLFLUSH
128 UNSPECV_MONITOR
129 UNSPECV_MWAIT
130 UNSPECV_VZEROALL
131 UNSPECV_VZEROUPPER
132 ])
133
134 ;; All vector modes including V?TImode, used in move patterns.
135 (define_mode_iterator VMOVE
136 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
137 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
138 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
139 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
140 (V2TI "TARGET_AVX") V1TI
141 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
142 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
143
144 ;; All vector modes
145 (define_mode_iterator V
146 [(V32QI "TARGET_AVX") V16QI
147 (V16HI "TARGET_AVX") V8HI
148 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
149 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
150 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
151 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
152
153 ;; All 128bit vector modes
154 (define_mode_iterator V_128
155 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
156
157 ;; All 256bit vector modes
158 (define_mode_iterator V_256
159 [V32QI V16HI V8SI V4DI V8SF V4DF])
160
161 ;; All 512bit vector modes
162 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
163
164 ;; All 256bit and 512bit vector modes
165 (define_mode_iterator V_256_512
166 [V32QI V16HI V8SI V4DI V8SF V4DF
167 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
168 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
169
170 ;; All vector float modes
171 (define_mode_iterator VF
172 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
173 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
174
175 ;; 128- and 256-bit float vector modes
176 (define_mode_iterator VF_128_256
177 [(V8SF "TARGET_AVX") V4SF
178 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
179
180 ;; All SFmode vector float modes
181 (define_mode_iterator VF1
182 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
183
184 ;; 128- and 256-bit SF vector modes
185 (define_mode_iterator VF1_128_256
186 [(V8SF "TARGET_AVX") V4SF])
187
188 ;; All DFmode vector float modes
189 (define_mode_iterator VF2
190 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
191
192 ;; 128- and 256-bit DF vector modes
193 (define_mode_iterator VF2_128_256
194 [(V4DF "TARGET_AVX") V2DF])
195
196 (define_mode_iterator VF2_512_256
197 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
198
199 ;; All 128bit vector float modes
200 (define_mode_iterator VF_128
201 [V4SF (V2DF "TARGET_SSE2")])
202
203 ;; All 256bit vector float modes
204 (define_mode_iterator VF_256
205 [V8SF V4DF])
206
207 ;; All 512bit vector float modes
208 (define_mode_iterator VF_512
209 [V16SF V8DF])
210
211 ;; All vector integer modes
212 (define_mode_iterator VI
213 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
214 (V32QI "TARGET_AVX") V16QI
215 (V16HI "TARGET_AVX") V8HI
216 (V8SI "TARGET_AVX") V4SI
217 (V4DI "TARGET_AVX") V2DI])
218
219 (define_mode_iterator VI_AVX2
220 [(V32QI "TARGET_AVX2") V16QI
221 (V16HI "TARGET_AVX2") V8HI
222 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
223 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
224
225 ;; All QImode vector integer modes
226 (define_mode_iterator VI1
227 [(V32QI "TARGET_AVX") V16QI])
228
229 (define_mode_iterator VI_UNALIGNED_LOADSTORE
230 [(V32QI "TARGET_AVX") V16QI
231 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
232
233 ;; All DImode vector integer modes
234 (define_mode_iterator VI8
235 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
236
237 (define_mode_iterator VI1_AVX2
238 [(V32QI "TARGET_AVX2") V16QI])
239
240 (define_mode_iterator VI2_AVX2
241 [(V16HI "TARGET_AVX2") V8HI])
242
243 (define_mode_iterator VI2_AVX512F
244 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
245
246 (define_mode_iterator VI4_AVX
247 [(V8SI "TARGET_AVX") V4SI])
248
249 (define_mode_iterator VI4_AVX2
250 [(V8SI "TARGET_AVX2") V4SI])
251
252 (define_mode_iterator VI4_AVX512F
253 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
254
255 (define_mode_iterator VI48_AVX512F
256 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
257 (V8DI "TARGET_AVX512F")])
258
259 (define_mode_iterator VI8_AVX2
260 [(V4DI "TARGET_AVX2") V2DI])
261
262 (define_mode_iterator VI8_AVX2_AVX512F
263 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
264
265 ;; All V8D* modes
266 (define_mode_iterator V8FI
267 [V8DF V8DI])
268
269 ;; All V16S* modes
270 (define_mode_iterator V16FI
271 [V16SF V16SI])
272
273 ;; ??? We should probably use TImode instead.
274 (define_mode_iterator VIMAX_AVX2
275 [(V2TI "TARGET_AVX2") V1TI])
276
277 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
278 (define_mode_iterator SSESCALARMODE
279 [(V2TI "TARGET_AVX2") TI])
280
281 (define_mode_iterator VI12_AVX2
282 [(V32QI "TARGET_AVX2") V16QI
283 (V16HI "TARGET_AVX2") V8HI])
284
285 (define_mode_iterator VI24_AVX2
286 [(V16HI "TARGET_AVX2") V8HI
287 (V8SI "TARGET_AVX2") V4SI])
288
289 (define_mode_iterator VI124_AVX2_48_AVX512F
290 [(V32QI "TARGET_AVX2") V16QI
291 (V16HI "TARGET_AVX2") V8HI
292 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
293 (V8DI "TARGET_AVX512F")])
294
295 (define_mode_iterator VI124_AVX512F
296 [(V32QI "TARGET_AVX2") V16QI
297 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
298 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
299
300 (define_mode_iterator VI124_AVX2
301 [(V32QI "TARGET_AVX2") V16QI
302 (V16HI "TARGET_AVX2") V8HI
303 (V8SI "TARGET_AVX2") V4SI])
304
305 (define_mode_iterator VI248_AVX2
306 [(V16HI "TARGET_AVX2") V8HI
307 (V8SI "TARGET_AVX2") V4SI
308 (V4DI "TARGET_AVX2") V2DI])
309
310 (define_mode_iterator VI248_AVX2_8_AVX512F
311 [(V16HI "TARGET_AVX2") V8HI
312 (V8SI "TARGET_AVX2") V4SI
313 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
314
315 (define_mode_iterator VI48_AVX2_48_AVX512F
316 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
317 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
318
319 (define_mode_iterator V48_AVX2
320 [V4SF V2DF
321 V8SF V4DF
322 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
323 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
324
325 (define_mode_attr sse2_avx_avx512f
326 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
327 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
328 (V8DI "avx512f")
329 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
330 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
331
332 (define_mode_attr sse2_avx2
333 [(V16QI "sse2") (V32QI "avx2")
334 (V8HI "sse2") (V16HI "avx2")
335 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
336 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
337 (V1TI "sse2") (V2TI "avx2")])
338
339 (define_mode_attr ssse3_avx2
340 [(V16QI "ssse3") (V32QI "avx2")
341 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
342 (V4SI "ssse3") (V8SI "avx2")
343 (V2DI "ssse3") (V4DI "avx2")
344 (TI "ssse3") (V2TI "avx2")])
345
346 (define_mode_attr sse4_1_avx2
347 [(V16QI "sse4_1") (V32QI "avx2")
348 (V8HI "sse4_1") (V16HI "avx2")
349 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
350 (V2DI "sse4_1") (V4DI "avx2")])
351
352 (define_mode_attr avx_avx2
353 [(V4SF "avx") (V2DF "avx")
354 (V8SF "avx") (V4DF "avx")
355 (V4SI "avx2") (V2DI "avx2")
356 (V8SI "avx2") (V4DI "avx2")])
357
358 (define_mode_attr vec_avx2
359 [(V16QI "vec") (V32QI "avx2")
360 (V8HI "vec") (V16HI "avx2")
361 (V4SI "vec") (V8SI "avx2")
362 (V2DI "vec") (V4DI "avx2")])
363
364 (define_mode_attr avx2_avx512f
365 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
366 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
367 (V8SF "avx2") (V16SF "avx512f")
368 (V4DF "avx2") (V8DF "avx512f")])
369
370 (define_mode_attr shuffletype
371 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
372 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
373 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
374 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
375 (V64QI "i") (V1TI "i") (V2TI "i")])
376
377 (define_mode_attr ssequartermode
378 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
379
380 (define_mode_attr ssedoublemode
381 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
382 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
383 (V32QI "V32HI") (V16QI "V16HI")])
384
385 (define_mode_attr ssebytemode
386 [(V4DI "V32QI") (V2DI "V16QI")])
387
388 ;; All 128bit vector integer modes
389 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
390
391 ;; All 256bit vector integer modes
392 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
393
394 ;; All 512bit vector integer modes
395 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
396
397 ;; Various 128bit vector integer mode combinations
398 (define_mode_iterator VI12_128 [V16QI V8HI])
399 (define_mode_iterator VI14_128 [V16QI V4SI])
400 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
401 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
402 (define_mode_iterator VI24_128 [V8HI V4SI])
403 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
404 (define_mode_iterator VI48_128 [V4SI V2DI])
405
406 ;; Various 256bit and 512 vector integer mode combinations
407 (define_mode_iterator VI124_256_48_512
408 [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
409 (define_mode_iterator VI48_256 [V8SI V4DI])
410 (define_mode_iterator VI48_512 [V16SI V8DI])
411
412 ;; Int-float size matches
413 (define_mode_iterator VI4F_128 [V4SI V4SF])
414 (define_mode_iterator VI8F_128 [V2DI V2DF])
415 (define_mode_iterator VI4F_256 [V8SI V8SF])
416 (define_mode_iterator VI8F_256 [V4DI V4DF])
417 (define_mode_iterator VI8F_256_512
418 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
419 (define_mode_iterator VI48F_256_512
420 [V8SI V8SF
421 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
422 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
423 (define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
424
425 ;; Mapping from float mode to required SSE level
426 (define_mode_attr sse
427 [(SF "sse") (DF "sse2")
428 (V4SF "sse") (V2DF "sse2")
429 (V16SF "avx512f") (V8SF "avx")
430 (V8DF "avx512f") (V4DF "avx")])
431
432 (define_mode_attr sse2
433 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
434 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
435
436 (define_mode_attr sse3
437 [(V16QI "sse3") (V32QI "avx")])
438
439 (define_mode_attr sse4_1
440 [(V4SF "sse4_1") (V2DF "sse4_1")
441 (V8SF "avx") (V4DF "avx")
442 (V8DF "avx512f")])
443
444 (define_mode_attr avxsizesuffix
445 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
446 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
447 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
448 (V16SF "512") (V8DF "512")
449 (V8SF "256") (V4DF "256")
450 (V4SF "") (V2DF "")])
451
452 ;; SSE instruction mode
453 (define_mode_attr sseinsnmode
454 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
455 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
456 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
457 (V16SF "V16SF") (V8DF "V8DF")
458 (V8SF "V8SF") (V4DF "V4DF")
459 (V4SF "V4SF") (V2DF "V2DF")
460 (TI "TI")])
461
462 ;; Mapping of vector modes to corresponding mask size
463 (define_mode_attr avx512fmaskmode
464 [(V16QI "HI")
465 (V16HI "HI") (V8HI "QI")
466 (V16SI "HI") (V8SI "QI") (V4SI "QI")
467 (V8DI "QI") (V4DI "QI") (V2DI "QI")
468 (V16SF "HI") (V8SF "QI") (V4SF "QI")
469 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
470
471 ;; Mapping of vector float modes to an integer mode of the same size
472 (define_mode_attr sseintvecmode
473 [(V16SF "V16SI") (V8DF "V8DI")
474 (V8SF "V8SI") (V4DF "V4DI")
475 (V4SF "V4SI") (V2DF "V2DI")
476 (V16SI "V16SI") (V8DI "V8DI")
477 (V8SI "V8SI") (V4DI "V4DI")
478 (V4SI "V4SI") (V2DI "V2DI")
479 (V16HI "V16HI") (V8HI "V8HI")
480 (V32QI "V32QI") (V16QI "V16QI")])
481
482 (define_mode_attr sseintvecmodelower
483 [(V16SF "v16si")
484 (V8SF "v8si") (V4DF "v4di")
485 (V4SF "v4si") (V2DF "v2di")
486 (V8SI "v8si") (V4DI "v4di")
487 (V4SI "v4si") (V2DI "v2di")
488 (V16HI "v16hi") (V8HI "v8hi")
489 (V32QI "v32qi") (V16QI "v16qi")])
490
491 ;; Mapping of vector modes to a vector mode of double size
492 (define_mode_attr ssedoublevecmode
493 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
494 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
495 (V8SF "V16SF") (V4DF "V8DF")
496 (V4SF "V8SF") (V2DF "V4DF")])
497
498 ;; Mapping of vector modes to a vector mode of half size
499 (define_mode_attr ssehalfvecmode
500 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
501 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
502 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
503 (V16SF "V8SF") (V8DF "V4DF")
504 (V8SF "V4SF") (V4DF "V2DF")
505 (V4SF "V2SF")])
506
507 ;; Mapping of vector modes ti packed single mode of the same size
508 (define_mode_attr ssePSmode
509 [(V16SI "V16SF") (V8DF "V16SF")
510 (V16SF "V16SF") (V8DI "V16SF")
511 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
512 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
513 (V8SI "V8SF") (V4SI "V4SF")
514 (V4DI "V8SF") (V2DI "V4SF")
515 (V2TI "V8SF") (V1TI "V4SF")
516 (V8SF "V8SF") (V4SF "V4SF")
517 (V4DF "V8SF") (V2DF "V4SF")])
518
519 ;; Mapping of vector modes back to the scalar modes
520 (define_mode_attr ssescalarmode
521 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
522 (V32HI "HI") (V16HI "HI") (V8HI "HI")
523 (V16SI "SI") (V8SI "SI") (V4SI "SI")
524 (V8DI "DI") (V4DI "DI") (V2DI "DI")
525 (V16SF "SF") (V8SF "SF") (V4SF "SF")
526 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
527
528 ;; Mapping of vector modes to the 128bit modes
529 (define_mode_attr ssexmmmode
530 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
531 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
532 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
533 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
534 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
535 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
536
537 ;; Pointer size override for scalar modes (Intel asm dialect)
538 (define_mode_attr iptr
539 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
540 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
541 (V8SF "k") (V4DF "q")
542 (V4SF "k") (V2DF "q")
543 (SF "k") (DF "q")])
544
545 ;; Number of scalar elements in each vector type
546 (define_mode_attr ssescalarnum
547 [(V64QI "64") (V16SI "16") (V8DI "8")
548 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
549 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
550 (V16SF "16") (V8DF "8")
551 (V8SF "8") (V4DF "4")
552 (V4SF "4") (V2DF "2")])
553
554 ;; Mask of scalar elements in each vector type
555 (define_mode_attr ssescalarnummask
556 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
557 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
558 (V8SF "7") (V4DF "3")
559 (V4SF "3") (V2DF "1")])
560
561 (define_mode_attr ssescalarsize
562 [(V8DI "64") (V4DI "64") (V2DI "64")
563 (V32HI "16") (V16HI "16") (V8HI "16")
564 (V16SI "32") (V8SI "32") (V4SI "32")
565 (V16SF "32") (V8DF "64")])
566
567 ;; SSE prefix for integer vector modes
568 (define_mode_attr sseintprefix
569 [(V2DI "p") (V2DF "")
570 (V4DI "p") (V4DF "")
571 (V8DI "p") (V8DF "")
572 (V4SI "p") (V4SF "")
573 (V8SI "p") (V8SF "")
574 (V16SI "p") (V16SF "")])
575
576 ;; SSE scalar suffix for vector modes
577 (define_mode_attr ssescalarmodesuffix
578 [(SF "ss") (DF "sd")
579 (V8SF "ss") (V4DF "sd")
580 (V4SF "ss") (V2DF "sd")
581 (V8SI "ss") (V4DI "sd")
582 (V4SI "d")])
583
584 ;; Pack/unpack vector modes
585 (define_mode_attr sseunpackmode
586 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
587 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
588 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
589
590 (define_mode_attr ssepackmode
591 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
592 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
593 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
594
595 ;; Mapping of the max integer size for xop rotate immediate constraint
596 (define_mode_attr sserotatemax
597 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
598
599 ;; Mapping of mode to cast intrinsic name
600 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
601
602 ;; Instruction suffix for sign and zero extensions.
603 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
604
605 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
606 ;; i64x4 or f64x4 for 512bit modes.
607 (define_mode_attr i128
608 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
609 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
610 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
611
612 ;; Mix-n-match
613 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
614
615 ;; Mapping of immediate bits for blend instructions
616 (define_mode_attr blendbits
617 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
618
619 ;; Mapping suffixes for broadcast
620 (define_mode_attr bcstscalarsuff
621 [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
622
623 ;; Include define_subst patterns for instructions with mask
624 (include "subst.md")
625
626 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
627
628 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
629 ;;
630 ;; Move patterns
631 ;;
632 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
633
634 ;; All of these patterns are enabled for SSE1 as well as SSE2.
635 ;; This is essential for maintaining stable calling conventions.
636
637 (define_expand "mov<mode>"
638 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
639 (match_operand:VMOVE 1 "nonimmediate_operand"))]
640 "TARGET_SSE"
641 {
642 ix86_expand_vector_move (<MODE>mode, operands);
643 DONE;
644 })
645
646 (define_insn "*mov<mode>_internal"
647 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
648 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
649 "TARGET_SSE
650 && (register_operand (operands[0], <MODE>mode)
651 || register_operand (operands[1], <MODE>mode))"
652 {
653 int mode = get_attr_mode (insn);
654 switch (which_alternative)
655 {
656 case 0:
657 return standard_sse_constant_opcode (insn, operands[1]);
658 case 1:
659 case 2:
660 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
661 in avx512f, so we need to use workarounds, to access sse registers
662 16-31, which are evex-only. */
663 if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64
664 && (EXT_REX_SSE_REGNO_P (REGNO (operands[0]))
665 || EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))
666 {
667 if (memory_operand (operands[0], <MODE>mode))
668 {
669 if (GET_MODE_SIZE (<MODE>mode) == 32)
670 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
671 else if (GET_MODE_SIZE (<MODE>mode) == 16)
672 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
673 else
674 gcc_unreachable ();
675 }
676 else if (memory_operand (operands[1], <MODE>mode))
677 {
678 if (GET_MODE_SIZE (<MODE>mode) == 32)
679 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
680 else if (GET_MODE_SIZE (<MODE>mode) == 16)
681 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
682 else
683 gcc_unreachable ();
684 }
685 else
686 /* Reg -> reg move is always aligned. Just use wider move. */
687 switch (mode)
688 {
689 case MODE_V8SF:
690 case MODE_V4SF:
691 return "vmovaps\t{%g1, %g0|%g0, %g1}";
692 case MODE_V4DF:
693 case MODE_V2DF:
694 return "vmovapd\t{%g1, %g0|%g0, %g1}";
695 case MODE_OI:
696 case MODE_TI:
697 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
698 default:
699 gcc_unreachable ();
700 }
701 }
702 switch (mode)
703 {
704 case MODE_V16SF:
705 case MODE_V8SF:
706 case MODE_V4SF:
707 if (TARGET_AVX
708 && (misaligned_operand (operands[0], <MODE>mode)
709 || misaligned_operand (operands[1], <MODE>mode)))
710 return "vmovups\t{%1, %0|%0, %1}";
711 else
712 return "%vmovaps\t{%1, %0|%0, %1}";
713
714 case MODE_V8DF:
715 case MODE_V4DF:
716 case MODE_V2DF:
717 if (TARGET_AVX
718 && (misaligned_operand (operands[0], <MODE>mode)
719 || misaligned_operand (operands[1], <MODE>mode)))
720 return "vmovupd\t{%1, %0|%0, %1}";
721 else
722 return "%vmovapd\t{%1, %0|%0, %1}";
723
724 case MODE_OI:
725 case MODE_TI:
726 if (TARGET_AVX
727 && (misaligned_operand (operands[0], <MODE>mode)
728 || misaligned_operand (operands[1], <MODE>mode)))
729 return "vmovdqu\t{%1, %0|%0, %1}";
730 else
731 return "%vmovdqa\t{%1, %0|%0, %1}";
732 case MODE_XI:
733 if (misaligned_operand (operands[0], <MODE>mode)
734 || misaligned_operand (operands[1], <MODE>mode))
735 return "vmovdqu64\t{%1, %0|%0, %1}";
736 else
737 return "vmovdqa64\t{%1, %0|%0, %1}";
738
739 default:
740 gcc_unreachable ();
741 }
742 default:
743 gcc_unreachable ();
744 }
745 }
746 [(set_attr "type" "sselog1,ssemov,ssemov")
747 (set_attr "prefix" "maybe_vex")
748 (set (attr "mode")
749 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
750 (const_string "<ssePSmode>")
751 (and (eq_attr "alternative" "2")
752 (match_test "TARGET_SSE_TYPELESS_STORES"))
753 (const_string "<ssePSmode>")
754 (match_test "TARGET_AVX")
755 (const_string "<sseinsnmode>")
756 (ior (not (match_test "TARGET_SSE2"))
757 (match_test "optimize_function_for_size_p (cfun)"))
758 (const_string "V4SF")
759 (and (eq_attr "alternative" "0")
760 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
761 (const_string "TI")
762 ]
763 (const_string "<sseinsnmode>")))])
764
765 (define_insn "avx512f_load<mode>_mask"
766 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
767 (vec_merge:VI48F_512
768 (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
769 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
770 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
771 "TARGET_AVX512F"
772 {
773 switch (MODE_<sseinsnmode>)
774 {
775 case MODE_V8DF:
776 case MODE_V16SF:
777 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
778 default:
779 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
780 }
781 }
782 [(set_attr "type" "ssemov")
783 (set_attr "prefix" "evex")
784 (set_attr "memory" "none,load")
785 (set_attr "mode" "<sseinsnmode>")])
786
787 (define_insn "avx512f_blendm<mode>"
788 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
789 (vec_merge:VI48F_512
790 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
791 (match_operand:VI48F_512 1 "register_operand" "v")
792 (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))]
793 "TARGET_AVX512F"
794 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
795 [(set_attr "type" "ssemov")
796 (set_attr "prefix" "evex")
797 (set_attr "mode" "<sseinsnmode>")])
798
799 (define_insn "avx512f_store<mode>_mask"
800 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
801 (vec_merge:VI48F_512
802 (match_operand:VI48F_512 1 "register_operand" "v")
803 (match_dup 0)
804 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
805 "TARGET_AVX512F"
806 {
807 switch (MODE_<sseinsnmode>)
808 {
809 case MODE_V8DF:
810 case MODE_V16SF:
811 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
812 default:
813 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
814 }
815 }
816 [(set_attr "type" "ssemov")
817 (set_attr "prefix" "evex")
818 (set_attr "memory" "store")
819 (set_attr "mode" "<sseinsnmode>")])
820
821 (define_insn "sse2_movq128"
822 [(set (match_operand:V2DI 0 "register_operand" "=x")
823 (vec_concat:V2DI
824 (vec_select:DI
825 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
826 (parallel [(const_int 0)]))
827 (const_int 0)))]
828 "TARGET_SSE2"
829 "%vmovq\t{%1, %0|%0, %q1}"
830 [(set_attr "type" "ssemov")
831 (set_attr "prefix" "maybe_vex")
832 (set_attr "mode" "TI")])
833
834 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
835 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
836 ;; from memory, we'd prefer to load the memory directly into the %xmm
837 ;; register. To facilitate this happy circumstance, this pattern won't
838 ;; split until after register allocation. If the 64-bit value didn't
839 ;; come from memory, this is the best we can do. This is much better
840 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
841 ;; from there.
842
843 (define_insn_and_split "movdi_to_sse"
844 [(parallel
845 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
846 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
847 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
848 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
849 "#"
850 "&& reload_completed"
851 [(const_int 0)]
852 {
853 if (register_operand (operands[1], DImode))
854 {
855 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
856 Assemble the 64-bit DImode value in an xmm register. */
857 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
858 gen_rtx_SUBREG (SImode, operands[1], 0)));
859 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
860 gen_rtx_SUBREG (SImode, operands[1], 4)));
861 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
862 operands[2]));
863 }
864 else if (memory_operand (operands[1], DImode))
865 {
866 rtx tmp = gen_reg_rtx (V2DImode);
867 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
868 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
869 }
870 else
871 gcc_unreachable ();
872 })
873
874 (define_split
875 [(set (match_operand:V4SF 0 "register_operand")
876 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
877 "TARGET_SSE && reload_completed"
878 [(set (match_dup 0)
879 (vec_merge:V4SF
880 (vec_duplicate:V4SF (match_dup 1))
881 (match_dup 2)
882 (const_int 1)))]
883 {
884 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
885 operands[2] = CONST0_RTX (V4SFmode);
886 })
887
888 (define_split
889 [(set (match_operand:V2DF 0 "register_operand")
890 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
891 "TARGET_SSE2 && reload_completed"
892 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
893 {
894 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
895 operands[2] = CONST0_RTX (DFmode);
896 })
897
898 (define_expand "push<mode>1"
899 [(match_operand:VMOVE 0 "register_operand")]
900 "TARGET_SSE"
901 {
902 ix86_expand_push (<MODE>mode, operands[0]);
903 DONE;
904 })
905
906 (define_expand "movmisalign<mode>"
907 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
908 (match_operand:VMOVE 1 "nonimmediate_operand"))]
909 "TARGET_SSE"
910 {
911 ix86_expand_vector_move_misalign (<MODE>mode, operands);
912 DONE;
913 })
914
915 (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
916 [(set (match_operand:VF 0 "register_operand" "=v")
917 (unspec:VF
918 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
919 UNSPEC_LOADU))]
920 "TARGET_SSE && <mask_mode512bit_condition>"
921 {
922 switch (get_attr_mode (insn))
923 {
924 case MODE_V16SF:
925 case MODE_V8SF:
926 case MODE_V4SF:
927 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
928 default:
929 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
930 }
931 }
932 [(set_attr "type" "ssemov")
933 (set_attr "movu" "1")
934 (set_attr "ssememalign" "8")
935 (set_attr "prefix" "maybe_vex")
936 (set (attr "mode")
937 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
938 (const_string "<ssePSmode>")
939 (match_test "TARGET_AVX")
940 (const_string "<MODE>")
941 (match_test "optimize_function_for_size_p (cfun)")
942 (const_string "V4SF")
943 ]
944 (const_string "<MODE>")))])
945
946 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
947 [(set (match_operand:VF 0 "memory_operand" "=m")
948 (unspec:VF
949 [(match_operand:VF 1 "register_operand" "v")]
950 UNSPEC_STOREU))]
951 "TARGET_SSE"
952 {
953 switch (get_attr_mode (insn))
954 {
955 case MODE_V16SF:
956 case MODE_V8SF:
957 case MODE_V4SF:
958 return "%vmovups\t{%1, %0|%0, %1}";
959 default:
960 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
961 }
962 }
963 [(set_attr "type" "ssemov")
964 (set_attr "movu" "1")
965 (set_attr "ssememalign" "8")
966 (set_attr "prefix" "maybe_vex")
967 (set (attr "mode")
968 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
969 (match_test "TARGET_SSE_TYPELESS_STORES"))
970 (const_string "<ssePSmode>")
971 (match_test "TARGET_AVX")
972 (const_string "<MODE>")
973 (match_test "optimize_function_for_size_p (cfun)")
974 (const_string "V4SF")
975 ]
976 (const_string "<MODE>")))])
977
978 (define_insn "avx512f_storeu<ssemodesuffix>512_mask"
979 [(set (match_operand:VF_512 0 "memory_operand" "=m")
980 (vec_merge:VF_512
981 (unspec:VF_512
982 [(match_operand:VF_512 1 "register_operand" "v")]
983 UNSPEC_STOREU)
984 (match_dup 0)
985 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
986 "TARGET_AVX512F"
987 {
988 switch (get_attr_mode (insn))
989 {
990 case MODE_V16SF:
991 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
992 default:
993 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
994 }
995 }
996 [(set_attr "type" "ssemov")
997 (set_attr "movu" "1")
998 (set_attr "memory" "store")
999 (set_attr "prefix" "evex")
1000 (set_attr "mode" "<sseinsnmode>")])
1001
1002 (define_insn "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1003 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
1004 (unspec:VI_UNALIGNED_LOADSTORE
1005 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
1006 UNSPEC_LOADU))]
1007 "TARGET_SSE2 && <mask_mode512bit_condition>"
1008 {
1009 switch (get_attr_mode (insn))
1010 {
1011 case MODE_V8SF:
1012 case MODE_V4SF:
1013 return "%vmovups\t{%1, %0|%0, %1}";
1014 case MODE_XI:
1015 if (<MODE>mode == V8DImode)
1016 return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1017 else
1018 return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1019 default:
1020 return "%vmovdqu\t{%1, %0|%0, %1}";
1021 }
1022 }
1023 [(set_attr "type" "ssemov")
1024 (set_attr "movu" "1")
1025 (set_attr "ssememalign" "8")
1026 (set (attr "prefix_data16")
1027 (if_then_else
1028 (match_test "TARGET_AVX")
1029 (const_string "*")
1030 (const_string "1")))
1031 (set_attr "prefix" "maybe_vex")
1032 (set (attr "mode")
1033 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1034 (const_string "<ssePSmode>")
1035 (match_test "TARGET_AVX")
1036 (const_string "<sseinsnmode>")
1037 (match_test "optimize_function_for_size_p (cfun)")
1038 (const_string "V4SF")
1039 ]
1040 (const_string "<sseinsnmode>")))])
1041
1042 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1043 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
1044 (unspec:VI_UNALIGNED_LOADSTORE
1045 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
1046 UNSPEC_STOREU))]
1047 "TARGET_SSE2"
1048 {
1049 switch (get_attr_mode (insn))
1050 {
1051 case MODE_V8SF:
1052 case MODE_V4SF:
1053 return "%vmovups\t{%1, %0|%0, %1}";
1054 case MODE_XI:
1055 if (<MODE>mode == V8DImode)
1056 return "vmovdqu64\t{%1, %0|%0, %1}";
1057 else
1058 return "vmovdqu32\t{%1, %0|%0, %1}";
1059 default:
1060 return "%vmovdqu\t{%1, %0|%0, %1}";
1061 }
1062 }
1063 [(set_attr "type" "ssemov")
1064 (set_attr "movu" "1")
1065 (set_attr "ssememalign" "8")
1066 (set (attr "prefix_data16")
1067 (if_then_else
1068 (match_test "TARGET_AVX")
1069 (const_string "*")
1070 (const_string "1")))
1071 (set_attr "prefix" "maybe_vex")
1072 (set (attr "mode")
1073 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1074 (match_test "TARGET_SSE_TYPELESS_STORES"))
1075 (const_string "<ssePSmode>")
1076 (match_test "TARGET_AVX")
1077 (const_string "<sseinsnmode>")
1078 (match_test "optimize_function_for_size_p (cfun)")
1079 (const_string "V4SF")
1080 ]
1081 (const_string "<sseinsnmode>")))])
1082
1083 (define_insn "avx512f_storedqu<mode>_mask"
1084 [(set (match_operand:VI48_512 0 "memory_operand" "=m")
1085 (vec_merge:VI48_512
1086 (unspec:VI48_512
1087 [(match_operand:VI48_512 1 "register_operand" "v")]
1088 UNSPEC_STOREU)
1089 (match_dup 0)
1090 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
1091 "TARGET_AVX512F"
1092 {
1093 if (<MODE>mode == V8DImode)
1094 return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1095 else
1096 return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1097 }
1098 [(set_attr "type" "ssemov")
1099 (set_attr "movu" "1")
1100 (set_attr "memory" "store")
1101 (set_attr "prefix" "evex")
1102 (set_attr "mode" "<sseinsnmode>")])
1103
1104 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1105 [(set (match_operand:VI1 0 "register_operand" "=x")
1106 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1107 UNSPEC_LDDQU))]
1108 "TARGET_SSE3"
1109 "%vlddqu\t{%1, %0|%0, %1}"
1110 [(set_attr "type" "ssemov")
1111 (set_attr "movu" "1")
1112 (set_attr "ssememalign" "8")
1113 (set (attr "prefix_data16")
1114 (if_then_else
1115 (match_test "TARGET_AVX")
1116 (const_string "*")
1117 (const_string "0")))
1118 (set (attr "prefix_rep")
1119 (if_then_else
1120 (match_test "TARGET_AVX")
1121 (const_string "*")
1122 (const_string "1")))
1123 (set_attr "prefix" "maybe_vex")
1124 (set_attr "mode" "<sseinsnmode>")])
1125
1126 (define_insn "sse2_movnti<mode>"
1127 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1128 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1129 UNSPEC_MOVNT))]
1130 "TARGET_SSE2"
1131 "movnti\t{%1, %0|%0, %1}"
1132 [(set_attr "type" "ssemov")
1133 (set_attr "prefix_data16" "0")
1134 (set_attr "mode" "<MODE>")])
1135
1136 (define_insn "<sse>_movnt<mode>"
1137 [(set (match_operand:VF 0 "memory_operand" "=m")
1138 (unspec:VF
1139 [(match_operand:VF 1 "register_operand" "v")]
1140 UNSPEC_MOVNT))]
1141 "TARGET_SSE"
1142 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1143 [(set_attr "type" "ssemov")
1144 (set_attr "prefix" "maybe_vex")
1145 (set_attr "mode" "<MODE>")])
1146
1147 (define_insn "<sse2>_movnt<mode>"
1148 [(set (match_operand:VI8 0 "memory_operand" "=m")
1149 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1150 UNSPEC_MOVNT))]
1151 "TARGET_SSE2"
1152 "%vmovntdq\t{%1, %0|%0, %1}"
1153 [(set_attr "type" "ssecvt")
1154 (set (attr "prefix_data16")
1155 (if_then_else
1156 (match_test "TARGET_AVX")
1157 (const_string "*")
1158 (const_string "1")))
1159 (set_attr "prefix" "maybe_vex")
1160 (set_attr "mode" "<sseinsnmode>")])
1161
1162 ; Expand patterns for non-temporal stores. At the moment, only those
1163 ; that directly map to insns are defined; it would be possible to
1164 ; define patterns for other modes that would expand to several insns.
1165
1166 ;; Modes handled by storent patterns.
1167 (define_mode_iterator STORENT_MODE
1168 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1169 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1170 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1171 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1172 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1173
1174 (define_expand "storent<mode>"
1175 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1176 (unspec:STORENT_MODE
1177 [(match_operand:STORENT_MODE 1 "register_operand")]
1178 UNSPEC_MOVNT))]
1179 "TARGET_SSE")
1180
1181 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1182 ;;
1183 ;; Parallel floating point arithmetic
1184 ;;
1185 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1186
1187 (define_expand "<code><mode>2"
1188 [(set (match_operand:VF 0 "register_operand")
1189 (absneg:VF
1190 (match_operand:VF 1 "register_operand")))]
1191 "TARGET_SSE"
1192 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1193
1194 (define_insn_and_split "*absneg<mode>2"
1195 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1196 (match_operator:VF 3 "absneg_operator"
1197 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1198 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1199 "TARGET_SSE"
1200 "#"
1201 "&& reload_completed"
1202 [(const_int 0)]
1203 {
1204 enum rtx_code absneg_op;
1205 rtx op1, op2;
1206 rtx t;
1207
1208 if (TARGET_AVX)
1209 {
1210 if (MEM_P (operands[1]))
1211 op1 = operands[2], op2 = operands[1];
1212 else
1213 op1 = operands[1], op2 = operands[2];
1214 }
1215 else
1216 {
1217 op1 = operands[0];
1218 if (rtx_equal_p (operands[0], operands[1]))
1219 op2 = operands[2];
1220 else
1221 op2 = operands[1];
1222 }
1223
1224 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1225 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1226 t = gen_rtx_SET (VOIDmode, operands[0], t);
1227 emit_insn (t);
1228 DONE;
1229 }
1230 [(set_attr "isa" "noavx,noavx,avx,avx")])
1231
1232 (define_expand "<plusminus_insn><mode>3<mask_name>"
1233 [(set (match_operand:VF 0 "register_operand")
1234 (plusminus:VF
1235 (match_operand:VF 1 "nonimmediate_operand")
1236 (match_operand:VF 2 "nonimmediate_operand")))]
1237 "TARGET_SSE && <mask_mode512bit_condition>"
1238 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1239
1240 (define_insn "*<plusminus_insn><mode>3<mask_name>"
1241 [(set (match_operand:VF 0 "register_operand" "=x,v")
1242 (plusminus:VF
1243 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,v")
1244 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1245 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
1246 "@
1247 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1248 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1249 [(set_attr "isa" "noavx,avx")
1250 (set_attr "type" "sseadd")
1251 (set_attr "prefix" "<mask_prefix3>")
1252 (set_attr "mode" "<MODE>")])
1253
1254 (define_insn "<sse>_vm<plusminus_insn><mode>3"
1255 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1256 (vec_merge:VF_128
1257 (plusminus:VF_128
1258 (match_operand:VF_128 1 "register_operand" "0,v")
1259 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1260 (match_dup 1)
1261 (const_int 1)))]
1262 "TARGET_SSE"
1263 "@
1264 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1265 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1266 [(set_attr "isa" "noavx,avx")
1267 (set_attr "type" "sseadd")
1268 (set_attr "prefix" "orig,vex")
1269 (set_attr "mode" "<ssescalarmode>")])
1270
1271 (define_expand "mul<mode>3<mask_name>"
1272 [(set (match_operand:VF 0 "register_operand")
1273 (mult:VF
1274 (match_operand:VF 1 "nonimmediate_operand")
1275 (match_operand:VF 2 "nonimmediate_operand")))]
1276 "TARGET_SSE && <mask_mode512bit_condition>"
1277 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1278
1279 (define_insn "*mul<mode>3<mask_name>"
1280 [(set (match_operand:VF 0 "register_operand" "=x,v")
1281 (mult:VF
1282 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1283 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1284 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
1285 "@
1286 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1287 vmul<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1288 [(set_attr "isa" "noavx,avx")
1289 (set_attr "type" "ssemul")
1290 (set_attr "prefix" "<mask_prefix3>")
1291 (set_attr "btver2_decode" "direct,double")
1292 (set_attr "mode" "<MODE>")])
1293
1294 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3"
1295 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1296 (vec_merge:VF_128
1297 (multdiv:VF_128
1298 (match_operand:VF_128 1 "register_operand" "0,v")
1299 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1300 (match_dup 1)
1301 (const_int 1)))]
1302 "TARGET_SSE"
1303 "@
1304 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1305 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1306 [(set_attr "isa" "noavx,avx")
1307 (set_attr "type" "sse<multdiv_mnemonic>")
1308 (set_attr "prefix" "orig,vex")
1309 (set_attr "btver2_decode" "direct,double")
1310 (set_attr "mode" "<ssescalarmode>")])
1311
1312 (define_expand "div<mode>3"
1313 [(set (match_operand:VF2 0 "register_operand")
1314 (div:VF2 (match_operand:VF2 1 "register_operand")
1315 (match_operand:VF2 2 "nonimmediate_operand")))]
1316 "TARGET_SSE2"
1317 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1318
1319 (define_expand "div<mode>3"
1320 [(set (match_operand:VF1 0 "register_operand")
1321 (div:VF1 (match_operand:VF1 1 "register_operand")
1322 (match_operand:VF1 2 "nonimmediate_operand")))]
1323 "TARGET_SSE"
1324 {
1325 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1326
1327 if (TARGET_SSE_MATH
1328 && TARGET_RECIP_VEC_DIV
1329 && !optimize_insn_for_size_p ()
1330 && flag_finite_math_only && !flag_trapping_math
1331 && flag_unsafe_math_optimizations)
1332 {
1333 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1334 DONE;
1335 }
1336 })
1337
1338 (define_insn "<sse>_div<mode>3<mask_name>"
1339 [(set (match_operand:VF 0 "register_operand" "=x,v")
1340 (div:VF
1341 (match_operand:VF 1 "register_operand" "0,v")
1342 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1343 "TARGET_SSE && <mask_mode512bit_condition>"
1344 "@
1345 div<ssemodesuffix>\t{%2, %0|%0, %2}
1346 vdiv<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1347 [(set_attr "isa" "noavx,avx")
1348 (set_attr "type" "ssediv")
1349 (set_attr "prefix" "<mask_prefix3>")
1350 (set_attr "mode" "<MODE>")])
1351
1352 (define_insn "<sse>_rcp<mode>2"
1353 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1354 (unspec:VF1_128_256
1355 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1356 "TARGET_SSE"
1357 "%vrcpps\t{%1, %0|%0, %1}"
1358 [(set_attr "type" "sse")
1359 (set_attr "atom_sse_attr" "rcp")
1360 (set_attr "btver2_sse_attr" "rcp")
1361 (set_attr "prefix" "maybe_vex")
1362 (set_attr "mode" "<MODE>")])
1363
1364 (define_insn "sse_vmrcpv4sf2"
1365 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1366 (vec_merge:V4SF
1367 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1368 UNSPEC_RCP)
1369 (match_operand:V4SF 2 "register_operand" "0,x")
1370 (const_int 1)))]
1371 "TARGET_SSE"
1372 "@
1373 rcpss\t{%1, %0|%0, %k1}
1374 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1375 [(set_attr "isa" "noavx,avx")
1376 (set_attr "type" "sse")
1377 (set_attr "ssememalign" "32")
1378 (set_attr "atom_sse_attr" "rcp")
1379 (set_attr "btver2_sse_attr" "rcp")
1380 (set_attr "prefix" "orig,vex")
1381 (set_attr "mode" "SF")])
1382
1383 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1384 [(set (match_operand:VF_512 0 "register_operand" "=v")
1385 (unspec:VF_512
1386 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1387 UNSPEC_RCP14))]
1388 "TARGET_AVX512F"
1389 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1390 [(set_attr "type" "sse")
1391 (set_attr "prefix" "evex")
1392 (set_attr "mode" "<MODE>")])
1393
1394 (define_insn "*srcp14<mode>"
1395 [(set (match_operand:VF_128 0 "register_operand" "=v")
1396 (vec_merge:VF_128
1397 (unspec:VF_128
1398 [(match_operand:VF_128 1 "register_operand" "v")
1399 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
1400 UNSPEC_RCP14)
1401 (match_dup 1)
1402 (const_int 1)))]
1403 "TARGET_AVX512F"
1404 "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1405 [(set_attr "type" "sse")
1406 (set_attr "prefix" "evex")
1407 (set_attr "mode" "<MODE>")])
1408
1409 (define_expand "sqrt<mode>2"
1410 [(set (match_operand:VF2 0 "register_operand")
1411 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1412 "TARGET_SSE2")
1413
1414 (define_expand "sqrt<mode>2"
1415 [(set (match_operand:VF1 0 "register_operand")
1416 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1417 "TARGET_SSE"
1418 {
1419 if (TARGET_SSE_MATH
1420 && TARGET_RECIP_VEC_SQRT
1421 && !optimize_insn_for_size_p ()
1422 && flag_finite_math_only && !flag_trapping_math
1423 && flag_unsafe_math_optimizations)
1424 {
1425 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1426 DONE;
1427 }
1428 })
1429
1430 (define_insn "<sse>_sqrt<mode>2<mask_name>"
1431 [(set (match_operand:VF 0 "register_operand" "=v")
1432 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "vm")))]
1433 "TARGET_SSE && <mask_mode512bit_condition>"
1434 "%vsqrt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1435 [(set_attr "type" "sse")
1436 (set_attr "atom_sse_attr" "sqrt")
1437 (set_attr "btver2_sse_attr" "sqrt")
1438 (set_attr "prefix" "maybe_vex")
1439 (set_attr "mode" "<MODE>")])
1440
1441 (define_insn "<sse>_vmsqrt<mode>2"
1442 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1443 (vec_merge:VF_128
1444 (sqrt:VF_128
1445 (match_operand:VF_128 1 "nonimmediate_operand" "xm,vm"))
1446 (match_operand:VF_128 2 "register_operand" "0,v")
1447 (const_int 1)))]
1448 "TARGET_SSE"
1449 "@
1450 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1451 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1452 [(set_attr "isa" "noavx,avx")
1453 (set_attr "type" "sse")
1454 (set_attr "atom_sse_attr" "sqrt")
1455 (set_attr "prefix" "orig,vex")
1456 (set_attr "btver2_sse_attr" "sqrt")
1457 (set_attr "mode" "<ssescalarmode>")])
1458
1459 (define_expand "rsqrt<mode>2"
1460 [(set (match_operand:VF1_128_256 0 "register_operand")
1461 (unspec:VF1_128_256
1462 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1463 "TARGET_SSE_MATH"
1464 {
1465 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1466 DONE;
1467 })
1468
1469 (define_insn "<sse>_rsqrt<mode>2"
1470 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1471 (unspec:VF1_128_256
1472 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1473 "TARGET_SSE"
1474 "%vrsqrtps\t{%1, %0|%0, %1}"
1475 [(set_attr "type" "sse")
1476 (set_attr "prefix" "maybe_vex")
1477 (set_attr "mode" "<MODE>")])
1478
1479 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1480 [(set (match_operand:VF_512 0 "register_operand" "=v")
1481 (unspec:VF_512
1482 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1483 UNSPEC_RSQRT14))]
1484 "TARGET_AVX512F"
1485 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1486 [(set_attr "type" "sse")
1487 (set_attr "prefix" "evex")
1488 (set_attr "mode" "<MODE>")])
1489
1490 (define_insn "*rsqrt14<mode>"
1491 [(set (match_operand:VF_128 0 "register_operand" "=v")
1492 (vec_merge:VF_128
1493 (unspec:VF_128
1494 [(match_operand:VF_128 1 "register_operand" "v")
1495 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
1496 UNSPEC_RSQRT14)
1497 (match_dup 1)
1498 (const_int 1)))]
1499 "TARGET_AVX512F"
1500 "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1501 [(set_attr "type" "sse")
1502 (set_attr "prefix" "evex")
1503 (set_attr "mode" "<MODE>")])
1504
1505 (define_insn "sse_vmrsqrtv4sf2"
1506 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1507 (vec_merge:V4SF
1508 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1509 UNSPEC_RSQRT)
1510 (match_operand:V4SF 2 "register_operand" "0,x")
1511 (const_int 1)))]
1512 "TARGET_SSE"
1513 "@
1514 rsqrtss\t{%1, %0|%0, %k1}
1515 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1516 [(set_attr "isa" "noavx,avx")
1517 (set_attr "type" "sse")
1518 (set_attr "ssememalign" "32")
1519 (set_attr "prefix" "orig,vex")
1520 (set_attr "mode" "SF")])
1521
1522 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1523 ;; isn't really correct, as those rtl operators aren't defined when
1524 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1525
1526 (define_expand "<code><mode>3<mask_name>"
1527 [(set (match_operand:VF 0 "register_operand")
1528 (smaxmin:VF
1529 (match_operand:VF 1 "nonimmediate_operand")
1530 (match_operand:VF 2 "nonimmediate_operand")))]
1531 "TARGET_SSE && <mask_mode512bit_condition>"
1532 {
1533 if (!flag_finite_math_only)
1534 operands[1] = force_reg (<MODE>mode, operands[1]);
1535 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1536 })
1537
1538 (define_insn "*<code><mode>3_finite<mask_name>"
1539 [(set (match_operand:VF 0 "register_operand" "=x,v")
1540 (smaxmin:VF
1541 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1542 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1543 "TARGET_SSE && flag_finite_math_only
1544 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1545 && <mask_mode512bit_condition>"
1546 "@
1547 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1548 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1549 [(set_attr "isa" "noavx,avx")
1550 (set_attr "type" "sseadd")
1551 (set_attr "btver2_sse_attr" "maxmin")
1552 (set_attr "prefix" "<mask_prefix3>")
1553 (set_attr "mode" "<MODE>")])
1554
1555 (define_insn "*<code><mode>3<mask_name>"
1556 [(set (match_operand:VF 0 "register_operand" "=x,v")
1557 (smaxmin:VF
1558 (match_operand:VF 1 "register_operand" "0,v")
1559 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1560 "TARGET_SSE && !flag_finite_math_only
1561 && <mask_mode512bit_condition>"
1562 "@
1563 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1564 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1565 [(set_attr "isa" "noavx,avx")
1566 (set_attr "type" "sseadd")
1567 (set_attr "btver2_sse_attr" "maxmin")
1568 (set_attr "prefix" "<mask_prefix3>")
1569 (set_attr "mode" "<MODE>")])
1570
1571 (define_insn "<sse>_vm<code><mode>3"
1572 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1573 (vec_merge:VF_128
1574 (smaxmin:VF_128
1575 (match_operand:VF_128 1 "register_operand" "0,v")
1576 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1577 (match_dup 1)
1578 (const_int 1)))]
1579 "TARGET_SSE"
1580 "@
1581 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1582 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1583 [(set_attr "isa" "noavx,avx")
1584 (set_attr "type" "sse")
1585 (set_attr "btver2_sse_attr" "maxmin")
1586 (set_attr "prefix" "orig,vex")
1587 (set_attr "mode" "<ssescalarmode>")])
1588
1589 ;; These versions of the min/max patterns implement exactly the operations
1590 ;; min = (op1 < op2 ? op1 : op2)
1591 ;; max = (!(op1 < op2) ? op1 : op2)
1592 ;; Their operands are not commutative, and thus they may be used in the
1593 ;; presence of -0.0 and NaN.
1594
1595 (define_insn "*ieee_smin<mode>3"
1596 [(set (match_operand:VF 0 "register_operand" "=v,v")
1597 (unspec:VF
1598 [(match_operand:VF 1 "register_operand" "0,v")
1599 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1600 UNSPEC_IEEE_MIN))]
1601 "TARGET_SSE"
1602 "@
1603 min<ssemodesuffix>\t{%2, %0|%0, %2}
1604 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1605 [(set_attr "isa" "noavx,avx")
1606 (set_attr "type" "sseadd")
1607 (set_attr "prefix" "orig,vex")
1608 (set_attr "mode" "<MODE>")])
1609
1610 (define_insn "*ieee_smax<mode>3"
1611 [(set (match_operand:VF 0 "register_operand" "=v,v")
1612 (unspec:VF
1613 [(match_operand:VF 1 "register_operand" "0,v")
1614 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1615 UNSPEC_IEEE_MAX))]
1616 "TARGET_SSE"
1617 "@
1618 max<ssemodesuffix>\t{%2, %0|%0, %2}
1619 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1620 [(set_attr "isa" "noavx,avx")
1621 (set_attr "type" "sseadd")
1622 (set_attr "prefix" "orig,vex")
1623 (set_attr "mode" "<MODE>")])
1624
1625 (define_insn "avx_addsubv4df3"
1626 [(set (match_operand:V4DF 0 "register_operand" "=x")
1627 (vec_merge:V4DF
1628 (plus:V4DF
1629 (match_operand:V4DF 1 "register_operand" "x")
1630 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1631 (minus:V4DF (match_dup 1) (match_dup 2))
1632 (const_int 10)))]
1633 "TARGET_AVX"
1634 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1635 [(set_attr "type" "sseadd")
1636 (set_attr "prefix" "vex")
1637 (set_attr "mode" "V4DF")])
1638
1639 (define_insn "sse3_addsubv2df3"
1640 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1641 (vec_merge:V2DF
1642 (plus:V2DF
1643 (match_operand:V2DF 1 "register_operand" "0,x")
1644 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1645 (minus:V2DF (match_dup 1) (match_dup 2))
1646 (const_int 2)))]
1647 "TARGET_SSE3"
1648 "@
1649 addsubpd\t{%2, %0|%0, %2}
1650 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1651 [(set_attr "isa" "noavx,avx")
1652 (set_attr "type" "sseadd")
1653 (set_attr "atom_unit" "complex")
1654 (set_attr "prefix" "orig,vex")
1655 (set_attr "mode" "V2DF")])
1656
1657 (define_insn "avx_addsubv8sf3"
1658 [(set (match_operand:V8SF 0 "register_operand" "=x")
1659 (vec_merge:V8SF
1660 (plus:V8SF
1661 (match_operand:V8SF 1 "register_operand" "x")
1662 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1663 (minus:V8SF (match_dup 1) (match_dup 2))
1664 (const_int 170)))]
1665 "TARGET_AVX"
1666 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1667 [(set_attr "type" "sseadd")
1668 (set_attr "prefix" "vex")
1669 (set_attr "mode" "V8SF")])
1670
1671 (define_insn "sse3_addsubv4sf3"
1672 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1673 (vec_merge:V4SF
1674 (plus:V4SF
1675 (match_operand:V4SF 1 "register_operand" "0,x")
1676 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1677 (minus:V4SF (match_dup 1) (match_dup 2))
1678 (const_int 10)))]
1679 "TARGET_SSE3"
1680 "@
1681 addsubps\t{%2, %0|%0, %2}
1682 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1683 [(set_attr "isa" "noavx,avx")
1684 (set_attr "type" "sseadd")
1685 (set_attr "prefix" "orig,vex")
1686 (set_attr "prefix_rep" "1,*")
1687 (set_attr "mode" "V4SF")])
1688
1689 (define_insn "avx_h<plusminus_insn>v4df3"
1690 [(set (match_operand:V4DF 0 "register_operand" "=x")
1691 (vec_concat:V4DF
1692 (vec_concat:V2DF
1693 (plusminus:DF
1694 (vec_select:DF
1695 (match_operand:V4DF 1 "register_operand" "x")
1696 (parallel [(const_int 0)]))
1697 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1698 (plusminus:DF
1699 (vec_select:DF
1700 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1701 (parallel [(const_int 0)]))
1702 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1703 (vec_concat:V2DF
1704 (plusminus:DF
1705 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1706 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1707 (plusminus:DF
1708 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1709 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1710 "TARGET_AVX"
1711 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1712 [(set_attr "type" "sseadd")
1713 (set_attr "prefix" "vex")
1714 (set_attr "mode" "V4DF")])
1715
1716 (define_expand "sse3_haddv2df3"
1717 [(set (match_operand:V2DF 0 "register_operand")
1718 (vec_concat:V2DF
1719 (plus:DF
1720 (vec_select:DF
1721 (match_operand:V2DF 1 "register_operand")
1722 (parallel [(const_int 0)]))
1723 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1724 (plus:DF
1725 (vec_select:DF
1726 (match_operand:V2DF 2 "nonimmediate_operand")
1727 (parallel [(const_int 0)]))
1728 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1729 "TARGET_SSE3")
1730
1731 (define_insn "*sse3_haddv2df3"
1732 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1733 (vec_concat:V2DF
1734 (plus:DF
1735 (vec_select:DF
1736 (match_operand:V2DF 1 "register_operand" "0,x")
1737 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1738 (vec_select:DF
1739 (match_dup 1)
1740 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1741 (plus:DF
1742 (vec_select:DF
1743 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1744 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1745 (vec_select:DF
1746 (match_dup 2)
1747 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1748 "TARGET_SSE3
1749 && INTVAL (operands[3]) != INTVAL (operands[4])
1750 && INTVAL (operands[5]) != INTVAL (operands[6])"
1751 "@
1752 haddpd\t{%2, %0|%0, %2}
1753 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1754 [(set_attr "isa" "noavx,avx")
1755 (set_attr "type" "sseadd")
1756 (set_attr "prefix" "orig,vex")
1757 (set_attr "mode" "V2DF")])
1758
1759 (define_insn "sse3_hsubv2df3"
1760 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1761 (vec_concat:V2DF
1762 (minus:DF
1763 (vec_select:DF
1764 (match_operand:V2DF 1 "register_operand" "0,x")
1765 (parallel [(const_int 0)]))
1766 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1767 (minus:DF
1768 (vec_select:DF
1769 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1770 (parallel [(const_int 0)]))
1771 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1772 "TARGET_SSE3"
1773 "@
1774 hsubpd\t{%2, %0|%0, %2}
1775 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1776 [(set_attr "isa" "noavx,avx")
1777 (set_attr "type" "sseadd")
1778 (set_attr "prefix" "orig,vex")
1779 (set_attr "mode" "V2DF")])
1780
1781 (define_insn "*sse3_haddv2df3_low"
1782 [(set (match_operand:DF 0 "register_operand" "=x,x")
1783 (plus:DF
1784 (vec_select:DF
1785 (match_operand:V2DF 1 "register_operand" "0,x")
1786 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1787 (vec_select:DF
1788 (match_dup 1)
1789 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1790 "TARGET_SSE3
1791 && INTVAL (operands[2]) != INTVAL (operands[3])"
1792 "@
1793 haddpd\t{%0, %0|%0, %0}
1794 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1795 [(set_attr "isa" "noavx,avx")
1796 (set_attr "type" "sseadd1")
1797 (set_attr "prefix" "orig,vex")
1798 (set_attr "mode" "V2DF")])
1799
1800 (define_insn "*sse3_hsubv2df3_low"
1801 [(set (match_operand:DF 0 "register_operand" "=x,x")
1802 (minus:DF
1803 (vec_select:DF
1804 (match_operand:V2DF 1 "register_operand" "0,x")
1805 (parallel [(const_int 0)]))
1806 (vec_select:DF
1807 (match_dup 1)
1808 (parallel [(const_int 1)]))))]
1809 "TARGET_SSE3"
1810 "@
1811 hsubpd\t{%0, %0|%0, %0}
1812 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1813 [(set_attr "isa" "noavx,avx")
1814 (set_attr "type" "sseadd1")
1815 (set_attr "prefix" "orig,vex")
1816 (set_attr "mode" "V2DF")])
1817
1818 (define_insn "avx_h<plusminus_insn>v8sf3"
1819 [(set (match_operand:V8SF 0 "register_operand" "=x")
1820 (vec_concat:V8SF
1821 (vec_concat:V4SF
1822 (vec_concat:V2SF
1823 (plusminus:SF
1824 (vec_select:SF
1825 (match_operand:V8SF 1 "register_operand" "x")
1826 (parallel [(const_int 0)]))
1827 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1828 (plusminus:SF
1829 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1830 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1831 (vec_concat:V2SF
1832 (plusminus:SF
1833 (vec_select:SF
1834 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1835 (parallel [(const_int 0)]))
1836 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1837 (plusminus:SF
1838 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1839 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1840 (vec_concat:V4SF
1841 (vec_concat:V2SF
1842 (plusminus:SF
1843 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1844 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1845 (plusminus:SF
1846 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1847 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1848 (vec_concat:V2SF
1849 (plusminus:SF
1850 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1851 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1852 (plusminus:SF
1853 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1854 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1855 "TARGET_AVX"
1856 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1857 [(set_attr "type" "sseadd")
1858 (set_attr "prefix" "vex")
1859 (set_attr "mode" "V8SF")])
1860
1861 (define_insn "sse3_h<plusminus_insn>v4sf3"
1862 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1863 (vec_concat:V4SF
1864 (vec_concat:V2SF
1865 (plusminus:SF
1866 (vec_select:SF
1867 (match_operand:V4SF 1 "register_operand" "0,x")
1868 (parallel [(const_int 0)]))
1869 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1870 (plusminus:SF
1871 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1872 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1873 (vec_concat:V2SF
1874 (plusminus:SF
1875 (vec_select:SF
1876 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1877 (parallel [(const_int 0)]))
1878 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1879 (plusminus:SF
1880 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1881 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1882 "TARGET_SSE3"
1883 "@
1884 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1885 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1886 [(set_attr "isa" "noavx,avx")
1887 (set_attr "type" "sseadd")
1888 (set_attr "atom_unit" "complex")
1889 (set_attr "prefix" "orig,vex")
1890 (set_attr "prefix_rep" "1,*")
1891 (set_attr "mode" "V4SF")])
1892
1893 (define_expand "reduc_splus_v8df"
1894 [(match_operand:V8DF 0 "register_operand")
1895 (match_operand:V8DF 1 "register_operand")]
1896 "TARGET_AVX512F"
1897 {
1898 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
1899 DONE;
1900 })
1901
1902 (define_expand "reduc_splus_v4df"
1903 [(match_operand:V4DF 0 "register_operand")
1904 (match_operand:V4DF 1 "register_operand")]
1905 "TARGET_AVX"
1906 {
1907 rtx tmp = gen_reg_rtx (V4DFmode);
1908 rtx tmp2 = gen_reg_rtx (V4DFmode);
1909 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1910 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1911 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1912 DONE;
1913 })
1914
1915 (define_expand "reduc_splus_v2df"
1916 [(match_operand:V2DF 0 "register_operand")
1917 (match_operand:V2DF 1 "register_operand")]
1918 "TARGET_SSE3"
1919 {
1920 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1921 DONE;
1922 })
1923
1924 (define_expand "reduc_splus_v16sf"
1925 [(match_operand:V16SF 0 "register_operand")
1926 (match_operand:V16SF 1 "register_operand")]
1927 "TARGET_AVX512F"
1928 {
1929 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
1930 DONE;
1931 })
1932
1933 (define_expand "reduc_splus_v8sf"
1934 [(match_operand:V8SF 0 "register_operand")
1935 (match_operand:V8SF 1 "register_operand")]
1936 "TARGET_AVX"
1937 {
1938 rtx tmp = gen_reg_rtx (V8SFmode);
1939 rtx tmp2 = gen_reg_rtx (V8SFmode);
1940 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1941 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1942 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1943 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1944 DONE;
1945 })
1946
1947 (define_expand "reduc_splus_v4sf"
1948 [(match_operand:V4SF 0 "register_operand")
1949 (match_operand:V4SF 1 "register_operand")]
1950 "TARGET_SSE"
1951 {
1952 if (TARGET_SSE3)
1953 {
1954 rtx tmp = gen_reg_rtx (V4SFmode);
1955 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1956 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1957 }
1958 else
1959 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1960 DONE;
1961 })
1962
1963 ;; Modes handled by reduc_sm{in,ax}* patterns.
1964 (define_mode_iterator REDUC_SMINMAX_MODE
1965 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1966 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1967 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1968 (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
1969 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
1970 (V8DF "TARGET_AVX512F")])
1971
1972 (define_expand "reduc_<code>_<mode>"
1973 [(smaxmin:REDUC_SMINMAX_MODE
1974 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
1975 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
1976 ""
1977 {
1978 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1979 DONE;
1980 })
1981
1982 (define_expand "reduc_<code>_<mode>"
1983 [(umaxmin:VI48_512
1984 (match_operand:VI48_512 0 "register_operand")
1985 (match_operand:VI48_512 1 "register_operand"))]
1986 "TARGET_AVX512F"
1987 {
1988 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1989 DONE;
1990 })
1991
1992 (define_expand "reduc_<code>_<mode>"
1993 [(umaxmin:VI_256
1994 (match_operand:VI_256 0 "register_operand")
1995 (match_operand:VI_256 1 "register_operand"))]
1996 "TARGET_AVX2"
1997 {
1998 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1999 DONE;
2000 })
2001
2002 (define_expand "reduc_umin_v8hi"
2003 [(umin:V8HI
2004 (match_operand:V8HI 0 "register_operand")
2005 (match_operand:V8HI 1 "register_operand"))]
2006 "TARGET_SSE4_1"
2007 {
2008 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2009 DONE;
2010 })
2011
2012 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2013 ;;
2014 ;; Parallel floating point comparisons
2015 ;;
2016 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2017
2018 (define_insn "avx_cmp<mode>3"
2019 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2020 (unspec:VF_128_256
2021 [(match_operand:VF_128_256 1 "register_operand" "x")
2022 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2023 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2024 UNSPEC_PCMP))]
2025 "TARGET_AVX"
2026 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2027 [(set_attr "type" "ssecmp")
2028 (set_attr "length_immediate" "1")
2029 (set_attr "prefix" "vex")
2030 (set_attr "mode" "<MODE>")])
2031
2032 (define_insn "avx_vmcmp<mode>3"
2033 [(set (match_operand:VF_128 0 "register_operand" "=x")
2034 (vec_merge:VF_128
2035 (unspec:VF_128
2036 [(match_operand:VF_128 1 "register_operand" "x")
2037 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2038 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2039 UNSPEC_PCMP)
2040 (match_dup 1)
2041 (const_int 1)))]
2042 "TARGET_AVX"
2043 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2044 [(set_attr "type" "ssecmp")
2045 (set_attr "length_immediate" "1")
2046 (set_attr "prefix" "vex")
2047 (set_attr "mode" "<ssescalarmode>")])
2048
2049 (define_insn "*<sse>_maskcmp<mode>3_comm"
2050 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2051 (match_operator:VF_128_256 3 "sse_comparison_operator"
2052 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2053 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2054 "TARGET_SSE
2055 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2056 "@
2057 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2058 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2059 [(set_attr "isa" "noavx,avx")
2060 (set_attr "type" "ssecmp")
2061 (set_attr "length_immediate" "1")
2062 (set_attr "prefix" "orig,vex")
2063 (set_attr "mode" "<MODE>")])
2064
2065 (define_insn "<sse>_maskcmp<mode>3"
2066 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2067 (match_operator:VF_128_256 3 "sse_comparison_operator"
2068 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2069 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2070 "TARGET_SSE"
2071 "@
2072 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2073 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2074 [(set_attr "isa" "noavx,avx")
2075 (set_attr "type" "ssecmp")
2076 (set_attr "length_immediate" "1")
2077 (set_attr "prefix" "orig,vex")
2078 (set_attr "mode" "<MODE>")])
2079
2080 (define_insn "<sse>_vmmaskcmp<mode>3"
2081 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2082 (vec_merge:VF_128
2083 (match_operator:VF_128 3 "sse_comparison_operator"
2084 [(match_operand:VF_128 1 "register_operand" "0,x")
2085 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2086 (match_dup 1)
2087 (const_int 1)))]
2088 "TARGET_SSE"
2089 "@
2090 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2091 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2092 [(set_attr "isa" "noavx,avx")
2093 (set_attr "type" "ssecmp")
2094 (set_attr "length_immediate" "1,*")
2095 (set_attr "prefix" "orig,vex")
2096 (set_attr "mode" "<ssescalarmode>")])
2097
2098 (define_mode_attr cmp_imm_predicate
2099 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2100 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
2101
2102 (define_insn "avx512f_cmp<mode>3"
2103 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2104 (unspec:<avx512fmaskmode>
2105 [(match_operand:VI48F_512 1 "register_operand" "v")
2106 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
2107 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2108 UNSPEC_PCMP))]
2109 "TARGET_AVX512F"
2110 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2111 [(set_attr "type" "ssecmp")
2112 (set_attr "length_immediate" "1")
2113 (set_attr "prefix" "evex")
2114 (set_attr "mode" "<sseinsnmode>")])
2115
2116 (define_insn "avx512f_ucmp<mode>3"
2117 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2118 (unspec:<avx512fmaskmode>
2119 [(match_operand:VI48_512 1 "register_operand" "v")
2120 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
2121 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2122 UNSPEC_UNSIGNED_PCMP))]
2123 "TARGET_AVX512F"
2124 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2125 [(set_attr "type" "ssecmp")
2126 (set_attr "length_immediate" "1")
2127 (set_attr "prefix" "evex")
2128 (set_attr "mode" "<sseinsnmode>")])
2129
2130 (define_insn "avx512f_vmcmp<mode>3"
2131 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2132 (and:<avx512fmaskmode>
2133 (unspec:<avx512fmaskmode>
2134 [(match_operand:VF_128 1 "register_operand" "v")
2135 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2136 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2137 UNSPEC_PCMP)
2138 (const_int 1)))]
2139 "TARGET_AVX512F"
2140 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2141 [(set_attr "type" "ssecmp")
2142 (set_attr "length_immediate" "1")
2143 (set_attr "prefix" "evex")
2144 (set_attr "mode" "<ssescalarmode>")])
2145
2146 (define_insn "avx512f_vmcmp<mode>3_mask"
2147 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2148 (and:<avx512fmaskmode>
2149 (unspec:<avx512fmaskmode>
2150 [(match_operand:VF_128 1 "register_operand" "v")
2151 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2152 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2153 UNSPEC_PCMP)
2154 (and:<avx512fmaskmode>
2155 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")
2156 (const_int 1))))]
2157 "TARGET_AVX512F"
2158 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0%{%4%}|%0%{%4%}, %1, %2, %3}"
2159 [(set_attr "type" "ssecmp")
2160 (set_attr "length_immediate" "1")
2161 (set_attr "prefix" "evex")
2162 (set_attr "mode" "<ssescalarmode>")])
2163
2164 (define_insn "avx512f_maskcmp<mode>3"
2165 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2166 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2167 [(match_operand:VF 1 "register_operand" "v")
2168 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2169 "TARGET_SSE"
2170 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2171 [(set_attr "type" "ssecmp")
2172 (set_attr "length_immediate" "1")
2173 (set_attr "prefix" "evex")
2174 (set_attr "mode" "<sseinsnmode>")])
2175
2176 (define_insn "<sse>_comi"
2177 [(set (reg:CCFP FLAGS_REG)
2178 (compare:CCFP
2179 (vec_select:MODEF
2180 (match_operand:<ssevecmode> 0 "register_operand" "v")
2181 (parallel [(const_int 0)]))
2182 (vec_select:MODEF
2183 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
2184 (parallel [(const_int 0)]))))]
2185 "SSE_FLOAT_MODE_P (<MODE>mode)"
2186 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
2187 [(set_attr "type" "ssecomi")
2188 (set_attr "prefix" "maybe_vex")
2189 (set_attr "prefix_rep" "0")
2190 (set (attr "prefix_data16")
2191 (if_then_else (eq_attr "mode" "DF")
2192 (const_string "1")
2193 (const_string "0")))
2194 (set_attr "mode" "<MODE>")])
2195
2196 (define_insn "<sse>_ucomi"
2197 [(set (reg:CCFPU FLAGS_REG)
2198 (compare:CCFPU
2199 (vec_select:MODEF
2200 (match_operand:<ssevecmode> 0 "register_operand" "v")
2201 (parallel [(const_int 0)]))
2202 (vec_select:MODEF
2203 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
2204 (parallel [(const_int 0)]))))]
2205 "SSE_FLOAT_MODE_P (<MODE>mode)"
2206 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
2207 [(set_attr "type" "ssecomi")
2208 (set_attr "prefix" "maybe_vex")
2209 (set_attr "prefix_rep" "0")
2210 (set (attr "prefix_data16")
2211 (if_then_else (eq_attr "mode" "DF")
2212 (const_string "1")
2213 (const_string "0")))
2214 (set_attr "mode" "<MODE>")])
2215
2216 (define_expand "vcond<V_512:mode><VF_512:mode>"
2217 [(set (match_operand:V_512 0 "register_operand")
2218 (if_then_else:V_512
2219 (match_operator 3 ""
2220 [(match_operand:VF_512 4 "nonimmediate_operand")
2221 (match_operand:VF_512 5 "nonimmediate_operand")])
2222 (match_operand:V_512 1 "general_operand")
2223 (match_operand:V_512 2 "general_operand")))]
2224 "TARGET_AVX512F
2225 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2226 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2227 {
2228 bool ok = ix86_expand_fp_vcond (operands);
2229 gcc_assert (ok);
2230 DONE;
2231 })
2232
2233 (define_expand "vcond<V_256:mode><VF_256:mode>"
2234 [(set (match_operand:V_256 0 "register_operand")
2235 (if_then_else:V_256
2236 (match_operator 3 ""
2237 [(match_operand:VF_256 4 "nonimmediate_operand")
2238 (match_operand:VF_256 5 "nonimmediate_operand")])
2239 (match_operand:V_256 1 "general_operand")
2240 (match_operand:V_256 2 "general_operand")))]
2241 "TARGET_AVX
2242 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2243 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2244 {
2245 bool ok = ix86_expand_fp_vcond (operands);
2246 gcc_assert (ok);
2247 DONE;
2248 })
2249
2250 (define_expand "vcond<V_128:mode><VF_128:mode>"
2251 [(set (match_operand:V_128 0 "register_operand")
2252 (if_then_else:V_128
2253 (match_operator 3 ""
2254 [(match_operand:VF_128 4 "nonimmediate_operand")
2255 (match_operand:VF_128 5 "nonimmediate_operand")])
2256 (match_operand:V_128 1 "general_operand")
2257 (match_operand:V_128 2 "general_operand")))]
2258 "TARGET_SSE
2259 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2260 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2261 {
2262 bool ok = ix86_expand_fp_vcond (operands);
2263 gcc_assert (ok);
2264 DONE;
2265 })
2266
2267 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2268 ;;
2269 ;; Parallel floating point logical operations
2270 ;;
2271 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2272
2273 (define_insn "<sse>_andnot<mode>3"
2274 [(set (match_operand:VF 0 "register_operand" "=x,v")
2275 (and:VF
2276 (not:VF
2277 (match_operand:VF 1 "register_operand" "0,v"))
2278 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2279 "TARGET_SSE"
2280 {
2281 static char buf[32];
2282 const char *ops;
2283 const char *suffix;
2284
2285 switch (get_attr_mode (insn))
2286 {
2287 case MODE_V8SF:
2288 case MODE_V4SF:
2289 suffix = "ps";
2290 break;
2291 default:
2292 suffix = "<ssemodesuffix>";
2293 }
2294
2295 switch (which_alternative)
2296 {
2297 case 0:
2298 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2299 break;
2300 case 1:
2301 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2302 break;
2303 default:
2304 gcc_unreachable ();
2305 }
2306
2307 /* There is no vandnp[sd]. Use vpandnq. */
2308 if (GET_MODE_SIZE (<MODE>mode) == 64)
2309 {
2310 suffix = "q";
2311 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2312 }
2313
2314 snprintf (buf, sizeof (buf), ops, suffix);
2315 return buf;
2316 }
2317 [(set_attr "isa" "noavx,avx")
2318 (set_attr "type" "sselog")
2319 (set_attr "prefix" "orig,maybe_evex")
2320 (set (attr "mode")
2321 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2322 (const_string "<ssePSmode>")
2323 (match_test "TARGET_AVX")
2324 (const_string "<MODE>")
2325 (match_test "optimize_function_for_size_p (cfun)")
2326 (const_string "V4SF")
2327 ]
2328 (const_string "<MODE>")))])
2329
2330 (define_expand "<code><mode>3"
2331 [(set (match_operand:VF_128_256 0 "register_operand")
2332 (any_logic:VF_128_256
2333 (match_operand:VF_128_256 1 "nonimmediate_operand")
2334 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2335 "TARGET_SSE"
2336 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2337
2338 (define_expand "<code><mode>3"
2339 [(set (match_operand:VF_512 0 "register_operand")
2340 (fpint_logic:VF_512
2341 (match_operand:VF_512 1 "nonimmediate_operand")
2342 (match_operand:VF_512 2 "nonimmediate_operand")))]
2343 "TARGET_AVX512F"
2344 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2345
2346 (define_insn "*<code><mode>3"
2347 [(set (match_operand:VF 0 "register_operand" "=x,v")
2348 (any_logic:VF
2349 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
2350 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2351 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2352 {
2353 static char buf[32];
2354 const char *ops;
2355 const char *suffix;
2356
2357 switch (get_attr_mode (insn))
2358 {
2359 case MODE_V8SF:
2360 case MODE_V4SF:
2361 suffix = "ps";
2362 break;
2363 default:
2364 suffix = "<ssemodesuffix>";
2365 }
2366
2367 switch (which_alternative)
2368 {
2369 case 0:
2370 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2371 break;
2372 case 1:
2373 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2374 break;
2375 default:
2376 gcc_unreachable ();
2377 }
2378
2379 /* There is no v<logic>p[sd]. Use vp<logic>q. */
2380 if (GET_MODE_SIZE (<MODE>mode) == 64)
2381 {
2382 suffix = "q";
2383 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2384 }
2385
2386 snprintf (buf, sizeof (buf), ops, suffix);
2387 return buf;
2388 }
2389 [(set_attr "isa" "noavx,avx")
2390 (set_attr "type" "sselog")
2391 (set_attr "prefix" "orig,maybe_evex")
2392 (set (attr "mode")
2393 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2394 (const_string "<ssePSmode>")
2395 (match_test "TARGET_AVX")
2396 (const_string "<MODE>")
2397 (match_test "optimize_function_for_size_p (cfun)")
2398 (const_string "V4SF")
2399 ]
2400 (const_string "<MODE>")))])
2401
2402 (define_expand "copysign<mode>3"
2403 [(set (match_dup 4)
2404 (and:VF
2405 (not:VF (match_dup 3))
2406 (match_operand:VF 1 "nonimmediate_operand")))
2407 (set (match_dup 5)
2408 (and:VF (match_dup 3)
2409 (match_operand:VF 2 "nonimmediate_operand")))
2410 (set (match_operand:VF 0 "register_operand")
2411 (ior:VF (match_dup 4) (match_dup 5)))]
2412 "TARGET_SSE"
2413 {
2414 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2415
2416 operands[4] = gen_reg_rtx (<MODE>mode);
2417 operands[5] = gen_reg_rtx (<MODE>mode);
2418 })
2419
2420 ;; Also define scalar versions. These are used for abs, neg, and
2421 ;; conditional move. Using subregs into vector modes causes register
2422 ;; allocation lossage. These patterns do not allow memory operands
2423 ;; because the native instructions read the full 128-bits.
2424
2425 (define_insn "*andnot<mode>3"
2426 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2427 (and:MODEF
2428 (not:MODEF
2429 (match_operand:MODEF 1 "register_operand" "0,x"))
2430 (match_operand:MODEF 2 "register_operand" "x,x")))]
2431 "SSE_FLOAT_MODE_P (<MODE>mode)"
2432 {
2433 static char buf[32];
2434 const char *ops;
2435 const char *suffix
2436 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2437
2438 switch (which_alternative)
2439 {
2440 case 0:
2441 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2442 break;
2443 case 1:
2444 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2445 break;
2446 default:
2447 gcc_unreachable ();
2448 }
2449
2450 snprintf (buf, sizeof (buf), ops, suffix);
2451 return buf;
2452 }
2453 [(set_attr "isa" "noavx,avx")
2454 (set_attr "type" "sselog")
2455 (set_attr "prefix" "orig,vex")
2456 (set (attr "mode")
2457 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2458 (const_string "V4SF")
2459 (match_test "TARGET_AVX")
2460 (const_string "<ssevecmode>")
2461 (match_test "optimize_function_for_size_p (cfun)")
2462 (const_string "V4SF")
2463 ]
2464 (const_string "<ssevecmode>")))])
2465
2466 (define_insn "*andnottf3"
2467 [(set (match_operand:TF 0 "register_operand" "=x,x")
2468 (and:TF
2469 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2470 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2471 "TARGET_SSE"
2472 {
2473 static char buf[32];
2474 const char *ops;
2475 const char *tmp
2476 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2477
2478 switch (which_alternative)
2479 {
2480 case 0:
2481 ops = "%s\t{%%2, %%0|%%0, %%2}";
2482 break;
2483 case 1:
2484 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2485 break;
2486 default:
2487 gcc_unreachable ();
2488 }
2489
2490 snprintf (buf, sizeof (buf), ops, tmp);
2491 return buf;
2492 }
2493 [(set_attr "isa" "noavx,avx")
2494 (set_attr "type" "sselog")
2495 (set (attr "prefix_data16")
2496 (if_then_else
2497 (and (eq_attr "alternative" "0")
2498 (eq_attr "mode" "TI"))
2499 (const_string "1")
2500 (const_string "*")))
2501 (set_attr "prefix" "orig,vex")
2502 (set (attr "mode")
2503 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2504 (const_string "V4SF")
2505 (match_test "TARGET_AVX")
2506 (const_string "TI")
2507 (ior (not (match_test "TARGET_SSE2"))
2508 (match_test "optimize_function_for_size_p (cfun)"))
2509 (const_string "V4SF")
2510 ]
2511 (const_string "TI")))])
2512
2513 (define_insn "*<code><mode>3"
2514 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2515 (any_logic:MODEF
2516 (match_operand:MODEF 1 "register_operand" "%0,x")
2517 (match_operand:MODEF 2 "register_operand" "x,x")))]
2518 "SSE_FLOAT_MODE_P (<MODE>mode)"
2519 {
2520 static char buf[32];
2521 const char *ops;
2522 const char *suffix
2523 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2524
2525 switch (which_alternative)
2526 {
2527 case 0:
2528 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2529 break;
2530 case 1:
2531 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2532 break;
2533 default:
2534 gcc_unreachable ();
2535 }
2536
2537 snprintf (buf, sizeof (buf), ops, suffix);
2538 return buf;
2539 }
2540 [(set_attr "isa" "noavx,avx")
2541 (set_attr "type" "sselog")
2542 (set_attr "prefix" "orig,vex")
2543 (set (attr "mode")
2544 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2545 (const_string "V4SF")
2546 (match_test "TARGET_AVX")
2547 (const_string "<ssevecmode>")
2548 (match_test "optimize_function_for_size_p (cfun)")
2549 (const_string "V4SF")
2550 ]
2551 (const_string "<ssevecmode>")))])
2552
2553 (define_expand "<code>tf3"
2554 [(set (match_operand:TF 0 "register_operand")
2555 (any_logic:TF
2556 (match_operand:TF 1 "nonimmediate_operand")
2557 (match_operand:TF 2 "nonimmediate_operand")))]
2558 "TARGET_SSE"
2559 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2560
2561 (define_insn "*<code>tf3"
2562 [(set (match_operand:TF 0 "register_operand" "=x,x")
2563 (any_logic:TF
2564 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2565 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2566 "TARGET_SSE
2567 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2568 {
2569 static char buf[32];
2570 const char *ops;
2571 const char *tmp
2572 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2573
2574 switch (which_alternative)
2575 {
2576 case 0:
2577 ops = "%s\t{%%2, %%0|%%0, %%2}";
2578 break;
2579 case 1:
2580 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2581 break;
2582 default:
2583 gcc_unreachable ();
2584 }
2585
2586 snprintf (buf, sizeof (buf), ops, tmp);
2587 return buf;
2588 }
2589 [(set_attr "isa" "noavx,avx")
2590 (set_attr "type" "sselog")
2591 (set (attr "prefix_data16")
2592 (if_then_else
2593 (and (eq_attr "alternative" "0")
2594 (eq_attr "mode" "TI"))
2595 (const_string "1")
2596 (const_string "*")))
2597 (set_attr "prefix" "orig,vex")
2598 (set (attr "mode")
2599 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2600 (const_string "V4SF")
2601 (match_test "TARGET_AVX")
2602 (const_string "TI")
2603 (ior (not (match_test "TARGET_SSE2"))
2604 (match_test "optimize_function_for_size_p (cfun)"))
2605 (const_string "V4SF")
2606 ]
2607 (const_string "TI")))])
2608
2609 ;; There are no floating point xor for V16SF and V8DF in avx512f
2610 ;; but we need them for negation. Instead we use int versions of
2611 ;; xor. Maybe there could be a better way to do that.
2612
2613 (define_mode_attr avx512flogicsuff
2614 [(V16SF "d") (V8DF "q")])
2615
2616 (define_insn "avx512f_<logic><mode>"
2617 [(set (match_operand:VF_512 0 "register_operand" "=v")
2618 (fpint_logic:VF_512
2619 (match_operand:VF_512 1 "register_operand" "v")
2620 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2621 "TARGET_AVX512F"
2622 "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
2623 [(set_attr "type" "sselog")
2624 (set_attr "prefix" "evex")])
2625
2626 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2627 ;;
2628 ;; FMA floating point multiply/accumulate instructions. These include
2629 ;; scalar versions of the instructions as well as vector versions.
2630 ;;
2631 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2632
2633 ;; The standard names for scalar FMA are only available with SSE math enabled.
2634 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
2635 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
2636 ;; and TARGET_FMA4 are both false.
2637 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
2638 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
2639 ;; GAS to allow proper prefix selection. However, for the moment all hardware
2640 ;; that supports AVX512F also supports FMA so we can ignore this for now.
2641 (define_mode_iterator FMAMODEM
2642 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2643 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2644 (V4SF "TARGET_FMA || TARGET_FMA4")
2645 (V2DF "TARGET_FMA || TARGET_FMA4")
2646 (V8SF "TARGET_FMA || TARGET_FMA4")
2647 (V4DF "TARGET_FMA || TARGET_FMA4")
2648 (V16SF "TARGET_AVX512F")
2649 (V8DF "TARGET_AVX512F")])
2650
2651 (define_expand "fma<mode>4"
2652 [(set (match_operand:FMAMODEM 0 "register_operand")
2653 (fma:FMAMODEM
2654 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2655 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2656 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2657 "")
2658
2659 (define_expand "fms<mode>4"
2660 [(set (match_operand:FMAMODEM 0 "register_operand")
2661 (fma:FMAMODEM
2662 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2663 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2664 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2665 "")
2666
2667 (define_expand "fnma<mode>4"
2668 [(set (match_operand:FMAMODEM 0 "register_operand")
2669 (fma:FMAMODEM
2670 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2671 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2672 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2673 "")
2674
2675 (define_expand "fnms<mode>4"
2676 [(set (match_operand:FMAMODEM 0 "register_operand")
2677 (fma:FMAMODEM
2678 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2679 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2680 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2681 "")
2682
2683 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2684 (define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2685 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2686 (V4SF "TARGET_FMA || TARGET_FMA4")
2687 (V2DF "TARGET_FMA || TARGET_FMA4")
2688 (V8SF "TARGET_FMA || TARGET_FMA4")
2689 (V4DF "TARGET_FMA || TARGET_FMA4")
2690 (V16SF "TARGET_AVX512F")
2691 (V8DF "TARGET_AVX512F")])
2692
2693 (define_expand "fma4i_fmadd_<mode>"
2694 [(set (match_operand:FMAMODE 0 "register_operand")
2695 (fma:FMAMODE
2696 (match_operand:FMAMODE 1 "nonimmediate_operand")
2697 (match_operand:FMAMODE 2 "nonimmediate_operand")
2698 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2699 "")
2700
2701 (define_insn "*fma_fmadd_<mode>"
2702 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2703 (fma:FMAMODE
2704 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
2705 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2706 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
2707 ""
2708 "@
2709 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2710 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2711 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2712 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2713 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2714 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2715 (set_attr "type" "ssemuladd")
2716 (set_attr "mode" "<MODE>")])
2717
2718 (define_insn "avx512f_fmadd_<mode>_mask"
2719 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2720 (vec_merge:VF_512
2721 (fma:VF_512
2722 (match_operand:VF_512 1 "register_operand" "0,0")
2723 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2724 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))
2725 (match_dup 1)
2726 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2727 "TARGET_AVX512F"
2728 "@
2729 vfmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2730 vfmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2731 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2732 (set_attr "type" "ssemuladd")
2733 (set_attr "mode" "<MODE>")])
2734
2735 (define_insn "avx512f_fmadd_<mode>_mask3"
2736 [(set (match_operand:VF_512 0 "register_operand" "=x")
2737 (vec_merge:VF_512
2738 (fma:VF_512
2739 (match_operand:VF_512 1 "register_operand" "x")
2740 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2741 (match_operand:VF_512 3 "register_operand" "0"))
2742 (match_dup 3)
2743 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2744 "TARGET_AVX512F"
2745 "vfmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2746 [(set_attr "isa" "fma_avx512f")
2747 (set_attr "type" "ssemuladd")
2748 (set_attr "mode" "<MODE>")])
2749
2750 (define_insn "*fma_fmsub_<mode>"
2751 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2752 (fma:FMAMODE
2753 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
2754 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2755 (neg:FMAMODE
2756 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
2757 ""
2758 "@
2759 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2760 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2761 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2762 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2763 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2764 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2765 (set_attr "type" "ssemuladd")
2766 (set_attr "mode" "<MODE>")])
2767
2768 (define_insn "avx512f_fmsub_<mode>_mask"
2769 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2770 (vec_merge:VF_512
2771 (fma:VF_512
2772 (match_operand:VF_512 1 "register_operand" "0,0")
2773 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2774 (neg:VF_512
2775 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")))
2776 (match_dup 1)
2777 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2778 "TARGET_AVX512F"
2779 "@
2780 vfmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2781 vfmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2782 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2783 (set_attr "type" "ssemuladd")
2784 (set_attr "mode" "<MODE>")])
2785
2786 (define_insn "avx512f_fmsub_<mode>_mask3"
2787 [(set (match_operand:VF_512 0 "register_operand" "=v")
2788 (vec_merge:VF_512
2789 (fma:VF_512
2790 (match_operand:VF_512 1 "register_operand" "v")
2791 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2792 (neg:VF_512
2793 (match_operand:VF_512 3 "register_operand" "0")))
2794 (match_dup 3)
2795 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2796 "TARGET_AVX512F"
2797 "vfmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2798 [(set_attr "isa" "fma_avx512f")
2799 (set_attr "type" "ssemuladd")
2800 (set_attr "mode" "<MODE>")])
2801
2802 (define_insn "*fma_fnmadd_<mode>"
2803 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2804 (fma:FMAMODE
2805 (neg:FMAMODE
2806 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
2807 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2808 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
2809 ""
2810 "@
2811 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2812 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2813 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2814 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2815 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2816 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2817 (set_attr "type" "ssemuladd")
2818 (set_attr "mode" "<MODE>")])
2819
2820 (define_insn "avx512f_fnmadd_<mode>_mask"
2821 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2822 (vec_merge:VF_512
2823 (fma:VF_512
2824 (neg:VF_512
2825 (match_operand:VF_512 1 "register_operand" "0,0"))
2826 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2827 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))
2828 (match_dup 1)
2829 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2830 "TARGET_AVX512F"
2831 "@
2832 vfnmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2833 vfnmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2834 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2835 (set_attr "type" "ssemuladd")
2836 (set_attr "mode" "<MODE>")])
2837
2838 (define_insn "avx512f_fnmadd_<mode>_mask3"
2839 [(set (match_operand:VF_512 0 "register_operand" "=v")
2840 (vec_merge:VF_512
2841 (fma:VF_512
2842 (neg:VF_512
2843 (match_operand:VF_512 1 "register_operand" "v"))
2844 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2845 (match_operand:VF_512 3 "register_operand" "0"))
2846 (match_dup 3)
2847 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2848 "TARGET_AVX512F"
2849 "vfnmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2850 [(set_attr "isa" "fma_avx512f")
2851 (set_attr "type" "ssemuladd")
2852 (set_attr "mode" "<MODE>")])
2853
2854 (define_insn "*fma_fnmsub_<mode>"
2855 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2856 (fma:FMAMODE
2857 (neg:FMAMODE
2858 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
2859 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2860 (neg:FMAMODE
2861 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
2862 ""
2863 "@
2864 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2865 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2866 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2867 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2868 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2869 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2870 (set_attr "type" "ssemuladd")
2871 (set_attr "mode" "<MODE>")])
2872
2873 (define_insn "avx512f_fnmsub_<mode>_mask"
2874 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2875 (vec_merge:VF_512
2876 (fma:VF_512
2877 (neg:VF_512
2878 (match_operand:VF_512 1 "register_operand" "0,0"))
2879 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2880 (neg:VF_512
2881 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")))
2882 (match_dup 1)
2883 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2884 "TARGET_AVX512F"
2885 "@
2886 vfnmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2887 vfnmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2888 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2889 (set_attr "type" "ssemuladd")
2890 (set_attr "mode" "<MODE>")])
2891
2892 (define_insn "avx512f_fnmsub_<mode>_mask3"
2893 [(set (match_operand:VF_512 0 "register_operand" "=v")
2894 (vec_merge:VF_512
2895 (fma:VF_512
2896 (neg:VF_512
2897 (match_operand:VF_512 1 "register_operand" "v"))
2898 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2899 (neg:VF_512
2900 (match_operand:VF_512 3 "register_operand" "0")))
2901 (match_dup 3)
2902 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2903 "TARGET_AVX512F"
2904 "vfnmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2905 [(set_attr "isa" "fma_avx512f")
2906 (set_attr "type" "ssemuladd")
2907 (set_attr "mode" "<MODE>")])
2908
2909 ;; FMA parallel floating point multiply addsub and subadd operations.
2910
2911 ;; It would be possible to represent these without the UNSPEC as
2912 ;;
2913 ;; (vec_merge
2914 ;; (fma op1 op2 op3)
2915 ;; (fma op1 op2 (neg op3))
2916 ;; (merge-const))
2917 ;;
2918 ;; But this doesn't seem useful in practice.
2919
2920 (define_expand "fmaddsub_<mode>"
2921 [(set (match_operand:VF 0 "register_operand")
2922 (unspec:VF
2923 [(match_operand:VF 1 "nonimmediate_operand")
2924 (match_operand:VF 2 "nonimmediate_operand")
2925 (match_operand:VF 3 "nonimmediate_operand")]
2926 UNSPEC_FMADDSUB))]
2927 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2928
2929 (define_insn "*fma_fmaddsub_<mode>"
2930 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
2931 (unspec:VF
2932 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
2933 (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
2934 (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")]
2935 UNSPEC_FMADDSUB))]
2936 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)"
2937 "@
2938 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2939 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2940 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2941 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2942 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2943 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2944 (set_attr "type" "ssemuladd")
2945 (set_attr "mode" "<MODE>")])
2946
2947 (define_insn "avx512f_fmaddsub_<mode>_mask"
2948 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2949 (vec_merge:VF_512
2950 (unspec:VF_512
2951 [(match_operand:VF_512 1 "register_operand" "0,0")
2952 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2953 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")]
2954 UNSPEC_FMADDSUB)
2955 (match_dup 1)
2956 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2957 "TARGET_AVX512F"
2958 "@
2959 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2960 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2961 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2962 (set_attr "type" "ssemuladd")
2963 (set_attr "mode" "<MODE>")])
2964
2965 (define_insn "avx512f_fmaddsub_<mode>_mask3"
2966 [(set (match_operand:VF_512 0 "register_operand" "=v")
2967 (vec_merge:VF_512
2968 (unspec:VF_512
2969 [(match_operand:VF_512 1 "register_operand" "v")
2970 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2971 (match_operand:VF_512 3 "register_operand" "0")]
2972 UNSPEC_FMADDSUB)
2973 (match_dup 3)
2974 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2975 "TARGET_AVX512F"
2976 "vfmaddsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2977 [(set_attr "isa" "fma_avx512f")
2978 (set_attr "type" "ssemuladd")
2979 (set_attr "mode" "<MODE>")])
2980
2981 (define_insn "*fma_fmsubadd_<mode>"
2982 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
2983 (unspec:VF
2984 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
2985 (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
2986 (neg:VF
2987 (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))]
2988 UNSPEC_FMADDSUB))]
2989 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)"
2990 "@
2991 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2992 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2993 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2994 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2995 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2996 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2997 (set_attr "type" "ssemuladd")
2998 (set_attr "mode" "<MODE>")])
2999
3000 (define_insn "avx512f_fmsubadd_<mode>_mask"
3001 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3002 (vec_merge:VF_512
3003 (unspec:VF_512
3004 [(match_operand:VF_512 1 "register_operand" "0,0")
3005 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
3006 (neg:VF_512
3007 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))]
3008 UNSPEC_FMADDSUB)
3009 (match_dup 1)
3010 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
3011 "TARGET_AVX512F"
3012 "@
3013 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
3014 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
3015 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3016 (set_attr "type" "ssemuladd")
3017 (set_attr "mode" "<MODE>")])
3018
3019 (define_insn "avx512f_fmsubadd_<mode>_mask3"
3020 [(set (match_operand:VF_512 0 "register_operand" "=v")
3021 (vec_merge:VF_512
3022 (unspec:VF_512
3023 [(match_operand:VF_512 1 "register_operand" "v")
3024 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
3025 (neg:VF_512
3026 (match_operand:VF_512 3 "register_operand" "0"))]
3027 UNSPEC_FMADDSUB)
3028 (match_dup 3)
3029 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
3030 "TARGET_AVX512F"
3031 "vfmsubadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
3032 [(set_attr "isa" "fma_avx512f")
3033 (set_attr "type" "ssemuladd")
3034 (set_attr "mode" "<MODE>")])
3035
3036 ;; FMA3 floating point scalar intrinsics. These merge result with
3037 ;; high-order elements from the destination register.
3038
3039 (define_expand "fmai_vmfmadd_<mode>"
3040 [(set (match_operand:VF_128 0 "register_operand")
3041 (vec_merge:VF_128
3042 (fma:VF_128
3043 (match_operand:VF_128 1 "nonimmediate_operand")
3044 (match_operand:VF_128 2 "nonimmediate_operand")
3045 (match_operand:VF_128 3 "nonimmediate_operand"))
3046 (match_dup 1)
3047 (const_int 1)))]
3048 "TARGET_FMA")
3049
3050 (define_insn "*fmai_fmadd_<mode>"
3051 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3052 (vec_merge:VF_128
3053 (fma:VF_128
3054 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3055 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
3056 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
3057 (match_dup 1)
3058 (const_int 1)))]
3059 "TARGET_FMA || TARGET_AVX512F"
3060 "@
3061 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3062 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3063 [(set_attr "type" "ssemuladd")
3064 (set_attr "mode" "<MODE>")])
3065
3066 (define_insn "*fmai_fmsub_<mode>"
3067 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3068 (vec_merge:VF_128
3069 (fma:VF_128
3070 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3071 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
3072 (neg:VF_128
3073 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
3074 (match_dup 1)
3075 (const_int 1)))]
3076 "TARGET_FMA || TARGET_AVX512F"
3077 "@
3078 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3079 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3080 [(set_attr "type" "ssemuladd")
3081 (set_attr "mode" "<MODE>")])
3082
3083 (define_insn "*fmai_fnmadd_<mode>"
3084 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3085 (vec_merge:VF_128
3086 (fma:VF_128
3087 (neg:VF_128
3088 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
3089 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3090 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
3091 (match_dup 1)
3092 (const_int 1)))]
3093 "TARGET_FMA || TARGET_AVX512F"
3094 "@
3095 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3096 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3097 [(set_attr "type" "ssemuladd")
3098 (set_attr "mode" "<MODE>")])
3099
3100 (define_insn "*fmai_fnmsub_<mode>"
3101 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3102 (vec_merge:VF_128
3103 (fma:VF_128
3104 (neg:VF_128
3105 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
3106 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3107 (neg:VF_128
3108 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
3109 (match_dup 1)
3110 (const_int 1)))]
3111 "TARGET_FMA || TARGET_AVX512F"
3112 "@
3113 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3114 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3115 [(set_attr "type" "ssemuladd")
3116 (set_attr "mode" "<MODE>")])
3117
3118 ;; FMA4 floating point scalar intrinsics. These write the
3119 ;; entire destination register, with the high-order elements zeroed.
3120
3121 (define_expand "fma4i_vmfmadd_<mode>"
3122 [(set (match_operand:VF_128 0 "register_operand")
3123 (vec_merge:VF_128
3124 (fma:VF_128
3125 (match_operand:VF_128 1 "nonimmediate_operand")
3126 (match_operand:VF_128 2 "nonimmediate_operand")
3127 (match_operand:VF_128 3 "nonimmediate_operand"))
3128 (match_dup 4)
3129 (const_int 1)))]
3130 "TARGET_FMA4"
3131 "operands[4] = CONST0_RTX (<MODE>mode);")
3132
3133 (define_insn "*fma4i_vmfmadd_<mode>"
3134 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3135 (vec_merge:VF_128
3136 (fma:VF_128
3137 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3138 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3139 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3140 (match_operand:VF_128 4 "const0_operand")
3141 (const_int 1)))]
3142 "TARGET_FMA4"
3143 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3144 [(set_attr "type" "ssemuladd")
3145 (set_attr "mode" "<MODE>")])
3146
3147 (define_insn "*fma4i_vmfmsub_<mode>"
3148 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3149 (vec_merge:VF_128
3150 (fma:VF_128
3151 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3152 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3153 (neg:VF_128
3154 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3155 (match_operand:VF_128 4 "const0_operand")
3156 (const_int 1)))]
3157 "TARGET_FMA4"
3158 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3159 [(set_attr "type" "ssemuladd")
3160 (set_attr "mode" "<MODE>")])
3161
3162 (define_insn "*fma4i_vmfnmadd_<mode>"
3163 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3164 (vec_merge:VF_128
3165 (fma:VF_128
3166 (neg:VF_128
3167 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3168 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3169 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3170 (match_operand:VF_128 4 "const0_operand")
3171 (const_int 1)))]
3172 "TARGET_FMA4"
3173 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3174 [(set_attr "type" "ssemuladd")
3175 (set_attr "mode" "<MODE>")])
3176
3177 (define_insn "*fma4i_vmfnmsub_<mode>"
3178 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3179 (vec_merge:VF_128
3180 (fma:VF_128
3181 (neg:VF_128
3182 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3183 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3184 (neg:VF_128
3185 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3186 (match_operand:VF_128 4 "const0_operand")
3187 (const_int 1)))]
3188 "TARGET_FMA4"
3189 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3190 [(set_attr "type" "ssemuladd")
3191 (set_attr "mode" "<MODE>")])
3192
3193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3194 ;;
3195 ;; Parallel single-precision floating point conversion operations
3196 ;;
3197 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3198
3199 (define_insn "sse_cvtpi2ps"
3200 [(set (match_operand:V4SF 0 "register_operand" "=x")
3201 (vec_merge:V4SF
3202 (vec_duplicate:V4SF
3203 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3204 (match_operand:V4SF 1 "register_operand" "0")
3205 (const_int 3)))]
3206 "TARGET_SSE"
3207 "cvtpi2ps\t{%2, %0|%0, %2}"
3208 [(set_attr "type" "ssecvt")
3209 (set_attr "mode" "V4SF")])
3210
3211 (define_insn "sse_cvtps2pi"
3212 [(set (match_operand:V2SI 0 "register_operand" "=y")
3213 (vec_select:V2SI
3214 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3215 UNSPEC_FIX_NOTRUNC)
3216 (parallel [(const_int 0) (const_int 1)])))]
3217 "TARGET_SSE"
3218 "cvtps2pi\t{%1, %0|%0, %q1}"
3219 [(set_attr "type" "ssecvt")
3220 (set_attr "unit" "mmx")
3221 (set_attr "mode" "DI")])
3222
3223 (define_insn "sse_cvttps2pi"
3224 [(set (match_operand:V2SI 0 "register_operand" "=y")
3225 (vec_select:V2SI
3226 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3227 (parallel [(const_int 0) (const_int 1)])))]
3228 "TARGET_SSE"
3229 "cvttps2pi\t{%1, %0|%0, %q1}"
3230 [(set_attr "type" "ssecvt")
3231 (set_attr "unit" "mmx")
3232 (set_attr "prefix_rep" "0")
3233 (set_attr "mode" "SF")])
3234
3235 (define_insn "sse_cvtsi2ss"
3236 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3237 (vec_merge:V4SF
3238 (vec_duplicate:V4SF
3239 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
3240 (match_operand:V4SF 1 "register_operand" "0,0,v")
3241 (const_int 1)))]
3242 "TARGET_SSE"
3243 "@
3244 cvtsi2ss\t{%2, %0|%0, %2}
3245 cvtsi2ss\t{%2, %0|%0, %2}
3246 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
3247 [(set_attr "isa" "noavx,noavx,avx")
3248 (set_attr "type" "sseicvt")
3249 (set_attr "athlon_decode" "vector,double,*")
3250 (set_attr "amdfam10_decode" "vector,double,*")
3251 (set_attr "bdver1_decode" "double,direct,*")
3252 (set_attr "btver2_decode" "double,double,double")
3253 (set_attr "prefix" "orig,orig,maybe_evex")
3254 (set_attr "mode" "SF")])
3255
3256 (define_insn "sse_cvtsi2ssq"
3257 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3258 (vec_merge:V4SF
3259 (vec_duplicate:V4SF
3260 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
3261 (match_operand:V4SF 1 "register_operand" "0,0,v")
3262 (const_int 1)))]
3263 "TARGET_SSE && TARGET_64BIT"
3264 "@
3265 cvtsi2ssq\t{%2, %0|%0, %2}
3266 cvtsi2ssq\t{%2, %0|%0, %2}
3267 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
3268 [(set_attr "isa" "noavx,noavx,avx")
3269 (set_attr "type" "sseicvt")
3270 (set_attr "athlon_decode" "vector,double,*")
3271 (set_attr "amdfam10_decode" "vector,double,*")
3272 (set_attr "bdver1_decode" "double,direct,*")
3273 (set_attr "btver2_decode" "double,double,double")
3274 (set_attr "length_vex" "*,*,4")
3275 (set_attr "prefix_rex" "1,1,*")
3276 (set_attr "prefix" "orig,orig,maybe_evex")
3277 (set_attr "mode" "SF")])
3278
3279 (define_insn "sse_cvtss2si"
3280 [(set (match_operand:SI 0 "register_operand" "=r,r")
3281 (unspec:SI
3282 [(vec_select:SF
3283 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
3284 (parallel [(const_int 0)]))]
3285 UNSPEC_FIX_NOTRUNC))]
3286 "TARGET_SSE"
3287 "%vcvtss2si\t{%1, %0|%0, %k1}"
3288 [(set_attr "type" "sseicvt")
3289 (set_attr "athlon_decode" "double,vector")
3290 (set_attr "bdver1_decode" "double,double")
3291 (set_attr "prefix_rep" "1")
3292 (set_attr "prefix" "maybe_vex")
3293 (set_attr "mode" "SI")])
3294
3295 (define_insn "sse_cvtss2si_2"
3296 [(set (match_operand:SI 0 "register_operand" "=r,r")
3297 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3298 UNSPEC_FIX_NOTRUNC))]
3299 "TARGET_SSE"
3300 "%vcvtss2si\t{%1, %0|%0, %k1}"
3301 [(set_attr "type" "sseicvt")
3302 (set_attr "athlon_decode" "double,vector")
3303 (set_attr "amdfam10_decode" "double,double")
3304 (set_attr "bdver1_decode" "double,double")
3305 (set_attr "prefix_rep" "1")
3306 (set_attr "prefix" "maybe_vex")
3307 (set_attr "mode" "SI")])
3308
3309 (define_insn "sse_cvtss2siq"
3310 [(set (match_operand:DI 0 "register_operand" "=r,r")
3311 (unspec:DI
3312 [(vec_select:SF
3313 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
3314 (parallel [(const_int 0)]))]
3315 UNSPEC_FIX_NOTRUNC))]
3316 "TARGET_SSE && TARGET_64BIT"
3317 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3318 [(set_attr "type" "sseicvt")
3319 (set_attr "athlon_decode" "double,vector")
3320 (set_attr "bdver1_decode" "double,double")
3321 (set_attr "prefix_rep" "1")
3322 (set_attr "prefix" "maybe_vex")
3323 (set_attr "mode" "DI")])
3324
3325 (define_insn "sse_cvtss2siq_2"
3326 [(set (match_operand:DI 0 "register_operand" "=r,r")
3327 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3328 UNSPEC_FIX_NOTRUNC))]
3329 "TARGET_SSE && TARGET_64BIT"
3330 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3331 [(set_attr "type" "sseicvt")
3332 (set_attr "athlon_decode" "double,vector")
3333 (set_attr "amdfam10_decode" "double,double")
3334 (set_attr "bdver1_decode" "double,double")
3335 (set_attr "prefix_rep" "1")
3336 (set_attr "prefix" "maybe_vex")
3337 (set_attr "mode" "DI")])
3338
3339 (define_insn "sse_cvttss2si"
3340 [(set (match_operand:SI 0 "register_operand" "=r,r")
3341 (fix:SI
3342 (vec_select:SF
3343 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
3344 (parallel [(const_int 0)]))))]
3345 "TARGET_SSE"
3346 "%vcvttss2si\t{%1, %0|%0, %k1}"
3347 [(set_attr "type" "sseicvt")
3348 (set_attr "athlon_decode" "double,vector")
3349 (set_attr "amdfam10_decode" "double,double")
3350 (set_attr "bdver1_decode" "double,double")
3351 (set_attr "prefix_rep" "1")
3352 (set_attr "prefix" "maybe_vex")
3353 (set_attr "mode" "SI")])
3354
3355 (define_insn "sse_cvttss2siq"
3356 [(set (match_operand:DI 0 "register_operand" "=r,r")
3357 (fix:DI
3358 (vec_select:SF
3359 (match_operand:V4SF 1 "nonimmediate_operand" "v,vm")
3360 (parallel [(const_int 0)]))))]
3361 "TARGET_SSE && TARGET_64BIT"
3362 "%vcvttss2si{q}\t{%1, %0|%0, %k1}"
3363 [(set_attr "type" "sseicvt")
3364 (set_attr "athlon_decode" "double,vector")
3365 (set_attr "amdfam10_decode" "double,double")
3366 (set_attr "bdver1_decode" "double,double")
3367 (set_attr "prefix_rep" "1")
3368 (set_attr "prefix" "maybe_vex")
3369 (set_attr "mode" "DI")])
3370
3371 (define_insn "cvtusi2<ssescalarmodesuffix>32"
3372 [(set (match_operand:VF_128 0 "register_operand" "=v")
3373 (vec_merge:VF_128
3374 (vec_duplicate:VF_128
3375 (unsigned_float:<ssescalarmode>
3376 (match_operand:SI 2 "nonimmediate_operand" "rm")))
3377 (match_operand:VF_128 1 "register_operand" "v")
3378 (const_int 1)))]
3379 "TARGET_AVX512F"
3380 "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3381 [(set_attr "type" "sseicvt")
3382 (set_attr "prefix" "evex")
3383 (set_attr "mode" "<ssescalarmode>")])
3384
3385 (define_insn "cvtusi2<ssescalarmodesuffix>64"
3386 [(set (match_operand:VF_128 0 "register_operand" "=v")
3387 (vec_merge:VF_128
3388 (vec_duplicate:VF_128
3389 (unsigned_float:<ssescalarmode>
3390 (match_operand:DI 2 "nonimmediate_operand" "rm")))
3391 (match_operand:VF_128 1 "register_operand" "v")
3392 (const_int 1)))]
3393 "TARGET_AVX512F && TARGET_64BIT"
3394 "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3395 [(set_attr "type" "sseicvt")
3396 (set_attr "prefix" "evex")
3397 (set_attr "mode" "<ssescalarmode>")])
3398
3399 (define_insn "float<sseintvecmodelower><mode>2<mask_name>"
3400 [(set (match_operand:VF1 0 "register_operand" "=v")
3401 (float:VF1
3402 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
3403 "TARGET_SSE2 && <mask_mode512bit_condition>"
3404 "%vcvtdq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3405 [(set_attr "type" "ssecvt")
3406 (set_attr "prefix" "maybe_vex")
3407 (set_attr "mode" "<sseinsnmode>")])
3408
3409 (define_insn "ufloatv16siv16sf2<mask_name>"
3410 [(set (match_operand:V16SF 0 "register_operand" "=v")
3411 (unsigned_float:V16SF
3412 (match_operand:V16SI 1 "nonimmediate_operand" "vm")))]
3413 "TARGET_AVX512F"
3414 "vcvtudq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3415 [(set_attr "type" "ssecvt")
3416 (set_attr "prefix" "evex")
3417 (set_attr "mode" "V16SF")])
3418
3419 (define_expand "floatuns<sseintvecmodelower><mode>2"
3420 [(match_operand:VF1 0 "register_operand")
3421 (match_operand:<sseintvecmode> 1 "register_operand")]
3422 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
3423 {
3424 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
3425 DONE;
3426 })
3427
3428
3429 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
3430 (define_mode_attr sf2simodelower
3431 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
3432
3433 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
3434 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
3435 (unspec:VI4_AVX
3436 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
3437 UNSPEC_FIX_NOTRUNC))]
3438 "TARGET_SSE2"
3439 "%vcvtps2dq\t{%1, %0|%0, %1}"
3440 [(set_attr "type" "ssecvt")
3441 (set (attr "prefix_data16")
3442 (if_then_else
3443 (match_test "TARGET_AVX")
3444 (const_string "*")
3445 (const_string "1")))
3446 (set_attr "prefix" "maybe_vex")
3447 (set_attr "mode" "<sseinsnmode>")])
3448
3449 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name>"
3450 [(set (match_operand:V16SI 0 "register_operand" "=v")
3451 (unspec:V16SI
3452 [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
3453 UNSPEC_FIX_NOTRUNC))]
3454 "TARGET_AVX512F"
3455 "vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3456 [(set_attr "type" "ssecvt")
3457 (set_attr "prefix" "evex")
3458 (set_attr "mode" "XI")])
3459
3460 (define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name>"
3461 [(set (match_operand:V16SI 0 "register_operand" "=v")
3462 (unspec:V16SI
3463 [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
3464 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3465 "TARGET_AVX512F"
3466 "vcvtps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3467 [(set_attr "type" "ssecvt")
3468 (set_attr "prefix" "evex")
3469 (set_attr "mode" "XI")])
3470
3471 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name>"
3472 [(set (match_operand:V16SI 0 "register_operand" "=v")
3473 (any_fix:V16SI
3474 (match_operand:V16SF 1 "nonimmediate_operand" "vm")))]
3475 "TARGET_AVX512F"
3476 "vcvttps2<fixsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3477 [(set_attr "type" "ssecvt")
3478 (set_attr "prefix" "evex")
3479 (set_attr "mode" "XI")])
3480
3481 (define_insn "fix_truncv8sfv8si2"
3482 [(set (match_operand:V8SI 0 "register_operand" "=x")
3483 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
3484 "TARGET_AVX"
3485 "vcvttps2dq\t{%1, %0|%0, %1}"
3486 [(set_attr "type" "ssecvt")
3487 (set_attr "prefix" "vex")
3488 (set_attr "mode" "OI")])
3489
3490 (define_insn "fix_truncv4sfv4si2"
3491 [(set (match_operand:V4SI 0 "register_operand" "=x")
3492 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3493 "TARGET_SSE2"
3494 "%vcvttps2dq\t{%1, %0|%0, %1}"
3495 [(set_attr "type" "ssecvt")
3496 (set (attr "prefix_rep")
3497 (if_then_else
3498 (match_test "TARGET_AVX")
3499 (const_string "*")
3500 (const_string "1")))
3501 (set (attr "prefix_data16")
3502 (if_then_else
3503 (match_test "TARGET_AVX")
3504 (const_string "*")
3505 (const_string "0")))
3506 (set_attr "prefix_data16" "0")
3507 (set_attr "prefix" "maybe_vex")
3508 (set_attr "mode" "TI")])
3509
3510 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
3511 [(match_operand:<sseintvecmode> 0 "register_operand")
3512 (match_operand:VF1 1 "register_operand")]
3513 "TARGET_SSE2"
3514 {
3515 rtx tmp[3];
3516 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3517 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
3518 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
3519 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
3520 DONE;
3521 })
3522
3523 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3524 ;;
3525 ;; Parallel double-precision floating point conversion operations
3526 ;;
3527 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3528
3529 (define_insn "sse2_cvtpi2pd"
3530 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3531 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
3532 "TARGET_SSE2"
3533 "cvtpi2pd\t{%1, %0|%0, %1}"
3534 [(set_attr "type" "ssecvt")
3535 (set_attr "unit" "mmx,*")
3536 (set_attr "prefix_data16" "1,*")
3537 (set_attr "mode" "V2DF")])
3538
3539 (define_insn "sse2_cvtpd2pi"
3540 [(set (match_operand:V2SI 0 "register_operand" "=y")
3541 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3542 UNSPEC_FIX_NOTRUNC))]
3543 "TARGET_SSE2"
3544 "cvtpd2pi\t{%1, %0|%0, %1}"
3545 [(set_attr "type" "ssecvt")
3546 (set_attr "unit" "mmx")
3547 (set_attr "bdver1_decode" "double")
3548 (set_attr "btver2_decode" "direct")
3549 (set_attr "prefix_data16" "1")
3550 (set_attr "mode" "DI")])
3551
3552 (define_insn "sse2_cvttpd2pi"
3553 [(set (match_operand:V2SI 0 "register_operand" "=y")
3554 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
3555 "TARGET_SSE2"
3556 "cvttpd2pi\t{%1, %0|%0, %1}"
3557 [(set_attr "type" "ssecvt")
3558 (set_attr "unit" "mmx")
3559 (set_attr "bdver1_decode" "double")
3560 (set_attr "prefix_data16" "1")
3561 (set_attr "mode" "TI")])
3562
3563 (define_insn "sse2_cvtsi2sd"
3564 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
3565 (vec_merge:V2DF
3566 (vec_duplicate:V2DF
3567 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
3568 (match_operand:V2DF 1 "register_operand" "0,0,x")
3569 (const_int 1)))]
3570 "TARGET_SSE2"
3571 "@
3572 cvtsi2sd\t{%2, %0|%0, %2}
3573 cvtsi2sd\t{%2, %0|%0, %2}
3574 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
3575 [(set_attr "isa" "noavx,noavx,avx")
3576 (set_attr "type" "sseicvt")
3577 (set_attr "athlon_decode" "double,direct,*")
3578 (set_attr "amdfam10_decode" "vector,double,*")
3579 (set_attr "bdver1_decode" "double,direct,*")
3580 (set_attr "btver2_decode" "double,double,double")
3581 (set_attr "prefix" "orig,orig,vex")
3582 (set_attr "mode" "DF")])
3583
3584 (define_insn "sse2_cvtsi2sdq"
3585 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
3586 (vec_merge:V2DF
3587 (vec_duplicate:V2DF
3588 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
3589 (match_operand:V2DF 1 "register_operand" "0,0,v")
3590 (const_int 1)))]
3591 "TARGET_SSE2 && TARGET_64BIT"
3592 "@
3593 cvtsi2sdq\t{%2, %0|%0, %2}
3594 cvtsi2sdq\t{%2, %0|%0, %2}
3595 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
3596 [(set_attr "isa" "noavx,noavx,avx")
3597 (set_attr "type" "sseicvt")
3598 (set_attr "athlon_decode" "double,direct,*")
3599 (set_attr "amdfam10_decode" "vector,double,*")
3600 (set_attr "bdver1_decode" "double,direct,*")
3601 (set_attr "length_vex" "*,*,4")
3602 (set_attr "prefix_rex" "1,1,*")
3603 (set_attr "prefix" "orig,orig,maybe_evex")
3604 (set_attr "mode" "DF")])
3605
3606 (define_insn "avx512f_vcvtss2usi"
3607 [(set (match_operand:SI 0 "register_operand" "=r")
3608 (unspec:SI
3609 [(vec_select:SF
3610 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3611 (parallel [(const_int 0)]))]
3612 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3613 "TARGET_AVX512F"
3614 "vcvtss2usi\t{%1, %0|%0, %1}"
3615 [(set_attr "type" "sseicvt")
3616 (set_attr "prefix" "evex")
3617 (set_attr "mode" "SI")])
3618
3619 (define_insn "avx512f_vcvtss2usiq"
3620 [(set (match_operand:DI 0 "register_operand" "=r")
3621 (unspec:DI
3622 [(vec_select:SF
3623 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3624 (parallel [(const_int 0)]))]
3625 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3626 "TARGET_AVX512F && TARGET_64BIT"
3627 "vcvtss2usi\t{%1, %0|%0, %1}"
3628 [(set_attr "type" "sseicvt")
3629 (set_attr "prefix" "evex")
3630 (set_attr "mode" "DI")])
3631
3632 (define_insn "avx512f_vcvttss2usi"
3633 [(set (match_operand:SI 0 "register_operand" "=r")
3634 (unsigned_fix:SI
3635 (vec_select:SF
3636 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3637 (parallel [(const_int 0)]))))]
3638 "TARGET_AVX512F"
3639 "vcvttss2usi\t{%1, %0|%0, %1}"
3640 [(set_attr "type" "sseicvt")
3641 (set_attr "prefix" "evex")
3642 (set_attr "mode" "SI")])
3643
3644 (define_insn "avx512f_vcvttss2usiq"
3645 [(set (match_operand:DI 0 "register_operand" "=r")
3646 (unsigned_fix:DI
3647 (vec_select:SF
3648 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3649 (parallel [(const_int 0)]))))]
3650 "TARGET_AVX512F && TARGET_64BIT"
3651 "vcvttss2usi\t{%1, %0|%0, %1}"
3652 [(set_attr "type" "sseicvt")
3653 (set_attr "prefix" "evex")
3654 (set_attr "mode" "DI")])
3655
3656 (define_insn "avx512f_vcvtsd2usi"
3657 [(set (match_operand:SI 0 "register_operand" "=r")
3658 (unspec:SI
3659 [(vec_select:DF
3660 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3661 (parallel [(const_int 0)]))]
3662 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3663 "TARGET_AVX512F"
3664 "vcvtsd2usi\t{%1, %0|%0, %1}"
3665 [(set_attr "type" "sseicvt")
3666 (set_attr "prefix" "evex")
3667 (set_attr "mode" "SI")])
3668
3669 (define_insn "avx512f_vcvtsd2usiq"
3670 [(set (match_operand:DI 0 "register_operand" "=r")
3671 (unspec:DI
3672 [(vec_select:DF
3673 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3674 (parallel [(const_int 0)]))]
3675 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3676 "TARGET_AVX512F && TARGET_64BIT"
3677 "vcvtsd2usi\t{%1, %0|%0, %1}"
3678 [(set_attr "type" "sseicvt")
3679 (set_attr "prefix" "evex")
3680 (set_attr "mode" "DI")])
3681
3682 (define_insn "avx512f_vcvttsd2usi"
3683 [(set (match_operand:SI 0 "register_operand" "=r")
3684 (unsigned_fix:SI
3685 (vec_select:DF
3686 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3687 (parallel [(const_int 0)]))))]
3688 "TARGET_AVX512F"
3689 "vcvttsd2usi\t{%1, %0|%0, %1}"
3690 [(set_attr "type" "sseicvt")
3691 (set_attr "prefix" "evex")
3692 (set_attr "mode" "SI")])
3693
3694 (define_insn "avx512f_vcvttsd2usiq"
3695 [(set (match_operand:DI 0 "register_operand" "=r")
3696 (unsigned_fix:DI
3697 (vec_select:DF
3698 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3699 (parallel [(const_int 0)]))))]
3700 "TARGET_AVX512F && TARGET_64BIT"
3701 "vcvttsd2usi\t{%1, %0|%0, %1}"
3702 [(set_attr "type" "sseicvt")
3703 (set_attr "prefix" "evex")
3704 (set_attr "mode" "DI")])
3705
3706 (define_insn "sse2_cvtsd2si"
3707 [(set (match_operand:SI 0 "register_operand" "=r,r")
3708 (unspec:SI
3709 [(vec_select:DF
3710 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3711 (parallel [(const_int 0)]))]
3712 UNSPEC_FIX_NOTRUNC))]
3713 "TARGET_SSE2"
3714 "%vcvtsd2si\t{%1, %0|%0, %q1}"
3715 [(set_attr "type" "sseicvt")
3716 (set_attr "athlon_decode" "double,vector")
3717 (set_attr "bdver1_decode" "double,double")
3718 (set_attr "btver2_decode" "double,double")
3719 (set_attr "prefix_rep" "1")
3720 (set_attr "prefix" "maybe_vex")
3721 (set_attr "mode" "SI")])
3722
3723 (define_insn "sse2_cvtsd2si_2"
3724 [(set (match_operand:SI 0 "register_operand" "=r,r")
3725 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3726 UNSPEC_FIX_NOTRUNC))]
3727 "TARGET_SSE2"
3728 "%vcvtsd2si\t{%1, %0|%0, %q1}"
3729 [(set_attr "type" "sseicvt")
3730 (set_attr "athlon_decode" "double,vector")
3731 (set_attr "amdfam10_decode" "double,double")
3732 (set_attr "bdver1_decode" "double,double")
3733 (set_attr "prefix_rep" "1")
3734 (set_attr "prefix" "maybe_vex")
3735 (set_attr "mode" "SI")])
3736
3737 (define_insn "sse2_cvtsd2siq"
3738 [(set (match_operand:DI 0 "register_operand" "=r,r")
3739 (unspec:DI
3740 [(vec_select:DF
3741 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3742 (parallel [(const_int 0)]))]
3743 UNSPEC_FIX_NOTRUNC))]
3744 "TARGET_SSE2 && TARGET_64BIT"
3745 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
3746 [(set_attr "type" "sseicvt")
3747 (set_attr "athlon_decode" "double,vector")
3748 (set_attr "bdver1_decode" "double,double")
3749 (set_attr "prefix_rep" "1")
3750 (set_attr "prefix" "maybe_vex")
3751 (set_attr "mode" "DI")])
3752
3753 (define_insn "sse2_cvtsd2siq_2"
3754 [(set (match_operand:DI 0 "register_operand" "=r,r")
3755 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3756 UNSPEC_FIX_NOTRUNC))]
3757 "TARGET_SSE2 && TARGET_64BIT"
3758 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
3759 [(set_attr "type" "sseicvt")
3760 (set_attr "athlon_decode" "double,vector")
3761 (set_attr "amdfam10_decode" "double,double")
3762 (set_attr "bdver1_decode" "double,double")
3763 (set_attr "prefix_rep" "1")
3764 (set_attr "prefix" "maybe_vex")
3765 (set_attr "mode" "DI")])
3766
3767 (define_insn "sse2_cvttsd2si"
3768 [(set (match_operand:SI 0 "register_operand" "=r,r")
3769 (fix:SI
3770 (vec_select:DF
3771 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3772 (parallel [(const_int 0)]))))]
3773 "TARGET_SSE2"
3774 "%vcvttsd2si\t{%1, %0|%0, %q1}"
3775 [(set_attr "type" "sseicvt")
3776 (set_attr "athlon_decode" "double,vector")
3777 (set_attr "amdfam10_decode" "double,double")
3778 (set_attr "bdver1_decode" "double,double")
3779 (set_attr "btver2_decode" "double,double")
3780 (set_attr "prefix_rep" "1")
3781 (set_attr "prefix" "maybe_vex")
3782 (set_attr "mode" "SI")])
3783
3784 (define_insn "sse2_cvttsd2siq"
3785 [(set (match_operand:DI 0 "register_operand" "=r,r")
3786 (fix:DI
3787 (vec_select:DF
3788 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3789 (parallel [(const_int 0)]))))]
3790 "TARGET_SSE2 && TARGET_64BIT"
3791 "%vcvttsd2si{q}\t{%1, %0|%0, %q1}"
3792 [(set_attr "type" "sseicvt")
3793 (set_attr "athlon_decode" "double,vector")
3794 (set_attr "amdfam10_decode" "double,double")
3795 (set_attr "bdver1_decode" "double,double")
3796 (set_attr "prefix_rep" "1")
3797 (set_attr "prefix" "maybe_vex")
3798 (set_attr "mode" "DI")])
3799
3800 ;; For float<si2dfmode><mode>2 insn pattern
3801 (define_mode_attr si2dfmode
3802 [(V8DF "V8SI") (V4DF "V4SI")])
3803 (define_mode_attr si2dfmodelower
3804 [(V8DF "v8si") (V4DF "v4si")])
3805
3806 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
3807 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
3808 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
3809 "TARGET_AVX && <mask_mode512bit_condition>"
3810 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3811 [(set_attr "type" "ssecvt")
3812 (set_attr "prefix" "maybe_vex")
3813 (set_attr "mode" "<MODE>")])
3814
3815 (define_insn "ufloatv8siv8df<mask_name>"
3816 [(set (match_operand:V8DF 0 "register_operand" "=v")
3817 (unsigned_float:V8DF
3818 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
3819 "TARGET_AVX512F"
3820 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3821 [(set_attr "type" "ssecvt")
3822 (set_attr "prefix" "evex")
3823 (set_attr "mode" "V8DF")])
3824
3825 (define_insn "avx512f_cvtdq2pd512_2"
3826 [(set (match_operand:V8DF 0 "register_operand" "=v")
3827 (float:V8DF
3828 (vec_select:V8SI
3829 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
3830 (parallel [(const_int 0) (const_int 1)
3831 (const_int 2) (const_int 3)
3832 (const_int 4) (const_int 5)
3833 (const_int 6) (const_int 7)]))))]
3834 "TARGET_AVX"
3835 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
3836 [(set_attr "type" "ssecvt")
3837 (set_attr "prefix" "evex")
3838 (set_attr "mode" "V8DF")])
3839
3840 (define_insn "avx_cvtdq2pd256_2"
3841 [(set (match_operand:V4DF 0 "register_operand" "=x")
3842 (float:V4DF
3843 (vec_select:V4SI
3844 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
3845 (parallel [(const_int 0) (const_int 1)
3846 (const_int 2) (const_int 3)]))))]
3847 "TARGET_AVX"
3848 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
3849 [(set_attr "type" "ssecvt")
3850 (set_attr "prefix" "vex")
3851 (set_attr "mode" "V4DF")])
3852
3853 (define_insn "sse2_cvtdq2pd"
3854 [(set (match_operand:V2DF 0 "register_operand" "=x")
3855 (float:V2DF
3856 (vec_select:V2SI
3857 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3858 (parallel [(const_int 0) (const_int 1)]))))]
3859 "TARGET_SSE2"
3860 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
3861 [(set_attr "type" "ssecvt")
3862 (set_attr "prefix" "maybe_vex")
3863 (set_attr "ssememalign" "64")
3864 (set_attr "mode" "V2DF")])
3865
3866 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name>"
3867 [(set (match_operand:V8SI 0 "register_operand" "=v")
3868 (unspec:V8SI
3869 [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
3870 UNSPEC_FIX_NOTRUNC))]
3871 "TARGET_AVX512F"
3872 "vcvtpd2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3873 [(set_attr "type" "ssecvt")
3874 (set_attr "prefix" "evex")
3875 (set_attr "mode" "OI")])
3876
3877 (define_insn "avx_cvtpd2dq256"
3878 [(set (match_operand:V4SI 0 "register_operand" "=x")
3879 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3880 UNSPEC_FIX_NOTRUNC))]
3881 "TARGET_AVX"
3882 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
3883 [(set_attr "type" "ssecvt")
3884 (set_attr "prefix" "vex")
3885 (set_attr "mode" "OI")])
3886
3887 (define_expand "avx_cvtpd2dq256_2"
3888 [(set (match_operand:V8SI 0 "register_operand")
3889 (vec_concat:V8SI
3890 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
3891 UNSPEC_FIX_NOTRUNC)
3892 (match_dup 2)))]
3893 "TARGET_AVX"
3894 "operands[2] = CONST0_RTX (V4SImode);")
3895
3896 (define_insn "*avx_cvtpd2dq256_2"
3897 [(set (match_operand:V8SI 0 "register_operand" "=x")
3898 (vec_concat:V8SI
3899 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3900 UNSPEC_FIX_NOTRUNC)
3901 (match_operand:V4SI 2 "const0_operand")))]
3902 "TARGET_AVX"
3903 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
3904 [(set_attr "type" "ssecvt")
3905 (set_attr "prefix" "vex")
3906 (set_attr "btver2_decode" "vector")
3907 (set_attr "mode" "OI")])
3908
3909 (define_expand "sse2_cvtpd2dq"
3910 [(set (match_operand:V4SI 0 "register_operand")
3911 (vec_concat:V4SI
3912 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
3913 UNSPEC_FIX_NOTRUNC)
3914 (match_dup 2)))]
3915 "TARGET_SSE2"
3916 "operands[2] = CONST0_RTX (V2SImode);")
3917
3918 (define_insn "*sse2_cvtpd2dq"
3919 [(set (match_operand:V4SI 0 "register_operand" "=x")
3920 (vec_concat:V4SI
3921 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3922 UNSPEC_FIX_NOTRUNC)
3923 (match_operand:V2SI 2 "const0_operand")))]
3924 "TARGET_SSE2"
3925 {
3926 if (TARGET_AVX)
3927 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
3928 else
3929 return "cvtpd2dq\t{%1, %0|%0, %1}";
3930 }
3931 [(set_attr "type" "ssecvt")
3932 (set_attr "prefix_rep" "1")
3933 (set_attr "prefix_data16" "0")
3934 (set_attr "prefix" "maybe_vex")
3935 (set_attr "mode" "TI")
3936 (set_attr "amdfam10_decode" "double")
3937 (set_attr "athlon_decode" "vector")
3938 (set_attr "bdver1_decode" "double")])
3939
3940 (define_insn "avx512f_ufix_notruncv8dfv8si<mask_name>"
3941 [(set (match_operand:V8SI 0 "register_operand" "=v")
3942 (unspec:V8SI
3943 [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
3944 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3945 "TARGET_AVX512F"
3946 "vcvtpd2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3947 [(set_attr "type" "ssecvt")
3948 (set_attr "prefix" "evex")
3949 (set_attr "mode" "OI")])
3950
3951 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name>"
3952 [(set (match_operand:V8SI 0 "register_operand" "=v")
3953 (any_fix:V8SI
3954 (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
3955 "TARGET_AVX512F"
3956 "vcvttpd2<fixsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3957 [(set_attr "type" "ssecvt")
3958 (set_attr "prefix" "evex")
3959 (set_attr "mode" "OI")])
3960
3961 (define_insn "fix_truncv4dfv4si2"
3962 [(set (match_operand:V4SI 0 "register_operand" "=x")
3963 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3964 "TARGET_AVX"
3965 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
3966 [(set_attr "type" "ssecvt")
3967 (set_attr "prefix" "vex")
3968 (set_attr "mode" "OI")])
3969
3970 (define_expand "avx_cvttpd2dq256_2"
3971 [(set (match_operand:V8SI 0 "register_operand")
3972 (vec_concat:V8SI
3973 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
3974 (match_dup 2)))]
3975 "TARGET_AVX"
3976 "operands[2] = CONST0_RTX (V4SImode);")
3977
3978 (define_insn "*avx_cvttpd2dq256_2"
3979 [(set (match_operand:V8SI 0 "register_operand" "=x")
3980 (vec_concat:V8SI
3981 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
3982 (match_operand:V4SI 2 "const0_operand")))]
3983 "TARGET_AVX"
3984 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
3985 [(set_attr "type" "ssecvt")
3986 (set_attr "prefix" "vex")
3987 (set_attr "btver2_decode" "vector")
3988 (set_attr "mode" "OI")])
3989
3990 (define_expand "sse2_cvttpd2dq"
3991 [(set (match_operand:V4SI 0 "register_operand")
3992 (vec_concat:V4SI
3993 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
3994 (match_dup 2)))]
3995 "TARGET_SSE2"
3996 "operands[2] = CONST0_RTX (V2SImode);")
3997
3998 (define_insn "*sse2_cvttpd2dq"
3999 [(set (match_operand:V4SI 0 "register_operand" "=x")
4000 (vec_concat:V4SI
4001 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4002 (match_operand:V2SI 2 "const0_operand")))]
4003 "TARGET_SSE2"
4004 {
4005 if (TARGET_AVX)
4006 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
4007 else
4008 return "cvttpd2dq\t{%1, %0|%0, %1}";
4009 }
4010 [(set_attr "type" "ssecvt")
4011 (set_attr "amdfam10_decode" "double")
4012 (set_attr "athlon_decode" "vector")
4013 (set_attr "bdver1_decode" "double")
4014 (set_attr "prefix" "maybe_vex")
4015 (set_attr "mode" "TI")])
4016
4017 (define_insn "sse2_cvtsd2ss"
4018 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4019 (vec_merge:V4SF
4020 (vec_duplicate:V4SF
4021 (float_truncate:V2SF
4022 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,vm")))
4023 (match_operand:V4SF 1 "register_operand" "0,0,v")
4024 (const_int 1)))]
4025 "TARGET_SSE2"
4026 "@
4027 cvtsd2ss\t{%2, %0|%0, %2}
4028 cvtsd2ss\t{%2, %0|%0, %q2}
4029 vcvtsd2ss\t{%2, %1, %0|%0, %1, %q2}"
4030 [(set_attr "isa" "noavx,noavx,avx")
4031 (set_attr "type" "ssecvt")
4032 (set_attr "athlon_decode" "vector,double,*")
4033 (set_attr "amdfam10_decode" "vector,double,*")
4034 (set_attr "bdver1_decode" "direct,direct,*")
4035 (set_attr "btver2_decode" "double,double,double")
4036 (set_attr "prefix" "orig,orig,vex")
4037 (set_attr "mode" "SF")])
4038
4039 (define_insn "sse2_cvtss2sd"
4040 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4041 (vec_merge:V2DF
4042 (float_extend:V2DF
4043 (vec_select:V2SF
4044 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,vm")
4045 (parallel [(const_int 0) (const_int 1)])))
4046 (match_operand:V2DF 1 "register_operand" "0,0,v")
4047 (const_int 1)))]
4048 "TARGET_SSE2"
4049 "@
4050 cvtss2sd\t{%2, %0|%0, %2}
4051 cvtss2sd\t{%2, %0|%0, %k2}
4052 vcvtss2sd\t{%2, %1, %0|%0, %1, %k2}"
4053 [(set_attr "isa" "noavx,noavx,avx")
4054 (set_attr "type" "ssecvt")
4055 (set_attr "amdfam10_decode" "vector,double,*")
4056 (set_attr "athlon_decode" "direct,direct,*")
4057 (set_attr "bdver1_decode" "direct,direct,*")
4058 (set_attr "btver2_decode" "double,double,double")
4059 (set_attr "prefix" "orig,orig,vex")
4060 (set_attr "mode" "DF")])
4061
4062 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name>"
4063 [(set (match_operand:V8SF 0 "register_operand" "=v")
4064 (float_truncate:V8SF
4065 (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
4066 "TARGET_AVX512F"
4067 "vcvtpd2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4068 [(set_attr "type" "ssecvt")
4069 (set_attr "prefix" "evex")
4070 (set_attr "mode" "V8SF")])
4071
4072 (define_insn "avx_cvtpd2ps256"
4073 [(set (match_operand:V4SF 0 "register_operand" "=x")
4074 (float_truncate:V4SF
4075 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4076 "TARGET_AVX"
4077 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
4078 [(set_attr "type" "ssecvt")
4079 (set_attr "prefix" "vex")
4080 (set_attr "btver2_decode" "vector")
4081 (set_attr "mode" "V4SF")])
4082
4083 (define_expand "sse2_cvtpd2ps"
4084 [(set (match_operand:V4SF 0 "register_operand")
4085 (vec_concat:V4SF
4086 (float_truncate:V2SF
4087 (match_operand:V2DF 1 "nonimmediate_operand"))
4088 (match_dup 2)))]
4089 "TARGET_SSE2"
4090 "operands[2] = CONST0_RTX (V2SFmode);")
4091
4092 (define_insn "*sse2_cvtpd2ps"
4093 [(set (match_operand:V4SF 0 "register_operand" "=x")
4094 (vec_concat:V4SF
4095 (float_truncate:V2SF
4096 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4097 (match_operand:V2SF 2 "const0_operand")))]
4098 "TARGET_SSE2"
4099 {
4100 if (TARGET_AVX)
4101 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
4102 else
4103 return "cvtpd2ps\t{%1, %0|%0, %1}";
4104 }
4105 [(set_attr "type" "ssecvt")
4106 (set_attr "amdfam10_decode" "double")
4107 (set_attr "athlon_decode" "vector")
4108 (set_attr "bdver1_decode" "double")
4109 (set_attr "prefix_data16" "1")
4110 (set_attr "prefix" "maybe_vex")
4111 (set_attr "mode" "V4SF")])
4112
4113 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4114 (define_mode_attr sf2dfmode
4115 [(V8DF "V8SF") (V4DF "V4SF")])
4116
4117 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name>"
4118 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4119 (float_extend:VF2_512_256
4120 (match_operand:<sf2dfmode> 1 "nonimmediate_operand" "vm")))]
4121 "TARGET_AVX && <mask_mode512bit_condition>"
4122 "vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4123 [(set_attr "type" "ssecvt")
4124 (set_attr "prefix" "maybe_vex")
4125 (set_attr "mode" "<MODE>")])
4126
4127 (define_insn "*avx_cvtps2pd256_2"
4128 [(set (match_operand:V4DF 0 "register_operand" "=x")
4129 (float_extend:V4DF
4130 (vec_select:V4SF
4131 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4132 (parallel [(const_int 0) (const_int 1)
4133 (const_int 2) (const_int 3)]))))]
4134 "TARGET_AVX"
4135 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4136 [(set_attr "type" "ssecvt")
4137 (set_attr "prefix" "vex")
4138 (set_attr "mode" "V4DF")])
4139
4140 (define_insn "vec_unpacks_lo_v16sf"
4141 [(set (match_operand:V8DF 0 "register_operand" "=v")
4142 (float_extend:V8DF
4143 (vec_select:V8SF
4144 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4145 (parallel [(const_int 0) (const_int 1)
4146 (const_int 2) (const_int 3)
4147 (const_int 4) (const_int 5)
4148 (const_int 6) (const_int 7)]))))]
4149 "TARGET_AVX512F"
4150 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4151 [(set_attr "type" "ssecvt")
4152 (set_attr "prefix" "evex")
4153 (set_attr "mode" "V8DF")])
4154
4155 (define_insn "sse2_cvtps2pd"
4156 [(set (match_operand:V2DF 0 "register_operand" "=x")
4157 (float_extend:V2DF
4158 (vec_select:V2SF
4159 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4160 (parallel [(const_int 0) (const_int 1)]))))]
4161 "TARGET_SSE2"
4162 "%vcvtps2pd\t{%1, %0|%0, %q1}"
4163 [(set_attr "type" "ssecvt")
4164 (set_attr "amdfam10_decode" "direct")
4165 (set_attr "athlon_decode" "double")
4166 (set_attr "bdver1_decode" "double")
4167 (set_attr "prefix_data16" "0")
4168 (set_attr "prefix" "maybe_vex")
4169 (set_attr "mode" "V2DF")])
4170
4171 (define_expand "vec_unpacks_hi_v4sf"
4172 [(set (match_dup 2)
4173 (vec_select:V4SF
4174 (vec_concat:V8SF
4175 (match_dup 2)
4176 (match_operand:V4SF 1 "nonimmediate_operand"))
4177 (parallel [(const_int 6) (const_int 7)
4178 (const_int 2) (const_int 3)])))
4179 (set (match_operand:V2DF 0 "register_operand")
4180 (float_extend:V2DF
4181 (vec_select:V2SF
4182 (match_dup 2)
4183 (parallel [(const_int 0) (const_int 1)]))))]
4184 "TARGET_SSE2"
4185 "operands[2] = gen_reg_rtx (V4SFmode);")
4186
4187 (define_expand "vec_unpacks_hi_v8sf"
4188 [(set (match_dup 2)
4189 (vec_select:V4SF
4190 (match_operand:V8SF 1 "nonimmediate_operand")
4191 (parallel [(const_int 4) (const_int 5)
4192 (const_int 6) (const_int 7)])))
4193 (set (match_operand:V4DF 0 "register_operand")
4194 (float_extend:V4DF
4195 (match_dup 2)))]
4196 "TARGET_AVX"
4197 "operands[2] = gen_reg_rtx (V4SFmode);")
4198
4199 (define_expand "vec_unpacks_hi_v16sf"
4200 [(set (match_dup 2)
4201 (vec_select:V8SF
4202 (match_operand:V16SF 1 "nonimmediate_operand")
4203 (parallel [(const_int 8) (const_int 9)
4204 (const_int 10) (const_int 11)
4205 (const_int 12) (const_int 13)
4206 (const_int 14) (const_int 15)])))
4207 (set (match_operand:V8DF 0 "register_operand")
4208 (float_extend:V8DF
4209 (match_dup 2)))]
4210 "TARGET_AVX512F"
4211 "operands[2] = gen_reg_rtx (V8SFmode);")
4212
4213 (define_expand "vec_unpacks_lo_v4sf"
4214 [(set (match_operand:V2DF 0 "register_operand")
4215 (float_extend:V2DF
4216 (vec_select:V2SF
4217 (match_operand:V4SF 1 "nonimmediate_operand")
4218 (parallel [(const_int 0) (const_int 1)]))))]
4219 "TARGET_SSE2")
4220
4221 (define_expand "vec_unpacks_lo_v8sf"
4222 [(set (match_operand:V4DF 0 "register_operand")
4223 (float_extend:V4DF
4224 (vec_select:V4SF
4225 (match_operand:V8SF 1 "nonimmediate_operand")
4226 (parallel [(const_int 0) (const_int 1)
4227 (const_int 2) (const_int 3)]))))]
4228 "TARGET_AVX")
4229
4230 (define_mode_attr sseunpackfltmode
4231 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
4232 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
4233
4234 (define_expand "vec_unpacks_float_hi_<mode>"
4235 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4236 (match_operand:VI2_AVX512F 1 "register_operand")]
4237 "TARGET_SSE2"
4238 {
4239 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4240
4241 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
4242 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4243 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4244 DONE;
4245 })
4246
4247 (define_expand "vec_unpacks_float_lo_<mode>"
4248 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4249 (match_operand:VI2_AVX512F 1 "register_operand")]
4250 "TARGET_SSE2"
4251 {
4252 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4253
4254 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
4255 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4256 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4257 DONE;
4258 })
4259
4260 (define_expand "vec_unpacku_float_hi_<mode>"
4261 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4262 (match_operand:VI2_AVX512F 1 "register_operand")]
4263 "TARGET_SSE2"
4264 {
4265 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4266
4267 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
4268 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4269 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4270 DONE;
4271 })
4272
4273 (define_expand "vec_unpacku_float_lo_<mode>"
4274 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4275 (match_operand:VI2_AVX512F 1 "register_operand")]
4276 "TARGET_SSE2"
4277 {
4278 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4279
4280 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
4281 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4282 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4283 DONE;
4284 })
4285
4286 (define_expand "vec_unpacks_float_hi_v4si"
4287 [(set (match_dup 2)
4288 (vec_select:V4SI
4289 (match_operand:V4SI 1 "nonimmediate_operand")
4290 (parallel [(const_int 2) (const_int 3)
4291 (const_int 2) (const_int 3)])))
4292 (set (match_operand:V2DF 0 "register_operand")
4293 (float:V2DF
4294 (vec_select:V2SI
4295 (match_dup 2)
4296 (parallel [(const_int 0) (const_int 1)]))))]
4297 "TARGET_SSE2"
4298 "operands[2] = gen_reg_rtx (V4SImode);")
4299
4300 (define_expand "vec_unpacks_float_lo_v4si"
4301 [(set (match_operand:V2DF 0 "register_operand")
4302 (float:V2DF
4303 (vec_select:V2SI
4304 (match_operand:V4SI 1 "nonimmediate_operand")
4305 (parallel [(const_int 0) (const_int 1)]))))]
4306 "TARGET_SSE2")
4307
4308 (define_expand "vec_unpacks_float_hi_v8si"
4309 [(set (match_dup 2)
4310 (vec_select:V4SI
4311 (match_operand:V8SI 1 "nonimmediate_operand")
4312 (parallel [(const_int 4) (const_int 5)
4313 (const_int 6) (const_int 7)])))
4314 (set (match_operand:V4DF 0 "register_operand")
4315 (float:V4DF
4316 (match_dup 2)))]
4317 "TARGET_AVX"
4318 "operands[2] = gen_reg_rtx (V4SImode);")
4319
4320 (define_expand "vec_unpacks_float_lo_v8si"
4321 [(set (match_operand:V4DF 0 "register_operand")
4322 (float:V4DF
4323 (vec_select:V4SI
4324 (match_operand:V8SI 1 "nonimmediate_operand")
4325 (parallel [(const_int 0) (const_int 1)
4326 (const_int 2) (const_int 3)]))))]
4327 "TARGET_AVX")
4328
4329 (define_expand "vec_unpacks_float_hi_v16si"
4330 [(set (match_dup 2)
4331 (vec_select:V8SI
4332 (match_operand:V16SI 1 "nonimmediate_operand")
4333 (parallel [(const_int 8) (const_int 9)
4334 (const_int 10) (const_int 11)
4335 (const_int 12) (const_int 13)
4336 (const_int 14) (const_int 15)])))
4337 (set (match_operand:V8DF 0 "register_operand")
4338 (float:V8DF
4339 (match_dup 2)))]
4340 "TARGET_AVX512F"
4341 "operands[2] = gen_reg_rtx (V8SImode);")
4342
4343 (define_expand "vec_unpacks_float_lo_v16si"
4344 [(set (match_operand:V8DF 0 "register_operand")
4345 (float:V8DF
4346 (vec_select:V8SI
4347 (match_operand:V16SI 1 "nonimmediate_operand")
4348 (parallel [(const_int 0) (const_int 1)
4349 (const_int 2) (const_int 3)
4350 (const_int 4) (const_int 5)
4351 (const_int 6) (const_int 7)]))))]
4352 "TARGET_AVX512F")
4353
4354 (define_expand "vec_unpacku_float_hi_v4si"
4355 [(set (match_dup 5)
4356 (vec_select:V4SI
4357 (match_operand:V4SI 1 "nonimmediate_operand")
4358 (parallel [(const_int 2) (const_int 3)
4359 (const_int 2) (const_int 3)])))
4360 (set (match_dup 6)
4361 (float:V2DF
4362 (vec_select:V2SI
4363 (match_dup 5)
4364 (parallel [(const_int 0) (const_int 1)]))))
4365 (set (match_dup 7)
4366 (lt:V2DF (match_dup 6) (match_dup 3)))
4367 (set (match_dup 8)
4368 (and:V2DF (match_dup 7) (match_dup 4)))
4369 (set (match_operand:V2DF 0 "register_operand")
4370 (plus:V2DF (match_dup 6) (match_dup 8)))]
4371 "TARGET_SSE2"
4372 {
4373 REAL_VALUE_TYPE TWO32r;
4374 rtx x;
4375 int i;
4376
4377 real_ldexp (&TWO32r, &dconst1, 32);
4378 x = const_double_from_real_value (TWO32r, DFmode);
4379
4380 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4381 operands[4] = force_reg (V2DFmode,
4382 ix86_build_const_vector (V2DFmode, 1, x));
4383
4384 operands[5] = gen_reg_rtx (V4SImode);
4385
4386 for (i = 6; i < 9; i++)
4387 operands[i] = gen_reg_rtx (V2DFmode);
4388 })
4389
4390 (define_expand "vec_unpacku_float_lo_v4si"
4391 [(set (match_dup 5)
4392 (float:V2DF
4393 (vec_select:V2SI
4394 (match_operand:V4SI 1 "nonimmediate_operand")
4395 (parallel [(const_int 0) (const_int 1)]))))
4396 (set (match_dup 6)
4397 (lt:V2DF (match_dup 5) (match_dup 3)))
4398 (set (match_dup 7)
4399 (and:V2DF (match_dup 6) (match_dup 4)))
4400 (set (match_operand:V2DF 0 "register_operand")
4401 (plus:V2DF (match_dup 5) (match_dup 7)))]
4402 "TARGET_SSE2"
4403 {
4404 REAL_VALUE_TYPE TWO32r;
4405 rtx x;
4406 int i;
4407
4408 real_ldexp (&TWO32r, &dconst1, 32);
4409 x = const_double_from_real_value (TWO32r, DFmode);
4410
4411 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4412 operands[4] = force_reg (V2DFmode,
4413 ix86_build_const_vector (V2DFmode, 1, x));
4414
4415 for (i = 5; i < 8; i++)
4416 operands[i] = gen_reg_rtx (V2DFmode);
4417 })
4418
4419 (define_expand "vec_unpacku_float_hi_v8si"
4420 [(match_operand:V4DF 0 "register_operand")
4421 (match_operand:V8SI 1 "register_operand")]
4422 "TARGET_AVX"
4423 {
4424 REAL_VALUE_TYPE TWO32r;
4425 rtx x, tmp[6];
4426 int i;
4427
4428 real_ldexp (&TWO32r, &dconst1, 32);
4429 x = const_double_from_real_value (TWO32r, DFmode);
4430
4431 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4432 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4433 tmp[5] = gen_reg_rtx (V4SImode);
4434
4435 for (i = 2; i < 5; i++)
4436 tmp[i] = gen_reg_rtx (V4DFmode);
4437 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
4438 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
4439 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4440 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4441 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4442 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4443 DONE;
4444 })
4445
4446 (define_expand "vec_unpacku_float_lo_v8si"
4447 [(match_operand:V4DF 0 "register_operand")
4448 (match_operand:V8SI 1 "nonimmediate_operand")]
4449 "TARGET_AVX"
4450 {
4451 REAL_VALUE_TYPE TWO32r;
4452 rtx x, tmp[5];
4453 int i;
4454
4455 real_ldexp (&TWO32r, &dconst1, 32);
4456 x = const_double_from_real_value (TWO32r, DFmode);
4457
4458 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4459 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4460
4461 for (i = 2; i < 5; i++)
4462 tmp[i] = gen_reg_rtx (V4DFmode);
4463 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
4464 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4465 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4466 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4467 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4468 DONE;
4469 })
4470
4471 (define_expand "vec_unpacku_float_lo_v16si"
4472 [(match_operand:V8DF 0 "register_operand")
4473 (match_operand:V16SI 1 "nonimmediate_operand")]
4474 "TARGET_AVX512F"
4475 {
4476 REAL_VALUE_TYPE TWO32r;
4477 rtx k, x, tmp[3];
4478
4479 real_ldexp (&TWO32r, &dconst1, 32);
4480 x = const_double_from_real_value (TWO32r, DFmode);
4481
4482 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
4483 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
4484 tmp[2] = gen_reg_rtx (V8DFmode);
4485 k = gen_reg_rtx (QImode);
4486
4487 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
4488 emit_insn (gen_rtx_SET (VOIDmode, k,
4489 gen_rtx_LT (QImode, tmp[2], tmp[0])));
4490 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
4491 emit_move_insn (operands[0], tmp[2]);
4492 DONE;
4493 })
4494
4495 (define_expand "vec_pack_trunc_<mode>"
4496 [(set (match_dup 3)
4497 (float_truncate:<sf2dfmode>
4498 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
4499 (set (match_dup 4)
4500 (float_truncate:<sf2dfmode>
4501 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
4502 (set (match_operand:<ssePSmode> 0 "register_operand")
4503 (vec_concat:<ssePSmode>
4504 (match_dup 3)
4505 (match_dup 4)))]
4506 "TARGET_AVX"
4507 {
4508 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
4509 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
4510 })
4511
4512 (define_expand "vec_pack_trunc_v2df"
4513 [(match_operand:V4SF 0 "register_operand")
4514 (match_operand:V2DF 1 "nonimmediate_operand")
4515 (match_operand:V2DF 2 "nonimmediate_operand")]
4516 "TARGET_SSE2"
4517 {
4518 rtx tmp0, tmp1;
4519
4520 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4521 {
4522 tmp0 = gen_reg_rtx (V4DFmode);
4523 tmp1 = force_reg (V2DFmode, operands[1]);
4524
4525 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4526 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
4527 }
4528 else
4529 {
4530 tmp0 = gen_reg_rtx (V4SFmode);
4531 tmp1 = gen_reg_rtx (V4SFmode);
4532
4533 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
4534 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
4535 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
4536 }
4537 DONE;
4538 })
4539
4540 (define_expand "vec_pack_sfix_trunc_v8df"
4541 [(match_operand:V16SI 0 "register_operand")
4542 (match_operand:V8DF 1 "nonimmediate_operand")
4543 (match_operand:V8DF 2 "nonimmediate_operand")]
4544 "TARGET_AVX512F"
4545 {
4546 rtx r1, r2;
4547
4548 r1 = gen_reg_rtx (V8SImode);
4549 r2 = gen_reg_rtx (V8SImode);
4550
4551 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
4552 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
4553 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
4554 DONE;
4555 })
4556
4557 (define_expand "vec_pack_sfix_trunc_v4df"
4558 [(match_operand:V8SI 0 "register_operand")
4559 (match_operand:V4DF 1 "nonimmediate_operand")
4560 (match_operand:V4DF 2 "nonimmediate_operand")]
4561 "TARGET_AVX"
4562 {
4563 rtx r1, r2;
4564
4565 r1 = gen_reg_rtx (V4SImode);
4566 r2 = gen_reg_rtx (V4SImode);
4567
4568 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
4569 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
4570 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4571 DONE;
4572 })
4573
4574 (define_expand "vec_pack_sfix_trunc_v2df"
4575 [(match_operand:V4SI 0 "register_operand")
4576 (match_operand:V2DF 1 "nonimmediate_operand")
4577 (match_operand:V2DF 2 "nonimmediate_operand")]
4578 "TARGET_SSE2"
4579 {
4580 rtx tmp0, tmp1, tmp2;
4581
4582 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4583 {
4584 tmp0 = gen_reg_rtx (V4DFmode);
4585 tmp1 = force_reg (V2DFmode, operands[1]);
4586
4587 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4588 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
4589 }
4590 else
4591 {
4592 tmp0 = gen_reg_rtx (V4SImode);
4593 tmp1 = gen_reg_rtx (V4SImode);
4594 tmp2 = gen_reg_rtx (V2DImode);
4595
4596 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
4597 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
4598 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4599 gen_lowpart (V2DImode, tmp0),
4600 gen_lowpart (V2DImode, tmp1)));
4601 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4602 }
4603 DONE;
4604 })
4605
4606 (define_mode_attr ssepackfltmode
4607 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
4608
4609 (define_expand "vec_pack_ufix_trunc_<mode>"
4610 [(match_operand:<ssepackfltmode> 0 "register_operand")
4611 (match_operand:VF2_128_256 1 "register_operand")
4612 (match_operand:VF2_128_256 2 "register_operand")]
4613 "TARGET_SSE2"
4614 {
4615 rtx tmp[7];
4616 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4617 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
4618 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
4619 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
4620 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
4621 {
4622 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
4623 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
4624 }
4625 else
4626 {
4627 tmp[5] = gen_reg_rtx (V8SFmode);
4628 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
4629 gen_lowpart (V8SFmode, tmp[3]), 0);
4630 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
4631 }
4632 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
4633 operands[0], 0, OPTAB_DIRECT);
4634 if (tmp[6] != operands[0])
4635 emit_move_insn (operands[0], tmp[6]);
4636 DONE;
4637 })
4638
4639 (define_expand "vec_pack_sfix_v4df"
4640 [(match_operand:V8SI 0 "register_operand")
4641 (match_operand:V4DF 1 "nonimmediate_operand")
4642 (match_operand:V4DF 2 "nonimmediate_operand")]
4643 "TARGET_AVX"
4644 {
4645 rtx r1, r2;
4646
4647 r1 = gen_reg_rtx (V4SImode);
4648 r2 = gen_reg_rtx (V4SImode);
4649
4650 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
4651 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
4652 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4653 DONE;
4654 })
4655
4656 (define_expand "vec_pack_sfix_v2df"
4657 [(match_operand:V4SI 0 "register_operand")
4658 (match_operand:V2DF 1 "nonimmediate_operand")
4659 (match_operand:V2DF 2 "nonimmediate_operand")]
4660 "TARGET_SSE2"
4661 {
4662 rtx tmp0, tmp1, tmp2;
4663
4664 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4665 {
4666 tmp0 = gen_reg_rtx (V4DFmode);
4667 tmp1 = force_reg (V2DFmode, operands[1]);
4668
4669 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4670 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
4671 }
4672 else
4673 {
4674 tmp0 = gen_reg_rtx (V4SImode);
4675 tmp1 = gen_reg_rtx (V4SImode);
4676 tmp2 = gen_reg_rtx (V2DImode);
4677
4678 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
4679 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
4680 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4681 gen_lowpart (V2DImode, tmp0),
4682 gen_lowpart (V2DImode, tmp1)));
4683 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4684 }
4685 DONE;
4686 })
4687
4688 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4689 ;;
4690 ;; Parallel single-precision floating point element swizzling
4691 ;;
4692 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4693
4694 (define_expand "sse_movhlps_exp"
4695 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4696 (vec_select:V4SF
4697 (vec_concat:V8SF
4698 (match_operand:V4SF 1 "nonimmediate_operand")
4699 (match_operand:V4SF 2 "nonimmediate_operand"))
4700 (parallel [(const_int 6)
4701 (const_int 7)
4702 (const_int 2)
4703 (const_int 3)])))]
4704 "TARGET_SSE"
4705 {
4706 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4707
4708 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
4709
4710 /* Fix up the destination if needed. */
4711 if (dst != operands[0])
4712 emit_move_insn (operands[0], dst);
4713
4714 DONE;
4715 })
4716
4717 (define_insn "sse_movhlps"
4718 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
4719 (vec_select:V4SF
4720 (vec_concat:V8SF
4721 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4722 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
4723 (parallel [(const_int 6)
4724 (const_int 7)
4725 (const_int 2)
4726 (const_int 3)])))]
4727 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4728 "@
4729 movhlps\t{%2, %0|%0, %2}
4730 vmovhlps\t{%2, %1, %0|%0, %1, %2}
4731 movlps\t{%H2, %0|%0, %H2}
4732 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
4733 %vmovhps\t{%2, %0|%q0, %2}"
4734 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4735 (set_attr "type" "ssemov")
4736 (set_attr "ssememalign" "64")
4737 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4738 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4739
4740 (define_expand "sse_movlhps_exp"
4741 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4742 (vec_select:V4SF
4743 (vec_concat:V8SF
4744 (match_operand:V4SF 1 "nonimmediate_operand")
4745 (match_operand:V4SF 2 "nonimmediate_operand"))
4746 (parallel [(const_int 0)
4747 (const_int 1)
4748 (const_int 4)
4749 (const_int 5)])))]
4750 "TARGET_SSE"
4751 {
4752 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4753
4754 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
4755
4756 /* Fix up the destination if needed. */
4757 if (dst != operands[0])
4758 emit_move_insn (operands[0], dst);
4759
4760 DONE;
4761 })
4762
4763 (define_insn "sse_movlhps"
4764 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
4765 (vec_select:V4SF
4766 (vec_concat:V8SF
4767 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4768 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
4769 (parallel [(const_int 0)
4770 (const_int 1)
4771 (const_int 4)
4772 (const_int 5)])))]
4773 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
4774 "@
4775 movlhps\t{%2, %0|%0, %2}
4776 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4777 movhps\t{%2, %0|%0, %q2}
4778 vmovhps\t{%2, %1, %0|%0, %1, %q2}
4779 %vmovlps\t{%2, %H0|%H0, %2}"
4780 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4781 (set_attr "type" "ssemov")
4782 (set_attr "ssememalign" "64")
4783 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4784 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4785
4786 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
4787 [(set (match_operand:V16SF 0 "register_operand" "=v")
4788 (vec_select:V16SF
4789 (vec_concat:V32SF
4790 (match_operand:V16SF 1 "register_operand" "v")
4791 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
4792 (parallel [(const_int 2) (const_int 18)
4793 (const_int 3) (const_int 19)
4794 (const_int 6) (const_int 22)
4795 (const_int 7) (const_int 23)
4796 (const_int 10) (const_int 26)
4797 (const_int 11) (const_int 27)
4798 (const_int 14) (const_int 30)
4799 (const_int 15) (const_int 31)])))]
4800 "TARGET_AVX512F"
4801 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
4802 [(set_attr "type" "sselog")
4803 (set_attr "prefix" "evex")
4804 (set_attr "mode" "V16SF")])
4805
4806 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4807 (define_insn "avx_unpckhps256"
4808 [(set (match_operand:V8SF 0 "register_operand" "=x")
4809 (vec_select:V8SF
4810 (vec_concat:V16SF
4811 (match_operand:V8SF 1 "register_operand" "x")
4812 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4813 (parallel [(const_int 2) (const_int 10)
4814 (const_int 3) (const_int 11)
4815 (const_int 6) (const_int 14)
4816 (const_int 7) (const_int 15)])))]
4817 "TARGET_AVX"
4818 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
4819 [(set_attr "type" "sselog")
4820 (set_attr "prefix" "vex")
4821 (set_attr "mode" "V8SF")])
4822
4823 (define_expand "vec_interleave_highv8sf"
4824 [(set (match_dup 3)
4825 (vec_select:V8SF
4826 (vec_concat:V16SF
4827 (match_operand:V8SF 1 "register_operand" "x")
4828 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4829 (parallel [(const_int 0) (const_int 8)
4830 (const_int 1) (const_int 9)
4831 (const_int 4) (const_int 12)
4832 (const_int 5) (const_int 13)])))
4833 (set (match_dup 4)
4834 (vec_select:V8SF
4835 (vec_concat:V16SF
4836 (match_dup 1)
4837 (match_dup 2))
4838 (parallel [(const_int 2) (const_int 10)
4839 (const_int 3) (const_int 11)
4840 (const_int 6) (const_int 14)
4841 (const_int 7) (const_int 15)])))
4842 (set (match_operand:V8SF 0 "register_operand")
4843 (vec_select:V8SF
4844 (vec_concat:V16SF
4845 (match_dup 3)
4846 (match_dup 4))
4847 (parallel [(const_int 4) (const_int 5)
4848 (const_int 6) (const_int 7)
4849 (const_int 12) (const_int 13)
4850 (const_int 14) (const_int 15)])))]
4851 "TARGET_AVX"
4852 {
4853 operands[3] = gen_reg_rtx (V8SFmode);
4854 operands[4] = gen_reg_rtx (V8SFmode);
4855 })
4856
4857 (define_insn "vec_interleave_highv4sf"
4858 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4859 (vec_select:V4SF
4860 (vec_concat:V8SF
4861 (match_operand:V4SF 1 "register_operand" "0,x")
4862 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
4863 (parallel [(const_int 2) (const_int 6)
4864 (const_int 3) (const_int 7)])))]
4865 "TARGET_SSE"
4866 "@
4867 unpckhps\t{%2, %0|%0, %2}
4868 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
4869 [(set_attr "isa" "noavx,avx")
4870 (set_attr "type" "sselog")
4871 (set_attr "prefix" "orig,vex")
4872 (set_attr "mode" "V4SF")])
4873
4874 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
4875 [(set (match_operand:V16SF 0 "register_operand" "=v")
4876 (vec_select:V16SF
4877 (vec_concat:V32SF
4878 (match_operand:V16SF 1 "register_operand" "v")
4879 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
4880 (parallel [(const_int 0) (const_int 16)
4881 (const_int 1) (const_int 17)
4882 (const_int 4) (const_int 20)
4883 (const_int 5) (const_int 21)
4884 (const_int 8) (const_int 24)
4885 (const_int 9) (const_int 25)
4886 (const_int 12) (const_int 28)
4887 (const_int 13) (const_int 29)])))]
4888 "TARGET_AVX512F"
4889 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
4890 [(set_attr "type" "sselog")
4891 (set_attr "prefix" "evex")
4892 (set_attr "mode" "V16SF")])
4893
4894 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4895 (define_insn "avx_unpcklps256"
4896 [(set (match_operand:V8SF 0 "register_operand" "=x")
4897 (vec_select:V8SF
4898 (vec_concat:V16SF
4899 (match_operand:V8SF 1 "register_operand" "x")
4900 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4901 (parallel [(const_int 0) (const_int 8)
4902 (const_int 1) (const_int 9)
4903 (const_int 4) (const_int 12)
4904 (const_int 5) (const_int 13)])))]
4905 "TARGET_AVX"
4906 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
4907 [(set_attr "type" "sselog")
4908 (set_attr "prefix" "vex")
4909 (set_attr "mode" "V8SF")])
4910
4911 (define_expand "vec_interleave_lowv8sf"
4912 [(set (match_dup 3)
4913 (vec_select:V8SF
4914 (vec_concat:V16SF
4915 (match_operand:V8SF 1 "register_operand" "x")
4916 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4917 (parallel [(const_int 0) (const_int 8)
4918 (const_int 1) (const_int 9)
4919 (const_int 4) (const_int 12)
4920 (const_int 5) (const_int 13)])))
4921 (set (match_dup 4)
4922 (vec_select:V8SF
4923 (vec_concat:V16SF
4924 (match_dup 1)
4925 (match_dup 2))
4926 (parallel [(const_int 2) (const_int 10)
4927 (const_int 3) (const_int 11)
4928 (const_int 6) (const_int 14)
4929 (const_int 7) (const_int 15)])))
4930 (set (match_operand:V8SF 0 "register_operand")
4931 (vec_select:V8SF
4932 (vec_concat:V16SF
4933 (match_dup 3)
4934 (match_dup 4))
4935 (parallel [(const_int 0) (const_int 1)
4936 (const_int 2) (const_int 3)
4937 (const_int 8) (const_int 9)
4938 (const_int 10) (const_int 11)])))]
4939 "TARGET_AVX"
4940 {
4941 operands[3] = gen_reg_rtx (V8SFmode);
4942 operands[4] = gen_reg_rtx (V8SFmode);
4943 })
4944
4945 (define_insn "vec_interleave_lowv4sf"
4946 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4947 (vec_select:V4SF
4948 (vec_concat:V8SF
4949 (match_operand:V4SF 1 "register_operand" "0,x")
4950 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
4951 (parallel [(const_int 0) (const_int 4)
4952 (const_int 1) (const_int 5)])))]
4953 "TARGET_SSE"
4954 "@
4955 unpcklps\t{%2, %0|%0, %2}
4956 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
4957 [(set_attr "isa" "noavx,avx")
4958 (set_attr "type" "sselog")
4959 (set_attr "prefix" "orig,vex")
4960 (set_attr "mode" "V4SF")])
4961
4962 ;; These are modeled with the same vec_concat as the others so that we
4963 ;; capture users of shufps that can use the new instructions
4964 (define_insn "avx_movshdup256"
4965 [(set (match_operand:V8SF 0 "register_operand" "=x")
4966 (vec_select:V8SF
4967 (vec_concat:V16SF
4968 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4969 (match_dup 1))
4970 (parallel [(const_int 1) (const_int 1)
4971 (const_int 3) (const_int 3)
4972 (const_int 5) (const_int 5)
4973 (const_int 7) (const_int 7)])))]
4974 "TARGET_AVX"
4975 "vmovshdup\t{%1, %0|%0, %1}"
4976 [(set_attr "type" "sse")
4977 (set_attr "prefix" "vex")
4978 (set_attr "mode" "V8SF")])
4979
4980 (define_insn "sse3_movshdup"
4981 [(set (match_operand:V4SF 0 "register_operand" "=x")
4982 (vec_select:V4SF
4983 (vec_concat:V8SF
4984 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4985 (match_dup 1))
4986 (parallel [(const_int 1)
4987 (const_int 1)
4988 (const_int 7)
4989 (const_int 7)])))]
4990 "TARGET_SSE3"
4991 "%vmovshdup\t{%1, %0|%0, %1}"
4992 [(set_attr "type" "sse")
4993 (set_attr "prefix_rep" "1")
4994 (set_attr "prefix" "maybe_vex")
4995 (set_attr "mode" "V4SF")])
4996
4997 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
4998 [(set (match_operand:V16SF 0 "register_operand" "=v")
4999 (vec_select:V16SF
5000 (vec_concat:V32SF
5001 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5002 (match_dup 1))
5003 (parallel [(const_int 1) (const_int 1)
5004 (const_int 3) (const_int 3)
5005 (const_int 5) (const_int 5)
5006 (const_int 7) (const_int 7)
5007 (const_int 9) (const_int 9)
5008 (const_int 11) (const_int 11)
5009 (const_int 13) (const_int 13)
5010 (const_int 15) (const_int 15)])))]
5011 "TARGET_AVX512F"
5012 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5013 [(set_attr "type" "sse")
5014 (set_attr "prefix" "evex")
5015 (set_attr "mode" "V16SF")])
5016
5017 (define_insn "avx_movsldup256"
5018 [(set (match_operand:V8SF 0 "register_operand" "=x")
5019 (vec_select:V8SF
5020 (vec_concat:V16SF
5021 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5022 (match_dup 1))
5023 (parallel [(const_int 0) (const_int 0)
5024 (const_int 2) (const_int 2)
5025 (const_int 4) (const_int 4)
5026 (const_int 6) (const_int 6)])))]
5027 "TARGET_AVX"
5028 "vmovsldup\t{%1, %0|%0, %1}"
5029 [(set_attr "type" "sse")
5030 (set_attr "prefix" "vex")
5031 (set_attr "mode" "V8SF")])
5032
5033 (define_insn "sse3_movsldup"
5034 [(set (match_operand:V4SF 0 "register_operand" "=x")
5035 (vec_select:V4SF
5036 (vec_concat:V8SF
5037 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5038 (match_dup 1))
5039 (parallel [(const_int 0)
5040 (const_int 0)
5041 (const_int 6)
5042 (const_int 6)])))]
5043 "TARGET_SSE3"
5044 "%vmovsldup\t{%1, %0|%0, %1}"
5045 [(set_attr "type" "sse")
5046 (set_attr "prefix_rep" "1")
5047 (set_attr "prefix" "maybe_vex")
5048 (set_attr "mode" "V4SF")])
5049
5050 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
5051 [(set (match_operand:V16SF 0 "register_operand" "=v")
5052 (vec_select:V16SF
5053 (vec_concat:V32SF
5054 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5055 (match_dup 1))
5056 (parallel [(const_int 0) (const_int 0)
5057 (const_int 2) (const_int 2)
5058 (const_int 4) (const_int 4)
5059 (const_int 6) (const_int 6)
5060 (const_int 8) (const_int 8)
5061 (const_int 10) (const_int 10)
5062 (const_int 12) (const_int 12)
5063 (const_int 14) (const_int 14)])))]
5064 "TARGET_AVX512F"
5065 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5066 [(set_attr "type" "sse")
5067 (set_attr "prefix" "evex")
5068 (set_attr "mode" "V16SF")])
5069
5070 (define_expand "avx_shufps256"
5071 [(match_operand:V8SF 0 "register_operand")
5072 (match_operand:V8SF 1 "register_operand")
5073 (match_operand:V8SF 2 "nonimmediate_operand")
5074 (match_operand:SI 3 "const_int_operand")]
5075 "TARGET_AVX"
5076 {
5077 int mask = INTVAL (operands[3]);
5078 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
5079 GEN_INT ((mask >> 0) & 3),
5080 GEN_INT ((mask >> 2) & 3),
5081 GEN_INT (((mask >> 4) & 3) + 8),
5082 GEN_INT (((mask >> 6) & 3) + 8),
5083 GEN_INT (((mask >> 0) & 3) + 4),
5084 GEN_INT (((mask >> 2) & 3) + 4),
5085 GEN_INT (((mask >> 4) & 3) + 12),
5086 GEN_INT (((mask >> 6) & 3) + 12)));
5087 DONE;
5088 })
5089
5090 ;; One bit in mask selects 2 elements.
5091 (define_insn "avx_shufps256_1"
5092 [(set (match_operand:V8SF 0 "register_operand" "=x")
5093 (vec_select:V8SF
5094 (vec_concat:V16SF
5095 (match_operand:V8SF 1 "register_operand" "x")
5096 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5097 (parallel [(match_operand 3 "const_0_to_3_operand" )
5098 (match_operand 4 "const_0_to_3_operand" )
5099 (match_operand 5 "const_8_to_11_operand" )
5100 (match_operand 6 "const_8_to_11_operand" )
5101 (match_operand 7 "const_4_to_7_operand" )
5102 (match_operand 8 "const_4_to_7_operand" )
5103 (match_operand 9 "const_12_to_15_operand")
5104 (match_operand 10 "const_12_to_15_operand")])))]
5105 "TARGET_AVX
5106 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
5107 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
5108 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
5109 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
5110 {
5111 int mask;
5112 mask = INTVAL (operands[3]);
5113 mask |= INTVAL (operands[4]) << 2;
5114 mask |= (INTVAL (operands[5]) - 8) << 4;
5115 mask |= (INTVAL (operands[6]) - 8) << 6;
5116 operands[3] = GEN_INT (mask);
5117
5118 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5119 }
5120 [(set_attr "type" "sseshuf")
5121 (set_attr "length_immediate" "1")
5122 (set_attr "prefix" "vex")
5123 (set_attr "mode" "V8SF")])
5124
5125 (define_expand "sse_shufps"
5126 [(match_operand:V4SF 0 "register_operand")
5127 (match_operand:V4SF 1 "register_operand")
5128 (match_operand:V4SF 2 "nonimmediate_operand")
5129 (match_operand:SI 3 "const_int_operand")]
5130 "TARGET_SSE"
5131 {
5132 int mask = INTVAL (operands[3]);
5133 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
5134 GEN_INT ((mask >> 0) & 3),
5135 GEN_INT ((mask >> 2) & 3),
5136 GEN_INT (((mask >> 4) & 3) + 4),
5137 GEN_INT (((mask >> 6) & 3) + 4)));
5138 DONE;
5139 })
5140
5141 (define_insn "sse_shufps_<mode>"
5142 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
5143 (vec_select:VI4F_128
5144 (vec_concat:<ssedoublevecmode>
5145 (match_operand:VI4F_128 1 "register_operand" "0,x")
5146 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
5147 (parallel [(match_operand 3 "const_0_to_3_operand")
5148 (match_operand 4 "const_0_to_3_operand")
5149 (match_operand 5 "const_4_to_7_operand")
5150 (match_operand 6 "const_4_to_7_operand")])))]
5151 "TARGET_SSE"
5152 {
5153 int mask = 0;
5154 mask |= INTVAL (operands[3]) << 0;
5155 mask |= INTVAL (operands[4]) << 2;
5156 mask |= (INTVAL (operands[5]) - 4) << 4;
5157 mask |= (INTVAL (operands[6]) - 4) << 6;
5158 operands[3] = GEN_INT (mask);
5159
5160 switch (which_alternative)
5161 {
5162 case 0:
5163 return "shufps\t{%3, %2, %0|%0, %2, %3}";
5164 case 1:
5165 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5166 default:
5167 gcc_unreachable ();
5168 }
5169 }
5170 [(set_attr "isa" "noavx,avx")
5171 (set_attr "type" "sseshuf")
5172 (set_attr "length_immediate" "1")
5173 (set_attr "prefix" "orig,vex")
5174 (set_attr "mode" "V4SF")])
5175
5176 (define_insn "sse_storehps"
5177 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5178 (vec_select:V2SF
5179 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
5180 (parallel [(const_int 2) (const_int 3)])))]
5181 "TARGET_SSE"
5182 "@
5183 %vmovhps\t{%1, %0|%q0, %1}
5184 %vmovhlps\t{%1, %d0|%d0, %1}
5185 %vmovlps\t{%H1, %d0|%d0, %H1}"
5186 [(set_attr "type" "ssemov")
5187 (set_attr "ssememalign" "64")
5188 (set_attr "prefix" "maybe_vex")
5189 (set_attr "mode" "V2SF,V4SF,V2SF")])
5190
5191 (define_expand "sse_loadhps_exp"
5192 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5193 (vec_concat:V4SF
5194 (vec_select:V2SF
5195 (match_operand:V4SF 1 "nonimmediate_operand")
5196 (parallel [(const_int 0) (const_int 1)]))
5197 (match_operand:V2SF 2 "nonimmediate_operand")))]
5198 "TARGET_SSE"
5199 {
5200 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5201
5202 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
5203
5204 /* Fix up the destination if needed. */
5205 if (dst != operands[0])
5206 emit_move_insn (operands[0], dst);
5207
5208 DONE;
5209 })
5210
5211 (define_insn "sse_loadhps"
5212 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5213 (vec_concat:V4SF
5214 (vec_select:V2SF
5215 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5216 (parallel [(const_int 0) (const_int 1)]))
5217 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
5218 "TARGET_SSE"
5219 "@
5220 movhps\t{%2, %0|%0, %q2}
5221 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5222 movlhps\t{%2, %0|%0, %2}
5223 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5224 %vmovlps\t{%2, %H0|%H0, %2}"
5225 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5226 (set_attr "type" "ssemov")
5227 (set_attr "ssememalign" "64")
5228 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5229 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
5230
5231 (define_insn "sse_storelps"
5232 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5233 (vec_select:V2SF
5234 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
5235 (parallel [(const_int 0) (const_int 1)])))]
5236 "TARGET_SSE"
5237 "@
5238 %vmovlps\t{%1, %0|%q0, %1}
5239 %vmovaps\t{%1, %0|%0, %1}
5240 %vmovlps\t{%1, %d0|%d0, %q1}"
5241 [(set_attr "type" "ssemov")
5242 (set_attr "prefix" "maybe_vex")
5243 (set_attr "mode" "V2SF,V4SF,V2SF")])
5244
5245 (define_expand "sse_loadlps_exp"
5246 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5247 (vec_concat:V4SF
5248 (match_operand:V2SF 2 "nonimmediate_operand")
5249 (vec_select:V2SF
5250 (match_operand:V4SF 1 "nonimmediate_operand")
5251 (parallel [(const_int 2) (const_int 3)]))))]
5252 "TARGET_SSE"
5253 {
5254 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5255
5256 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
5257
5258 /* Fix up the destination if needed. */
5259 if (dst != operands[0])
5260 emit_move_insn (operands[0], dst);
5261
5262 DONE;
5263 })
5264
5265 (define_insn "sse_loadlps"
5266 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5267 (vec_concat:V4SF
5268 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
5269 (vec_select:V2SF
5270 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
5271 (parallel [(const_int 2) (const_int 3)]))))]
5272 "TARGET_SSE"
5273 "@
5274 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
5275 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
5276 movlps\t{%2, %0|%0, %q2}
5277 vmovlps\t{%2, %1, %0|%0, %1, %q2}
5278 %vmovlps\t{%2, %0|%q0, %2}"
5279 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5280 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
5281 (set_attr "ssememalign" "64")
5282 (set_attr "length_immediate" "1,1,*,*,*")
5283 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5284 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5285
5286 (define_insn "sse_movss"
5287 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5288 (vec_merge:V4SF
5289 (match_operand:V4SF 2 "register_operand" " x,x")
5290 (match_operand:V4SF 1 "register_operand" " 0,x")
5291 (const_int 1)))]
5292 "TARGET_SSE"
5293 "@
5294 movss\t{%2, %0|%0, %2}
5295 vmovss\t{%2, %1, %0|%0, %1, %2}"
5296 [(set_attr "isa" "noavx,avx")
5297 (set_attr "type" "ssemov")
5298 (set_attr "prefix" "orig,vex")
5299 (set_attr "mode" "SF")])
5300
5301 (define_insn "avx2_vec_dup<mode>"
5302 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
5303 (vec_duplicate:VF1_128_256
5304 (vec_select:SF
5305 (match_operand:V4SF 1 "register_operand" "x")
5306 (parallel [(const_int 0)]))))]
5307 "TARGET_AVX2"
5308 "vbroadcastss\t{%1, %0|%0, %1}"
5309 [(set_attr "type" "sselog1")
5310 (set_attr "prefix" "vex")
5311 (set_attr "mode" "<MODE>")])
5312
5313 (define_insn "avx2_vec_dupv8sf_1"
5314 [(set (match_operand:V8SF 0 "register_operand" "=x")
5315 (vec_duplicate:V8SF
5316 (vec_select:SF
5317 (match_operand:V8SF 1 "register_operand" "x")
5318 (parallel [(const_int 0)]))))]
5319 "TARGET_AVX2"
5320 "vbroadcastss\t{%x1, %0|%0, %x1}"
5321 [(set_attr "type" "sselog1")
5322 (set_attr "prefix" "vex")
5323 (set_attr "mode" "V8SF")])
5324
5325 (define_insn "vec_dupv4sf"
5326 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
5327 (vec_duplicate:V4SF
5328 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
5329 "TARGET_SSE"
5330 "@
5331 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
5332 vbroadcastss\t{%1, %0|%0, %1}
5333 shufps\t{$0, %0, %0|%0, %0, 0}"
5334 [(set_attr "isa" "avx,avx,noavx")
5335 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
5336 (set_attr "length_immediate" "1,0,1")
5337 (set_attr "prefix_extra" "0,1,*")
5338 (set_attr "prefix" "vex,vex,orig")
5339 (set_attr "mode" "V4SF")])
5340
5341 ;; Although insertps takes register source, we prefer
5342 ;; unpcklps with register source since it is shorter.
5343 (define_insn "*vec_concatv2sf_sse4_1"
5344 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
5345 (vec_concat:V2SF
5346 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
5347 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
5348 "TARGET_SSE4_1"
5349 "@
5350 unpcklps\t{%2, %0|%0, %2}
5351 vunpcklps\t{%2, %1, %0|%0, %1, %2}
5352 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
5353 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
5354 %vmovss\t{%1, %0|%0, %1}
5355 punpckldq\t{%2, %0|%0, %2}
5356 movd\t{%1, %0|%0, %1}"
5357 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5358 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
5359 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
5360 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
5361 (set_attr "length_immediate" "*,*,1,1,*,*,*")
5362 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
5363 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
5364
5365 ;; ??? In theory we can match memory for the MMX alternative, but allowing
5366 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
5367 ;; alternatives pretty much forces the MMX alternative to be chosen.
5368 (define_insn "*vec_concatv2sf_sse"
5369 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
5370 (vec_concat:V2SF
5371 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
5372 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
5373 "TARGET_SSE"
5374 "@
5375 unpcklps\t{%2, %0|%0, %2}
5376 movss\t{%1, %0|%0, %1}
5377 punpckldq\t{%2, %0|%0, %2}
5378 movd\t{%1, %0|%0, %1}"
5379 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5380 (set_attr "mode" "V4SF,SF,DI,DI")])
5381
5382 (define_insn "*vec_concatv4sf"
5383 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
5384 (vec_concat:V4SF
5385 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
5386 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
5387 "TARGET_SSE"
5388 "@
5389 movlhps\t{%2, %0|%0, %2}
5390 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5391 movhps\t{%2, %0|%0, %q2}
5392 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
5393 [(set_attr "isa" "noavx,avx,noavx,avx")
5394 (set_attr "type" "ssemov")
5395 (set_attr "prefix" "orig,vex,orig,vex")
5396 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
5397
5398 (define_expand "vec_init<mode>"
5399 [(match_operand:V_128 0 "register_operand")
5400 (match_operand 1)]
5401 "TARGET_SSE"
5402 {
5403 ix86_expand_vector_init (false, operands[0], operands[1]);
5404 DONE;
5405 })
5406
5407 ;; Avoid combining registers from different units in a single alternative,
5408 ;; see comment above inline_secondary_memory_needed function in i386.c
5409 (define_insn "vec_set<mode>_0"
5410 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
5411 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
5412 (vec_merge:VI4F_128
5413 (vec_duplicate:VI4F_128
5414 (match_operand:<ssescalarmode> 2 "general_operand"
5415 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
5416 (match_operand:VI4F_128 1 "vector_move_operand"
5417 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
5418 (const_int 1)))]
5419 "TARGET_SSE"
5420 "@
5421 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
5422 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
5423 %vmovd\t{%2, %0|%0, %2}
5424 movss\t{%2, %0|%0, %2}
5425 movss\t{%2, %0|%0, %2}
5426 vmovss\t{%2, %1, %0|%0, %1, %2}
5427 pinsrd\t{$0, %2, %0|%0, %2, 0}
5428 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
5429 #
5430 #
5431 #"
5432 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
5433 (set (attr "type")
5434 (cond [(eq_attr "alternative" "0,6,7")
5435 (const_string "sselog")
5436 (eq_attr "alternative" "9")
5437 (const_string "imov")
5438 (eq_attr "alternative" "10")
5439 (const_string "fmov")
5440 ]
5441 (const_string "ssemov")))
5442 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
5443 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
5444 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
5445 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
5446
5447 ;; A subset is vec_setv4sf.
5448 (define_insn "*vec_setv4sf_sse4_1"
5449 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5450 (vec_merge:V4SF
5451 (vec_duplicate:V4SF
5452 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
5453 (match_operand:V4SF 1 "register_operand" "0,x")
5454 (match_operand:SI 3 "const_int_operand")))]
5455 "TARGET_SSE4_1
5456 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5457 < GET_MODE_NUNITS (V4SFmode))"
5458 {
5459 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
5460 switch (which_alternative)
5461 {
5462 case 0:
5463 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5464 case 1:
5465 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5466 default:
5467 gcc_unreachable ();
5468 }
5469 }
5470 [(set_attr "isa" "noavx,avx")
5471 (set_attr "type" "sselog")
5472 (set_attr "prefix_data16" "1,*")
5473 (set_attr "prefix_extra" "1")
5474 (set_attr "length_immediate" "1")
5475 (set_attr "prefix" "orig,vex")
5476 (set_attr "mode" "V4SF")])
5477
5478 (define_insn "sse4_1_insertps"
5479 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5480 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
5481 (match_operand:V4SF 1 "register_operand" "0,x")
5482 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
5483 UNSPEC_INSERTPS))]
5484 "TARGET_SSE4_1"
5485 {
5486 if (MEM_P (operands[2]))
5487 {
5488 unsigned count_s = INTVAL (operands[3]) >> 6;
5489 if (count_s)
5490 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
5491 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
5492 }
5493 switch (which_alternative)
5494 {
5495 case 0:
5496 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5497 case 1:
5498 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5499 default:
5500 gcc_unreachable ();
5501 }
5502 }
5503 [(set_attr "isa" "noavx,avx")
5504 (set_attr "type" "sselog")
5505 (set_attr "prefix_data16" "1,*")
5506 (set_attr "prefix_extra" "1")
5507 (set_attr "length_immediate" "1")
5508 (set_attr "prefix" "orig,vex")
5509 (set_attr "mode" "V4SF")])
5510
5511 (define_split
5512 [(set (match_operand:VI4F_128 0 "memory_operand")
5513 (vec_merge:VI4F_128
5514 (vec_duplicate:VI4F_128
5515 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
5516 (match_dup 0)
5517 (const_int 1)))]
5518 "TARGET_SSE && reload_completed"
5519 [(set (match_dup 0) (match_dup 1))]
5520 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
5521
5522 (define_expand "vec_set<mode>"
5523 [(match_operand:V 0 "register_operand")
5524 (match_operand:<ssescalarmode> 1 "register_operand")
5525 (match_operand 2 "const_int_operand")]
5526 "TARGET_SSE"
5527 {
5528 ix86_expand_vector_set (false, operands[0], operands[1],
5529 INTVAL (operands[2]));
5530 DONE;
5531 })
5532
5533 (define_insn_and_split "*vec_extractv4sf_0"
5534 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
5535 (vec_select:SF
5536 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
5537 (parallel [(const_int 0)])))]
5538 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5539 "#"
5540 "&& reload_completed"
5541 [(set (match_dup 0) (match_dup 1))]
5542 {
5543 if (REG_P (operands[1]))
5544 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
5545 else
5546 operands[1] = adjust_address (operands[1], SFmode, 0);
5547 })
5548
5549 (define_insn_and_split "*sse4_1_extractps"
5550 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
5551 (vec_select:SF
5552 (match_operand:V4SF 1 "register_operand" "x,0,x")
5553 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
5554 "TARGET_SSE4_1"
5555 "@
5556 %vextractps\t{%2, %1, %0|%0, %1, %2}
5557 #
5558 #"
5559 "&& reload_completed && SSE_REG_P (operands[0])"
5560 [(const_int 0)]
5561 {
5562 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
5563 switch (INTVAL (operands[2]))
5564 {
5565 case 1:
5566 case 3:
5567 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
5568 operands[2], operands[2],
5569 GEN_INT (INTVAL (operands[2]) + 4),
5570 GEN_INT (INTVAL (operands[2]) + 4)));
5571 break;
5572 case 2:
5573 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
5574 break;
5575 default:
5576 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
5577 gcc_unreachable ();
5578 }
5579 DONE;
5580 }
5581 [(set_attr "isa" "*,noavx,avx")
5582 (set_attr "type" "sselog,*,*")
5583 (set_attr "prefix_data16" "1,*,*")
5584 (set_attr "prefix_extra" "1,*,*")
5585 (set_attr "length_immediate" "1,*,*")
5586 (set_attr "prefix" "maybe_vex,*,*")
5587 (set_attr "mode" "V4SF,*,*")])
5588
5589 (define_insn_and_split "*vec_extractv4sf_mem"
5590 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
5591 (vec_select:SF
5592 (match_operand:V4SF 1 "memory_operand" "o,o,o")
5593 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
5594 "TARGET_SSE"
5595 "#"
5596 "&& reload_completed"
5597 [(set (match_dup 0) (match_dup 1))]
5598 {
5599 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
5600 })
5601
5602 (define_expand "avx512f_vextract<shuffletype>32x4_mask"
5603 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
5604 (match_operand:V16FI 1 "register_operand")
5605 (match_operand:SI 2 "const_0_to_3_operand")
5606 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
5607 (match_operand:QI 4 "register_operand")]
5608 "TARGET_AVX512F"
5609 {
5610 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5611 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5612 switch (INTVAL (operands[2]))
5613 {
5614 case 0:
5615 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5616 operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
5617 GEN_INT (3), operands[3], operands[4]));
5618 break;
5619 case 1:
5620 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5621 operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
5622 GEN_INT (7), operands[3], operands[4]));
5623 break;
5624 case 2:
5625 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5626 operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
5627 GEN_INT (11), operands[3], operands[4]));
5628 break;
5629 case 3:
5630 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5631 operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
5632 GEN_INT (15), operands[3], operands[4]));
5633 break;
5634 default:
5635 gcc_unreachable ();
5636 }
5637 DONE;
5638 })
5639
5640 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
5641 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
5642 (vec_merge:<ssequartermode>
5643 (vec_select:<ssequartermode>
5644 (match_operand:V16FI 1 "register_operand" "v")
5645 (parallel [(match_operand 2 "const_0_to_15_operand")
5646 (match_operand 3 "const_0_to_15_operand")
5647 (match_operand 4 "const_0_to_15_operand")
5648 (match_operand 5 "const_0_to_15_operand")]))
5649 (match_operand:<ssequartermode> 6 "memory_operand" "0")
5650 (match_operand:QI 7 "register_operand" "k")))]
5651 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
5652 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
5653 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
5654 {
5655 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5656 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
5657 }
5658 [(set_attr "type" "sselog")
5659 (set_attr "prefix_extra" "1")
5660 (set_attr "length_immediate" "1")
5661 (set_attr "memory" "store")
5662 (set_attr "prefix" "evex")
5663 (set_attr "mode" "<sseinsnmode>")])
5664
5665 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
5666 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5667 (vec_select:<ssequartermode>
5668 (match_operand:V16FI 1 "register_operand" "v")
5669 (parallel [(match_operand 2 "const_0_to_15_operand")
5670 (match_operand 3 "const_0_to_15_operand")
5671 (match_operand 4 "const_0_to_15_operand")
5672 (match_operand 5 "const_0_to_15_operand")])))]
5673 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
5674 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
5675 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
5676 {
5677 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5678 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
5679 }
5680 [(set_attr "type" "sselog")
5681 (set_attr "prefix_extra" "1")
5682 (set_attr "length_immediate" "1")
5683 (set (attr "memory")
5684 (if_then_else (match_test "MEM_P (operands[0])")
5685 (const_string "store")
5686 (const_string "none")))
5687 (set_attr "prefix" "evex")
5688 (set_attr "mode" "<sseinsnmode>")])
5689
5690 (define_expand "avx512f_vextract<shuffletype>64x4_mask"
5691 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5692 (match_operand:V8FI 1 "register_operand")
5693 (match_operand:SI 2 "const_0_to_1_operand")
5694 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
5695 (match_operand:QI 4 "register_operand")]
5696 "TARGET_AVX512F"
5697 {
5698 rtx (*insn)(rtx, rtx, rtx, rtx);
5699
5700 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5701 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5702
5703 switch (INTVAL (operands[2]))
5704 {
5705 case 0:
5706 insn = gen_vec_extract_lo_<mode>_mask;
5707 break;
5708 case 1:
5709 insn = gen_vec_extract_hi_<mode>_mask;
5710 break;
5711 default:
5712 gcc_unreachable ();
5713 }
5714
5715 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
5716 DONE;
5717 })
5718
5719 (define_split
5720 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5721 (vec_select:<ssehalfvecmode>
5722 (match_operand:V8FI 1 "nonimmediate_operand")
5723 (parallel [(const_int 0) (const_int 1)
5724 (const_int 2) (const_int 3)])))]
5725 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
5726 && reload_completed"
5727 [(const_int 0)]
5728 {
5729 rtx op1 = operands[1];
5730 if (REG_P (op1))
5731 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
5732 else
5733 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
5734 emit_move_insn (operands[0], op1);
5735 DONE;
5736 })
5737
5738 (define_insn "vec_extract_lo_<mode>_maskm"
5739 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
5740 (vec_merge:<ssehalfvecmode>
5741 (vec_select:<ssehalfvecmode>
5742 (match_operand:V8FI 1 "register_operand" "v")
5743 (parallel [(const_int 0) (const_int 1)
5744 (const_int 2) (const_int 3)]))
5745 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
5746 (match_operand:QI 3 "register_operand" "k")))]
5747 "TARGET_AVX512F"
5748 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
5749 [(set_attr "type" "sselog")
5750 (set_attr "prefix_extra" "1")
5751 (set_attr "length_immediate" "1")
5752 (set_attr "prefix" "evex")
5753 (set_attr "mode" "<sseinsnmode>")])
5754
5755 (define_insn "vec_extract_lo_<mode><mask_name>"
5756 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5757 (vec_select:<ssehalfvecmode>
5758 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
5759 (parallel [(const_int 0) (const_int 1)
5760 (const_int 2) (const_int 3)])))]
5761 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5762 {
5763 if (<mask_applied>)
5764 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
5765 else
5766 return "#";
5767 }
5768 [(set_attr "type" "sselog")
5769 (set_attr "prefix_extra" "1")
5770 (set_attr "length_immediate" "1")
5771 (set (attr "memory")
5772 (if_then_else (match_test "MEM_P (operands[0])")
5773 (const_string "store")
5774 (const_string "none")))
5775 (set_attr "prefix" "evex")
5776 (set_attr "mode" "<sseinsnmode>")])
5777
5778 (define_insn "vec_extract_hi_<mode>_maskm"
5779 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
5780 (vec_merge:<ssehalfvecmode>
5781 (vec_select:<ssehalfvecmode>
5782 (match_operand:V8FI 1 "register_operand" "v")
5783 (parallel [(const_int 4) (const_int 5)
5784 (const_int 6) (const_int 7)]))
5785 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
5786 (match_operand:QI 3 "register_operand" "k")))]
5787 "TARGET_AVX512F"
5788 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
5789 [(set_attr "type" "sselog")
5790 (set_attr "prefix_extra" "1")
5791 (set_attr "length_immediate" "1")
5792 (set_attr "memory" "store")
5793 (set_attr "prefix" "evex")
5794 (set_attr "mode" "<sseinsnmode>")])
5795
5796 (define_insn "vec_extract_hi_<mode><mask_name>"
5797 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5798 (vec_select:<ssehalfvecmode>
5799 (match_operand:V8FI 1 "register_operand" "v")
5800 (parallel [(const_int 4) (const_int 5)
5801 (const_int 6) (const_int 7)])))]
5802 "TARGET_AVX512F"
5803 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
5804 [(set_attr "type" "sselog")
5805 (set_attr "prefix_extra" "1")
5806 (set_attr "length_immediate" "1")
5807 (set (attr "memory")
5808 (if_then_else (match_test "MEM_P (operands[0])")
5809 (const_string "store")
5810 (const_string "none")))
5811 (set_attr "prefix" "evex")
5812 (set_attr "mode" "<sseinsnmode>")])
5813
5814 (define_expand "avx_vextractf128<mode>"
5815 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5816 (match_operand:V_256 1 "register_operand")
5817 (match_operand:SI 2 "const_0_to_1_operand")]
5818 "TARGET_AVX"
5819 {
5820 rtx (*insn)(rtx, rtx);
5821
5822 switch (INTVAL (operands[2]))
5823 {
5824 case 0:
5825 insn = gen_vec_extract_lo_<mode>;
5826 break;
5827 case 1:
5828 insn = gen_vec_extract_hi_<mode>;
5829 break;
5830 default:
5831 gcc_unreachable ();
5832 }
5833
5834 emit_insn (insn (operands[0], operands[1]));
5835 DONE;
5836 })
5837
5838 (define_insn_and_split "vec_extract_lo_<mode>"
5839 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
5840 (vec_select:<ssehalfvecmode>
5841 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
5842 (parallel [(const_int 0) (const_int 1)
5843 (const_int 2) (const_int 3)
5844 (const_int 4) (const_int 5)
5845 (const_int 6) (const_int 7)])))]
5846 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5847 "#"
5848 "&& reload_completed"
5849 [(const_int 0)]
5850 {
5851 rtx op1 = operands[1];
5852 if (REG_P (op1))
5853 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
5854 else
5855 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
5856 emit_move_insn (operands[0], op1);
5857 DONE;
5858 })
5859
5860 (define_insn "vec_extract_hi_<mode>"
5861 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
5862 (vec_select:<ssehalfvecmode>
5863 (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
5864 (parallel [(const_int 8) (const_int 9)
5865 (const_int 10) (const_int 11)
5866 (const_int 12) (const_int 13)
5867 (const_int 14) (const_int 15)])))]
5868 "TARGET_AVX512F"
5869 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
5870 [(set_attr "type" "sselog")
5871 (set_attr "prefix_extra" "1")
5872 (set_attr "length_immediate" "1")
5873 (set_attr "memory" "none,store")
5874 (set_attr "prefix" "evex")
5875 (set_attr "mode" "XI")])
5876
5877 (define_insn_and_split "vec_extract_lo_<mode>"
5878 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5879 (vec_select:<ssehalfvecmode>
5880 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
5881 (parallel [(const_int 0) (const_int 1)])))]
5882 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5883 "#"
5884 "&& reload_completed"
5885 [(set (match_dup 0) (match_dup 1))]
5886 {
5887 if (REG_P (operands[1]))
5888 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
5889 else
5890 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
5891 })
5892
5893 (define_insn "vec_extract_hi_<mode>"
5894 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5895 (vec_select:<ssehalfvecmode>
5896 (match_operand:VI8F_256 1 "register_operand" "x,x")
5897 (parallel [(const_int 2) (const_int 3)])))]
5898 "TARGET_AVX"
5899 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
5900 [(set_attr "type" "sselog")
5901 (set_attr "prefix_extra" "1")
5902 (set_attr "length_immediate" "1")
5903 (set_attr "memory" "none,store")
5904 (set_attr "prefix" "vex")
5905 (set_attr "mode" "<sseinsnmode>")])
5906
5907 (define_insn_and_split "vec_extract_lo_<mode>"
5908 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5909 (vec_select:<ssehalfvecmode>
5910 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
5911 (parallel [(const_int 0) (const_int 1)
5912 (const_int 2) (const_int 3)])))]
5913 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5914 "#"
5915 "&& reload_completed"
5916 [(set (match_dup 0) (match_dup 1))]
5917 {
5918 if (REG_P (operands[1]))
5919 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
5920 else
5921 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
5922 })
5923
5924 (define_insn "vec_extract_hi_<mode>"
5925 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5926 (vec_select:<ssehalfvecmode>
5927 (match_operand:VI4F_256 1 "register_operand" "x,x")
5928 (parallel [(const_int 4) (const_int 5)
5929 (const_int 6) (const_int 7)])))]
5930 "TARGET_AVX"
5931 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
5932 [(set_attr "type" "sselog")
5933 (set_attr "prefix_extra" "1")
5934 (set_attr "length_immediate" "1")
5935 (set_attr "memory" "none,store")
5936 (set_attr "prefix" "vex")
5937 (set_attr "mode" "<sseinsnmode>")])
5938
5939 (define_insn_and_split "vec_extract_lo_v32hi"
5940 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
5941 (vec_select:V16HI
5942 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
5943 (parallel [(const_int 0) (const_int 1)
5944 (const_int 2) (const_int 3)
5945 (const_int 4) (const_int 5)
5946 (const_int 6) (const_int 7)
5947 (const_int 8) (const_int 9)
5948 (const_int 10) (const_int 11)
5949 (const_int 12) (const_int 13)
5950 (const_int 14) (const_int 15)])))]
5951 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5952 "#"
5953 "&& reload_completed"
5954 [(set (match_dup 0) (match_dup 1))]
5955 {
5956 if (REG_P (operands[1]))
5957 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
5958 else
5959 operands[1] = adjust_address (operands[1], V16HImode, 0);
5960 })
5961
5962 (define_insn "vec_extract_hi_v32hi"
5963 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
5964 (vec_select:V16HI
5965 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
5966 (parallel [(const_int 16) (const_int 17)
5967 (const_int 18) (const_int 19)
5968 (const_int 20) (const_int 21)
5969 (const_int 22) (const_int 23)
5970 (const_int 24) (const_int 25)
5971 (const_int 26) (const_int 27)
5972 (const_int 28) (const_int 29)
5973 (const_int 30) (const_int 31)])))]
5974 "TARGET_AVX512F"
5975 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
5976 [(set_attr "type" "sselog")
5977 (set_attr "prefix_extra" "1")
5978 (set_attr "length_immediate" "1")
5979 (set_attr "memory" "none,store")
5980 (set_attr "prefix" "evex")
5981 (set_attr "mode" "XI")])
5982
5983 (define_insn_and_split "vec_extract_lo_v16hi"
5984 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
5985 (vec_select:V8HI
5986 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
5987 (parallel [(const_int 0) (const_int 1)
5988 (const_int 2) (const_int 3)
5989 (const_int 4) (const_int 5)
5990 (const_int 6) (const_int 7)])))]
5991 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5992 "#"
5993 "&& reload_completed"
5994 [(set (match_dup 0) (match_dup 1))]
5995 {
5996 if (REG_P (operands[1]))
5997 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
5998 else
5999 operands[1] = adjust_address (operands[1], V8HImode, 0);
6000 })
6001
6002 (define_insn "vec_extract_hi_v16hi"
6003 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6004 (vec_select:V8HI
6005 (match_operand:V16HI 1 "register_operand" "x,x")
6006 (parallel [(const_int 8) (const_int 9)
6007 (const_int 10) (const_int 11)
6008 (const_int 12) (const_int 13)
6009 (const_int 14) (const_int 15)])))]
6010 "TARGET_AVX"
6011 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6012 [(set_attr "type" "sselog")
6013 (set_attr "prefix_extra" "1")
6014 (set_attr "length_immediate" "1")
6015 (set_attr "memory" "none,store")
6016 (set_attr "prefix" "vex")
6017 (set_attr "mode" "OI")])
6018
6019 (define_insn_and_split "vec_extract_lo_v64qi"
6020 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6021 (vec_select:V32QI
6022 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
6023 (parallel [(const_int 0) (const_int 1)
6024 (const_int 2) (const_int 3)
6025 (const_int 4) (const_int 5)
6026 (const_int 6) (const_int 7)
6027 (const_int 8) (const_int 9)
6028 (const_int 10) (const_int 11)
6029 (const_int 12) (const_int 13)
6030 (const_int 14) (const_int 15)
6031 (const_int 16) (const_int 17)
6032 (const_int 18) (const_int 19)
6033 (const_int 20) (const_int 21)
6034 (const_int 22) (const_int 23)
6035 (const_int 24) (const_int 25)
6036 (const_int 26) (const_int 27)
6037 (const_int 28) (const_int 29)
6038 (const_int 30) (const_int 31)])))]
6039 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6040 "#"
6041 "&& reload_completed"
6042 [(set (match_dup 0) (match_dup 1))]
6043 {
6044 if (REG_P (operands[1]))
6045 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
6046 else
6047 operands[1] = adjust_address (operands[1], V32QImode, 0);
6048 })
6049
6050 (define_insn "vec_extract_hi_v64qi"
6051 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6052 (vec_select:V32QI
6053 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
6054 (parallel [(const_int 32) (const_int 33)
6055 (const_int 34) (const_int 35)
6056 (const_int 36) (const_int 37)
6057 (const_int 38) (const_int 39)
6058 (const_int 40) (const_int 41)
6059 (const_int 42) (const_int 43)
6060 (const_int 44) (const_int 45)
6061 (const_int 46) (const_int 47)
6062 (const_int 48) (const_int 49)
6063 (const_int 50) (const_int 51)
6064 (const_int 52) (const_int 53)
6065 (const_int 54) (const_int 55)
6066 (const_int 56) (const_int 57)
6067 (const_int 58) (const_int 59)
6068 (const_int 60) (const_int 61)
6069 (const_int 62) (const_int 63)])))]
6070 "TARGET_AVX512F"
6071 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6072 [(set_attr "type" "sselog")
6073 (set_attr "prefix_extra" "1")
6074 (set_attr "length_immediate" "1")
6075 (set_attr "memory" "none,store")
6076 (set_attr "prefix" "evex")
6077 (set_attr "mode" "XI")])
6078
6079 (define_insn_and_split "vec_extract_lo_v32qi"
6080 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6081 (vec_select:V16QI
6082 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
6083 (parallel [(const_int 0) (const_int 1)
6084 (const_int 2) (const_int 3)
6085 (const_int 4) (const_int 5)
6086 (const_int 6) (const_int 7)
6087 (const_int 8) (const_int 9)
6088 (const_int 10) (const_int 11)
6089 (const_int 12) (const_int 13)
6090 (const_int 14) (const_int 15)])))]
6091 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6092 "#"
6093 "&& reload_completed"
6094 [(set (match_dup 0) (match_dup 1))]
6095 {
6096 if (REG_P (operands[1]))
6097 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
6098 else
6099 operands[1] = adjust_address (operands[1], V16QImode, 0);
6100 })
6101
6102 (define_insn "vec_extract_hi_v32qi"
6103 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6104 (vec_select:V16QI
6105 (match_operand:V32QI 1 "register_operand" "x,x")
6106 (parallel [(const_int 16) (const_int 17)
6107 (const_int 18) (const_int 19)
6108 (const_int 20) (const_int 21)
6109 (const_int 22) (const_int 23)
6110 (const_int 24) (const_int 25)
6111 (const_int 26) (const_int 27)
6112 (const_int 28) (const_int 29)
6113 (const_int 30) (const_int 31)])))]
6114 "TARGET_AVX"
6115 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6116 [(set_attr "type" "sselog")
6117 (set_attr "prefix_extra" "1")
6118 (set_attr "length_immediate" "1")
6119 (set_attr "memory" "none,store")
6120 (set_attr "prefix" "vex")
6121 (set_attr "mode" "OI")])
6122
6123 ;; Modes handled by vec_extract patterns.
6124 (define_mode_iterator VEC_EXTRACT_MODE
6125 [(V32QI "TARGET_AVX") V16QI
6126 (V16HI "TARGET_AVX") V8HI
6127 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
6128 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
6129 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
6130 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
6131
6132 (define_expand "vec_extract<mode>"
6133 [(match_operand:<ssescalarmode> 0 "register_operand")
6134 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
6135 (match_operand 2 "const_int_operand")]
6136 "TARGET_SSE"
6137 {
6138 ix86_expand_vector_extract (false, operands[0], operands[1],
6139 INTVAL (operands[2]));
6140 DONE;
6141 })
6142
6143 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6144 ;;
6145 ;; Parallel double-precision floating point element swizzling
6146 ;;
6147 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6148
6149 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
6150 [(set (match_operand:V8DF 0 "register_operand" "=v")
6151 (vec_select:V8DF
6152 (vec_concat:V16DF
6153 (match_operand:V8DF 1 "nonimmediate_operand" "v")
6154 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6155 (parallel [(const_int 1) (const_int 9)
6156 (const_int 3) (const_int 11)
6157 (const_int 5) (const_int 13)
6158 (const_int 7) (const_int 15)])))]
6159 "TARGET_AVX512F"
6160 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6161 [(set_attr "type" "sselog")
6162 (set_attr "prefix" "evex")
6163 (set_attr "mode" "V8DF")])
6164
6165 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6166 (define_insn "avx_unpckhpd256"
6167 [(set (match_operand:V4DF 0 "register_operand" "=x")
6168 (vec_select:V4DF
6169 (vec_concat:V8DF
6170 (match_operand:V4DF 1 "register_operand" "x")
6171 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6172 (parallel [(const_int 1) (const_int 5)
6173 (const_int 3) (const_int 7)])))]
6174 "TARGET_AVX"
6175 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
6176 [(set_attr "type" "sselog")
6177 (set_attr "prefix" "vex")
6178 (set_attr "mode" "V4DF")])
6179
6180 (define_expand "vec_interleave_highv4df"
6181 [(set (match_dup 3)
6182 (vec_select:V4DF
6183 (vec_concat:V8DF
6184 (match_operand:V4DF 1 "register_operand" "x")
6185 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6186 (parallel [(const_int 0) (const_int 4)
6187 (const_int 2) (const_int 6)])))
6188 (set (match_dup 4)
6189 (vec_select:V4DF
6190 (vec_concat:V8DF
6191 (match_dup 1)
6192 (match_dup 2))
6193 (parallel [(const_int 1) (const_int 5)
6194 (const_int 3) (const_int 7)])))
6195 (set (match_operand:V4DF 0 "register_operand")
6196 (vec_select:V4DF
6197 (vec_concat:V8DF
6198 (match_dup 3)
6199 (match_dup 4))
6200 (parallel [(const_int 2) (const_int 3)
6201 (const_int 6) (const_int 7)])))]
6202 "TARGET_AVX"
6203 {
6204 operands[3] = gen_reg_rtx (V4DFmode);
6205 operands[4] = gen_reg_rtx (V4DFmode);
6206 })
6207
6208
6209 (define_expand "vec_interleave_highv2df"
6210 [(set (match_operand:V2DF 0 "register_operand")
6211 (vec_select:V2DF
6212 (vec_concat:V4DF
6213 (match_operand:V2DF 1 "nonimmediate_operand")
6214 (match_operand:V2DF 2 "nonimmediate_operand"))
6215 (parallel [(const_int 1)
6216 (const_int 3)])))]
6217 "TARGET_SSE2"
6218 {
6219 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
6220 operands[2] = force_reg (V2DFmode, operands[2]);
6221 })
6222
6223 (define_insn "*vec_interleave_highv2df"
6224 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
6225 (vec_select:V2DF
6226 (vec_concat:V4DF
6227 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
6228 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
6229 (parallel [(const_int 1)
6230 (const_int 3)])))]
6231 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
6232 "@
6233 unpckhpd\t{%2, %0|%0, %2}
6234 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
6235 %vmovddup\t{%H1, %0|%0, %H1}
6236 movlpd\t{%H1, %0|%0, %H1}
6237 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
6238 %vmovhpd\t{%1, %0|%q0, %1}"
6239 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6240 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6241 (set_attr "ssememalign" "64")
6242 (set_attr "prefix_data16" "*,*,*,1,*,1")
6243 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6244 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6245
6246 (define_expand "avx512f_movddup512<mask_name>"
6247 [(set (match_operand:V8DF 0 "register_operand")
6248 (vec_select:V8DF
6249 (vec_concat:V16DF
6250 (match_operand:V8DF 1 "nonimmediate_operand")
6251 (match_dup 1))
6252 (parallel [(const_int 0) (const_int 8)
6253 (const_int 2) (const_int 10)
6254 (const_int 4) (const_int 12)
6255 (const_int 6) (const_int 14)])))]
6256 "TARGET_AVX512F")
6257
6258 (define_expand "avx512f_unpcklpd512<mask_name>"
6259 [(set (match_operand:V8DF 0 "register_operand")
6260 (vec_select:V8DF
6261 (vec_concat:V16DF
6262 (match_operand:V8DF 1 "register_operand")
6263 (match_operand:V8DF 2 "nonimmediate_operand"))
6264 (parallel [(const_int 0) (const_int 8)
6265 (const_int 2) (const_int 10)
6266 (const_int 4) (const_int 12)
6267 (const_int 6) (const_int 14)])))]
6268 "TARGET_AVX512F")
6269
6270 (define_insn "*avx512f_unpcklpd512<mask_name>"
6271 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
6272 (vec_select:V8DF
6273 (vec_concat:V16DF
6274 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
6275 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
6276 (parallel [(const_int 0) (const_int 8)
6277 (const_int 2) (const_int 10)
6278 (const_int 4) (const_int 12)
6279 (const_int 6) (const_int 14)])))]
6280 "TARGET_AVX512F"
6281 "@
6282 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
6283 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6284 [(set_attr "type" "sselog")
6285 (set_attr "prefix" "evex")
6286 (set_attr "mode" "V8DF")])
6287
6288 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6289 (define_expand "avx_movddup256"
6290 [(set (match_operand:V4DF 0 "register_operand")
6291 (vec_select:V4DF
6292 (vec_concat:V8DF
6293 (match_operand:V4DF 1 "nonimmediate_operand")
6294 (match_dup 1))
6295 (parallel [(const_int 0) (const_int 4)
6296 (const_int 2) (const_int 6)])))]
6297 "TARGET_AVX")
6298
6299 (define_expand "avx_unpcklpd256"
6300 [(set (match_operand:V4DF 0 "register_operand")
6301 (vec_select:V4DF
6302 (vec_concat:V8DF
6303 (match_operand:V4DF 1 "register_operand")
6304 (match_operand:V4DF 2 "nonimmediate_operand"))
6305 (parallel [(const_int 0) (const_int 4)
6306 (const_int 2) (const_int 6)])))]
6307 "TARGET_AVX")
6308
6309 (define_insn "*avx_unpcklpd256"
6310 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
6311 (vec_select:V4DF
6312 (vec_concat:V8DF
6313 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
6314 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
6315 (parallel [(const_int 0) (const_int 4)
6316 (const_int 2) (const_int 6)])))]
6317 "TARGET_AVX"
6318 "@
6319 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6320 vmovddup\t{%1, %0|%0, %1}"
6321 [(set_attr "type" "sselog")
6322 (set_attr "prefix" "vex")
6323 (set_attr "mode" "V4DF")])
6324
6325 (define_expand "vec_interleave_lowv4df"
6326 [(set (match_dup 3)
6327 (vec_select:V4DF
6328 (vec_concat:V8DF
6329 (match_operand:V4DF 1 "register_operand" "x")
6330 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6331 (parallel [(const_int 0) (const_int 4)
6332 (const_int 2) (const_int 6)])))
6333 (set (match_dup 4)
6334 (vec_select:V4DF
6335 (vec_concat:V8DF
6336 (match_dup 1)
6337 (match_dup 2))
6338 (parallel [(const_int 1) (const_int 5)
6339 (const_int 3) (const_int 7)])))
6340 (set (match_operand:V4DF 0 "register_operand")
6341 (vec_select:V4DF
6342 (vec_concat:V8DF
6343 (match_dup 3)
6344 (match_dup 4))
6345 (parallel [(const_int 0) (const_int 1)
6346 (const_int 4) (const_int 5)])))]
6347 "TARGET_AVX"
6348 {
6349 operands[3] = gen_reg_rtx (V4DFmode);
6350 operands[4] = gen_reg_rtx (V4DFmode);
6351 })
6352
6353 (define_expand "vec_interleave_lowv2df"
6354 [(set (match_operand:V2DF 0 "register_operand")
6355 (vec_select:V2DF
6356 (vec_concat:V4DF
6357 (match_operand:V2DF 1 "nonimmediate_operand")
6358 (match_operand:V2DF 2 "nonimmediate_operand"))
6359 (parallel [(const_int 0)
6360 (const_int 2)])))]
6361 "TARGET_SSE2"
6362 {
6363 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
6364 operands[1] = force_reg (V2DFmode, operands[1]);
6365 })
6366
6367 (define_insn "*vec_interleave_lowv2df"
6368 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
6369 (vec_select:V2DF
6370 (vec_concat:V4DF
6371 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
6372 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
6373 (parallel [(const_int 0)
6374 (const_int 2)])))]
6375 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
6376 "@
6377 unpcklpd\t{%2, %0|%0, %2}
6378 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6379 %vmovddup\t{%1, %0|%0, %q1}
6380 movhpd\t{%2, %0|%0, %q2}
6381 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
6382 %vmovlpd\t{%2, %H0|%H0, %2}"
6383 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6384 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6385 (set_attr "ssememalign" "64")
6386 (set_attr "prefix_data16" "*,*,*,1,*,1")
6387 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6388 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6389
6390 (define_split
6391 [(set (match_operand:V2DF 0 "memory_operand")
6392 (vec_select:V2DF
6393 (vec_concat:V4DF
6394 (match_operand:V2DF 1 "register_operand")
6395 (match_dup 1))
6396 (parallel [(const_int 0)
6397 (const_int 2)])))]
6398 "TARGET_SSE3 && reload_completed"
6399 [(const_int 0)]
6400 {
6401 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
6402 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
6403 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
6404 DONE;
6405 })
6406
6407 (define_split
6408 [(set (match_operand:V2DF 0 "register_operand")
6409 (vec_select:V2DF
6410 (vec_concat:V4DF
6411 (match_operand:V2DF 1 "memory_operand")
6412 (match_dup 1))
6413 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
6414 (match_operand:SI 3 "const_int_operand")])))]
6415 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
6416 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
6417 {
6418 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
6419 })
6420
6421 (define_insn "*avx512f_vmscalef<mode>"
6422 [(set (match_operand:VF_128 0 "register_operand" "=v")
6423 (vec_merge:VF_128
6424 (unspec:VF_128
6425 [(match_operand:VF_128 1 "register_operand" "v")
6426 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
6427 UNSPEC_SCALEF)
6428 (match_dup 1)
6429 (const_int 1)))]
6430 "TARGET_AVX512F"
6431 "%vscalef<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6432 [(set_attr "prefix" "evex")
6433 (set_attr "mode" "<ssescalarmode>")])
6434
6435 (define_insn "avx512f_scalef<mode><mask_name>"
6436 [(set (match_operand:VF_512 0 "register_operand" "=v")
6437 (unspec:VF_512
6438 [(match_operand:VF_512 1 "register_operand" "v")
6439 (match_operand:VF_512 2 "nonimmediate_operand" "vm")]
6440 UNSPEC_SCALEF))]
6441 "TARGET_AVX512F"
6442 "%vscalef<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6443 [(set_attr "prefix" "evex")
6444 (set_attr "mode" "<MODE>")])
6445
6446 (define_insn "avx512f_vternlog<mode>"
6447 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6448 (unspec:VI48_512
6449 [(match_operand:VI48_512 1 "register_operand" "0")
6450 (match_operand:VI48_512 2 "register_operand" "v")
6451 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6452 (match_operand:SI 4 "const_0_to_255_operand")]
6453 UNSPEC_VTERNLOG))]
6454 "TARGET_AVX512F"
6455 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6456 [(set_attr "type" "sselog")
6457 (set_attr "prefix" "evex")
6458 (set_attr "mode" "<sseinsnmode>")])
6459
6460 (define_insn "avx512f_vternlog<mode>_mask"
6461 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6462 (vec_merge:VI48_512
6463 (unspec:VI48_512
6464 [(match_operand:VI48_512 1 "register_operand" "0")
6465 (match_operand:VI48_512 2 "register_operand" "v")
6466 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6467 (match_operand:SI 4 "const_0_to_255_operand")]
6468 UNSPEC_VTERNLOG)
6469 (match_dup 1)
6470 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6471 "TARGET_AVX512F"
6472 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
6473 [(set_attr "type" "sselog")
6474 (set_attr "prefix" "evex")
6475 (set_attr "mode" "<sseinsnmode>")])
6476
6477 (define_insn "avx512f_getexp<mode><mask_name>"
6478 [(set (match_operand:VF_512 0 "register_operand" "=v")
6479 (unspec:VF_512 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
6480 UNSPEC_GETEXP))]
6481 "TARGET_AVX512F"
6482 "vgetexp<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
6483 [(set_attr "prefix" "evex")
6484 (set_attr "mode" "<MODE>")])
6485
6486 (define_insn "avx512f_sgetexp<mode>"
6487 [(set (match_operand:VF_128 0 "register_operand" "=v")
6488 (vec_merge:VF_128
6489 (unspec:VF_128
6490 [(match_operand:VF_128 1 "register_operand" "v")
6491 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
6492 UNSPEC_GETEXP)
6493 (match_dup 1)
6494 (const_int 1)))]
6495 "TARGET_AVX512F"
6496 "vgetexp<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
6497 [(set_attr "prefix" "evex")
6498 (set_attr "mode" "<ssescalarmode>")])
6499
6500 (define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
6501 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6502 (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
6503 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
6504 (match_operand:SI 3 "const_0_to_255_operand")]
6505 UNSPEC_ALIGN))]
6506 "TARGET_AVX512F"
6507 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
6508 [(set_attr "prefix" "evex")
6509 (set_attr "mode" "<sseinsnmode>")])
6510
6511 (define_expand "avx512f_shufps512_mask"
6512 [(match_operand:V16SF 0 "register_operand")
6513 (match_operand:V16SF 1 "register_operand")
6514 (match_operand:V16SF 2 "nonimmediate_operand")
6515 (match_operand:SI 3 "const_0_to_255_operand")
6516 (match_operand:V16SF 4 "register_operand")
6517 (match_operand:HI 5 "register_operand")]
6518 "TARGET_AVX512F"
6519 {
6520 int mask = INTVAL (operands[3]);
6521 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
6522 GEN_INT ((mask >> 0) & 3),
6523 GEN_INT ((mask >> 2) & 3),
6524 GEN_INT (((mask >> 4) & 3) + 16),
6525 GEN_INT (((mask >> 6) & 3) + 16),
6526 GEN_INT (((mask >> 0) & 3) + 4),
6527 GEN_INT (((mask >> 2) & 3) + 4),
6528 GEN_INT (((mask >> 4) & 3) + 20),
6529 GEN_INT (((mask >> 6) & 3) + 20),
6530 GEN_INT (((mask >> 0) & 3) + 8),
6531 GEN_INT (((mask >> 2) & 3) + 8),
6532 GEN_INT (((mask >> 4) & 3) + 24),
6533 GEN_INT (((mask >> 6) & 3) + 24),
6534 GEN_INT (((mask >> 0) & 3) + 12),
6535 GEN_INT (((mask >> 2) & 3) + 12),
6536 GEN_INT (((mask >> 4) & 3) + 28),
6537 GEN_INT (((mask >> 6) & 3) + 28),
6538 operands[4], operands[5]));
6539 DONE;
6540 })
6541
6542 (define_insn "avx512f_fixupimm<mode>"
6543 [(set (match_operand:VF_512 0 "register_operand" "=v")
6544 (unspec:VF_512
6545 [(match_operand:VF_512 1 "register_operand" "0")
6546 (match_operand:VF_512 2 "register_operand" "v")
6547 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6548 (match_operand:SI 4 "const_0_to_255_operand")]
6549 UNSPEC_FIXUPIMM))]
6550 "TARGET_AVX512F"
6551 "vfixupimm<ssemodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
6552 [(set_attr "prefix" "evex")
6553 (set_attr "mode" "<MODE>")])
6554
6555 (define_insn "avx512f_fixupimm<mode>_mask"
6556 [(set (match_operand:VF_512 0 "register_operand" "=v")
6557 (vec_merge:VF_512
6558 (unspec:VF_512
6559 [(match_operand:VF_512 1 "register_operand" "0")
6560 (match_operand:VF_512 2 "register_operand" "v")
6561 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6562 (match_operand:SI 4 "const_0_to_255_operand")]
6563 UNSPEC_FIXUPIMM)
6564 (match_dup 1)
6565 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6566 "TARGET_AVX512F"
6567 "vfixupimm<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}";
6568 [(set_attr "prefix" "evex")
6569 (set_attr "mode" "<MODE>")])
6570
6571 (define_insn "avx512f_sfixupimm<mode>"
6572 [(set (match_operand:VF_128 0 "register_operand" "=v")
6573 (vec_merge:VF_128
6574 (unspec:VF_128
6575 [(match_operand:VF_128 1 "register_operand" "0")
6576 (match_operand:VF_128 2 "register_operand" "v")
6577 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6578 (match_operand:SI 4 "const_0_to_255_operand")]
6579 UNSPEC_FIXUPIMM)
6580 (match_dup 1)
6581 (const_int 1)))]
6582 "TARGET_AVX512F"
6583 "vfixupimm<ssescalarmodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
6584 [(set_attr "prefix" "evex")
6585 (set_attr "mode" "<ssescalarmode>")])
6586
6587 (define_insn "avx512f_sfixupimm<mode>_mask"
6588 [(set (match_operand:VF_128 0 "register_operand" "=v")
6589 (vec_merge:VF_128
6590 (vec_merge:VF_128
6591 (unspec:VF_128
6592 [(match_operand:VF_128 1 "register_operand" "0")
6593 (match_operand:VF_128 2 "register_operand" "v")
6594 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6595 (match_operand:SI 4 "const_0_to_255_operand")]
6596 UNSPEC_FIXUPIMM)
6597 (match_dup 1)
6598 (const_int 1))
6599 (match_dup 1)
6600 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6601 "TARGET_AVX512F"
6602 "vfixupimm<ssescalarmodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}";
6603 [(set_attr "prefix" "evex")
6604 (set_attr "mode" "<ssescalarmode>")])
6605
6606 (define_insn "avx512f_rndscale<mode><mask_name>"
6607 [(set (match_operand:VF_512 0 "register_operand" "=v")
6608 (unspec:VF_512
6609 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
6610 (match_operand:SI 2 "const_0_to_255_operand")]
6611 UNSPEC_ROUND))]
6612 "TARGET_AVX512F"
6613 "vrndscale<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6614 [(set_attr "length_immediate" "1")
6615 (set_attr "prefix" "evex")
6616 (set_attr "mode" "<MODE>")])
6617
6618 (define_insn "*avx512f_rndscale<mode>"
6619 [(set (match_operand:VF_128 0 "register_operand" "=v")
6620 (vec_merge:VF_128
6621 (unspec:VF_128
6622 [(match_operand:VF_128 1 "register_operand" "v")
6623 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
6624 (match_operand:SI 3 "const_0_to_255_operand")]
6625 UNSPEC_ROUND)
6626 (match_dup 1)
6627 (const_int 1)))]
6628 "TARGET_AVX512F"
6629 "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
6630 [(set_attr "length_immediate" "1")
6631 (set_attr "prefix" "evex")
6632 (set_attr "mode" "<MODE>")])
6633
6634 ;; One bit in mask selects 2 elements.
6635 (define_insn "avx512f_shufps512_1<mask_name>"
6636 [(set (match_operand:V16SF 0 "register_operand" "=v")
6637 (vec_select:V16SF
6638 (vec_concat:V32SF
6639 (match_operand:V16SF 1 "register_operand" "v")
6640 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6641 (parallel [(match_operand 3 "const_0_to_3_operand")
6642 (match_operand 4 "const_0_to_3_operand")
6643 (match_operand 5 "const_16_to_19_operand")
6644 (match_operand 6 "const_16_to_19_operand")
6645 (match_operand 7 "const_4_to_7_operand")
6646 (match_operand 8 "const_4_to_7_operand")
6647 (match_operand 9 "const_20_to_23_operand")
6648 (match_operand 10 "const_20_to_23_operand")
6649 (match_operand 11 "const_8_to_11_operand")
6650 (match_operand 12 "const_8_to_11_operand")
6651 (match_operand 13 "const_24_to_27_operand")
6652 (match_operand 14 "const_24_to_27_operand")
6653 (match_operand 15 "const_12_to_15_operand")
6654 (match_operand 16 "const_12_to_15_operand")
6655 (match_operand 17 "const_28_to_31_operand")
6656 (match_operand 18 "const_28_to_31_operand")])))]
6657 "TARGET_AVX512F
6658 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6659 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6660 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6661 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
6662 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
6663 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
6664 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
6665 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
6666 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
6667 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
6668 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
6669 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
6670 {
6671 int mask;
6672 mask = INTVAL (operands[3]);
6673 mask |= INTVAL (operands[4]) << 2;
6674 mask |= (INTVAL (operands[5]) - 16) << 4;
6675 mask |= (INTVAL (operands[6]) - 16) << 6;
6676 operands[3] = GEN_INT (mask);
6677
6678 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
6679 }
6680 [(set_attr "type" "sselog")
6681 (set_attr "length_immediate" "1")
6682 (set_attr "prefix" "evex")
6683 (set_attr "mode" "V16SF")])
6684
6685 (define_expand "avx512f_shufpd512_mask"
6686 [(match_operand:V8DF 0 "register_operand")
6687 (match_operand:V8DF 1 "register_operand")
6688 (match_operand:V8DF 2 "nonimmediate_operand")
6689 (match_operand:SI 3 "const_0_to_255_operand")
6690 (match_operand:V8DF 4 "register_operand")
6691 (match_operand:QI 5 "register_operand")]
6692 "TARGET_AVX512F"
6693 {
6694 int mask = INTVAL (operands[3]);
6695 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
6696 GEN_INT (mask & 1),
6697 GEN_INT (mask & 2 ? 9 : 8),
6698 GEN_INT (mask & 4 ? 3 : 2),
6699 GEN_INT (mask & 8 ? 11 : 10),
6700 GEN_INT (mask & 16 ? 5 : 4),
6701 GEN_INT (mask & 32 ? 13 : 12),
6702 GEN_INT (mask & 64 ? 7 : 6),
6703 GEN_INT (mask & 128 ? 15 : 14),
6704 operands[4], operands[5]));
6705 DONE;
6706 })
6707
6708 (define_insn "avx512f_shufpd512_1<mask_name>"
6709 [(set (match_operand:V8DF 0 "register_operand" "=v")
6710 (vec_select:V8DF
6711 (vec_concat:V16DF
6712 (match_operand:V8DF 1 "register_operand" "v")
6713 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6714 (parallel [(match_operand 3 "const_0_to_1_operand")
6715 (match_operand 4 "const_8_to_9_operand")
6716 (match_operand 5 "const_2_to_3_operand")
6717 (match_operand 6 "const_10_to_11_operand")
6718 (match_operand 7 "const_4_to_5_operand")
6719 (match_operand 8 "const_12_to_13_operand")
6720 (match_operand 9 "const_6_to_7_operand")
6721 (match_operand 10 "const_14_to_15_operand")])))]
6722 "TARGET_AVX512F"
6723 {
6724 int mask;
6725 mask = INTVAL (operands[3]);
6726 mask |= (INTVAL (operands[4]) - 8) << 1;
6727 mask |= (INTVAL (operands[5]) - 2) << 2;
6728 mask |= (INTVAL (operands[6]) - 10) << 3;
6729 mask |= (INTVAL (operands[7]) - 4) << 4;
6730 mask |= (INTVAL (operands[8]) - 12) << 5;
6731 mask |= (INTVAL (operands[9]) - 6) << 6;
6732 mask |= (INTVAL (operands[10]) - 14) << 7;
6733 operands[3] = GEN_INT (mask);
6734
6735 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6736 }
6737 [(set_attr "type" "sselog")
6738 (set_attr "length_immediate" "1")
6739 (set_attr "prefix" "evex")
6740 (set_attr "mode" "V8DF")])
6741
6742 (define_expand "avx_shufpd256"
6743 [(match_operand:V4DF 0 "register_operand")
6744 (match_operand:V4DF 1 "register_operand")
6745 (match_operand:V4DF 2 "nonimmediate_operand")
6746 (match_operand:SI 3 "const_int_operand")]
6747 "TARGET_AVX"
6748 {
6749 int mask = INTVAL (operands[3]);
6750 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
6751 GEN_INT (mask & 1),
6752 GEN_INT (mask & 2 ? 5 : 4),
6753 GEN_INT (mask & 4 ? 3 : 2),
6754 GEN_INT (mask & 8 ? 7 : 6)));
6755 DONE;
6756 })
6757
6758 (define_insn "avx_shufpd256_1"
6759 [(set (match_operand:V4DF 0 "register_operand" "=x")
6760 (vec_select:V4DF
6761 (vec_concat:V8DF
6762 (match_operand:V4DF 1 "register_operand" "x")
6763 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6764 (parallel [(match_operand 3 "const_0_to_1_operand")
6765 (match_operand 4 "const_4_to_5_operand")
6766 (match_operand 5 "const_2_to_3_operand")
6767 (match_operand 6 "const_6_to_7_operand")])))]
6768 "TARGET_AVX"
6769 {
6770 int mask;
6771 mask = INTVAL (operands[3]);
6772 mask |= (INTVAL (operands[4]) - 4) << 1;
6773 mask |= (INTVAL (operands[5]) - 2) << 2;
6774 mask |= (INTVAL (operands[6]) - 6) << 3;
6775 operands[3] = GEN_INT (mask);
6776
6777 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6778 }
6779 [(set_attr "type" "sseshuf")
6780 (set_attr "length_immediate" "1")
6781 (set_attr "prefix" "vex")
6782 (set_attr "mode" "V4DF")])
6783
6784 (define_expand "sse2_shufpd"
6785 [(match_operand:V2DF 0 "register_operand")
6786 (match_operand:V2DF 1 "register_operand")
6787 (match_operand:V2DF 2 "nonimmediate_operand")
6788 (match_operand:SI 3 "const_int_operand")]
6789 "TARGET_SSE2"
6790 {
6791 int mask = INTVAL (operands[3]);
6792 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
6793 GEN_INT (mask & 1),
6794 GEN_INT (mask & 2 ? 3 : 2)));
6795 DONE;
6796 })
6797
6798 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
6799 (define_insn "avx2_interleave_highv4di"
6800 [(set (match_operand:V4DI 0 "register_operand" "=x")
6801 (vec_select:V4DI
6802 (vec_concat:V8DI
6803 (match_operand:V4DI 1 "register_operand" "x")
6804 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
6805 (parallel [(const_int 1)
6806 (const_int 5)
6807 (const_int 3)
6808 (const_int 7)])))]
6809 "TARGET_AVX2"
6810 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
6811 [(set_attr "type" "sselog")
6812 (set_attr "prefix" "vex")
6813 (set_attr "mode" "OI")])
6814
6815 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
6816 [(set (match_operand:V8DI 0 "register_operand" "=v")
6817 (vec_select:V8DI
6818 (vec_concat:V16DI
6819 (match_operand:V8DI 1 "register_operand" "v")
6820 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
6821 (parallel [(const_int 1) (const_int 9)
6822 (const_int 3) (const_int 11)
6823 (const_int 5) (const_int 13)
6824 (const_int 7) (const_int 15)])))]
6825 "TARGET_AVX512F"
6826 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6827 [(set_attr "type" "sselog")
6828 (set_attr "prefix" "evex")
6829 (set_attr "mode" "XI")])
6830
6831 (define_insn "vec_interleave_highv2di"
6832 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6833 (vec_select:V2DI
6834 (vec_concat:V4DI
6835 (match_operand:V2DI 1 "register_operand" "0,x")
6836 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
6837 (parallel [(const_int 1)
6838 (const_int 3)])))]
6839 "TARGET_SSE2"
6840 "@
6841 punpckhqdq\t{%2, %0|%0, %2}
6842 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
6843 [(set_attr "isa" "noavx,avx")
6844 (set_attr "type" "sselog")
6845 (set_attr "prefix_data16" "1,*")
6846 (set_attr "prefix" "orig,vex")
6847 (set_attr "mode" "TI")])
6848
6849 (define_insn "avx2_interleave_lowv4di"
6850 [(set (match_operand:V4DI 0 "register_operand" "=x")
6851 (vec_select:V4DI
6852 (vec_concat:V8DI
6853 (match_operand:V4DI 1 "register_operand" "x")
6854 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
6855 (parallel [(const_int 0)
6856 (const_int 4)
6857 (const_int 2)
6858 (const_int 6)])))]
6859 "TARGET_AVX2"
6860 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
6861 [(set_attr "type" "sselog")
6862 (set_attr "prefix" "vex")
6863 (set_attr "mode" "OI")])
6864
6865 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
6866 [(set (match_operand:V8DI 0 "register_operand" "=v")
6867 (vec_select:V8DI
6868 (vec_concat:V16DI
6869 (match_operand:V8DI 1 "register_operand" "v")
6870 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
6871 (parallel [(const_int 0) (const_int 8)
6872 (const_int 2) (const_int 10)
6873 (const_int 4) (const_int 12)
6874 (const_int 6) (const_int 14)])))]
6875 "TARGET_AVX512F"
6876 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6877 [(set_attr "type" "sselog")
6878 (set_attr "prefix" "evex")
6879 (set_attr "mode" "XI")])
6880
6881 (define_insn "vec_interleave_lowv2di"
6882 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6883 (vec_select:V2DI
6884 (vec_concat:V4DI
6885 (match_operand:V2DI 1 "register_operand" "0,x")
6886 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
6887 (parallel [(const_int 0)
6888 (const_int 2)])))]
6889 "TARGET_SSE2"
6890 "@
6891 punpcklqdq\t{%2, %0|%0, %2}
6892 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
6893 [(set_attr "isa" "noavx,avx")
6894 (set_attr "type" "sselog")
6895 (set_attr "prefix_data16" "1,*")
6896 (set_attr "prefix" "orig,vex")
6897 (set_attr "mode" "TI")])
6898
6899 (define_insn "sse2_shufpd_<mode>"
6900 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
6901 (vec_select:VI8F_128
6902 (vec_concat:<ssedoublevecmode>
6903 (match_operand:VI8F_128 1 "register_operand" "0,x")
6904 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
6905 (parallel [(match_operand 3 "const_0_to_1_operand")
6906 (match_operand 4 "const_2_to_3_operand")])))]
6907 "TARGET_SSE2"
6908 {
6909 int mask;
6910 mask = INTVAL (operands[3]);
6911 mask |= (INTVAL (operands[4]) - 2) << 1;
6912 operands[3] = GEN_INT (mask);
6913
6914 switch (which_alternative)
6915 {
6916 case 0:
6917 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
6918 case 1:
6919 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6920 default:
6921 gcc_unreachable ();
6922 }
6923 }
6924 [(set_attr "isa" "noavx,avx")
6925 (set_attr "type" "sseshuf")
6926 (set_attr "length_immediate" "1")
6927 (set_attr "prefix" "orig,vex")
6928 (set_attr "mode" "V2DF")])
6929
6930 ;; Avoid combining registers from different units in a single alternative,
6931 ;; see comment above inline_secondary_memory_needed function in i386.c
6932 (define_insn "sse2_storehpd"
6933 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
6934 (vec_select:DF
6935 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
6936 (parallel [(const_int 1)])))]
6937 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6938 "@
6939 %vmovhpd\t{%1, %0|%0, %1}
6940 unpckhpd\t%0, %0
6941 vunpckhpd\t{%d1, %0|%0, %d1}
6942 #
6943 #
6944 #"
6945 [(set_attr "isa" "*,noavx,avx,*,*,*")
6946 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
6947 (set (attr "prefix_data16")
6948 (if_then_else
6949 (and (eq_attr "alternative" "0")
6950 (not (match_test "TARGET_AVX")))
6951 (const_string "1")
6952 (const_string "*")))
6953 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
6954 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
6955
6956 (define_split
6957 [(set (match_operand:DF 0 "register_operand")
6958 (vec_select:DF
6959 (match_operand:V2DF 1 "memory_operand")
6960 (parallel [(const_int 1)])))]
6961 "TARGET_SSE2 && reload_completed"
6962 [(set (match_dup 0) (match_dup 1))]
6963 "operands[1] = adjust_address (operands[1], DFmode, 8);")
6964
6965 (define_insn "*vec_extractv2df_1_sse"
6966 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
6967 (vec_select:DF
6968 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
6969 (parallel [(const_int 1)])))]
6970 "!TARGET_SSE2 && TARGET_SSE
6971 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6972 "@
6973 movhps\t{%1, %0|%q0, %1}
6974 movhlps\t{%1, %0|%0, %1}
6975 movlps\t{%H1, %0|%0, %H1}"
6976 [(set_attr "type" "ssemov")
6977 (set_attr "ssememalign" "64")
6978 (set_attr "mode" "V2SF,V4SF,V2SF")])
6979
6980 ;; Avoid combining registers from different units in a single alternative,
6981 ;; see comment above inline_secondary_memory_needed function in i386.c
6982 (define_insn "sse2_storelpd"
6983 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
6984 (vec_select:DF
6985 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
6986 (parallel [(const_int 0)])))]
6987 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6988 "@
6989 %vmovlpd\t{%1, %0|%0, %1}
6990 #
6991 #
6992 #
6993 #"
6994 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
6995 (set_attr "prefix_data16" "1,*,*,*,*")
6996 (set_attr "prefix" "maybe_vex")
6997 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
6998
6999 (define_split
7000 [(set (match_operand:DF 0 "register_operand")
7001 (vec_select:DF
7002 (match_operand:V2DF 1 "nonimmediate_operand")
7003 (parallel [(const_int 0)])))]
7004 "TARGET_SSE2 && reload_completed"
7005 [(set (match_dup 0) (match_dup 1))]
7006 {
7007 if (REG_P (operands[1]))
7008 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
7009 else
7010 operands[1] = adjust_address (operands[1], DFmode, 0);
7011 })
7012
7013 (define_insn "*vec_extractv2df_0_sse"
7014 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7015 (vec_select:DF
7016 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
7017 (parallel [(const_int 0)])))]
7018 "!TARGET_SSE2 && TARGET_SSE
7019 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7020 "@
7021 movlps\t{%1, %0|%0, %1}
7022 movaps\t{%1, %0|%0, %1}
7023 movlps\t{%1, %0|%0, %q1}"
7024 [(set_attr "type" "ssemov")
7025 (set_attr "mode" "V2SF,V4SF,V2SF")])
7026
7027 (define_expand "sse2_loadhpd_exp"
7028 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7029 (vec_concat:V2DF
7030 (vec_select:DF
7031 (match_operand:V2DF 1 "nonimmediate_operand")
7032 (parallel [(const_int 0)]))
7033 (match_operand:DF 2 "nonimmediate_operand")))]
7034 "TARGET_SSE2"
7035 {
7036 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7037
7038 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
7039
7040 /* Fix up the destination if needed. */
7041 if (dst != operands[0])
7042 emit_move_insn (operands[0], dst);
7043
7044 DONE;
7045 })
7046
7047 ;; Avoid combining registers from different units in a single alternative,
7048 ;; see comment above inline_secondary_memory_needed function in i386.c
7049 (define_insn "sse2_loadhpd"
7050 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7051 "=x,x,x,x,o,o ,o")
7052 (vec_concat:V2DF
7053 (vec_select:DF
7054 (match_operand:V2DF 1 "nonimmediate_operand"
7055 " 0,x,0,x,0,0 ,0")
7056 (parallel [(const_int 0)]))
7057 (match_operand:DF 2 "nonimmediate_operand"
7058 " m,m,x,x,x,*f,r")))]
7059 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7060 "@
7061 movhpd\t{%2, %0|%0, %2}
7062 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7063 unpcklpd\t{%2, %0|%0, %2}
7064 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7065 #
7066 #
7067 #"
7068 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7069 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
7070 (set_attr "ssememalign" "64")
7071 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
7072 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
7073 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
7074
7075 (define_split
7076 [(set (match_operand:V2DF 0 "memory_operand")
7077 (vec_concat:V2DF
7078 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
7079 (match_operand:DF 1 "register_operand")))]
7080 "TARGET_SSE2 && reload_completed"
7081 [(set (match_dup 0) (match_dup 1))]
7082 "operands[0] = adjust_address (operands[0], DFmode, 8);")
7083
7084 (define_expand "sse2_loadlpd_exp"
7085 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7086 (vec_concat:V2DF
7087 (match_operand:DF 2 "nonimmediate_operand")
7088 (vec_select:DF
7089 (match_operand:V2DF 1 "nonimmediate_operand")
7090 (parallel [(const_int 1)]))))]
7091 "TARGET_SSE2"
7092 {
7093 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7094
7095 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
7096
7097 /* Fix up the destination if needed. */
7098 if (dst != operands[0])
7099 emit_move_insn (operands[0], dst);
7100
7101 DONE;
7102 })
7103
7104 ;; Avoid combining registers from different units in a single alternative,
7105 ;; see comment above inline_secondary_memory_needed function in i386.c
7106 (define_insn "sse2_loadlpd"
7107 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7108 "=x,x,x,x,x,x,x,x,m,m ,m")
7109 (vec_concat:V2DF
7110 (match_operand:DF 2 "nonimmediate_operand"
7111 " m,m,m,x,x,0,0,x,x,*f,r")
7112 (vec_select:DF
7113 (match_operand:V2DF 1 "vector_move_operand"
7114 " C,0,x,0,x,x,o,o,0,0 ,0")
7115 (parallel [(const_int 1)]))))]
7116 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7117 "@
7118 %vmovsd\t{%2, %0|%0, %2}
7119 movlpd\t{%2, %0|%0, %2}
7120 vmovlpd\t{%2, %1, %0|%0, %1, %2}
7121 movsd\t{%2, %0|%0, %2}
7122 vmovsd\t{%2, %1, %0|%0, %1, %2}
7123 shufpd\t{$2, %1, %0|%0, %1, 2}
7124 movhpd\t{%H1, %0|%0, %H1}
7125 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
7126 #
7127 #
7128 #"
7129 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
7130 (set (attr "type")
7131 (cond [(eq_attr "alternative" "5")
7132 (const_string "sselog")
7133 (eq_attr "alternative" "9")
7134 (const_string "fmov")
7135 (eq_attr "alternative" "10")
7136 (const_string "imov")
7137 ]
7138 (const_string "ssemov")))
7139 (set_attr "ssememalign" "64")
7140 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
7141 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
7142 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
7143 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
7144
7145 (define_split
7146 [(set (match_operand:V2DF 0 "memory_operand")
7147 (vec_concat:V2DF
7148 (match_operand:DF 1 "register_operand")
7149 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
7150 "TARGET_SSE2 && reload_completed"
7151 [(set (match_dup 0) (match_dup 1))]
7152 "operands[0] = adjust_address (operands[0], DFmode, 0);")
7153
7154 (define_insn "sse2_movsd"
7155 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
7156 (vec_merge:V2DF
7157 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
7158 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
7159 (const_int 1)))]
7160 "TARGET_SSE2"
7161 "@
7162 movsd\t{%2, %0|%0, %2}
7163 vmovsd\t{%2, %1, %0|%0, %1, %2}
7164 movlpd\t{%2, %0|%0, %q2}
7165 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
7166 %vmovlpd\t{%2, %0|%q0, %2}
7167 shufpd\t{$2, %1, %0|%0, %1, 2}
7168 movhps\t{%H1, %0|%0, %H1}
7169 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
7170 %vmovhps\t{%1, %H0|%H0, %1}"
7171 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
7172 (set (attr "type")
7173 (if_then_else
7174 (eq_attr "alternative" "5")
7175 (const_string "sselog")
7176 (const_string "ssemov")))
7177 (set (attr "prefix_data16")
7178 (if_then_else
7179 (and (eq_attr "alternative" "2,4")
7180 (not (match_test "TARGET_AVX")))
7181 (const_string "1")
7182 (const_string "*")))
7183 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
7184 (set_attr "ssememalign" "64")
7185 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
7186 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
7187
7188 (define_insn "vec_dupv2df"
7189 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
7190 (vec_duplicate:V2DF
7191 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
7192 "TARGET_SSE2"
7193 "@
7194 unpcklpd\t%0, %0
7195 %vmovddup\t{%1, %0|%0, %1}"
7196 [(set_attr "isa" "noavx,sse3")
7197 (set_attr "type" "sselog1")
7198 (set_attr "prefix" "orig,maybe_vex")
7199 (set_attr "mode" "V2DF,DF")])
7200
7201 (define_insn "*vec_concatv2df"
7202 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
7203 (vec_concat:V2DF
7204 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
7205 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
7206 "TARGET_SSE"
7207 "@
7208 unpcklpd\t{%2, %0|%0, %2}
7209 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7210 %vmovddup\t{%1, %0|%0, %1}
7211 movhpd\t{%2, %0|%0, %2}
7212 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7213 %vmovsd\t{%1, %0|%0, %1}
7214 movlhps\t{%2, %0|%0, %2}
7215 movhps\t{%2, %0|%0, %2}"
7216 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
7217 (set (attr "type")
7218 (if_then_else
7219 (eq_attr "alternative" "0,1,2")
7220 (const_string "sselog")
7221 (const_string "ssemov")))
7222 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
7223 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
7224 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
7225
7226 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7227 ;;
7228 ;; Parallel integer down-conversion operations
7229 ;;
7230 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7231
7232 (define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
7233 (define_mode_attr pmov_src_mode
7234 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
7235 (define_mode_attr pmov_src_lower
7236 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
7237 (define_mode_attr pmov_suff
7238 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
7239
7240 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
7241 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7242 (any_truncate:PMOV_DST_MODE
7243 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
7244 "TARGET_AVX512F"
7245 "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
7246 [(set_attr "type" "ssemov")
7247 (set_attr "memory" "none,store")
7248 (set_attr "prefix" "evex")
7249 (set_attr "mode" "<sseinsnmode>")])
7250
7251 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
7252 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7253 (vec_merge:PMOV_DST_MODE
7254 (any_truncate:PMOV_DST_MODE
7255 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
7256 (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
7257 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
7258 "TARGET_AVX512F"
7259 "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7260 [(set_attr "type" "ssemov")
7261 (set_attr "memory" "none,store")
7262 (set_attr "prefix" "evex")
7263 (set_attr "mode" "<sseinsnmode>")])
7264
7265 (define_insn "*avx512f_<code>v8div16qi2"
7266 [(set (match_operand:V16QI 0 "register_operand" "=v")
7267 (vec_concat:V16QI
7268 (any_truncate:V8QI
7269 (match_operand:V8DI 1 "register_operand" "v"))
7270 (const_vector:V8QI [(const_int 0) (const_int 0)
7271 (const_int 0) (const_int 0)
7272 (const_int 0) (const_int 0)
7273 (const_int 0) (const_int 0)])))]
7274 "TARGET_AVX512F"
7275 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7276 [(set_attr "type" "ssemov")
7277 (set_attr "prefix" "evex")
7278 (set_attr "mode" "TI")])
7279
7280 (define_insn "*avx512f_<code>v8div16qi2_store"
7281 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7282 (vec_concat:V16QI
7283 (any_truncate:V8QI
7284 (match_operand:V8DI 1 "register_operand" "v"))
7285 (vec_select:V8QI
7286 (match_dup 0)
7287 (parallel [(const_int 8) (const_int 9)
7288 (const_int 10) (const_int 11)
7289 (const_int 12) (const_int 13)
7290 (const_int 14) (const_int 15)]))))]
7291 "TARGET_AVX512F"
7292 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7293 [(set_attr "type" "ssemov")
7294 (set_attr "memory" "store")
7295 (set_attr "prefix" "evex")
7296 (set_attr "mode" "TI")])
7297
7298 (define_insn "avx512f_<code>v8div16qi2_mask"
7299 [(set (match_operand:V16QI 0 "register_operand" "=v")
7300 (vec_concat:V16QI
7301 (vec_merge:V8QI
7302 (any_truncate:V8QI
7303 (match_operand:V8DI 1 "register_operand" "v"))
7304 (vec_select:V8QI
7305 (match_operand:V16QI 2 "vector_move_operand" "0C")
7306 (parallel [(const_int 0) (const_int 1)
7307 (const_int 2) (const_int 3)
7308 (const_int 4) (const_int 5)
7309 (const_int 6) (const_int 7)]))
7310 (match_operand:QI 3 "register_operand" "k"))
7311 (const_vector:V8QI [(const_int 0) (const_int 0)
7312 (const_int 0) (const_int 0)
7313 (const_int 0) (const_int 0)
7314 (const_int 0) (const_int 0)])))]
7315 "TARGET_AVX512F"
7316 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7317 [(set_attr "type" "ssemov")
7318 (set_attr "prefix" "evex")
7319 (set_attr "mode" "TI")])
7320
7321 (define_insn "*avx512f_<code>v8div16qi2_store_mask"
7322 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7323 (vec_concat:V16QI
7324 (vec_merge:V8QI
7325 (any_truncate:V8QI
7326 (match_operand:V8DI 1 "register_operand" "v"))
7327 (vec_select:V8QI
7328 (match_dup 0)
7329 (parallel [(const_int 0) (const_int 1)
7330 (const_int 2) (const_int 3)
7331 (const_int 4) (const_int 5)
7332 (const_int 6) (const_int 7)]))
7333 (match_operand:QI 2 "register_operand" "k"))
7334 (vec_select:V8QI
7335 (match_dup 0)
7336 (parallel [(const_int 8) (const_int 9)
7337 (const_int 10) (const_int 11)
7338 (const_int 12) (const_int 13)
7339 (const_int 14) (const_int 15)]))))]
7340 "TARGET_AVX512F"
7341 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
7342 [(set_attr "type" "ssemov")
7343 (set_attr "memory" "store")
7344 (set_attr "prefix" "evex")
7345 (set_attr "mode" "TI")])
7346
7347 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7348 ;;
7349 ;; Parallel integral arithmetic
7350 ;;
7351 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7352
7353 (define_expand "neg<mode>2"
7354 [(set (match_operand:VI_AVX2 0 "register_operand")
7355 (minus:VI_AVX2
7356 (match_dup 2)
7357 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
7358 "TARGET_SSE2"
7359 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
7360
7361 (define_expand "<plusminus_insn><mode>3<mask_name>"
7362 [(set (match_operand:VI_AVX2 0 "register_operand")
7363 (plusminus:VI_AVX2
7364 (match_operand:VI_AVX2 1 "nonimmediate_operand")
7365 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
7366 "TARGET_SSE2 && <mask_mode512bit_condition>"
7367 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7368
7369 (define_insn "*<plusminus_insn><mode>3<mask_name>"
7370 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
7371 (plusminus:VI_AVX2
7372 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7373 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7374 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
7375 "@
7376 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7377 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7378 [(set_attr "isa" "noavx,avx")
7379 (set_attr "type" "sseiadd")
7380 (set_attr "prefix_data16" "1,*")
7381 (set_attr "prefix" "<mask_prefix3>")
7382 (set_attr "mode" "<sseinsnmode>")])
7383
7384 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
7385 [(set (match_operand:VI12_AVX2 0 "register_operand")
7386 (sat_plusminus:VI12_AVX2
7387 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
7388 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
7389 "TARGET_SSE2"
7390 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7391
7392 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
7393 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
7394 (sat_plusminus:VI12_AVX2
7395 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7396 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7397 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
7398 "@
7399 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7400 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7401 [(set_attr "isa" "noavx,avx")
7402 (set_attr "type" "sseiadd")
7403 (set_attr "prefix_data16" "1,*")
7404 (set_attr "prefix" "orig,vex")
7405 (set_attr "mode" "TI")])
7406
7407 (define_expand "mul<mode>3"
7408 [(set (match_operand:VI1_AVX2 0 "register_operand")
7409 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
7410 (match_operand:VI1_AVX2 2 "register_operand")))]
7411 "TARGET_SSE2"
7412 {
7413 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
7414 DONE;
7415 })
7416
7417 (define_expand "mul<mode>3"
7418 [(set (match_operand:VI2_AVX2 0 "register_operand")
7419 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
7420 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
7421 "TARGET_SSE2"
7422 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7423
7424 (define_insn "*mul<mode>3"
7425 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7426 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
7427 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
7428 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7429 "@
7430 pmullw\t{%2, %0|%0, %2}
7431 vpmullw\t{%2, %1, %0|%0, %1, %2}"
7432 [(set_attr "isa" "noavx,avx")
7433 (set_attr "type" "sseimul")
7434 (set_attr "prefix_data16" "1,*")
7435 (set_attr "prefix" "orig,vex")
7436 (set_attr "mode" "<sseinsnmode>")])
7437
7438 (define_expand "<s>mul<mode>3_highpart"
7439 [(set (match_operand:VI2_AVX2 0 "register_operand")
7440 (truncate:VI2_AVX2
7441 (lshiftrt:<ssedoublemode>
7442 (mult:<ssedoublemode>
7443 (any_extend:<ssedoublemode>
7444 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
7445 (any_extend:<ssedoublemode>
7446 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
7447 (const_int 16))))]
7448 "TARGET_SSE2"
7449 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7450
7451 (define_insn "*<s>mul<mode>3_highpart"
7452 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7453 (truncate:VI2_AVX2
7454 (lshiftrt:<ssedoublemode>
7455 (mult:<ssedoublemode>
7456 (any_extend:<ssedoublemode>
7457 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
7458 (any_extend:<ssedoublemode>
7459 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
7460 (const_int 16))))]
7461 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7462 "@
7463 pmulh<u>w\t{%2, %0|%0, %2}
7464 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
7465 [(set_attr "isa" "noavx,avx")
7466 (set_attr "type" "sseimul")
7467 (set_attr "prefix_data16" "1,*")
7468 (set_attr "prefix" "orig,vex")
7469 (set_attr "mode" "<sseinsnmode>")])
7470
7471 (define_expand "vec_widen_umult_even_v16si<mask_name>"
7472 [(set (match_operand:V8DI 0 "register_operand")
7473 (mult:V8DI
7474 (zero_extend:V8DI
7475 (vec_select:V8SI
7476 (match_operand:V16SI 1 "nonimmediate_operand")
7477 (parallel [(const_int 0) (const_int 2)
7478 (const_int 4) (const_int 6)
7479 (const_int 8) (const_int 10)
7480 (const_int 12) (const_int 14)])))
7481 (zero_extend:V8DI
7482 (vec_select:V8SI
7483 (match_operand:V16SI 2 "nonimmediate_operand")
7484 (parallel [(const_int 0) (const_int 2)
7485 (const_int 4) (const_int 6)
7486 (const_int 8) (const_int 10)
7487 (const_int 12) (const_int 14)])))))]
7488 "TARGET_AVX512F"
7489 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7490
7491 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
7492 [(set (match_operand:V8DI 0 "register_operand" "=v")
7493 (mult:V8DI
7494 (zero_extend:V8DI
7495 (vec_select:V8SI
7496 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7497 (parallel [(const_int 0) (const_int 2)
7498 (const_int 4) (const_int 6)
7499 (const_int 8) (const_int 10)
7500 (const_int 12) (const_int 14)])))
7501 (zero_extend:V8DI
7502 (vec_select:V8SI
7503 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7504 (parallel [(const_int 0) (const_int 2)
7505 (const_int 4) (const_int 6)
7506 (const_int 8) (const_int 10)
7507 (const_int 12) (const_int 14)])))))]
7508 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7509 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7510 [(set_attr "isa" "avx512f")
7511 (set_attr "type" "sseimul")
7512 (set_attr "prefix_extra" "1")
7513 (set_attr "prefix" "evex")
7514 (set_attr "mode" "XI")])
7515
7516 (define_expand "vec_widen_umult_even_v8si"
7517 [(set (match_operand:V4DI 0 "register_operand")
7518 (mult:V4DI
7519 (zero_extend:V4DI
7520 (vec_select:V4SI
7521 (match_operand:V8SI 1 "nonimmediate_operand")
7522 (parallel [(const_int 0) (const_int 2)
7523 (const_int 4) (const_int 6)])))
7524 (zero_extend:V4DI
7525 (vec_select:V4SI
7526 (match_operand:V8SI 2 "nonimmediate_operand")
7527 (parallel [(const_int 0) (const_int 2)
7528 (const_int 4) (const_int 6)])))))]
7529 "TARGET_AVX2"
7530 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7531
7532 (define_insn "*vec_widen_umult_even_v8si"
7533 [(set (match_operand:V4DI 0 "register_operand" "=x")
7534 (mult:V4DI
7535 (zero_extend:V4DI
7536 (vec_select:V4SI
7537 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
7538 (parallel [(const_int 0) (const_int 2)
7539 (const_int 4) (const_int 6)])))
7540 (zero_extend:V4DI
7541 (vec_select:V4SI
7542 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7543 (parallel [(const_int 0) (const_int 2)
7544 (const_int 4) (const_int 6)])))))]
7545 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7546 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7547 [(set_attr "type" "sseimul")
7548 (set_attr "prefix" "vex")
7549 (set_attr "mode" "OI")])
7550
7551 (define_expand "vec_widen_umult_even_v4si"
7552 [(set (match_operand:V2DI 0 "register_operand")
7553 (mult:V2DI
7554 (zero_extend:V2DI
7555 (vec_select:V2SI
7556 (match_operand:V4SI 1 "nonimmediate_operand")
7557 (parallel [(const_int 0) (const_int 2)])))
7558 (zero_extend:V2DI
7559 (vec_select:V2SI
7560 (match_operand:V4SI 2 "nonimmediate_operand")
7561 (parallel [(const_int 0) (const_int 2)])))))]
7562 "TARGET_SSE2"
7563 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7564
7565 (define_insn "*vec_widen_umult_even_v4si"
7566 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7567 (mult:V2DI
7568 (zero_extend:V2DI
7569 (vec_select:V2SI
7570 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7571 (parallel [(const_int 0) (const_int 2)])))
7572 (zero_extend:V2DI
7573 (vec_select:V2SI
7574 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7575 (parallel [(const_int 0) (const_int 2)])))))]
7576 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7577 "@
7578 pmuludq\t{%2, %0|%0, %2}
7579 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7580 [(set_attr "isa" "noavx,avx")
7581 (set_attr "type" "sseimul")
7582 (set_attr "prefix_data16" "1,*")
7583 (set_attr "prefix" "orig,vex")
7584 (set_attr "mode" "TI")])
7585
7586 (define_expand "vec_widen_smult_even_v16si<mask_name>"
7587 [(set (match_operand:V8DI 0 "register_operand")
7588 (mult:V8DI
7589 (sign_extend:V8DI
7590 (vec_select:V8SI
7591 (match_operand:V16SI 1 "nonimmediate_operand")
7592 (parallel [(const_int 0) (const_int 2)
7593 (const_int 4) (const_int 6)
7594 (const_int 8) (const_int 10)
7595 (const_int 12) (const_int 14)])))
7596 (sign_extend:V8DI
7597 (vec_select:V8SI
7598 (match_operand:V16SI 2 "nonimmediate_operand")
7599 (parallel [(const_int 0) (const_int 2)
7600 (const_int 4) (const_int 6)
7601 (const_int 8) (const_int 10)
7602 (const_int 12) (const_int 14)])))))]
7603 "TARGET_AVX512F"
7604 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7605
7606 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
7607 [(set (match_operand:V8DI 0 "register_operand" "=v")
7608 (mult:V8DI
7609 (sign_extend:V8DI
7610 (vec_select:V8SI
7611 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7612 (parallel [(const_int 0) (const_int 2)
7613 (const_int 4) (const_int 6)
7614 (const_int 8) (const_int 10)
7615 (const_int 12) (const_int 14)])))
7616 (sign_extend:V8DI
7617 (vec_select:V8SI
7618 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7619 (parallel [(const_int 0) (const_int 2)
7620 (const_int 4) (const_int 6)
7621 (const_int 8) (const_int 10)
7622 (const_int 12) (const_int 14)])))))]
7623 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7624 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7625 [(set_attr "isa" "avx512f")
7626 (set_attr "type" "sseimul")
7627 (set_attr "prefix_extra" "1")
7628 (set_attr "prefix" "evex")
7629 (set_attr "mode" "XI")])
7630
7631 (define_expand "vec_widen_smult_even_v8si"
7632 [(set (match_operand:V4DI 0 "register_operand")
7633 (mult:V4DI
7634 (sign_extend:V4DI
7635 (vec_select:V4SI
7636 (match_operand:V8SI 1 "nonimmediate_operand")
7637 (parallel [(const_int 0) (const_int 2)
7638 (const_int 4) (const_int 6)])))
7639 (sign_extend:V4DI
7640 (vec_select:V4SI
7641 (match_operand:V8SI 2 "nonimmediate_operand")
7642 (parallel [(const_int 0) (const_int 2)
7643 (const_int 4) (const_int 6)])))))]
7644 "TARGET_AVX2"
7645 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7646
7647 (define_insn "*vec_widen_smult_even_v8si"
7648 [(set (match_operand:V4DI 0 "register_operand" "=x")
7649 (mult:V4DI
7650 (sign_extend:V4DI
7651 (vec_select:V4SI
7652 (match_operand:V8SI 1 "nonimmediate_operand" "x")
7653 (parallel [(const_int 0) (const_int 2)
7654 (const_int 4) (const_int 6)])))
7655 (sign_extend:V4DI
7656 (vec_select:V4SI
7657 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7658 (parallel [(const_int 0) (const_int 2)
7659 (const_int 4) (const_int 6)])))))]
7660 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7661 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7662 [(set_attr "type" "sseimul")
7663 (set_attr "prefix_extra" "1")
7664 (set_attr "prefix" "vex")
7665 (set_attr "mode" "OI")])
7666
7667 (define_expand "sse4_1_mulv2siv2di3"
7668 [(set (match_operand:V2DI 0 "register_operand")
7669 (mult:V2DI
7670 (sign_extend:V2DI
7671 (vec_select:V2SI
7672 (match_operand:V4SI 1 "nonimmediate_operand")
7673 (parallel [(const_int 0) (const_int 2)])))
7674 (sign_extend:V2DI
7675 (vec_select:V2SI
7676 (match_operand:V4SI 2 "nonimmediate_operand")
7677 (parallel [(const_int 0) (const_int 2)])))))]
7678 "TARGET_SSE4_1"
7679 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7680
7681 (define_insn "*sse4_1_mulv2siv2di3"
7682 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7683 (mult:V2DI
7684 (sign_extend:V2DI
7685 (vec_select:V2SI
7686 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7687 (parallel [(const_int 0) (const_int 2)])))
7688 (sign_extend:V2DI
7689 (vec_select:V2SI
7690 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7691 (parallel [(const_int 0) (const_int 2)])))))]
7692 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7693 "@
7694 pmuldq\t{%2, %0|%0, %2}
7695 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7696 [(set_attr "isa" "noavx,avx")
7697 (set_attr "type" "sseimul")
7698 (set_attr "prefix_data16" "1,*")
7699 (set_attr "prefix_extra" "1")
7700 (set_attr "prefix" "orig,vex")
7701 (set_attr "mode" "TI")])
7702
7703 (define_expand "avx2_pmaddwd"
7704 [(set (match_operand:V8SI 0 "register_operand")
7705 (plus:V8SI
7706 (mult:V8SI
7707 (sign_extend:V8SI
7708 (vec_select:V8HI
7709 (match_operand:V16HI 1 "nonimmediate_operand")
7710 (parallel [(const_int 0) (const_int 2)
7711 (const_int 4) (const_int 6)
7712 (const_int 8) (const_int 10)
7713 (const_int 12) (const_int 14)])))
7714 (sign_extend:V8SI
7715 (vec_select:V8HI
7716 (match_operand:V16HI 2 "nonimmediate_operand")
7717 (parallel [(const_int 0) (const_int 2)
7718 (const_int 4) (const_int 6)
7719 (const_int 8) (const_int 10)
7720 (const_int 12) (const_int 14)]))))
7721 (mult:V8SI
7722 (sign_extend:V8SI
7723 (vec_select:V8HI (match_dup 1)
7724 (parallel [(const_int 1) (const_int 3)
7725 (const_int 5) (const_int 7)
7726 (const_int 9) (const_int 11)
7727 (const_int 13) (const_int 15)])))
7728 (sign_extend:V8SI
7729 (vec_select:V8HI (match_dup 2)
7730 (parallel [(const_int 1) (const_int 3)
7731 (const_int 5) (const_int 7)
7732 (const_int 9) (const_int 11)
7733 (const_int 13) (const_int 15)]))))))]
7734 "TARGET_AVX2"
7735 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
7736
7737 (define_insn "*avx2_pmaddwd"
7738 [(set (match_operand:V8SI 0 "register_operand" "=x")
7739 (plus:V8SI
7740 (mult:V8SI
7741 (sign_extend:V8SI
7742 (vec_select:V8HI
7743 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
7744 (parallel [(const_int 0) (const_int 2)
7745 (const_int 4) (const_int 6)
7746 (const_int 8) (const_int 10)
7747 (const_int 12) (const_int 14)])))
7748 (sign_extend:V8SI
7749 (vec_select:V8HI
7750 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
7751 (parallel [(const_int 0) (const_int 2)
7752 (const_int 4) (const_int 6)
7753 (const_int 8) (const_int 10)
7754 (const_int 12) (const_int 14)]))))
7755 (mult:V8SI
7756 (sign_extend:V8SI
7757 (vec_select:V8HI (match_dup 1)
7758 (parallel [(const_int 1) (const_int 3)
7759 (const_int 5) (const_int 7)
7760 (const_int 9) (const_int 11)
7761 (const_int 13) (const_int 15)])))
7762 (sign_extend:V8SI
7763 (vec_select:V8HI (match_dup 2)
7764 (parallel [(const_int 1) (const_int 3)
7765 (const_int 5) (const_int 7)
7766 (const_int 9) (const_int 11)
7767 (const_int 13) (const_int 15)]))))))]
7768 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
7769 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
7770 [(set_attr "type" "sseiadd")
7771 (set_attr "prefix" "vex")
7772 (set_attr "mode" "OI")])
7773
7774 (define_expand "sse2_pmaddwd"
7775 [(set (match_operand:V4SI 0 "register_operand")
7776 (plus:V4SI
7777 (mult:V4SI
7778 (sign_extend:V4SI
7779 (vec_select:V4HI
7780 (match_operand:V8HI 1 "nonimmediate_operand")
7781 (parallel [(const_int 0) (const_int 2)
7782 (const_int 4) (const_int 6)])))
7783 (sign_extend:V4SI
7784 (vec_select:V4HI
7785 (match_operand:V8HI 2 "nonimmediate_operand")
7786 (parallel [(const_int 0) (const_int 2)
7787 (const_int 4) (const_int 6)]))))
7788 (mult:V4SI
7789 (sign_extend:V4SI
7790 (vec_select:V4HI (match_dup 1)
7791 (parallel [(const_int 1) (const_int 3)
7792 (const_int 5) (const_int 7)])))
7793 (sign_extend:V4SI
7794 (vec_select:V4HI (match_dup 2)
7795 (parallel [(const_int 1) (const_int 3)
7796 (const_int 5) (const_int 7)]))))))]
7797 "TARGET_SSE2"
7798 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
7799
7800 (define_insn "*sse2_pmaddwd"
7801 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7802 (plus:V4SI
7803 (mult:V4SI
7804 (sign_extend:V4SI
7805 (vec_select:V4HI
7806 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
7807 (parallel [(const_int 0) (const_int 2)
7808 (const_int 4) (const_int 6)])))
7809 (sign_extend:V4SI
7810 (vec_select:V4HI
7811 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7812 (parallel [(const_int 0) (const_int 2)
7813 (const_int 4) (const_int 6)]))))
7814 (mult:V4SI
7815 (sign_extend:V4SI
7816 (vec_select:V4HI (match_dup 1)
7817 (parallel [(const_int 1) (const_int 3)
7818 (const_int 5) (const_int 7)])))
7819 (sign_extend:V4SI
7820 (vec_select:V4HI (match_dup 2)
7821 (parallel [(const_int 1) (const_int 3)
7822 (const_int 5) (const_int 7)]))))))]
7823 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
7824 "@
7825 pmaddwd\t{%2, %0|%0, %2}
7826 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
7827 [(set_attr "isa" "noavx,avx")
7828 (set_attr "type" "sseiadd")
7829 (set_attr "atom_unit" "simul")
7830 (set_attr "prefix_data16" "1,*")
7831 (set_attr "prefix" "orig,vex")
7832 (set_attr "mode" "TI")])
7833
7834 (define_expand "mul<mode>3<mask_name>"
7835 [(set (match_operand:VI4_AVX512F 0 "register_operand")
7836 (mult:VI4_AVX512F
7837 (match_operand:VI4_AVX512F 1 "general_vector_operand")
7838 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
7839 "TARGET_SSE2 && <mask_mode512bit_condition>"
7840 {
7841 if (TARGET_SSE4_1)
7842 {
7843 if (!nonimmediate_operand (operands[1], <MODE>mode))
7844 operands[1] = force_reg (<MODE>mode, operands[1]);
7845 if (!nonimmediate_operand (operands[2], <MODE>mode))
7846 operands[2] = force_reg (<MODE>mode, operands[2]);
7847 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
7848 }
7849 else
7850 {
7851 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
7852 DONE;
7853 }
7854 })
7855
7856 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
7857 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
7858 (mult:VI4_AVX512F
7859 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
7860 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
7861 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
7862 "@
7863 pmulld\t{%2, %0|%0, %2}
7864 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7865 [(set_attr "isa" "noavx,avx")
7866 (set_attr "type" "sseimul")
7867 (set_attr "prefix_extra" "1")
7868 (set_attr "prefix" "<mask_prefix3>")
7869 (set_attr "btver2_decode" "vector,vector")
7870 (set_attr "mode" "<sseinsnmode>")])
7871
7872 (define_expand "mul<mode>3"
7873 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
7874 (mult:VI8_AVX2_AVX512F
7875 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
7876 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
7877 "TARGET_SSE2"
7878 {
7879 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
7880 DONE;
7881 })
7882
7883 (define_expand "vec_widen_<s>mult_hi_<mode>"
7884 [(match_operand:<sseunpackmode> 0 "register_operand")
7885 (any_extend:<sseunpackmode>
7886 (match_operand:VI124_AVX2 1 "register_operand"))
7887 (match_operand:VI124_AVX2 2 "register_operand")]
7888 "TARGET_SSE2"
7889 {
7890 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
7891 <u_bool>, true);
7892 DONE;
7893 })
7894
7895 (define_expand "vec_widen_<s>mult_lo_<mode>"
7896 [(match_operand:<sseunpackmode> 0 "register_operand")
7897 (any_extend:<sseunpackmode>
7898 (match_operand:VI124_AVX2 1 "register_operand"))
7899 (match_operand:VI124_AVX2 2 "register_operand")]
7900 "TARGET_SSE2"
7901 {
7902 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
7903 <u_bool>, false);
7904 DONE;
7905 })
7906
7907 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
7908 ;; named patterns, but signed V4SI needs special help for plain SSE2.
7909 (define_expand "vec_widen_smult_even_v4si"
7910 [(match_operand:V2DI 0 "register_operand")
7911 (match_operand:V4SI 1 "nonimmediate_operand")
7912 (match_operand:V4SI 2 "nonimmediate_operand")]
7913 "TARGET_SSE2"
7914 {
7915 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
7916 false, false);
7917 DONE;
7918 })
7919
7920 (define_expand "vec_widen_<s>mult_odd_<mode>"
7921 [(match_operand:<sseunpackmode> 0 "register_operand")
7922 (any_extend:<sseunpackmode>
7923 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
7924 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
7925 "TARGET_SSE2"
7926 {
7927 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
7928 <u_bool>, true);
7929 DONE;
7930 })
7931
7932 (define_expand "sdot_prod<mode>"
7933 [(match_operand:<sseunpackmode> 0 "register_operand")
7934 (match_operand:VI2_AVX2 1 "register_operand")
7935 (match_operand:VI2_AVX2 2 "register_operand")
7936 (match_operand:<sseunpackmode> 3 "register_operand")]
7937 "TARGET_SSE2"
7938 {
7939 rtx t = gen_reg_rtx (<sseunpackmode>mode);
7940 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
7941 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
7942 gen_rtx_PLUS (<sseunpackmode>mode,
7943 operands[3], t)));
7944 DONE;
7945 })
7946
7947 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
7948 ;; back together when madd is available.
7949 (define_expand "sdot_prodv4si"
7950 [(match_operand:V2DI 0 "register_operand")
7951 (match_operand:V4SI 1 "register_operand")
7952 (match_operand:V4SI 2 "register_operand")
7953 (match_operand:V2DI 3 "register_operand")]
7954 "TARGET_XOP"
7955 {
7956 rtx t = gen_reg_rtx (V2DImode);
7957 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
7958 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
7959 DONE;
7960 })
7961
7962 (define_insn "ashr<mode>3"
7963 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
7964 (ashiftrt:VI24_AVX2
7965 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
7966 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
7967 "TARGET_SSE2"
7968 "@
7969 psra<ssemodesuffix>\t{%2, %0|%0, %2}
7970 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7971 [(set_attr "isa" "noavx,avx")
7972 (set_attr "type" "sseishft")
7973 (set (attr "length_immediate")
7974 (if_then_else (match_operand 2 "const_int_operand")
7975 (const_string "1")
7976 (const_string "0")))
7977 (set_attr "prefix_data16" "1,*")
7978 (set_attr "prefix" "orig,vex")
7979 (set_attr "mode" "<sseinsnmode>")])
7980
7981 (define_insn "ashr<mode>3<mask_name>"
7982 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
7983 (ashiftrt:VI48_512
7984 (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
7985 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
7986 "TARGET_AVX512F && <mask_mode512bit_condition>"
7987 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7988 [(set_attr "type" "sseishft")
7989 (set (attr "length_immediate")
7990 (if_then_else (match_operand 2 "const_int_operand")
7991 (const_string "1")
7992 (const_string "0")))
7993 (set_attr "mode" "<sseinsnmode>")])
7994
7995 (define_insn "<shift_insn><mode>3"
7996 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
7997 (any_lshift:VI248_AVX2
7998 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
7999 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
8000 "TARGET_SSE2"
8001 "@
8002 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
8003 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8004 [(set_attr "isa" "noavx,avx")
8005 (set_attr "type" "sseishft")
8006 (set (attr "length_immediate")
8007 (if_then_else (match_operand 2 "const_int_operand")
8008 (const_string "1")
8009 (const_string "0")))
8010 (set_attr "prefix_data16" "1,*")
8011 (set_attr "prefix" "orig,vex")
8012 (set_attr "mode" "<sseinsnmode>")])
8013
8014 (define_insn "<shift_insn><mode>3<mask_name>"
8015 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8016 (any_lshift:VI48_512
8017 (match_operand:VI48_512 1 "register_operand" "v,m")
8018 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
8019 "TARGET_AVX512F && <mask_mode512bit_condition>"
8020 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8021 [(set_attr "isa" "avx512f")
8022 (set_attr "type" "sseishft")
8023 (set (attr "length_immediate")
8024 (if_then_else (match_operand 2 "const_int_operand")
8025 (const_string "1")
8026 (const_string "0")))
8027 (set_attr "prefix" "evex")
8028 (set_attr "mode" "<sseinsnmode>")])
8029
8030
8031 (define_expand "vec_shl_<mode>"
8032 [(set (match_dup 3)
8033 (ashift:V1TI
8034 (match_operand:VI_128 1 "register_operand")
8035 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8036 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8037 "TARGET_SSE2"
8038 {
8039 operands[1] = gen_lowpart (V1TImode, operands[1]);
8040 operands[3] = gen_reg_rtx (V1TImode);
8041 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8042 })
8043
8044 (define_insn "<sse2_avx2>_ashl<mode>3"
8045 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8046 (ashift:VIMAX_AVX2
8047 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8048 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8049 "TARGET_SSE2"
8050 {
8051 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8052
8053 switch (which_alternative)
8054 {
8055 case 0:
8056 return "pslldq\t{%2, %0|%0, %2}";
8057 case 1:
8058 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
8059 default:
8060 gcc_unreachable ();
8061 }
8062 }
8063 [(set_attr "isa" "noavx,avx")
8064 (set_attr "type" "sseishft")
8065 (set_attr "length_immediate" "1")
8066 (set_attr "prefix_data16" "1,*")
8067 (set_attr "prefix" "orig,vex")
8068 (set_attr "mode" "<sseinsnmode>")])
8069
8070 (define_expand "vec_shr_<mode>"
8071 [(set (match_dup 3)
8072 (lshiftrt:V1TI
8073 (match_operand:VI_128 1 "register_operand")
8074 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8075 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8076 "TARGET_SSE2"
8077 {
8078 operands[1] = gen_lowpart (V1TImode, operands[1]);
8079 operands[3] = gen_reg_rtx (V1TImode);
8080 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8081 })
8082
8083 (define_insn "<sse2_avx2>_lshr<mode>3"
8084 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8085 (lshiftrt:VIMAX_AVX2
8086 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8087 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8088 "TARGET_SSE2"
8089 {
8090 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8091
8092 switch (which_alternative)
8093 {
8094 case 0:
8095 return "psrldq\t{%2, %0|%0, %2}";
8096 case 1:
8097 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
8098 default:
8099 gcc_unreachable ();
8100 }
8101 }
8102 [(set_attr "isa" "noavx,avx")
8103 (set_attr "type" "sseishft")
8104 (set_attr "length_immediate" "1")
8105 (set_attr "atom_unit" "sishuf")
8106 (set_attr "prefix_data16" "1,*")
8107 (set_attr "prefix" "orig,vex")
8108 (set_attr "mode" "<sseinsnmode>")])
8109
8110 (define_insn "avx512f_<rotate>v<mode><mask_name>"
8111 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8112 (any_rotate:VI48_512
8113 (match_operand:VI48_512 1 "register_operand" "v")
8114 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
8115 "TARGET_AVX512F"
8116 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8117 [(set_attr "prefix" "evex")
8118 (set_attr "mode" "<sseinsnmode>")])
8119
8120 (define_insn "avx512f_<rotate><mode><mask_name>"
8121 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8122 (any_rotate:VI48_512
8123 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
8124 (match_operand:SI 2 "const_0_to_255_operand")))]
8125 "TARGET_AVX512F"
8126 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8127 [(set_attr "prefix" "evex")
8128 (set_attr "mode" "<sseinsnmode>")])
8129
8130 (define_expand "<code><mode>3<mask_name>"
8131 [(set (match_operand:VI124_256_48_512 0 "register_operand")
8132 (maxmin:VI124_256_48_512
8133 (match_operand:VI124_256_48_512 1 "nonimmediate_operand")
8134 (match_operand:VI124_256_48_512 2 "nonimmediate_operand")))]
8135 "TARGET_AVX2 && <mask_mode512bit_condition>"
8136 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8137
8138 (define_insn "*avx2_<code><mode>3<mask_name>"
8139 [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
8140 (maxmin:VI124_256_48_512
8141 (match_operand:VI124_256_48_512 1 "nonimmediate_operand" "%v")
8142 (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "vm")))]
8143 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
8144 && <mask_mode512bit_condition>"
8145 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8146 [(set_attr "type" "sseiadd")
8147 (set_attr "prefix_extra" "1")
8148 (set_attr "prefix" "maybe_evex")
8149 (set_attr "mode" "OI")])
8150
8151 (define_expand "<code><mode>3"
8152 [(set (match_operand:VI8_AVX2 0 "register_operand")
8153 (maxmin:VI8_AVX2
8154 (match_operand:VI8_AVX2 1 "register_operand")
8155 (match_operand:VI8_AVX2 2 "register_operand")))]
8156 "TARGET_SSE4_2"
8157 {
8158 enum rtx_code code;
8159 rtx xops[6];
8160 bool ok;
8161
8162 xops[0] = operands[0];
8163
8164 if (<CODE> == SMAX || <CODE> == UMAX)
8165 {
8166 xops[1] = operands[1];
8167 xops[2] = operands[2];
8168 }
8169 else
8170 {
8171 xops[1] = operands[2];
8172 xops[2] = operands[1];
8173 }
8174
8175 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
8176
8177 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
8178 xops[4] = operands[1];
8179 xops[5] = operands[2];
8180
8181 ok = ix86_expand_int_vcond (xops);
8182 gcc_assert (ok);
8183 DONE;
8184 })
8185
8186 (define_expand "<code><mode>3"
8187 [(set (match_operand:VI124_128 0 "register_operand")
8188 (smaxmin:VI124_128
8189 (match_operand:VI124_128 1 "nonimmediate_operand")
8190 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8191 "TARGET_SSE2"
8192 {
8193 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
8194 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8195 else
8196 {
8197 rtx xops[6];
8198 bool ok;
8199
8200 xops[0] = operands[0];
8201 operands[1] = force_reg (<MODE>mode, operands[1]);
8202 operands[2] = force_reg (<MODE>mode, operands[2]);
8203
8204 if (<CODE> == SMAX)
8205 {
8206 xops[1] = operands[1];
8207 xops[2] = operands[2];
8208 }
8209 else
8210 {
8211 xops[1] = operands[2];
8212 xops[2] = operands[1];
8213 }
8214
8215 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
8216 xops[4] = operands[1];
8217 xops[5] = operands[2];
8218
8219 ok = ix86_expand_int_vcond (xops);
8220 gcc_assert (ok);
8221 DONE;
8222 }
8223 })
8224
8225 (define_insn "*sse4_1_<code><mode>3"
8226 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
8227 (smaxmin:VI14_128
8228 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
8229 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
8230 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8231 "@
8232 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8233 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8234 [(set_attr "isa" "noavx,avx")
8235 (set_attr "type" "sseiadd")
8236 (set_attr "prefix_extra" "1,*")
8237 (set_attr "prefix" "orig,vex")
8238 (set_attr "mode" "TI")])
8239
8240 (define_insn "*<code>v8hi3"
8241 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8242 (smaxmin:V8HI
8243 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8244 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
8245 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
8246 "@
8247 p<maxmin_int>w\t{%2, %0|%0, %2}
8248 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
8249 [(set_attr "isa" "noavx,avx")
8250 (set_attr "type" "sseiadd")
8251 (set_attr "prefix_data16" "1,*")
8252 (set_attr "prefix_extra" "*,1")
8253 (set_attr "prefix" "orig,vex")
8254 (set_attr "mode" "TI")])
8255
8256 (define_expand "<code><mode>3"
8257 [(set (match_operand:VI124_128 0 "register_operand")
8258 (umaxmin:VI124_128
8259 (match_operand:VI124_128 1 "nonimmediate_operand")
8260 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8261 "TARGET_SSE2"
8262 {
8263 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
8264 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8265 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
8266 {
8267 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
8268 operands[1] = force_reg (<MODE>mode, operands[1]);
8269 if (rtx_equal_p (op3, op2))
8270 op3 = gen_reg_rtx (V8HImode);
8271 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
8272 emit_insn (gen_addv8hi3 (op0, op3, op2));
8273 DONE;
8274 }
8275 else
8276 {
8277 rtx xops[6];
8278 bool ok;
8279
8280 operands[1] = force_reg (<MODE>mode, operands[1]);
8281 operands[2] = force_reg (<MODE>mode, operands[2]);
8282
8283 xops[0] = operands[0];
8284
8285 if (<CODE> == UMAX)
8286 {
8287 xops[1] = operands[1];
8288 xops[2] = operands[2];
8289 }
8290 else
8291 {
8292 xops[1] = operands[2];
8293 xops[2] = operands[1];
8294 }
8295
8296 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
8297 xops[4] = operands[1];
8298 xops[5] = operands[2];
8299
8300 ok = ix86_expand_int_vcond (xops);
8301 gcc_assert (ok);
8302 DONE;
8303 }
8304 })
8305
8306 (define_insn "*sse4_1_<code><mode>3"
8307 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
8308 (umaxmin:VI24_128
8309 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
8310 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
8311 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8312 "@
8313 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8314 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8315 [(set_attr "isa" "noavx,avx")
8316 (set_attr "type" "sseiadd")
8317 (set_attr "prefix_extra" "1,*")
8318 (set_attr "prefix" "orig,vex")
8319 (set_attr "mode" "TI")])
8320
8321 (define_insn "*<code>v16qi3"
8322 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
8323 (umaxmin:V16QI
8324 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
8325 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
8326 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
8327 "@
8328 p<maxmin_int>b\t{%2, %0|%0, %2}
8329 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
8330 [(set_attr "isa" "noavx,avx")
8331 (set_attr "type" "sseiadd")
8332 (set_attr "prefix_data16" "1,*")
8333 (set_attr "prefix_extra" "*,1")
8334 (set_attr "prefix" "orig,vex")
8335 (set_attr "mode" "TI")])
8336
8337 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8338 ;;
8339 ;; Parallel integral comparisons
8340 ;;
8341 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8342
8343 (define_expand "avx2_eq<mode>3"
8344 [(set (match_operand:VI_256 0 "register_operand")
8345 (eq:VI_256
8346 (match_operand:VI_256 1 "nonimmediate_operand")
8347 (match_operand:VI_256 2 "nonimmediate_operand")))]
8348 "TARGET_AVX2"
8349 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8350
8351 (define_insn "*avx2_eq<mode>3"
8352 [(set (match_operand:VI_256 0 "register_operand" "=x")
8353 (eq:VI_256
8354 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
8355 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8356 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8357 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8358 [(set_attr "type" "ssecmp")
8359 (set_attr "prefix_extra" "1")
8360 (set_attr "prefix" "vex")
8361 (set_attr "mode" "OI")])
8362
8363 (define_expand "avx512f_eq<mode>3"
8364 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
8365 (unspec:<avx512fmaskmode>
8366 [(match_operand:VI48_512 1 "register_operand")
8367 (match_operand:VI48_512 2 "nonimmediate_operand")]
8368 UNSPEC_MASKED_EQ))]
8369 "TARGET_AVX512F"
8370 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8371
8372 (define_insn "avx512f_eq<mode>3_1"
8373 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8374 (unspec:<avx512fmaskmode>
8375 [(match_operand:VI48_512 1 "register_operand" "%v")
8376 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8377 UNSPEC_MASKED_EQ))]
8378 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8379 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8380 [(set_attr "type" "ssecmp")
8381 (set_attr "prefix_extra" "1")
8382 (set_attr "prefix" "evex")
8383 (set_attr "mode" "<sseinsnmode>")])
8384
8385 (define_insn "*sse4_1_eqv2di3"
8386 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8387 (eq:V2DI
8388 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
8389 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8390 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
8391 "@
8392 pcmpeqq\t{%2, %0|%0, %2}
8393 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
8394 [(set_attr "isa" "noavx,avx")
8395 (set_attr "type" "ssecmp")
8396 (set_attr "prefix_extra" "1")
8397 (set_attr "prefix" "orig,vex")
8398 (set_attr "mode" "TI")])
8399
8400 (define_insn "*sse2_eq<mode>3"
8401 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8402 (eq:VI124_128
8403 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
8404 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8405 "TARGET_SSE2 && !TARGET_XOP
8406 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8407 "@
8408 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
8409 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8410 [(set_attr "isa" "noavx,avx")
8411 (set_attr "type" "ssecmp")
8412 (set_attr "prefix_data16" "1,*")
8413 (set_attr "prefix" "orig,vex")
8414 (set_attr "mode" "TI")])
8415
8416 (define_expand "sse2_eq<mode>3"
8417 [(set (match_operand:VI124_128 0 "register_operand")
8418 (eq:VI124_128
8419 (match_operand:VI124_128 1 "nonimmediate_operand")
8420 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8421 "TARGET_SSE2 && !TARGET_XOP "
8422 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8423
8424 (define_expand "sse4_1_eqv2di3"
8425 [(set (match_operand:V2DI 0 "register_operand")
8426 (eq:V2DI
8427 (match_operand:V2DI 1 "nonimmediate_operand")
8428 (match_operand:V2DI 2 "nonimmediate_operand")))]
8429 "TARGET_SSE4_1"
8430 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
8431
8432 (define_insn "sse4_2_gtv2di3"
8433 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8434 (gt:V2DI
8435 (match_operand:V2DI 1 "register_operand" "0,x")
8436 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8437 "TARGET_SSE4_2"
8438 "@
8439 pcmpgtq\t{%2, %0|%0, %2}
8440 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
8441 [(set_attr "isa" "noavx,avx")
8442 (set_attr "type" "ssecmp")
8443 (set_attr "prefix_extra" "1")
8444 (set_attr "prefix" "orig,vex")
8445 (set_attr "mode" "TI")])
8446
8447 (define_insn "avx2_gt<mode>3"
8448 [(set (match_operand:VI_256 0 "register_operand" "=x")
8449 (gt:VI_256
8450 (match_operand:VI_256 1 "register_operand" "x")
8451 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8452 "TARGET_AVX2"
8453 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8454 [(set_attr "type" "ssecmp")
8455 (set_attr "prefix_extra" "1")
8456 (set_attr "prefix" "vex")
8457 (set_attr "mode" "OI")])
8458
8459 (define_insn "avx512f_gt<mode>3"
8460 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8461 (unspec:<avx512fmaskmode>
8462 [(match_operand:VI48_512 1 "register_operand" "v")
8463 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
8464 "TARGET_AVX512F"
8465 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8466 [(set_attr "type" "ssecmp")
8467 (set_attr "prefix_extra" "1")
8468 (set_attr "prefix" "evex")
8469 (set_attr "mode" "<sseinsnmode>")])
8470
8471 (define_insn "sse2_gt<mode>3"
8472 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8473 (gt:VI124_128
8474 (match_operand:VI124_128 1 "register_operand" "0,x")
8475 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8476 "TARGET_SSE2 && !TARGET_XOP"
8477 "@
8478 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
8479 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8480 [(set_attr "isa" "noavx,avx")
8481 (set_attr "type" "ssecmp")
8482 (set_attr "prefix_data16" "1,*")
8483 (set_attr "prefix" "orig,vex")
8484 (set_attr "mode" "TI")])
8485
8486 (define_expand "vcond<V_512:mode><VI_512:mode>"
8487 [(set (match_operand:V_512 0 "register_operand")
8488 (if_then_else:V_512
8489 (match_operator 3 ""
8490 [(match_operand:VI_512 4 "nonimmediate_operand")
8491 (match_operand:VI_512 5 "general_operand")])
8492 (match_operand:V_512 1)
8493 (match_operand:V_512 2)))]
8494 "TARGET_AVX512F
8495 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8496 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8497 {
8498 bool ok = ix86_expand_int_vcond (operands);
8499 gcc_assert (ok);
8500 DONE;
8501 })
8502
8503 (define_expand "vcond<V_256:mode><VI_256:mode>"
8504 [(set (match_operand:V_256 0 "register_operand")
8505 (if_then_else:V_256
8506 (match_operator 3 ""
8507 [(match_operand:VI_256 4 "nonimmediate_operand")
8508 (match_operand:VI_256 5 "general_operand")])
8509 (match_operand:V_256 1)
8510 (match_operand:V_256 2)))]
8511 "TARGET_AVX2
8512 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8513 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8514 {
8515 bool ok = ix86_expand_int_vcond (operands);
8516 gcc_assert (ok);
8517 DONE;
8518 })
8519
8520 (define_expand "vcond<V_128:mode><VI124_128:mode>"
8521 [(set (match_operand:V_128 0 "register_operand")
8522 (if_then_else:V_128
8523 (match_operator 3 ""
8524 [(match_operand:VI124_128 4 "nonimmediate_operand")
8525 (match_operand:VI124_128 5 "general_operand")])
8526 (match_operand:V_128 1)
8527 (match_operand:V_128 2)))]
8528 "TARGET_SSE2
8529 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8530 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8531 {
8532 bool ok = ix86_expand_int_vcond (operands);
8533 gcc_assert (ok);
8534 DONE;
8535 })
8536
8537 (define_expand "vcond<VI8F_128:mode>v2di"
8538 [(set (match_operand:VI8F_128 0 "register_operand")
8539 (if_then_else:VI8F_128
8540 (match_operator 3 ""
8541 [(match_operand:V2DI 4 "nonimmediate_operand")
8542 (match_operand:V2DI 5 "general_operand")])
8543 (match_operand:VI8F_128 1)
8544 (match_operand:VI8F_128 2)))]
8545 "TARGET_SSE4_2"
8546 {
8547 bool ok = ix86_expand_int_vcond (operands);
8548 gcc_assert (ok);
8549 DONE;
8550 })
8551
8552 (define_expand "vcondu<V_512:mode><VI_512:mode>"
8553 [(set (match_operand:V_512 0 "register_operand")
8554 (if_then_else:V_512
8555 (match_operator 3 ""
8556 [(match_operand:VI_512 4 "nonimmediate_operand")
8557 (match_operand:VI_512 5 "nonimmediate_operand")])
8558 (match_operand:V_512 1 "general_operand")
8559 (match_operand:V_512 2 "general_operand")))]
8560 "TARGET_AVX512F
8561 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8562 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8563 {
8564 bool ok = ix86_expand_int_vcond (operands);
8565 gcc_assert (ok);
8566 DONE;
8567 })
8568
8569 (define_expand "vcondu<V_256:mode><VI_256:mode>"
8570 [(set (match_operand:V_256 0 "register_operand")
8571 (if_then_else:V_256
8572 (match_operator 3 ""
8573 [(match_operand:VI_256 4 "nonimmediate_operand")
8574 (match_operand:VI_256 5 "nonimmediate_operand")])
8575 (match_operand:V_256 1 "general_operand")
8576 (match_operand:V_256 2 "general_operand")))]
8577 "TARGET_AVX2
8578 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8579 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8580 {
8581 bool ok = ix86_expand_int_vcond (operands);
8582 gcc_assert (ok);
8583 DONE;
8584 })
8585
8586 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
8587 [(set (match_operand:V_128 0 "register_operand")
8588 (if_then_else:V_128
8589 (match_operator 3 ""
8590 [(match_operand:VI124_128 4 "nonimmediate_operand")
8591 (match_operand:VI124_128 5 "nonimmediate_operand")])
8592 (match_operand:V_128 1 "general_operand")
8593 (match_operand:V_128 2 "general_operand")))]
8594 "TARGET_SSE2
8595 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8596 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8597 {
8598 bool ok = ix86_expand_int_vcond (operands);
8599 gcc_assert (ok);
8600 DONE;
8601 })
8602
8603 (define_expand "vcondu<VI8F_128:mode>v2di"
8604 [(set (match_operand:VI8F_128 0 "register_operand")
8605 (if_then_else:VI8F_128
8606 (match_operator 3 ""
8607 [(match_operand:V2DI 4 "nonimmediate_operand")
8608 (match_operand:V2DI 5 "nonimmediate_operand")])
8609 (match_operand:VI8F_128 1 "general_operand")
8610 (match_operand:VI8F_128 2 "general_operand")))]
8611 "TARGET_SSE4_2"
8612 {
8613 bool ok = ix86_expand_int_vcond (operands);
8614 gcc_assert (ok);
8615 DONE;
8616 })
8617
8618 (define_mode_iterator VEC_PERM_AVX2
8619 [V16QI V8HI V4SI V2DI V4SF V2DF
8620 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8621 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
8622 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
8623 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
8624 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
8625
8626 (define_expand "vec_perm<mode>"
8627 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
8628 (match_operand:VEC_PERM_AVX2 1 "register_operand")
8629 (match_operand:VEC_PERM_AVX2 2 "register_operand")
8630 (match_operand:<sseintvecmode> 3 "register_operand")]
8631 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
8632 {
8633 ix86_expand_vec_perm (operands);
8634 DONE;
8635 })
8636
8637 (define_mode_iterator VEC_PERM_CONST
8638 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
8639 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
8640 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
8641 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
8642 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
8643 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8644 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
8645 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
8646
8647 (define_expand "vec_perm_const<mode>"
8648 [(match_operand:VEC_PERM_CONST 0 "register_operand")
8649 (match_operand:VEC_PERM_CONST 1 "register_operand")
8650 (match_operand:VEC_PERM_CONST 2 "register_operand")
8651 (match_operand:<sseintvecmode> 3)]
8652 ""
8653 {
8654 if (ix86_expand_vec_perm_const (operands))
8655 DONE;
8656 else
8657 FAIL;
8658 })
8659
8660 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8661 ;;
8662 ;; Parallel bitwise logical operations
8663 ;;
8664 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8665
8666 (define_expand "one_cmpl<mode>2"
8667 [(set (match_operand:VI 0 "register_operand")
8668 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
8669 (match_dup 2)))]
8670 "TARGET_SSE"
8671 {
8672 int i, n = GET_MODE_NUNITS (<MODE>mode);
8673 rtvec v = rtvec_alloc (n);
8674
8675 for (i = 0; i < n; ++i)
8676 RTVEC_ELT (v, i) = constm1_rtx;
8677
8678 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
8679 })
8680
8681 (define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
8682 [(set (match_operand:VI_AVX2 0 "register_operand")
8683 (and:VI_AVX2
8684 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
8685 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
8686 "TARGET_SSE2 && <mask_mode512bit_condition>")
8687
8688 (define_insn "*andnot<mode>3<mask_name>"
8689 [(set (match_operand:VI 0 "register_operand" "=x,v")
8690 (and:VI
8691 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
8692 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
8693 "TARGET_SSE && <mask_mode512bit_condition>"
8694 {
8695 static char buf[64];
8696 const char *ops;
8697 const char *tmp;
8698
8699 switch (get_attr_mode (insn))
8700 {
8701 case MODE_XI:
8702 gcc_assert (TARGET_AVX512F);
8703
8704 tmp = "pandn<ssemodesuffix>";
8705 break;
8706
8707 case MODE_OI:
8708 gcc_assert (TARGET_AVX2);
8709 case MODE_TI:
8710 gcc_assert (TARGET_SSE2);
8711
8712 tmp = "pandn";
8713 break;
8714
8715 case MODE_V8SF:
8716 gcc_assert (TARGET_AVX);
8717 case MODE_V4SF:
8718 gcc_assert (TARGET_SSE);
8719
8720 tmp = "andnps";
8721 break;
8722
8723 default:
8724 gcc_unreachable ();
8725 }
8726
8727 switch (which_alternative)
8728 {
8729 case 0:
8730 ops = "%s\t{%%2, %%0|%%0, %%2}";
8731 break;
8732 case 1:
8733 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
8734 break;
8735 default:
8736 gcc_unreachable ();
8737 }
8738
8739 snprintf (buf, sizeof (buf), ops, tmp);
8740 return buf;
8741 }
8742 [(set_attr "isa" "noavx,avx")
8743 (set_attr "type" "sselog")
8744 (set (attr "prefix_data16")
8745 (if_then_else
8746 (and (eq_attr "alternative" "0")
8747 (eq_attr "mode" "TI"))
8748 (const_string "1")
8749 (const_string "*")))
8750 (set_attr "prefix" "<mask_prefix3>")
8751 (set (attr "mode")
8752 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
8753 (const_string "<ssePSmode>")
8754 (match_test "TARGET_AVX2")
8755 (const_string "<sseinsnmode>")
8756 (match_test "TARGET_AVX")
8757 (if_then_else
8758 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
8759 (const_string "V8SF")
8760 (const_string "<sseinsnmode>"))
8761 (ior (not (match_test "TARGET_SSE2"))
8762 (match_test "optimize_function_for_size_p (cfun)"))
8763 (const_string "V4SF")
8764 ]
8765 (const_string "<sseinsnmode>")))])
8766
8767 (define_expand "<code><mode>3"
8768 [(set (match_operand:VI 0 "register_operand")
8769 (any_logic:VI
8770 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
8771 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
8772 "TARGET_SSE"
8773 {
8774 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
8775 DONE;
8776 })
8777
8778 (define_insn "<mask_codefor><code><mode>3<mask_name>"
8779 [(set (match_operand:VI 0 "register_operand" "=x,v")
8780 (any_logic:VI
8781 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
8782 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
8783 "TARGET_SSE && <mask_mode512bit_condition>
8784 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8785 {
8786 static char buf[64];
8787 const char *ops;
8788 const char *tmp;
8789
8790 switch (get_attr_mode (insn))
8791 {
8792 case MODE_XI:
8793 gcc_assert (TARGET_AVX512F);
8794 tmp = "p<logic><ssemodesuffix>";
8795 break;
8796
8797 case MODE_OI:
8798 gcc_assert (TARGET_AVX2);
8799 case MODE_TI:
8800 gcc_assert (TARGET_SSE2);
8801
8802 tmp = "p<logic>";
8803 break;
8804
8805 case MODE_V16SF:
8806 gcc_assert (TARGET_AVX512F);
8807 case MODE_V8SF:
8808 gcc_assert (TARGET_AVX);
8809 case MODE_V4SF:
8810 gcc_assert (TARGET_SSE);
8811
8812 tmp = "<logic>ps";
8813 break;
8814
8815 default:
8816 gcc_unreachable ();
8817 }
8818
8819 switch (which_alternative)
8820 {
8821 case 0:
8822 ops = "%s\t{%%2, %%0|%%0, %%2}";
8823 break;
8824 case 1:
8825 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
8826 break;
8827 default:
8828 gcc_unreachable ();
8829 }
8830
8831 snprintf (buf, sizeof (buf), ops, tmp);
8832 return buf;
8833 }
8834 [(set_attr "isa" "noavx,avx")
8835 (set_attr "type" "sselog")
8836 (set (attr "prefix_data16")
8837 (if_then_else
8838 (and (eq_attr "alternative" "0")
8839 (eq_attr "mode" "TI"))
8840 (const_string "1")
8841 (const_string "*")))
8842 (set_attr "prefix" "<mask_prefix3>")
8843 (set (attr "mode")
8844 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
8845 (const_string "<ssePSmode>")
8846 (match_test "TARGET_AVX2")
8847 (const_string "<sseinsnmode>")
8848 (match_test "TARGET_AVX")
8849 (if_then_else
8850 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
8851 (const_string "V8SF")
8852 (const_string "<sseinsnmode>"))
8853 (ior (not (match_test "TARGET_SSE2"))
8854 (match_test "optimize_function_for_size_p (cfun)"))
8855 (const_string "V4SF")
8856 ]
8857 (const_string "<sseinsnmode>")))])
8858
8859 (define_insn "avx512f_testm<mode>3"
8860 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8861 (unspec:<avx512fmaskmode>
8862 [(match_operand:VI48_512 1 "register_operand" "v")
8863 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8864 UNSPEC_TESTM))]
8865 "TARGET_AVX512F"
8866 "vptestm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8867 [(set_attr "prefix" "evex")
8868 (set_attr "mode" "<sseinsnmode>")])
8869
8870 (define_insn "avx512f_testnm<mode>3"
8871 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8872 (unspec:<avx512fmaskmode>
8873 [(match_operand:VI48_512 1 "register_operand" "v")
8874 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8875 UNSPEC_TESTNM))]
8876 "TARGET_AVX512CD"
8877 "%vptestnm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8878 [(set_attr "prefix" "evex")
8879 (set_attr "mode" "<sseinsnmode>")])
8880
8881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8882 ;;
8883 ;; Parallel integral element swizzling
8884 ;;
8885 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8886
8887 (define_expand "vec_pack_trunc_<mode>"
8888 [(match_operand:<ssepackmode> 0 "register_operand")
8889 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
8890 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
8891 "TARGET_SSE2"
8892 {
8893 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
8894 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
8895 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
8896 DONE;
8897 })
8898
8899 (define_insn "<sse2_avx2>_packsswb"
8900 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8901 (vec_concat:VI1_AVX2
8902 (ss_truncate:<ssehalfvecmode>
8903 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
8904 (ss_truncate:<ssehalfvecmode>
8905 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
8906 "TARGET_SSE2"
8907 "@
8908 packsswb\t{%2, %0|%0, %2}
8909 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
8910 [(set_attr "isa" "noavx,avx")
8911 (set_attr "type" "sselog")
8912 (set_attr "prefix_data16" "1,*")
8913 (set_attr "prefix" "orig,vex")
8914 (set_attr "mode" "<sseinsnmode>")])
8915
8916 (define_insn "<sse2_avx2>_packssdw"
8917 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
8918 (vec_concat:VI2_AVX2
8919 (ss_truncate:<ssehalfvecmode>
8920 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
8921 (ss_truncate:<ssehalfvecmode>
8922 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
8923 "TARGET_SSE2"
8924 "@
8925 packssdw\t{%2, %0|%0, %2}
8926 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
8927 [(set_attr "isa" "noavx,avx")
8928 (set_attr "type" "sselog")
8929 (set_attr "prefix_data16" "1,*")
8930 (set_attr "prefix" "orig,vex")
8931 (set_attr "mode" "<sseinsnmode>")])
8932
8933 (define_insn "<sse2_avx2>_packuswb"
8934 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8935 (vec_concat:VI1_AVX2
8936 (us_truncate:<ssehalfvecmode>
8937 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
8938 (us_truncate:<ssehalfvecmode>
8939 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
8940 "TARGET_SSE2"
8941 "@
8942 packuswb\t{%2, %0|%0, %2}
8943 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
8944 [(set_attr "isa" "noavx,avx")
8945 (set_attr "type" "sselog")
8946 (set_attr "prefix_data16" "1,*")
8947 (set_attr "prefix" "orig,vex")
8948 (set_attr "mode" "<sseinsnmode>")])
8949
8950 (define_insn "avx2_interleave_highv32qi"
8951 [(set (match_operand:V32QI 0 "register_operand" "=x")
8952 (vec_select:V32QI
8953 (vec_concat:V64QI
8954 (match_operand:V32QI 1 "register_operand" "x")
8955 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
8956 (parallel [(const_int 8) (const_int 40)
8957 (const_int 9) (const_int 41)
8958 (const_int 10) (const_int 42)
8959 (const_int 11) (const_int 43)
8960 (const_int 12) (const_int 44)
8961 (const_int 13) (const_int 45)
8962 (const_int 14) (const_int 46)
8963 (const_int 15) (const_int 47)
8964 (const_int 24) (const_int 56)
8965 (const_int 25) (const_int 57)
8966 (const_int 26) (const_int 58)
8967 (const_int 27) (const_int 59)
8968 (const_int 28) (const_int 60)
8969 (const_int 29) (const_int 61)
8970 (const_int 30) (const_int 62)
8971 (const_int 31) (const_int 63)])))]
8972 "TARGET_AVX2"
8973 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
8974 [(set_attr "type" "sselog")
8975 (set_attr "prefix" "vex")
8976 (set_attr "mode" "OI")])
8977
8978 (define_insn "vec_interleave_highv16qi"
8979 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
8980 (vec_select:V16QI
8981 (vec_concat:V32QI
8982 (match_operand:V16QI 1 "register_operand" "0,x")
8983 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
8984 (parallel [(const_int 8) (const_int 24)
8985 (const_int 9) (const_int 25)
8986 (const_int 10) (const_int 26)
8987 (const_int 11) (const_int 27)
8988 (const_int 12) (const_int 28)
8989 (const_int 13) (const_int 29)
8990 (const_int 14) (const_int 30)
8991 (const_int 15) (const_int 31)])))]
8992 "TARGET_SSE2"
8993 "@
8994 punpckhbw\t{%2, %0|%0, %2}
8995 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
8996 [(set_attr "isa" "noavx,avx")
8997 (set_attr "type" "sselog")
8998 (set_attr "prefix_data16" "1,*")
8999 (set_attr "prefix" "orig,vex")
9000 (set_attr "mode" "TI")])
9001
9002 (define_insn "avx2_interleave_lowv32qi"
9003 [(set (match_operand:V32QI 0 "register_operand" "=x")
9004 (vec_select:V32QI
9005 (vec_concat:V64QI
9006 (match_operand:V32QI 1 "register_operand" "x")
9007 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9008 (parallel [(const_int 0) (const_int 32)
9009 (const_int 1) (const_int 33)
9010 (const_int 2) (const_int 34)
9011 (const_int 3) (const_int 35)
9012 (const_int 4) (const_int 36)
9013 (const_int 5) (const_int 37)
9014 (const_int 6) (const_int 38)
9015 (const_int 7) (const_int 39)
9016 (const_int 16) (const_int 48)
9017 (const_int 17) (const_int 49)
9018 (const_int 18) (const_int 50)
9019 (const_int 19) (const_int 51)
9020 (const_int 20) (const_int 52)
9021 (const_int 21) (const_int 53)
9022 (const_int 22) (const_int 54)
9023 (const_int 23) (const_int 55)])))]
9024 "TARGET_AVX2"
9025 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9026 [(set_attr "type" "sselog")
9027 (set_attr "prefix" "vex")
9028 (set_attr "mode" "OI")])
9029
9030 (define_insn "vec_interleave_lowv16qi"
9031 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9032 (vec_select:V16QI
9033 (vec_concat:V32QI
9034 (match_operand:V16QI 1 "register_operand" "0,x")
9035 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9036 (parallel [(const_int 0) (const_int 16)
9037 (const_int 1) (const_int 17)
9038 (const_int 2) (const_int 18)
9039 (const_int 3) (const_int 19)
9040 (const_int 4) (const_int 20)
9041 (const_int 5) (const_int 21)
9042 (const_int 6) (const_int 22)
9043 (const_int 7) (const_int 23)])))]
9044 "TARGET_SSE2"
9045 "@
9046 punpcklbw\t{%2, %0|%0, %2}
9047 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9048 [(set_attr "isa" "noavx,avx")
9049 (set_attr "type" "sselog")
9050 (set_attr "prefix_data16" "1,*")
9051 (set_attr "prefix" "orig,vex")
9052 (set_attr "mode" "TI")])
9053
9054 (define_insn "avx2_interleave_highv16hi"
9055 [(set (match_operand:V16HI 0 "register_operand" "=x")
9056 (vec_select:V16HI
9057 (vec_concat:V32HI
9058 (match_operand:V16HI 1 "register_operand" "x")
9059 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9060 (parallel [(const_int 4) (const_int 20)
9061 (const_int 5) (const_int 21)
9062 (const_int 6) (const_int 22)
9063 (const_int 7) (const_int 23)
9064 (const_int 12) (const_int 28)
9065 (const_int 13) (const_int 29)
9066 (const_int 14) (const_int 30)
9067 (const_int 15) (const_int 31)])))]
9068 "TARGET_AVX2"
9069 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9070 [(set_attr "type" "sselog")
9071 (set_attr "prefix" "vex")
9072 (set_attr "mode" "OI")])
9073
9074 (define_insn "vec_interleave_highv8hi"
9075 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9076 (vec_select:V8HI
9077 (vec_concat:V16HI
9078 (match_operand:V8HI 1 "register_operand" "0,x")
9079 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9080 (parallel [(const_int 4) (const_int 12)
9081 (const_int 5) (const_int 13)
9082 (const_int 6) (const_int 14)
9083 (const_int 7) (const_int 15)])))]
9084 "TARGET_SSE2"
9085 "@
9086 punpckhwd\t{%2, %0|%0, %2}
9087 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9088 [(set_attr "isa" "noavx,avx")
9089 (set_attr "type" "sselog")
9090 (set_attr "prefix_data16" "1,*")
9091 (set_attr "prefix" "orig,vex")
9092 (set_attr "mode" "TI")])
9093
9094 (define_insn "avx2_interleave_lowv16hi"
9095 [(set (match_operand:V16HI 0 "register_operand" "=x")
9096 (vec_select:V16HI
9097 (vec_concat:V32HI
9098 (match_operand:V16HI 1 "register_operand" "x")
9099 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9100 (parallel [(const_int 0) (const_int 16)
9101 (const_int 1) (const_int 17)
9102 (const_int 2) (const_int 18)
9103 (const_int 3) (const_int 19)
9104 (const_int 8) (const_int 24)
9105 (const_int 9) (const_int 25)
9106 (const_int 10) (const_int 26)
9107 (const_int 11) (const_int 27)])))]
9108 "TARGET_AVX2"
9109 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9110 [(set_attr "type" "sselog")
9111 (set_attr "prefix" "vex")
9112 (set_attr "mode" "OI")])
9113
9114 (define_insn "vec_interleave_lowv8hi"
9115 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9116 (vec_select:V8HI
9117 (vec_concat:V16HI
9118 (match_operand:V8HI 1 "register_operand" "0,x")
9119 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9120 (parallel [(const_int 0) (const_int 8)
9121 (const_int 1) (const_int 9)
9122 (const_int 2) (const_int 10)
9123 (const_int 3) (const_int 11)])))]
9124 "TARGET_SSE2"
9125 "@
9126 punpcklwd\t{%2, %0|%0, %2}
9127 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9128 [(set_attr "isa" "noavx,avx")
9129 (set_attr "type" "sselog")
9130 (set_attr "prefix_data16" "1,*")
9131 (set_attr "prefix" "orig,vex")
9132 (set_attr "mode" "TI")])
9133
9134 (define_insn "avx2_interleave_highv8si"
9135 [(set (match_operand:V8SI 0 "register_operand" "=x")
9136 (vec_select:V8SI
9137 (vec_concat:V16SI
9138 (match_operand:V8SI 1 "register_operand" "x")
9139 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9140 (parallel [(const_int 2) (const_int 10)
9141 (const_int 3) (const_int 11)
9142 (const_int 6) (const_int 14)
9143 (const_int 7) (const_int 15)])))]
9144 "TARGET_AVX2"
9145 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9146 [(set_attr "type" "sselog")
9147 (set_attr "prefix" "vex")
9148 (set_attr "mode" "OI")])
9149
9150 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
9151 [(set (match_operand:V16SI 0 "register_operand" "=v")
9152 (vec_select:V16SI
9153 (vec_concat:V32SI
9154 (match_operand:V16SI 1 "register_operand" "v")
9155 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9156 (parallel [(const_int 2) (const_int 18)
9157 (const_int 3) (const_int 19)
9158 (const_int 6) (const_int 22)
9159 (const_int 7) (const_int 23)
9160 (const_int 10) (const_int 26)
9161 (const_int 11) (const_int 27)
9162 (const_int 14) (const_int 30)
9163 (const_int 15) (const_int 31)])))]
9164 "TARGET_AVX512F"
9165 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9166 [(set_attr "type" "sselog")
9167 (set_attr "prefix" "evex")
9168 (set_attr "mode" "XI")])
9169
9170
9171 (define_insn "vec_interleave_highv4si"
9172 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9173 (vec_select:V4SI
9174 (vec_concat:V8SI
9175 (match_operand:V4SI 1 "register_operand" "0,x")
9176 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9177 (parallel [(const_int 2) (const_int 6)
9178 (const_int 3) (const_int 7)])))]
9179 "TARGET_SSE2"
9180 "@
9181 punpckhdq\t{%2, %0|%0, %2}
9182 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9183 [(set_attr "isa" "noavx,avx")
9184 (set_attr "type" "sselog")
9185 (set_attr "prefix_data16" "1,*")
9186 (set_attr "prefix" "orig,vex")
9187 (set_attr "mode" "TI")])
9188
9189 (define_insn "avx2_interleave_lowv8si"
9190 [(set (match_operand:V8SI 0 "register_operand" "=x")
9191 (vec_select:V8SI
9192 (vec_concat:V16SI
9193 (match_operand:V8SI 1 "register_operand" "x")
9194 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9195 (parallel [(const_int 0) (const_int 8)
9196 (const_int 1) (const_int 9)
9197 (const_int 4) (const_int 12)
9198 (const_int 5) (const_int 13)])))]
9199 "TARGET_AVX2"
9200 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9201 [(set_attr "type" "sselog")
9202 (set_attr "prefix" "vex")
9203 (set_attr "mode" "OI")])
9204
9205 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
9206 [(set (match_operand:V16SI 0 "register_operand" "=v")
9207 (vec_select:V16SI
9208 (vec_concat:V32SI
9209 (match_operand:V16SI 1 "register_operand" "v")
9210 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9211 (parallel [(const_int 0) (const_int 16)
9212 (const_int 1) (const_int 17)
9213 (const_int 4) (const_int 20)
9214 (const_int 5) (const_int 21)
9215 (const_int 8) (const_int 24)
9216 (const_int 9) (const_int 25)
9217 (const_int 12) (const_int 28)
9218 (const_int 13) (const_int 29)])))]
9219 "TARGET_AVX512F"
9220 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9221 [(set_attr "type" "sselog")
9222 (set_attr "prefix" "evex")
9223 (set_attr "mode" "XI")])
9224
9225 (define_insn "vec_interleave_lowv4si"
9226 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9227 (vec_select:V4SI
9228 (vec_concat:V8SI
9229 (match_operand:V4SI 1 "register_operand" "0,x")
9230 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9231 (parallel [(const_int 0) (const_int 4)
9232 (const_int 1) (const_int 5)])))]
9233 "TARGET_SSE2"
9234 "@
9235 punpckldq\t{%2, %0|%0, %2}
9236 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9237 [(set_attr "isa" "noavx,avx")
9238 (set_attr "type" "sselog")
9239 (set_attr "prefix_data16" "1,*")
9240 (set_attr "prefix" "orig,vex")
9241 (set_attr "mode" "TI")])
9242
9243 (define_expand "vec_interleave_high<mode>"
9244 [(match_operand:VI_256 0 "register_operand" "=x")
9245 (match_operand:VI_256 1 "register_operand" "x")
9246 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9247 "TARGET_AVX2"
9248 {
9249 rtx t1 = gen_reg_rtx (<MODE>mode);
9250 rtx t2 = gen_reg_rtx (<MODE>mode);
9251 rtx t3 = gen_reg_rtx (V4DImode);
9252 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9253 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9254 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9255 gen_lowpart (V4DImode, t2),
9256 GEN_INT (1 + (3 << 4))));
9257 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9258 DONE;
9259 })
9260
9261 (define_expand "vec_interleave_low<mode>"
9262 [(match_operand:VI_256 0 "register_operand" "=x")
9263 (match_operand:VI_256 1 "register_operand" "x")
9264 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9265 "TARGET_AVX2"
9266 {
9267 rtx t1 = gen_reg_rtx (<MODE>mode);
9268 rtx t2 = gen_reg_rtx (<MODE>mode);
9269 rtx t3 = gen_reg_rtx (V4DImode);
9270 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9271 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9272 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9273 gen_lowpart (V4DImode, t2),
9274 GEN_INT (0 + (2 << 4))));
9275 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9276 DONE;
9277 })
9278
9279 ;; Modes handled by pinsr patterns.
9280 (define_mode_iterator PINSR_MODE
9281 [(V16QI "TARGET_SSE4_1") V8HI
9282 (V4SI "TARGET_SSE4_1")
9283 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
9284
9285 (define_mode_attr sse2p4_1
9286 [(V16QI "sse4_1") (V8HI "sse2")
9287 (V4SI "sse4_1") (V2DI "sse4_1")])
9288
9289 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
9290 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
9291 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
9292 (vec_merge:PINSR_MODE
9293 (vec_duplicate:PINSR_MODE
9294 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
9295 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
9296 (match_operand:SI 3 "const_int_operand")))]
9297 "TARGET_SSE2
9298 && ((unsigned) exact_log2 (INTVAL (operands[3]))
9299 < GET_MODE_NUNITS (<MODE>mode))"
9300 {
9301 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
9302
9303 switch (which_alternative)
9304 {
9305 case 0:
9306 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9307 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
9308 /* FALLTHRU */
9309 case 1:
9310 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
9311 case 2:
9312 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9313 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
9314 /* FALLTHRU */
9315 case 3:
9316 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9317 default:
9318 gcc_unreachable ();
9319 }
9320 }
9321 [(set_attr "isa" "noavx,noavx,avx,avx")
9322 (set_attr "type" "sselog")
9323 (set (attr "prefix_rex")
9324 (if_then_else
9325 (and (not (match_test "TARGET_AVX"))
9326 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
9327 (const_string "1")
9328 (const_string "*")))
9329 (set (attr "prefix_data16")
9330 (if_then_else
9331 (and (not (match_test "TARGET_AVX"))
9332 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9333 (const_string "1")
9334 (const_string "*")))
9335 (set (attr "prefix_extra")
9336 (if_then_else
9337 (and (not (match_test "TARGET_AVX"))
9338 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9339 (const_string "*")
9340 (const_string "1")))
9341 (set_attr "length_immediate" "1")
9342 (set_attr "prefix" "orig,orig,vex,vex")
9343 (set_attr "mode" "TI")])
9344
9345 (define_expand "avx512f_vinsert<shuffletype>32x4_mask"
9346 [(match_operand:V16FI 0 "register_operand")
9347 (match_operand:V16FI 1 "register_operand")
9348 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
9349 (match_operand:SI 3 "const_0_to_3_operand")
9350 (match_operand:V16FI 4 "register_operand")
9351 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9352 "TARGET_AVX512F"
9353 {
9354 switch (INTVAL (operands[3]))
9355 {
9356 case 0:
9357 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9358 operands[1], operands[2], GEN_INT (0xFFF), operands[4],
9359 operands[5]));
9360 break;
9361 case 1:
9362 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9363 operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
9364 operands[5]));
9365 break;
9366 case 2:
9367 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9368 operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
9369 operands[5]));
9370 break;
9371 case 3:
9372 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9373 operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
9374 operands[5]));
9375 break;
9376 default:
9377 gcc_unreachable ();
9378 }
9379 DONE;
9380
9381 })
9382
9383 (define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
9384 [(set (match_operand:V16FI 0 "register_operand" "=v")
9385 (vec_merge:V16FI
9386 (match_operand:V16FI 1 "register_operand" "v")
9387 (vec_duplicate:V16FI
9388 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
9389 (match_operand:SI 3 "const_int_operand" "n")))]
9390 "TARGET_AVX512F"
9391 {
9392 int mask;
9393 if (INTVAL (operands[3]) == 0xFFF)
9394 mask = 0;
9395 else if ( INTVAL (operands[3]) == 0xF0FF)
9396 mask = 1;
9397 else if ( INTVAL (operands[3]) == 0xFF0F)
9398 mask = 2;
9399 else if ( INTVAL (operands[3]) == 0xFFF0)
9400 mask = 3;
9401 else
9402 gcc_unreachable ();
9403
9404 operands[3] = GEN_INT (mask);
9405
9406 return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9407 }
9408 [(set_attr "type" "sselog")
9409 (set_attr "length_immediate" "1")
9410 (set_attr "prefix" "evex")
9411 (set_attr "mode" "<sseinsnmode>")])
9412
9413 (define_expand "avx512f_vinsert<shuffletype>64x4_mask"
9414 [(match_operand:V8FI 0 "register_operand")
9415 (match_operand:V8FI 1 "register_operand")
9416 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
9417 (match_operand:SI 3 "const_0_to_1_operand")
9418 (match_operand:V8FI 4 "register_operand")
9419 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9420 "TARGET_AVX512F"
9421 {
9422 int mask = INTVAL (operands[3]);
9423 if (mask == 0)
9424 emit_insn (gen_vec_set_lo_<mode>_mask
9425 (operands[0], operands[1], operands[2],
9426 operands[4], operands[5]));
9427 else
9428 emit_insn (gen_vec_set_hi_<mode>_mask
9429 (operands[0], operands[1], operands[2],
9430 operands[4], operands[5]));
9431 DONE;
9432 })
9433
9434 (define_insn "vec_set_lo_<mode><mask_name>"
9435 [(set (match_operand:V8FI 0 "register_operand" "=v")
9436 (vec_concat:V8FI
9437 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9438 (vec_select:<ssehalfvecmode>
9439 (match_operand:V8FI 1 "register_operand" "v")
9440 (parallel [(const_int 4) (const_int 5)
9441 (const_int 6) (const_int 7)]))))]
9442 "TARGET_AVX512F"
9443 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
9444 [(set_attr "type" "sselog")
9445 (set_attr "length_immediate" "1")
9446 (set_attr "prefix" "evex")
9447 (set_attr "mode" "XI")])
9448
9449 (define_insn "vec_set_hi_<mode><mask_name>"
9450 [(set (match_operand:V8FI 0 "register_operand" "=v")
9451 (vec_concat:V8FI
9452 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9453 (vec_select:<ssehalfvecmode>
9454 (match_operand:V8FI 1 "register_operand" "v")
9455 (parallel [(const_int 0) (const_int 1)
9456 (const_int 2) (const_int 3)]))))]
9457 "TARGET_AVX512F"
9458 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
9459 [(set_attr "type" "sselog")
9460 (set_attr "length_immediate" "1")
9461 (set_attr "prefix" "evex")
9462 (set_attr "mode" "XI")])
9463
9464 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
9465 [(match_operand:V8FI 0 "register_operand")
9466 (match_operand:V8FI 1 "register_operand")
9467 (match_operand:V8FI 2 "nonimmediate_operand")
9468 (match_operand:SI 3 "const_0_to_255_operand")
9469 (match_operand:V8FI 4 "register_operand")
9470 (match_operand:QI 5 "register_operand")]
9471 "TARGET_AVX512F"
9472 {
9473 int mask = INTVAL (operands[3]);
9474 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
9475 (operands[0], operands[1], operands[2],
9476 GEN_INT (((mask >> 0) & 3) * 2),
9477 GEN_INT (((mask >> 0) & 3) * 2 + 1),
9478 GEN_INT (((mask >> 2) & 3) * 2),
9479 GEN_INT (((mask >> 2) & 3) * 2 + 1),
9480 GEN_INT (((mask >> 4) & 3) * 2 + 8),
9481 GEN_INT (((mask >> 4) & 3) * 2 + 9),
9482 GEN_INT (((mask >> 6) & 3) * 2 + 8),
9483 GEN_INT (((mask >> 6) & 3) * 2 + 9),
9484 operands[4], operands[5]));
9485 DONE;
9486 })
9487
9488 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
9489 [(set (match_operand:V8FI 0 "register_operand" "=v")
9490 (vec_select:V8FI
9491 (vec_concat:<ssedoublemode>
9492 (match_operand:V8FI 1 "register_operand" "v")
9493 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
9494 (parallel [(match_operand 3 "const_0_to_7_operand")
9495 (match_operand 4 "const_0_to_7_operand")
9496 (match_operand 5 "const_0_to_7_operand")
9497 (match_operand 6 "const_0_to_7_operand")
9498 (match_operand 7 "const_8_to_15_operand")
9499 (match_operand 8 "const_8_to_15_operand")
9500 (match_operand 9 "const_8_to_15_operand")
9501 (match_operand 10 "const_8_to_15_operand")])))]
9502 "TARGET_AVX512F
9503 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9504 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
9505 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9506 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
9507 {
9508 int mask;
9509 mask = INTVAL (operands[3]) / 2;
9510 mask |= INTVAL (operands[5]) / 2 << 2;
9511 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
9512 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
9513 operands[3] = GEN_INT (mask);
9514
9515 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9516 }
9517 [(set_attr "type" "sselog")
9518 (set_attr "length_immediate" "1")
9519 (set_attr "prefix" "evex")
9520 (set_attr "mode" "<sseinsnmode>")])
9521
9522 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
9523 [(match_operand:V16FI 0 "register_operand")
9524 (match_operand:V16FI 1 "register_operand")
9525 (match_operand:V16FI 2 "nonimmediate_operand")
9526 (match_operand:SI 3 "const_0_to_255_operand")
9527 (match_operand:V16FI 4 "register_operand")
9528 (match_operand:HI 5 "register_operand")]
9529 "TARGET_AVX512F"
9530 {
9531 int mask = INTVAL (operands[3]);
9532 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
9533 (operands[0], operands[1], operands[2],
9534 GEN_INT (((mask >> 0) & 3) * 4),
9535 GEN_INT (((mask >> 0) & 3) * 4 + 1),
9536 GEN_INT (((mask >> 0) & 3) * 4 + 2),
9537 GEN_INT (((mask >> 0) & 3) * 4 + 3),
9538 GEN_INT (((mask >> 2) & 3) * 4),
9539 GEN_INT (((mask >> 2) & 3) * 4 + 1),
9540 GEN_INT (((mask >> 2) & 3) * 4 + 2),
9541 GEN_INT (((mask >> 2) & 3) * 4 + 3),
9542 GEN_INT (((mask >> 4) & 3) * 4 + 16),
9543 GEN_INT (((mask >> 4) & 3) * 4 + 17),
9544 GEN_INT (((mask >> 4) & 3) * 4 + 18),
9545 GEN_INT (((mask >> 4) & 3) * 4 + 19),
9546 GEN_INT (((mask >> 6) & 3) * 4 + 16),
9547 GEN_INT (((mask >> 6) & 3) * 4 + 17),
9548 GEN_INT (((mask >> 6) & 3) * 4 + 18),
9549 GEN_INT (((mask >> 6) & 3) * 4 + 19),
9550 operands[4], operands[5]));
9551 DONE;
9552 })
9553
9554 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
9555 [(set (match_operand:V16FI 0 "register_operand" "=v")
9556 (vec_select:V16FI
9557 (vec_concat:<ssedoublemode>
9558 (match_operand:V16FI 1 "register_operand" "v")
9559 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
9560 (parallel [(match_operand 3 "const_0_to_15_operand")
9561 (match_operand 4 "const_0_to_15_operand")
9562 (match_operand 5 "const_0_to_15_operand")
9563 (match_operand 6 "const_0_to_15_operand")
9564 (match_operand 7 "const_0_to_15_operand")
9565 (match_operand 8 "const_0_to_15_operand")
9566 (match_operand 9 "const_0_to_15_operand")
9567 (match_operand 10 "const_0_to_15_operand")
9568 (match_operand 11 "const_16_to_31_operand")
9569 (match_operand 12 "const_16_to_31_operand")
9570 (match_operand 13 "const_16_to_31_operand")
9571 (match_operand 14 "const_16_to_31_operand")
9572 (match_operand 15 "const_16_to_31_operand")
9573 (match_operand 16 "const_16_to_31_operand")
9574 (match_operand 17 "const_16_to_31_operand")
9575 (match_operand 18 "const_16_to_31_operand")])))]
9576 "TARGET_AVX512F
9577 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9578 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
9579 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
9580 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9581 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
9582 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
9583 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
9584 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
9585 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
9586 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
9587 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
9588 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
9589 {
9590 int mask;
9591 mask = INTVAL (operands[3]) / 4;
9592 mask |= INTVAL (operands[7]) / 4 << 2;
9593 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
9594 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
9595 operands[3] = GEN_INT (mask);
9596
9597 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9598 }
9599 [(set_attr "type" "sselog")
9600 (set_attr "length_immediate" "1")
9601 (set_attr "prefix" "evex")
9602 (set_attr "mode" "<sseinsnmode>")])
9603
9604 (define_expand "avx512f_pshufdv3_mask"
9605 [(match_operand:V16SI 0 "register_operand")
9606 (match_operand:V16SI 1 "nonimmediate_operand")
9607 (match_operand:SI 2 "const_0_to_255_operand")
9608 (match_operand:V16SI 3 "register_operand")
9609 (match_operand:HI 4 "register_operand")]
9610 "TARGET_AVX512F"
9611 {
9612 int mask = INTVAL (operands[2]);
9613 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
9614 GEN_INT ((mask >> 0) & 3),
9615 GEN_INT ((mask >> 2) & 3),
9616 GEN_INT ((mask >> 4) & 3),
9617 GEN_INT ((mask >> 6) & 3),
9618 GEN_INT (((mask >> 0) & 3) + 4),
9619 GEN_INT (((mask >> 2) & 3) + 4),
9620 GEN_INT (((mask >> 4) & 3) + 4),
9621 GEN_INT (((mask >> 6) & 3) + 4),
9622 GEN_INT (((mask >> 0) & 3) + 8),
9623 GEN_INT (((mask >> 2) & 3) + 8),
9624 GEN_INT (((mask >> 4) & 3) + 8),
9625 GEN_INT (((mask >> 6) & 3) + 8),
9626 GEN_INT (((mask >> 0) & 3) + 12),
9627 GEN_INT (((mask >> 2) & 3) + 12),
9628 GEN_INT (((mask >> 4) & 3) + 12),
9629 GEN_INT (((mask >> 6) & 3) + 12),
9630 operands[3], operands[4]));
9631 DONE;
9632 })
9633
9634 (define_insn "avx512f_pshufd_1<mask_name>"
9635 [(set (match_operand:V16SI 0 "register_operand" "=v")
9636 (vec_select:V16SI
9637 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
9638 (parallel [(match_operand 2 "const_0_to_3_operand")
9639 (match_operand 3 "const_0_to_3_operand")
9640 (match_operand 4 "const_0_to_3_operand")
9641 (match_operand 5 "const_0_to_3_operand")
9642 (match_operand 6 "const_4_to_7_operand")
9643 (match_operand 7 "const_4_to_7_operand")
9644 (match_operand 8 "const_4_to_7_operand")
9645 (match_operand 9 "const_4_to_7_operand")
9646 (match_operand 10 "const_8_to_11_operand")
9647 (match_operand 11 "const_8_to_11_operand")
9648 (match_operand 12 "const_8_to_11_operand")
9649 (match_operand 13 "const_8_to_11_operand")
9650 (match_operand 14 "const_12_to_15_operand")
9651 (match_operand 15 "const_12_to_15_operand")
9652 (match_operand 16 "const_12_to_15_operand")
9653 (match_operand 17 "const_12_to_15_operand")])))]
9654 "TARGET_AVX512F
9655 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
9656 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
9657 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
9658 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
9659 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
9660 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
9661 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
9662 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
9663 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
9664 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
9665 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
9666 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
9667 {
9668 int mask = 0;
9669 mask |= INTVAL (operands[2]) << 0;
9670 mask |= INTVAL (operands[3]) << 2;
9671 mask |= INTVAL (operands[4]) << 4;
9672 mask |= INTVAL (operands[5]) << 6;
9673 operands[2] = GEN_INT (mask);
9674
9675 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
9676 }
9677 [(set_attr "type" "sselog1")
9678 (set_attr "prefix" "evex")
9679 (set_attr "length_immediate" "1")
9680 (set_attr "mode" "XI")])
9681
9682 (define_expand "avx2_pshufdv3"
9683 [(match_operand:V8SI 0 "register_operand")
9684 (match_operand:V8SI 1 "nonimmediate_operand")
9685 (match_operand:SI 2 "const_0_to_255_operand")]
9686 "TARGET_AVX2"
9687 {
9688 int mask = INTVAL (operands[2]);
9689 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
9690 GEN_INT ((mask >> 0) & 3),
9691 GEN_INT ((mask >> 2) & 3),
9692 GEN_INT ((mask >> 4) & 3),
9693 GEN_INT ((mask >> 6) & 3),
9694 GEN_INT (((mask >> 0) & 3) + 4),
9695 GEN_INT (((mask >> 2) & 3) + 4),
9696 GEN_INT (((mask >> 4) & 3) + 4),
9697 GEN_INT (((mask >> 6) & 3) + 4)));
9698 DONE;
9699 })
9700
9701 (define_insn "avx2_pshufd_1"
9702 [(set (match_operand:V8SI 0 "register_operand" "=x")
9703 (vec_select:V8SI
9704 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
9705 (parallel [(match_operand 2 "const_0_to_3_operand")
9706 (match_operand 3 "const_0_to_3_operand")
9707 (match_operand 4 "const_0_to_3_operand")
9708 (match_operand 5 "const_0_to_3_operand")
9709 (match_operand 6 "const_4_to_7_operand")
9710 (match_operand 7 "const_4_to_7_operand")
9711 (match_operand 8 "const_4_to_7_operand")
9712 (match_operand 9 "const_4_to_7_operand")])))]
9713 "TARGET_AVX2
9714 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
9715 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
9716 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
9717 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
9718 {
9719 int mask = 0;
9720 mask |= INTVAL (operands[2]) << 0;
9721 mask |= INTVAL (operands[3]) << 2;
9722 mask |= INTVAL (operands[4]) << 4;
9723 mask |= INTVAL (operands[5]) << 6;
9724 operands[2] = GEN_INT (mask);
9725
9726 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
9727 }
9728 [(set_attr "type" "sselog1")
9729 (set_attr "prefix" "vex")
9730 (set_attr "length_immediate" "1")
9731 (set_attr "mode" "OI")])
9732
9733 (define_expand "sse2_pshufd"
9734 [(match_operand:V4SI 0 "register_operand")
9735 (match_operand:V4SI 1 "nonimmediate_operand")
9736 (match_operand:SI 2 "const_int_operand")]
9737 "TARGET_SSE2"
9738 {
9739 int mask = INTVAL (operands[2]);
9740 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
9741 GEN_INT ((mask >> 0) & 3),
9742 GEN_INT ((mask >> 2) & 3),
9743 GEN_INT ((mask >> 4) & 3),
9744 GEN_INT ((mask >> 6) & 3)));
9745 DONE;
9746 })
9747
9748 (define_insn "sse2_pshufd_1"
9749 [(set (match_operand:V4SI 0 "register_operand" "=x")
9750 (vec_select:V4SI
9751 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9752 (parallel [(match_operand 2 "const_0_to_3_operand")
9753 (match_operand 3 "const_0_to_3_operand")
9754 (match_operand 4 "const_0_to_3_operand")
9755 (match_operand 5 "const_0_to_3_operand")])))]
9756 "TARGET_SSE2"
9757 {
9758 int mask = 0;
9759 mask |= INTVAL (operands[2]) << 0;
9760 mask |= INTVAL (operands[3]) << 2;
9761 mask |= INTVAL (operands[4]) << 4;
9762 mask |= INTVAL (operands[5]) << 6;
9763 operands[2] = GEN_INT (mask);
9764
9765 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
9766 }
9767 [(set_attr "type" "sselog1")
9768 (set_attr "prefix_data16" "1")
9769 (set_attr "prefix" "maybe_vex")
9770 (set_attr "length_immediate" "1")
9771 (set_attr "mode" "TI")])
9772
9773 (define_expand "avx2_pshuflwv3"
9774 [(match_operand:V16HI 0 "register_operand")
9775 (match_operand:V16HI 1 "nonimmediate_operand")
9776 (match_operand:SI 2 "const_0_to_255_operand")]
9777 "TARGET_AVX2"
9778 {
9779 int mask = INTVAL (operands[2]);
9780 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
9781 GEN_INT ((mask >> 0) & 3),
9782 GEN_INT ((mask >> 2) & 3),
9783 GEN_INT ((mask >> 4) & 3),
9784 GEN_INT ((mask >> 6) & 3),
9785 GEN_INT (((mask >> 0) & 3) + 8),
9786 GEN_INT (((mask >> 2) & 3) + 8),
9787 GEN_INT (((mask >> 4) & 3) + 8),
9788 GEN_INT (((mask >> 6) & 3) + 8)));
9789 DONE;
9790 })
9791
9792 (define_insn "avx2_pshuflw_1"
9793 [(set (match_operand:V16HI 0 "register_operand" "=x")
9794 (vec_select:V16HI
9795 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
9796 (parallel [(match_operand 2 "const_0_to_3_operand")
9797 (match_operand 3 "const_0_to_3_operand")
9798 (match_operand 4 "const_0_to_3_operand")
9799 (match_operand 5 "const_0_to_3_operand")
9800 (const_int 4)
9801 (const_int 5)
9802 (const_int 6)
9803 (const_int 7)
9804 (match_operand 6 "const_8_to_11_operand")
9805 (match_operand 7 "const_8_to_11_operand")
9806 (match_operand 8 "const_8_to_11_operand")
9807 (match_operand 9 "const_8_to_11_operand")
9808 (const_int 12)
9809 (const_int 13)
9810 (const_int 14)
9811 (const_int 15)])))]
9812 "TARGET_AVX2
9813 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
9814 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
9815 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
9816 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
9817 {
9818 int mask = 0;
9819 mask |= INTVAL (operands[2]) << 0;
9820 mask |= INTVAL (operands[3]) << 2;
9821 mask |= INTVAL (operands[4]) << 4;
9822 mask |= INTVAL (operands[5]) << 6;
9823 operands[2] = GEN_INT (mask);
9824
9825 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
9826 }
9827 [(set_attr "type" "sselog")
9828 (set_attr "prefix" "vex")
9829 (set_attr "length_immediate" "1")
9830 (set_attr "mode" "OI")])
9831
9832 (define_expand "sse2_pshuflw"
9833 [(match_operand:V8HI 0 "register_operand")
9834 (match_operand:V8HI 1 "nonimmediate_operand")
9835 (match_operand:SI 2 "const_int_operand")]
9836 "TARGET_SSE2"
9837 {
9838 int mask = INTVAL (operands[2]);
9839 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
9840 GEN_INT ((mask >> 0) & 3),
9841 GEN_INT ((mask >> 2) & 3),
9842 GEN_INT ((mask >> 4) & 3),
9843 GEN_INT ((mask >> 6) & 3)));
9844 DONE;
9845 })
9846
9847 (define_insn "sse2_pshuflw_1"
9848 [(set (match_operand:V8HI 0 "register_operand" "=x")
9849 (vec_select:V8HI
9850 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9851 (parallel [(match_operand 2 "const_0_to_3_operand")
9852 (match_operand 3 "const_0_to_3_operand")
9853 (match_operand 4 "const_0_to_3_operand")
9854 (match_operand 5 "const_0_to_3_operand")
9855 (const_int 4)
9856 (const_int 5)
9857 (const_int 6)
9858 (const_int 7)])))]
9859 "TARGET_SSE2"
9860 {
9861 int mask = 0;
9862 mask |= INTVAL (operands[2]) << 0;
9863 mask |= INTVAL (operands[3]) << 2;
9864 mask |= INTVAL (operands[4]) << 4;
9865 mask |= INTVAL (operands[5]) << 6;
9866 operands[2] = GEN_INT (mask);
9867
9868 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
9869 }
9870 [(set_attr "type" "sselog")
9871 (set_attr "prefix_data16" "0")
9872 (set_attr "prefix_rep" "1")
9873 (set_attr "prefix" "maybe_vex")
9874 (set_attr "length_immediate" "1")
9875 (set_attr "mode" "TI")])
9876
9877 (define_expand "avx2_pshufhwv3"
9878 [(match_operand:V16HI 0 "register_operand")
9879 (match_operand:V16HI 1 "nonimmediate_operand")
9880 (match_operand:SI 2 "const_0_to_255_operand")]
9881 "TARGET_AVX2"
9882 {
9883 int mask = INTVAL (operands[2]);
9884 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
9885 GEN_INT (((mask >> 0) & 3) + 4),
9886 GEN_INT (((mask >> 2) & 3) + 4),
9887 GEN_INT (((mask >> 4) & 3) + 4),
9888 GEN_INT (((mask >> 6) & 3) + 4),
9889 GEN_INT (((mask >> 0) & 3) + 12),
9890 GEN_INT (((mask >> 2) & 3) + 12),
9891 GEN_INT (((mask >> 4) & 3) + 12),
9892 GEN_INT (((mask >> 6) & 3) + 12)));
9893 DONE;
9894 })
9895
9896 (define_insn "avx2_pshufhw_1"
9897 [(set (match_operand:V16HI 0 "register_operand" "=x")
9898 (vec_select:V16HI
9899 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
9900 (parallel [(const_int 0)
9901 (const_int 1)
9902 (const_int 2)
9903 (const_int 3)
9904 (match_operand 2 "const_4_to_7_operand")
9905 (match_operand 3 "const_4_to_7_operand")
9906 (match_operand 4 "const_4_to_7_operand")
9907 (match_operand 5 "const_4_to_7_operand")
9908 (const_int 8)
9909 (const_int 9)
9910 (const_int 10)
9911 (const_int 11)
9912 (match_operand 6 "const_12_to_15_operand")
9913 (match_operand 7 "const_12_to_15_operand")
9914 (match_operand 8 "const_12_to_15_operand")
9915 (match_operand 9 "const_12_to_15_operand")])))]
9916 "TARGET_AVX2
9917 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
9918 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
9919 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
9920 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
9921 {
9922 int mask = 0;
9923 mask |= (INTVAL (operands[2]) - 4) << 0;
9924 mask |= (INTVAL (operands[3]) - 4) << 2;
9925 mask |= (INTVAL (operands[4]) - 4) << 4;
9926 mask |= (INTVAL (operands[5]) - 4) << 6;
9927 operands[2] = GEN_INT (mask);
9928
9929 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
9930 }
9931 [(set_attr "type" "sselog")
9932 (set_attr "prefix" "vex")
9933 (set_attr "length_immediate" "1")
9934 (set_attr "mode" "OI")])
9935
9936 (define_expand "sse2_pshufhw"
9937 [(match_operand:V8HI 0 "register_operand")
9938 (match_operand:V8HI 1 "nonimmediate_operand")
9939 (match_operand:SI 2 "const_int_operand")]
9940 "TARGET_SSE2"
9941 {
9942 int mask = INTVAL (operands[2]);
9943 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
9944 GEN_INT (((mask >> 0) & 3) + 4),
9945 GEN_INT (((mask >> 2) & 3) + 4),
9946 GEN_INT (((mask >> 4) & 3) + 4),
9947 GEN_INT (((mask >> 6) & 3) + 4)));
9948 DONE;
9949 })
9950
9951 (define_insn "sse2_pshufhw_1"
9952 [(set (match_operand:V8HI 0 "register_operand" "=x")
9953 (vec_select:V8HI
9954 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9955 (parallel [(const_int 0)
9956 (const_int 1)
9957 (const_int 2)
9958 (const_int 3)
9959 (match_operand 2 "const_4_to_7_operand")
9960 (match_operand 3 "const_4_to_7_operand")
9961 (match_operand 4 "const_4_to_7_operand")
9962 (match_operand 5 "const_4_to_7_operand")])))]
9963 "TARGET_SSE2"
9964 {
9965 int mask = 0;
9966 mask |= (INTVAL (operands[2]) - 4) << 0;
9967 mask |= (INTVAL (operands[3]) - 4) << 2;
9968 mask |= (INTVAL (operands[4]) - 4) << 4;
9969 mask |= (INTVAL (operands[5]) - 4) << 6;
9970 operands[2] = GEN_INT (mask);
9971
9972 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
9973 }
9974 [(set_attr "type" "sselog")
9975 (set_attr "prefix_rep" "1")
9976 (set_attr "prefix_data16" "0")
9977 (set_attr "prefix" "maybe_vex")
9978 (set_attr "length_immediate" "1")
9979 (set_attr "mode" "TI")])
9980
9981 (define_expand "sse2_loadd"
9982 [(set (match_operand:V4SI 0 "register_operand")
9983 (vec_merge:V4SI
9984 (vec_duplicate:V4SI
9985 (match_operand:SI 1 "nonimmediate_operand"))
9986 (match_dup 2)
9987 (const_int 1)))]
9988 "TARGET_SSE"
9989 "operands[2] = CONST0_RTX (V4SImode);")
9990
9991 (define_insn "sse2_loadld"
9992 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
9993 (vec_merge:V4SI
9994 (vec_duplicate:V4SI
9995 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
9996 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
9997 (const_int 1)))]
9998 "TARGET_SSE"
9999 "@
10000 %vmovd\t{%2, %0|%0, %2}
10001 %vmovd\t{%2, %0|%0, %2}
10002 movss\t{%2, %0|%0, %2}
10003 movss\t{%2, %0|%0, %2}
10004 vmovss\t{%2, %1, %0|%0, %1, %2}"
10005 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
10006 (set_attr "type" "ssemov")
10007 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
10008 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
10009
10010 (define_insn "*vec_extract<mode>"
10011 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
10012 (vec_select:<ssescalarmode>
10013 (match_operand:VI12_128 1 "register_operand" "x,x")
10014 (parallel
10015 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
10016 "TARGET_SSE4_1"
10017 "@
10018 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
10019 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10020 [(set_attr "type" "sselog1")
10021 (set (attr "prefix_data16")
10022 (if_then_else
10023 (and (eq_attr "alternative" "0")
10024 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10025 (const_string "1")
10026 (const_string "*")))
10027 (set (attr "prefix_extra")
10028 (if_then_else
10029 (and (eq_attr "alternative" "0")
10030 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10031 (const_string "*")
10032 (const_string "1")))
10033 (set_attr "length_immediate" "1")
10034 (set_attr "prefix" "maybe_vex")
10035 (set_attr "mode" "TI")])
10036
10037 (define_insn "*vec_extractv8hi_sse2"
10038 [(set (match_operand:HI 0 "register_operand" "=r")
10039 (vec_select:HI
10040 (match_operand:V8HI 1 "register_operand" "x")
10041 (parallel
10042 [(match_operand:SI 2 "const_0_to_7_operand")])))]
10043 "TARGET_SSE2 && !TARGET_SSE4_1"
10044 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
10045 [(set_attr "type" "sselog1")
10046 (set_attr "prefix_data16" "1")
10047 (set_attr "length_immediate" "1")
10048 (set_attr "mode" "TI")])
10049
10050 (define_insn "*vec_extractv16qi_zext"
10051 [(set (match_operand:SWI48 0 "register_operand" "=r")
10052 (zero_extend:SWI48
10053 (vec_select:QI
10054 (match_operand:V16QI 1 "register_operand" "x")
10055 (parallel
10056 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
10057 "TARGET_SSE4_1"
10058 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
10059 [(set_attr "type" "sselog1")
10060 (set_attr "prefix_extra" "1")
10061 (set_attr "length_immediate" "1")
10062 (set_attr "prefix" "maybe_vex")
10063 (set_attr "mode" "TI")])
10064
10065 (define_insn "*vec_extractv8hi_zext"
10066 [(set (match_operand:SWI48 0 "register_operand" "=r")
10067 (zero_extend:SWI48
10068 (vec_select:HI
10069 (match_operand:V8HI 1 "register_operand" "x")
10070 (parallel
10071 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
10072 "TARGET_SSE2"
10073 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
10074 [(set_attr "type" "sselog1")
10075 (set_attr "prefix_data16" "1")
10076 (set_attr "length_immediate" "1")
10077 (set_attr "prefix" "maybe_vex")
10078 (set_attr "mode" "TI")])
10079
10080 (define_insn "*vec_extract<mode>_mem"
10081 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
10082 (vec_select:<ssescalarmode>
10083 (match_operand:VI12_128 1 "memory_operand" "o")
10084 (parallel
10085 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10086 "TARGET_SSE"
10087 "#")
10088
10089 (define_insn "*vec_extract<ssevecmodelower>_0"
10090 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
10091 (vec_select:SWI48
10092 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
10093 (parallel [(const_int 0)])))]
10094 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10095 "#"
10096 [(set_attr "isa" "*,sse4,*,*")])
10097
10098 (define_insn_and_split "*vec_extractv4si_0_zext"
10099 [(set (match_operand:DI 0 "register_operand" "=r")
10100 (zero_extend:DI
10101 (vec_select:SI
10102 (match_operand:V4SI 1 "register_operand" "x")
10103 (parallel [(const_int 0)]))))]
10104 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
10105 "#"
10106 "&& reload_completed"
10107 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10108 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
10109
10110 (define_insn "*vec_extractv2di_0_sse"
10111 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
10112 (vec_select:DI
10113 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
10114 (parallel [(const_int 0)])))]
10115 "TARGET_SSE && !TARGET_64BIT
10116 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10117 "#")
10118
10119 (define_split
10120 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
10121 (vec_select:SWI48x
10122 (match_operand:<ssevecmode> 1 "register_operand")
10123 (parallel [(const_int 0)])))]
10124 "TARGET_SSE && reload_completed"
10125 [(set (match_dup 0) (match_dup 1))]
10126 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
10127
10128 (define_insn "*vec_extractv4si"
10129 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
10130 (vec_select:SI
10131 (match_operand:V4SI 1 "register_operand" "x,0,x")
10132 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
10133 "TARGET_SSE4_1"
10134 {
10135 switch (which_alternative)
10136 {
10137 case 0:
10138 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
10139
10140 case 1:
10141 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10142 return "psrldq\t{%2, %0|%0, %2}";
10143
10144 case 2:
10145 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10146 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10147
10148 default:
10149 gcc_unreachable ();
10150 }
10151 }
10152 [(set_attr "isa" "*,noavx,avx")
10153 (set_attr "type" "sselog1,sseishft1,sseishft1")
10154 (set_attr "prefix_extra" "1,*,*")
10155 (set_attr "length_immediate" "1")
10156 (set_attr "prefix" "maybe_vex,orig,vex")
10157 (set_attr "mode" "TI")])
10158
10159 (define_insn "*vec_extractv4si_zext"
10160 [(set (match_operand:DI 0 "register_operand" "=r")
10161 (zero_extend:DI
10162 (vec_select:SI
10163 (match_operand:V4SI 1 "register_operand" "x")
10164 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10165 "TARGET_64BIT && TARGET_SSE4_1"
10166 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
10167 [(set_attr "type" "sselog1")
10168 (set_attr "prefix_extra" "1")
10169 (set_attr "length_immediate" "1")
10170 (set_attr "prefix" "maybe_vex")
10171 (set_attr "mode" "TI")])
10172
10173 (define_insn "*vec_extractv4si_mem"
10174 [(set (match_operand:SI 0 "register_operand" "=x,r")
10175 (vec_select:SI
10176 (match_operand:V4SI 1 "memory_operand" "o,o")
10177 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
10178 "TARGET_SSE"
10179 "#")
10180
10181 (define_insn_and_split "*vec_extractv4si_zext_mem"
10182 [(set (match_operand:DI 0 "register_operand" "=x,r")
10183 (zero_extend:DI
10184 (vec_select:SI
10185 (match_operand:V4SI 1 "memory_operand" "o,o")
10186 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10187 "TARGET_64BIT && TARGET_SSE"
10188 "#"
10189 "&& reload_completed"
10190 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10191 {
10192 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
10193 })
10194
10195 (define_insn "*vec_extractv2di_1"
10196 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
10197 (vec_select:DI
10198 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
10199 (parallel [(const_int 1)])))]
10200 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10201 "@
10202 %vpextrq\t{$1, %1, %0|%0, %1, 1}
10203 %vmovhps\t{%1, %0|%0, %1}
10204 psrldq\t{$8, %0|%0, 8}
10205 vpsrldq\t{$8, %1, %0|%0, %1, 8}
10206 movhlps\t{%1, %0|%0, %1}
10207 #
10208 #"
10209 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
10210 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
10211 (set_attr "length_immediate" "1,*,1,1,*,*,*")
10212 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
10213 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
10214 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
10215 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
10216
10217 (define_split
10218 [(set (match_operand:<ssescalarmode> 0 "register_operand")
10219 (vec_select:<ssescalarmode>
10220 (match_operand:VI_128 1 "memory_operand")
10221 (parallel
10222 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10223 "TARGET_SSE && reload_completed"
10224 [(set (match_dup 0) (match_dup 1))]
10225 {
10226 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
10227
10228 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
10229 })
10230
10231 (define_insn "*vec_dupv4si"
10232 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10233 (vec_duplicate:V4SI
10234 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
10235 "TARGET_SSE"
10236 "@
10237 %vpshufd\t{$0, %1, %0|%0, %1, 0}
10238 vbroadcastss\t{%1, %0|%0, %1}
10239 shufps\t{$0, %0, %0|%0, %0, 0}"
10240 [(set_attr "isa" "sse2,avx,noavx")
10241 (set_attr "type" "sselog1,ssemov,sselog1")
10242 (set_attr "length_immediate" "1,0,1")
10243 (set_attr "prefix_extra" "0,1,*")
10244 (set_attr "prefix" "maybe_vex,vex,orig")
10245 (set_attr "mode" "TI,V4SF,V4SF")])
10246
10247 (define_insn "*vec_dupv2di"
10248 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
10249 (vec_duplicate:V2DI
10250 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
10251 "TARGET_SSE"
10252 "@
10253 punpcklqdq\t%0, %0
10254 vpunpcklqdq\t{%d1, %0|%0, %d1}
10255 %vmovddup\t{%1, %0|%0, %1}
10256 movlhps\t%0, %0"
10257 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
10258 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
10259 (set_attr "prefix" "orig,vex,maybe_vex,orig")
10260 (set_attr "mode" "TI,TI,DF,V4SF")])
10261
10262 (define_insn "*vec_concatv2si_sse4_1"
10263 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
10264 (vec_concat:V2SI
10265 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
10266 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
10267 "TARGET_SSE4_1"
10268 "@
10269 pinsrd\t{$1, %2, %0|%0, %2, 1}
10270 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
10271 punpckldq\t{%2, %0|%0, %2}
10272 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
10273 %vmovd\t{%1, %0|%0, %1}
10274 punpckldq\t{%2, %0|%0, %2}
10275 movd\t{%1, %0|%0, %1}"
10276 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10277 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
10278 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
10279 (set_attr "length_immediate" "1,1,*,*,*,*,*")
10280 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
10281 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
10282
10283 ;; ??? In theory we can match memory for the MMX alternative, but allowing
10284 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
10285 ;; alternatives pretty much forces the MMX alternative to be chosen.
10286 (define_insn "*vec_concatv2si"
10287 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
10288 (vec_concat:V2SI
10289 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
10290 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
10291 "TARGET_SSE && !TARGET_SSE4_1"
10292 "@
10293 punpckldq\t{%2, %0|%0, %2}
10294 movd\t{%1, %0|%0, %1}
10295 movd\t{%1, %0|%0, %1}
10296 unpcklps\t{%2, %0|%0, %2}
10297 movss\t{%1, %0|%0, %1}
10298 punpckldq\t{%2, %0|%0, %2}
10299 movd\t{%1, %0|%0, %1}"
10300 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
10301 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
10302 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
10303
10304 (define_insn "*vec_concatv4si"
10305 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
10306 (vec_concat:V4SI
10307 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
10308 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
10309 "TARGET_SSE"
10310 "@
10311 punpcklqdq\t{%2, %0|%0, %2}
10312 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10313 movlhps\t{%2, %0|%0, %2}
10314 movhps\t{%2, %0|%0, %q2}
10315 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
10316 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
10317 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
10318 (set_attr "prefix" "orig,vex,orig,orig,vex")
10319 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
10320
10321 ;; movd instead of movq is required to handle broken assemblers.
10322 (define_insn "vec_concatv2di"
10323 [(set (match_operand:V2DI 0 "register_operand"
10324 "=x,x ,Yi,x ,!x,x,x,x,x,x")
10325 (vec_concat:V2DI
10326 (match_operand:DI 1 "nonimmediate_operand"
10327 " 0,x ,r ,xm,*y,0,x,0,0,x")
10328 (match_operand:DI 2 "vector_move_operand"
10329 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
10330 "TARGET_SSE"
10331 "@
10332 pinsrq\t{$1, %2, %0|%0, %2, 1}
10333 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
10334 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
10335 %vmovq\t{%1, %0|%0, %1}
10336 movq2dq\t{%1, %0|%0, %1}
10337 punpcklqdq\t{%2, %0|%0, %2}
10338 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10339 movlhps\t{%2, %0|%0, %2}
10340 movhps\t{%2, %0|%0, %2}
10341 vmovhps\t{%2, %1, %0|%0, %1, %2}"
10342 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
10343 (set (attr "type")
10344 (if_then_else
10345 (eq_attr "alternative" "0,1,5,6")
10346 (const_string "sselog")
10347 (const_string "ssemov")))
10348 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
10349 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
10350 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
10351 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
10352 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
10353
10354 (define_expand "vec_unpacks_lo_<mode>"
10355 [(match_operand:<sseunpackmode> 0 "register_operand")
10356 (match_operand:VI124_AVX512F 1 "register_operand")]
10357 "TARGET_SSE2"
10358 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
10359
10360 (define_expand "vec_unpacks_hi_<mode>"
10361 [(match_operand:<sseunpackmode> 0 "register_operand")
10362 (match_operand:VI124_AVX512F 1 "register_operand")]
10363 "TARGET_SSE2"
10364 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
10365
10366 (define_expand "vec_unpacku_lo_<mode>"
10367 [(match_operand:<sseunpackmode> 0 "register_operand")
10368 (match_operand:VI124_AVX512F 1 "register_operand")]
10369 "TARGET_SSE2"
10370 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
10371
10372 (define_expand "vec_unpacku_hi_<mode>"
10373 [(match_operand:<sseunpackmode> 0 "register_operand")
10374 (match_operand:VI124_AVX512F 1 "register_operand")]
10375 "TARGET_SSE2"
10376 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
10377
10378 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10379 ;;
10380 ;; Miscellaneous
10381 ;;
10382 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10383
10384 (define_expand "<sse2_avx2>_uavg<mode>3"
10385 [(set (match_operand:VI12_AVX2 0 "register_operand")
10386 (truncate:VI12_AVX2
10387 (lshiftrt:<ssedoublemode>
10388 (plus:<ssedoublemode>
10389 (plus:<ssedoublemode>
10390 (zero_extend:<ssedoublemode>
10391 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
10392 (zero_extend:<ssedoublemode>
10393 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
10394 (match_dup 3))
10395 (const_int 1))))]
10396 "TARGET_SSE2"
10397 {
10398 operands[3] = CONST1_RTX(<MODE>mode);
10399 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
10400 })
10401
10402 (define_insn "*<sse2_avx2>_uavg<mode>3"
10403 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
10404 (truncate:VI12_AVX2
10405 (lshiftrt:<ssedoublemode>
10406 (plus:<ssedoublemode>
10407 (plus:<ssedoublemode>
10408 (zero_extend:<ssedoublemode>
10409 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
10410 (zero_extend:<ssedoublemode>
10411 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
10412 (match_operand:VI12_AVX2 3 "const1_operand"))
10413 (const_int 1))))]
10414 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
10415 "@
10416 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
10417 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10418 [(set_attr "isa" "noavx,avx")
10419 (set_attr "type" "sseiadd")
10420 (set_attr "prefix_data16" "1,*")
10421 (set_attr "prefix" "orig,vex")
10422 (set_attr "mode" "<sseinsnmode>")])
10423
10424 ;; The correct representation for this is absolutely enormous, and
10425 ;; surely not generally useful.
10426 (define_insn "<sse2_avx2>_psadbw"
10427 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
10428 (unspec:VI8_AVX2
10429 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
10430 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
10431 UNSPEC_PSADBW))]
10432 "TARGET_SSE2"
10433 "@
10434 psadbw\t{%2, %0|%0, %2}
10435 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
10436 [(set_attr "isa" "noavx,avx")
10437 (set_attr "type" "sseiadd")
10438 (set_attr "atom_unit" "simul")
10439 (set_attr "prefix_data16" "1,*")
10440 (set_attr "prefix" "orig,vex")
10441 (set_attr "mode" "<sseinsnmode>")])
10442
10443 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
10444 [(set (match_operand:SI 0 "register_operand" "=r")
10445 (unspec:SI
10446 [(match_operand:VF_128_256 1 "register_operand" "x")]
10447 UNSPEC_MOVMSK))]
10448 "TARGET_SSE"
10449 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
10450 [(set_attr "type" "ssemov")
10451 (set_attr "prefix" "maybe_vex")
10452 (set_attr "mode" "<MODE>")])
10453
10454 (define_insn "avx2_pmovmskb"
10455 [(set (match_operand:SI 0 "register_operand" "=r")
10456 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
10457 UNSPEC_MOVMSK))]
10458 "TARGET_AVX2"
10459 "vpmovmskb\t{%1, %0|%0, %1}"
10460 [(set_attr "type" "ssemov")
10461 (set_attr "prefix" "vex")
10462 (set_attr "mode" "DI")])
10463
10464 (define_insn "sse2_pmovmskb"
10465 [(set (match_operand:SI 0 "register_operand" "=r")
10466 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
10467 UNSPEC_MOVMSK))]
10468 "TARGET_SSE2"
10469 "%vpmovmskb\t{%1, %0|%0, %1}"
10470 [(set_attr "type" "ssemov")
10471 (set_attr "prefix_data16" "1")
10472 (set_attr "prefix" "maybe_vex")
10473 (set_attr "mode" "SI")])
10474
10475 (define_expand "sse2_maskmovdqu"
10476 [(set (match_operand:V16QI 0 "memory_operand")
10477 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
10478 (match_operand:V16QI 2 "register_operand")
10479 (match_dup 0)]
10480 UNSPEC_MASKMOV))]
10481 "TARGET_SSE2")
10482
10483 (define_insn "*sse2_maskmovdqu"
10484 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
10485 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
10486 (match_operand:V16QI 2 "register_operand" "x")
10487 (mem:V16QI (match_dup 0))]
10488 UNSPEC_MASKMOV))]
10489 "TARGET_SSE2"
10490 {
10491 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
10492 that requires %v to be at the beginning of the opcode name. */
10493 if (Pmode != word_mode)
10494 fputs ("\taddr32", asm_out_file);
10495 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
10496 }
10497 [(set_attr "type" "ssemov")
10498 (set_attr "prefix_data16" "1")
10499 (set (attr "length_address")
10500 (symbol_ref ("Pmode != word_mode")))
10501 ;; The implicit %rdi operand confuses default length_vex computation.
10502 (set (attr "length_vex")
10503 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
10504 (set_attr "prefix" "maybe_vex")
10505 (set_attr "mode" "TI")])
10506
10507 (define_insn "sse_ldmxcsr"
10508 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
10509 UNSPECV_LDMXCSR)]
10510 "TARGET_SSE"
10511 "%vldmxcsr\t%0"
10512 [(set_attr "type" "sse")
10513 (set_attr "atom_sse_attr" "mxcsr")
10514 (set_attr "prefix" "maybe_vex")
10515 (set_attr "memory" "load")])
10516
10517 (define_insn "sse_stmxcsr"
10518 [(set (match_operand:SI 0 "memory_operand" "=m")
10519 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
10520 "TARGET_SSE"
10521 "%vstmxcsr\t%0"
10522 [(set_attr "type" "sse")
10523 (set_attr "atom_sse_attr" "mxcsr")
10524 (set_attr "prefix" "maybe_vex")
10525 (set_attr "memory" "store")])
10526
10527 (define_insn "sse2_clflush"
10528 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
10529 UNSPECV_CLFLUSH)]
10530 "TARGET_SSE2"
10531 "clflush\t%a0"
10532 [(set_attr "type" "sse")
10533 (set_attr "atom_sse_attr" "fence")
10534 (set_attr "memory" "unknown")])
10535
10536
10537 (define_insn "sse3_mwait"
10538 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
10539 (match_operand:SI 1 "register_operand" "c")]
10540 UNSPECV_MWAIT)]
10541 "TARGET_SSE3"
10542 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
10543 ;; Since 32bit register operands are implicitly zero extended to 64bit,
10544 ;; we only need to set up 32bit registers.
10545 "mwait"
10546 [(set_attr "length" "3")])
10547
10548 (define_insn "sse3_monitor_<mode>"
10549 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
10550 (match_operand:SI 1 "register_operand" "c")
10551 (match_operand:SI 2 "register_operand" "d")]
10552 UNSPECV_MONITOR)]
10553 "TARGET_SSE3"
10554 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
10555 ;; RCX and RDX are used. Since 32bit register operands are implicitly
10556 ;; zero extended to 64bit, we only need to set up 32bit registers.
10557 "%^monitor"
10558 [(set (attr "length")
10559 (symbol_ref ("(Pmode != word_mode) + 3")))])
10560
10561 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10562 ;;
10563 ;; SSSE3 instructions
10564 ;;
10565 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10566
10567 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
10568
10569 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
10570 [(set (match_operand:V16HI 0 "register_operand" "=x")
10571 (vec_concat:V16HI
10572 (vec_concat:V8HI
10573 (vec_concat:V4HI
10574 (vec_concat:V2HI
10575 (ssse3_plusminus:HI
10576 (vec_select:HI
10577 (match_operand:V16HI 1 "register_operand" "x")
10578 (parallel [(const_int 0)]))
10579 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10580 (ssse3_plusminus:HI
10581 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10582 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10583 (vec_concat:V2HI
10584 (ssse3_plusminus:HI
10585 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10586 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10587 (ssse3_plusminus:HI
10588 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10589 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10590 (vec_concat:V4HI
10591 (vec_concat:V2HI
10592 (ssse3_plusminus:HI
10593 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
10594 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
10595 (ssse3_plusminus:HI
10596 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
10597 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
10598 (vec_concat:V2HI
10599 (ssse3_plusminus:HI
10600 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
10601 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
10602 (ssse3_plusminus:HI
10603 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
10604 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
10605 (vec_concat:V8HI
10606 (vec_concat:V4HI
10607 (vec_concat:V2HI
10608 (ssse3_plusminus:HI
10609 (vec_select:HI
10610 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
10611 (parallel [(const_int 0)]))
10612 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10613 (ssse3_plusminus:HI
10614 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10615 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10616 (vec_concat:V2HI
10617 (ssse3_plusminus:HI
10618 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10619 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10620 (ssse3_plusminus:HI
10621 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10622 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
10623 (vec_concat:V4HI
10624 (vec_concat:V2HI
10625 (ssse3_plusminus:HI
10626 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
10627 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
10628 (ssse3_plusminus:HI
10629 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
10630 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
10631 (vec_concat:V2HI
10632 (ssse3_plusminus:HI
10633 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
10634 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
10635 (ssse3_plusminus:HI
10636 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
10637 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
10638 "TARGET_AVX2"
10639 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10640 [(set_attr "type" "sseiadd")
10641 (set_attr "prefix_extra" "1")
10642 (set_attr "prefix" "vex")
10643 (set_attr "mode" "OI")])
10644
10645 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
10646 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10647 (vec_concat:V8HI
10648 (vec_concat:V4HI
10649 (vec_concat:V2HI
10650 (ssse3_plusminus:HI
10651 (vec_select:HI
10652 (match_operand:V8HI 1 "register_operand" "0,x")
10653 (parallel [(const_int 0)]))
10654 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10655 (ssse3_plusminus:HI
10656 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10657 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10658 (vec_concat:V2HI
10659 (ssse3_plusminus:HI
10660 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10661 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10662 (ssse3_plusminus:HI
10663 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10664 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10665 (vec_concat:V4HI
10666 (vec_concat:V2HI
10667 (ssse3_plusminus:HI
10668 (vec_select:HI
10669 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
10670 (parallel [(const_int 0)]))
10671 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10672 (ssse3_plusminus:HI
10673 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10674 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10675 (vec_concat:V2HI
10676 (ssse3_plusminus:HI
10677 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10678 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10679 (ssse3_plusminus:HI
10680 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10681 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
10682 "TARGET_SSSE3"
10683 "@
10684 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
10685 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10686 [(set_attr "isa" "noavx,avx")
10687 (set_attr "type" "sseiadd")
10688 (set_attr "atom_unit" "complex")
10689 (set_attr "prefix_data16" "1,*")
10690 (set_attr "prefix_extra" "1")
10691 (set_attr "prefix" "orig,vex")
10692 (set_attr "mode" "TI")])
10693
10694 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
10695 [(set (match_operand:V4HI 0 "register_operand" "=y")
10696 (vec_concat:V4HI
10697 (vec_concat:V2HI
10698 (ssse3_plusminus:HI
10699 (vec_select:HI
10700 (match_operand:V4HI 1 "register_operand" "0")
10701 (parallel [(const_int 0)]))
10702 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10703 (ssse3_plusminus:HI
10704 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10705 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10706 (vec_concat:V2HI
10707 (ssse3_plusminus:HI
10708 (vec_select:HI
10709 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
10710 (parallel [(const_int 0)]))
10711 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10712 (ssse3_plusminus:HI
10713 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10714 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
10715 "TARGET_SSSE3"
10716 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
10717 [(set_attr "type" "sseiadd")
10718 (set_attr "atom_unit" "complex")
10719 (set_attr "prefix_extra" "1")
10720 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
10721 (set_attr "mode" "DI")])
10722
10723 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
10724 [(set (match_operand:V8SI 0 "register_operand" "=x")
10725 (vec_concat:V8SI
10726 (vec_concat:V4SI
10727 (vec_concat:V2SI
10728 (plusminus:SI
10729 (vec_select:SI
10730 (match_operand:V8SI 1 "register_operand" "x")
10731 (parallel [(const_int 0)]))
10732 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
10733 (plusminus:SI
10734 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
10735 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
10736 (vec_concat:V2SI
10737 (plusminus:SI
10738 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
10739 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
10740 (plusminus:SI
10741 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
10742 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
10743 (vec_concat:V4SI
10744 (vec_concat:V2SI
10745 (plusminus:SI
10746 (vec_select:SI
10747 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
10748 (parallel [(const_int 0)]))
10749 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
10750 (plusminus:SI
10751 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
10752 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
10753 (vec_concat:V2SI
10754 (plusminus:SI
10755 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
10756 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
10757 (plusminus:SI
10758 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
10759 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
10760 "TARGET_AVX2"
10761 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
10762 [(set_attr "type" "sseiadd")
10763 (set_attr "prefix_extra" "1")
10764 (set_attr "prefix" "vex")
10765 (set_attr "mode" "OI")])
10766
10767 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
10768 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10769 (vec_concat:V4SI
10770 (vec_concat:V2SI
10771 (plusminus:SI
10772 (vec_select:SI
10773 (match_operand:V4SI 1 "register_operand" "0,x")
10774 (parallel [(const_int 0)]))
10775 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
10776 (plusminus:SI
10777 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
10778 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
10779 (vec_concat:V2SI
10780 (plusminus:SI
10781 (vec_select:SI
10782 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
10783 (parallel [(const_int 0)]))
10784 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
10785 (plusminus:SI
10786 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
10787 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
10788 "TARGET_SSSE3"
10789 "@
10790 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
10791 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
10792 [(set_attr "isa" "noavx,avx")
10793 (set_attr "type" "sseiadd")
10794 (set_attr "atom_unit" "complex")
10795 (set_attr "prefix_data16" "1,*")
10796 (set_attr "prefix_extra" "1")
10797 (set_attr "prefix" "orig,vex")
10798 (set_attr "mode" "TI")])
10799
10800 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
10801 [(set (match_operand:V2SI 0 "register_operand" "=y")
10802 (vec_concat:V2SI
10803 (plusminus:SI
10804 (vec_select:SI
10805 (match_operand:V2SI 1 "register_operand" "0")
10806 (parallel [(const_int 0)]))
10807 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
10808 (plusminus:SI
10809 (vec_select:SI
10810 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
10811 (parallel [(const_int 0)]))
10812 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
10813 "TARGET_SSSE3"
10814 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
10815 [(set_attr "type" "sseiadd")
10816 (set_attr "atom_unit" "complex")
10817 (set_attr "prefix_extra" "1")
10818 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
10819 (set_attr "mode" "DI")])
10820
10821 (define_insn "avx2_pmaddubsw256"
10822 [(set (match_operand:V16HI 0 "register_operand" "=x")
10823 (ss_plus:V16HI
10824 (mult:V16HI
10825 (zero_extend:V16HI
10826 (vec_select:V16QI
10827 (match_operand:V32QI 1 "register_operand" "x")
10828 (parallel [(const_int 0) (const_int 2)
10829 (const_int 4) (const_int 6)
10830 (const_int 8) (const_int 10)
10831 (const_int 12) (const_int 14)
10832 (const_int 16) (const_int 18)
10833 (const_int 20) (const_int 22)
10834 (const_int 24) (const_int 26)
10835 (const_int 28) (const_int 30)])))
10836 (sign_extend:V16HI
10837 (vec_select:V16QI
10838 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
10839 (parallel [(const_int 0) (const_int 2)
10840 (const_int 4) (const_int 6)
10841 (const_int 8) (const_int 10)
10842 (const_int 12) (const_int 14)
10843 (const_int 16) (const_int 18)
10844 (const_int 20) (const_int 22)
10845 (const_int 24) (const_int 26)
10846 (const_int 28) (const_int 30)]))))
10847 (mult:V16HI
10848 (zero_extend:V16HI
10849 (vec_select:V16QI (match_dup 1)
10850 (parallel [(const_int 1) (const_int 3)
10851 (const_int 5) (const_int 7)
10852 (const_int 9) (const_int 11)
10853 (const_int 13) (const_int 15)
10854 (const_int 17) (const_int 19)
10855 (const_int 21) (const_int 23)
10856 (const_int 25) (const_int 27)
10857 (const_int 29) (const_int 31)])))
10858 (sign_extend:V16HI
10859 (vec_select:V16QI (match_dup 2)
10860 (parallel [(const_int 1) (const_int 3)
10861 (const_int 5) (const_int 7)
10862 (const_int 9) (const_int 11)
10863 (const_int 13) (const_int 15)
10864 (const_int 17) (const_int 19)
10865 (const_int 21) (const_int 23)
10866 (const_int 25) (const_int 27)
10867 (const_int 29) (const_int 31)]))))))]
10868 "TARGET_AVX2"
10869 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
10870 [(set_attr "type" "sseiadd")
10871 (set_attr "prefix_extra" "1")
10872 (set_attr "prefix" "vex")
10873 (set_attr "mode" "OI")])
10874
10875 (define_insn "ssse3_pmaddubsw128"
10876 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10877 (ss_plus:V8HI
10878 (mult:V8HI
10879 (zero_extend:V8HI
10880 (vec_select:V8QI
10881 (match_operand:V16QI 1 "register_operand" "0,x")
10882 (parallel [(const_int 0) (const_int 2)
10883 (const_int 4) (const_int 6)
10884 (const_int 8) (const_int 10)
10885 (const_int 12) (const_int 14)])))
10886 (sign_extend:V8HI
10887 (vec_select:V8QI
10888 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
10889 (parallel [(const_int 0) (const_int 2)
10890 (const_int 4) (const_int 6)
10891 (const_int 8) (const_int 10)
10892 (const_int 12) (const_int 14)]))))
10893 (mult:V8HI
10894 (zero_extend:V8HI
10895 (vec_select:V8QI (match_dup 1)
10896 (parallel [(const_int 1) (const_int 3)
10897 (const_int 5) (const_int 7)
10898 (const_int 9) (const_int 11)
10899 (const_int 13) (const_int 15)])))
10900 (sign_extend:V8HI
10901 (vec_select:V8QI (match_dup 2)
10902 (parallel [(const_int 1) (const_int 3)
10903 (const_int 5) (const_int 7)
10904 (const_int 9) (const_int 11)
10905 (const_int 13) (const_int 15)]))))))]
10906 "TARGET_SSSE3"
10907 "@
10908 pmaddubsw\t{%2, %0|%0, %2}
10909 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
10910 [(set_attr "isa" "noavx,avx")
10911 (set_attr "type" "sseiadd")
10912 (set_attr "atom_unit" "simul")
10913 (set_attr "prefix_data16" "1,*")
10914 (set_attr "prefix_extra" "1")
10915 (set_attr "prefix" "orig,vex")
10916 (set_attr "mode" "TI")])
10917
10918 (define_insn "ssse3_pmaddubsw"
10919 [(set (match_operand:V4HI 0 "register_operand" "=y")
10920 (ss_plus:V4HI
10921 (mult:V4HI
10922 (zero_extend:V4HI
10923 (vec_select:V4QI
10924 (match_operand:V8QI 1 "register_operand" "0")
10925 (parallel [(const_int 0) (const_int 2)
10926 (const_int 4) (const_int 6)])))
10927 (sign_extend:V4HI
10928 (vec_select:V4QI
10929 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
10930 (parallel [(const_int 0) (const_int 2)
10931 (const_int 4) (const_int 6)]))))
10932 (mult:V4HI
10933 (zero_extend:V4HI
10934 (vec_select:V4QI (match_dup 1)
10935 (parallel [(const_int 1) (const_int 3)
10936 (const_int 5) (const_int 7)])))
10937 (sign_extend:V4HI
10938 (vec_select:V4QI (match_dup 2)
10939 (parallel [(const_int 1) (const_int 3)
10940 (const_int 5) (const_int 7)]))))))]
10941 "TARGET_SSSE3"
10942 "pmaddubsw\t{%2, %0|%0, %2}"
10943 [(set_attr "type" "sseiadd")
10944 (set_attr "atom_unit" "simul")
10945 (set_attr "prefix_extra" "1")
10946 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
10947 (set_attr "mode" "DI")])
10948
10949 (define_mode_iterator PMULHRSW
10950 [V4HI V8HI (V16HI "TARGET_AVX2")])
10951
10952 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
10953 [(set (match_operand:PMULHRSW 0 "register_operand")
10954 (truncate:PMULHRSW
10955 (lshiftrt:<ssedoublemode>
10956 (plus:<ssedoublemode>
10957 (lshiftrt:<ssedoublemode>
10958 (mult:<ssedoublemode>
10959 (sign_extend:<ssedoublemode>
10960 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
10961 (sign_extend:<ssedoublemode>
10962 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
10963 (const_int 14))
10964 (match_dup 3))
10965 (const_int 1))))]
10966 "TARGET_AVX2"
10967 {
10968 operands[3] = CONST1_RTX(<MODE>mode);
10969 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10970 })
10971
10972 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
10973 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
10974 (truncate:VI2_AVX2
10975 (lshiftrt:<ssedoublemode>
10976 (plus:<ssedoublemode>
10977 (lshiftrt:<ssedoublemode>
10978 (mult:<ssedoublemode>
10979 (sign_extend:<ssedoublemode>
10980 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
10981 (sign_extend:<ssedoublemode>
10982 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
10983 (const_int 14))
10984 (match_operand:VI2_AVX2 3 "const1_operand"))
10985 (const_int 1))))]
10986 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
10987 "@
10988 pmulhrsw\t{%2, %0|%0, %2}
10989 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
10990 [(set_attr "isa" "noavx,avx")
10991 (set_attr "type" "sseimul")
10992 (set_attr "prefix_data16" "1,*")
10993 (set_attr "prefix_extra" "1")
10994 (set_attr "prefix" "orig,vex")
10995 (set_attr "mode" "<sseinsnmode>")])
10996
10997 (define_insn "*ssse3_pmulhrswv4hi3"
10998 [(set (match_operand:V4HI 0 "register_operand" "=y")
10999 (truncate:V4HI
11000 (lshiftrt:V4SI
11001 (plus:V4SI
11002 (lshiftrt:V4SI
11003 (mult:V4SI
11004 (sign_extend:V4SI
11005 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
11006 (sign_extend:V4SI
11007 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
11008 (const_int 14))
11009 (match_operand:V4HI 3 "const1_operand"))
11010 (const_int 1))))]
11011 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
11012 "pmulhrsw\t{%2, %0|%0, %2}"
11013 [(set_attr "type" "sseimul")
11014 (set_attr "prefix_extra" "1")
11015 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11016 (set_attr "mode" "DI")])
11017
11018 (define_insn "<ssse3_avx2>_pshufb<mode>3"
11019 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11020 (unspec:VI1_AVX2
11021 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11022 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
11023 UNSPEC_PSHUFB))]
11024 "TARGET_SSSE3"
11025 "@
11026 pshufb\t{%2, %0|%0, %2}
11027 vpshufb\t{%2, %1, %0|%0, %1, %2}"
11028 [(set_attr "isa" "noavx,avx")
11029 (set_attr "type" "sselog1")
11030 (set_attr "prefix_data16" "1,*")
11031 (set_attr "prefix_extra" "1")
11032 (set_attr "prefix" "orig,vex")
11033 (set_attr "btver2_decode" "vector,vector")
11034 (set_attr "mode" "<sseinsnmode>")])
11035
11036 (define_insn "ssse3_pshufbv8qi3"
11037 [(set (match_operand:V8QI 0 "register_operand" "=y")
11038 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
11039 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
11040 UNSPEC_PSHUFB))]
11041 "TARGET_SSSE3"
11042 "pshufb\t{%2, %0|%0, %2}";
11043 [(set_attr "type" "sselog1")
11044 (set_attr "prefix_extra" "1")
11045 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11046 (set_attr "mode" "DI")])
11047
11048 (define_insn "<ssse3_avx2>_psign<mode>3"
11049 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
11050 (unspec:VI124_AVX2
11051 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
11052 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
11053 UNSPEC_PSIGN))]
11054 "TARGET_SSSE3"
11055 "@
11056 psign<ssemodesuffix>\t{%2, %0|%0, %2}
11057 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11058 [(set_attr "isa" "noavx,avx")
11059 (set_attr "type" "sselog1")
11060 (set_attr "prefix_data16" "1,*")
11061 (set_attr "prefix_extra" "1")
11062 (set_attr "prefix" "orig,vex")
11063 (set_attr "mode" "<sseinsnmode>")])
11064
11065 (define_insn "ssse3_psign<mode>3"
11066 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11067 (unspec:MMXMODEI
11068 [(match_operand:MMXMODEI 1 "register_operand" "0")
11069 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
11070 UNSPEC_PSIGN))]
11071 "TARGET_SSSE3"
11072 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
11073 [(set_attr "type" "sselog1")
11074 (set_attr "prefix_extra" "1")
11075 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11076 (set_attr "mode" "DI")])
11077
11078 (define_insn "<ssse3_avx2>_palignr<mode>"
11079 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
11080 (unspec:SSESCALARMODE
11081 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
11082 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
11083 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
11084 UNSPEC_PALIGNR))]
11085 "TARGET_SSSE3"
11086 {
11087 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11088
11089 switch (which_alternative)
11090 {
11091 case 0:
11092 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11093 case 1:
11094 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11095 default:
11096 gcc_unreachable ();
11097 }
11098 }
11099 [(set_attr "isa" "noavx,avx")
11100 (set_attr "type" "sseishft")
11101 (set_attr "atom_unit" "sishuf")
11102 (set_attr "prefix_data16" "1,*")
11103 (set_attr "prefix_extra" "1")
11104 (set_attr "length_immediate" "1")
11105 (set_attr "prefix" "orig,vex")
11106 (set_attr "mode" "<sseinsnmode>")])
11107
11108 (define_insn "ssse3_palignrdi"
11109 [(set (match_operand:DI 0 "register_operand" "=y")
11110 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
11111 (match_operand:DI 2 "nonimmediate_operand" "ym")
11112 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
11113 UNSPEC_PALIGNR))]
11114 "TARGET_SSSE3"
11115 {
11116 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11117 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11118 }
11119 [(set_attr "type" "sseishft")
11120 (set_attr "atom_unit" "sishuf")
11121 (set_attr "prefix_extra" "1")
11122 (set_attr "length_immediate" "1")
11123 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11124 (set_attr "mode" "DI")])
11125
11126 (define_insn "<mask_codefor>abs<mode>2<mask_name>"
11127 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
11128 (abs:VI124_AVX2_48_AVX512F
11129 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
11130 "TARGET_SSSE3 && <mask_mode512bit_condition>"
11131 "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11132 [(set_attr "type" "sselog1")
11133 (set_attr "prefix_data16" "1")
11134 (set_attr "prefix_extra" "1")
11135 (set_attr "prefix" "maybe_vex")
11136 (set_attr "mode" "<sseinsnmode>")])
11137
11138 (define_expand "abs<mode>2"
11139 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
11140 (abs:VI124_AVX2_48_AVX512F
11141 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
11142 "TARGET_SSE2"
11143 {
11144 if (!TARGET_SSSE3)
11145 {
11146 ix86_expand_sse2_abs (operands[0], operands[1]);
11147 DONE;
11148 }
11149 })
11150
11151 (define_insn "abs<mode>2"
11152 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11153 (abs:MMXMODEI
11154 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
11155 "TARGET_SSSE3"
11156 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
11157 [(set_attr "type" "sselog1")
11158 (set_attr "prefix_rep" "0")
11159 (set_attr "prefix_extra" "1")
11160 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11161 (set_attr "mode" "DI")])
11162
11163 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11164 ;;
11165 ;; AMD SSE4A instructions
11166 ;;
11167 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11168
11169 (define_insn "sse4a_movnt<mode>"
11170 [(set (match_operand:MODEF 0 "memory_operand" "=m")
11171 (unspec:MODEF
11172 [(match_operand:MODEF 1 "register_operand" "x")]
11173 UNSPEC_MOVNT))]
11174 "TARGET_SSE4A"
11175 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
11176 [(set_attr "type" "ssemov")
11177 (set_attr "mode" "<MODE>")])
11178
11179 (define_insn "sse4a_vmmovnt<mode>"
11180 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
11181 (unspec:<ssescalarmode>
11182 [(vec_select:<ssescalarmode>
11183 (match_operand:VF_128 1 "register_operand" "x")
11184 (parallel [(const_int 0)]))]
11185 UNSPEC_MOVNT))]
11186 "TARGET_SSE4A"
11187 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11188 [(set_attr "type" "ssemov")
11189 (set_attr "mode" "<ssescalarmode>")])
11190
11191 (define_insn "sse4a_extrqi"
11192 [(set (match_operand:V2DI 0 "register_operand" "=x")
11193 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11194 (match_operand 2 "const_0_to_255_operand")
11195 (match_operand 3 "const_0_to_255_operand")]
11196 UNSPEC_EXTRQI))]
11197 "TARGET_SSE4A"
11198 "extrq\t{%3, %2, %0|%0, %2, %3}"
11199 [(set_attr "type" "sse")
11200 (set_attr "prefix_data16" "1")
11201 (set_attr "length_immediate" "2")
11202 (set_attr "mode" "TI")])
11203
11204 (define_insn "sse4a_extrq"
11205 [(set (match_operand:V2DI 0 "register_operand" "=x")
11206 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11207 (match_operand:V16QI 2 "register_operand" "x")]
11208 UNSPEC_EXTRQ))]
11209 "TARGET_SSE4A"
11210 "extrq\t{%2, %0|%0, %2}"
11211 [(set_attr "type" "sse")
11212 (set_attr "prefix_data16" "1")
11213 (set_attr "mode" "TI")])
11214
11215 (define_insn "sse4a_insertqi"
11216 [(set (match_operand:V2DI 0 "register_operand" "=x")
11217 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11218 (match_operand:V2DI 2 "register_operand" "x")
11219 (match_operand 3 "const_0_to_255_operand")
11220 (match_operand 4 "const_0_to_255_operand")]
11221 UNSPEC_INSERTQI))]
11222 "TARGET_SSE4A"
11223 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
11224 [(set_attr "type" "sseins")
11225 (set_attr "prefix_data16" "0")
11226 (set_attr "prefix_rep" "1")
11227 (set_attr "length_immediate" "2")
11228 (set_attr "mode" "TI")])
11229
11230 (define_insn "sse4a_insertq"
11231 [(set (match_operand:V2DI 0 "register_operand" "=x")
11232 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11233 (match_operand:V2DI 2 "register_operand" "x")]
11234 UNSPEC_INSERTQ))]
11235 "TARGET_SSE4A"
11236 "insertq\t{%2, %0|%0, %2}"
11237 [(set_attr "type" "sseins")
11238 (set_attr "prefix_data16" "0")
11239 (set_attr "prefix_rep" "1")
11240 (set_attr "mode" "TI")])
11241
11242 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11243 ;;
11244 ;; Intel SSE4.1 instructions
11245 ;;
11246 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11247
11248 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
11249 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11250 (vec_merge:VF_128_256
11251 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11252 (match_operand:VF_128_256 1 "register_operand" "0,x")
11253 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
11254 "TARGET_SSE4_1"
11255 "@
11256 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11257 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11258 [(set_attr "isa" "noavx,avx")
11259 (set_attr "type" "ssemov")
11260 (set_attr "length_immediate" "1")
11261 (set_attr "prefix_data16" "1,*")
11262 (set_attr "prefix_extra" "1")
11263 (set_attr "prefix" "orig,vex")
11264 (set_attr "mode" "<MODE>")])
11265
11266 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
11267 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11268 (unspec:VF_128_256
11269 [(match_operand:VF_128_256 1 "register_operand" "0,x")
11270 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11271 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
11272 UNSPEC_BLENDV))]
11273 "TARGET_SSE4_1"
11274 "@
11275 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11276 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11277 [(set_attr "isa" "noavx,avx")
11278 (set_attr "type" "ssemov")
11279 (set_attr "length_immediate" "1")
11280 (set_attr "prefix_data16" "1,*")
11281 (set_attr "prefix_extra" "1")
11282 (set_attr "prefix" "orig,vex")
11283 (set_attr "btver2_decode" "vector,vector")
11284 (set_attr "mode" "<MODE>")])
11285
11286 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
11287 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11288 (unspec:VF_128_256
11289 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
11290 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11291 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11292 UNSPEC_DP))]
11293 "TARGET_SSE4_1"
11294 "@
11295 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11296 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11297 [(set_attr "isa" "noavx,avx")
11298 (set_attr "type" "ssemul")
11299 (set_attr "length_immediate" "1")
11300 (set_attr "prefix_data16" "1,*")
11301 (set_attr "prefix_extra" "1")
11302 (set_attr "prefix" "orig,vex")
11303 (set_attr "btver2_decode" "vector,vector")
11304 (set_attr "mode" "<MODE>")])
11305
11306 (define_insn "<sse4_1_avx2>_movntdqa"
11307 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
11308 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
11309 UNSPEC_MOVNTDQA))]
11310 "TARGET_SSE4_1"
11311 "%vmovntdqa\t{%1, %0|%0, %1}"
11312 [(set_attr "type" "ssemov")
11313 (set_attr "prefix_extra" "1")
11314 (set_attr "prefix" "maybe_vex")
11315 (set_attr "mode" "<sseinsnmode>")])
11316
11317 (define_insn "<sse4_1_avx2>_mpsadbw"
11318 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11319 (unspec:VI1_AVX2
11320 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11321 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11322 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11323 UNSPEC_MPSADBW))]
11324 "TARGET_SSE4_1"
11325 "@
11326 mpsadbw\t{%3, %2, %0|%0, %2, %3}
11327 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11328 [(set_attr "isa" "noavx,avx")
11329 (set_attr "type" "sselog1")
11330 (set_attr "length_immediate" "1")
11331 (set_attr "prefix_extra" "1")
11332 (set_attr "prefix" "orig,vex")
11333 (set_attr "btver2_decode" "vector,vector")
11334 (set_attr "mode" "<sseinsnmode>")])
11335
11336 (define_insn "avx2_packusdw"
11337 [(set (match_operand:V16HI 0 "register_operand" "=x")
11338 (vec_concat:V16HI
11339 (us_truncate:V8HI
11340 (match_operand:V8SI 1 "register_operand" "x"))
11341 (us_truncate:V8HI
11342 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
11343 "TARGET_AVX2"
11344 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11345 [(set_attr "type" "sselog")
11346 (set_attr "prefix_extra" "1")
11347 (set_attr "prefix" "vex")
11348 (set_attr "mode" "OI")])
11349
11350 (define_insn "sse4_1_packusdw"
11351 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11352 (vec_concat:V8HI
11353 (us_truncate:V4HI
11354 (match_operand:V4SI 1 "register_operand" "0,x"))
11355 (us_truncate:V4HI
11356 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
11357 "TARGET_SSE4_1"
11358 "@
11359 packusdw\t{%2, %0|%0, %2}
11360 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11361 [(set_attr "isa" "noavx,avx")
11362 (set_attr "type" "sselog")
11363 (set_attr "prefix_extra" "1")
11364 (set_attr "prefix" "orig,vex")
11365 (set_attr "mode" "TI")])
11366
11367 (define_insn "<sse4_1_avx2>_pblendvb"
11368 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11369 (unspec:VI1_AVX2
11370 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11371 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11372 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
11373 UNSPEC_BLENDV))]
11374 "TARGET_SSE4_1"
11375 "@
11376 pblendvb\t{%3, %2, %0|%0, %2, %3}
11377 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11378 [(set_attr "isa" "noavx,avx")
11379 (set_attr "type" "ssemov")
11380 (set_attr "prefix_extra" "1")
11381 (set_attr "length_immediate" "*,1")
11382 (set_attr "prefix" "orig,vex")
11383 (set_attr "btver2_decode" "vector,vector")
11384 (set_attr "mode" "<sseinsnmode>")])
11385
11386 (define_insn "sse4_1_pblendw"
11387 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11388 (vec_merge:V8HI
11389 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
11390 (match_operand:V8HI 1 "register_operand" "0,x")
11391 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
11392 "TARGET_SSE4_1"
11393 "@
11394 pblendw\t{%3, %2, %0|%0, %2, %3}
11395 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11396 [(set_attr "isa" "noavx,avx")
11397 (set_attr "type" "ssemov")
11398 (set_attr "prefix_extra" "1")
11399 (set_attr "length_immediate" "1")
11400 (set_attr "prefix" "orig,vex")
11401 (set_attr "mode" "TI")])
11402
11403 ;; The builtin uses an 8-bit immediate. Expand that.
11404 (define_expand "avx2_pblendw"
11405 [(set (match_operand:V16HI 0 "register_operand")
11406 (vec_merge:V16HI
11407 (match_operand:V16HI 2 "nonimmediate_operand")
11408 (match_operand:V16HI 1 "register_operand")
11409 (match_operand:SI 3 "const_0_to_255_operand")))]
11410 "TARGET_AVX2"
11411 {
11412 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
11413 operands[3] = GEN_INT (val << 8 | val);
11414 })
11415
11416 (define_insn "*avx2_pblendw"
11417 [(set (match_operand:V16HI 0 "register_operand" "=x")
11418 (vec_merge:V16HI
11419 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
11420 (match_operand:V16HI 1 "register_operand" "x")
11421 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
11422 "TARGET_AVX2"
11423 {
11424 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
11425 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11426 }
11427 [(set_attr "type" "ssemov")
11428 (set_attr "prefix_extra" "1")
11429 (set_attr "length_immediate" "1")
11430 (set_attr "prefix" "vex")
11431 (set_attr "mode" "OI")])
11432
11433 (define_insn "avx2_pblendd<mode>"
11434 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
11435 (vec_merge:VI4_AVX2
11436 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
11437 (match_operand:VI4_AVX2 1 "register_operand" "x")
11438 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
11439 "TARGET_AVX2"
11440 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11441 [(set_attr "type" "ssemov")
11442 (set_attr "prefix_extra" "1")
11443 (set_attr "length_immediate" "1")
11444 (set_attr "prefix" "vex")
11445 (set_attr "mode" "<sseinsnmode>")])
11446
11447 (define_insn "sse4_1_phminposuw"
11448 [(set (match_operand:V8HI 0 "register_operand" "=x")
11449 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11450 UNSPEC_PHMINPOSUW))]
11451 "TARGET_SSE4_1"
11452 "%vphminposuw\t{%1, %0|%0, %1}"
11453 [(set_attr "type" "sselog1")
11454 (set_attr "prefix_extra" "1")
11455 (set_attr "prefix" "maybe_vex")
11456 (set_attr "mode" "TI")])
11457
11458 (define_insn "avx2_<code>v16qiv16hi2"
11459 [(set (match_operand:V16HI 0 "register_operand" "=x")
11460 (any_extend:V16HI
11461 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
11462 "TARGET_AVX2"
11463 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
11464 [(set_attr "type" "ssemov")
11465 (set_attr "prefix_extra" "1")
11466 (set_attr "prefix" "vex")
11467 (set_attr "mode" "OI")])
11468
11469 (define_insn "sse4_1_<code>v8qiv8hi2"
11470 [(set (match_operand:V8HI 0 "register_operand" "=x")
11471 (any_extend:V8HI
11472 (vec_select:V8QI
11473 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11474 (parallel [(const_int 0) (const_int 1)
11475 (const_int 2) (const_int 3)
11476 (const_int 4) (const_int 5)
11477 (const_int 6) (const_int 7)]))))]
11478 "TARGET_SSE4_1"
11479 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
11480 [(set_attr "type" "ssemov")
11481 (set_attr "ssememalign" "64")
11482 (set_attr "prefix_extra" "1")
11483 (set_attr "prefix" "maybe_vex")
11484 (set_attr "mode" "TI")])
11485
11486 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
11487 [(set (match_operand:V16SI 0 "register_operand" "=v")
11488 (any_extend:V16SI
11489 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
11490 "TARGET_AVX512F"
11491 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11492 [(set_attr "type" "ssemov")
11493 (set_attr "prefix" "evex")
11494 (set_attr "mode" "XI")])
11495
11496 (define_insn "avx2_<code>v8qiv8si2"
11497 [(set (match_operand:V8SI 0 "register_operand" "=x")
11498 (any_extend:V8SI
11499 (vec_select:V8QI
11500 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11501 (parallel [(const_int 0) (const_int 1)
11502 (const_int 2) (const_int 3)
11503 (const_int 4) (const_int 5)
11504 (const_int 6) (const_int 7)]))))]
11505 "TARGET_AVX2"
11506 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
11507 [(set_attr "type" "ssemov")
11508 (set_attr "prefix_extra" "1")
11509 (set_attr "prefix" "vex")
11510 (set_attr "mode" "OI")])
11511
11512 (define_insn "sse4_1_<code>v4qiv4si2"
11513 [(set (match_operand:V4SI 0 "register_operand" "=x")
11514 (any_extend:V4SI
11515 (vec_select:V4QI
11516 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11517 (parallel [(const_int 0) (const_int 1)
11518 (const_int 2) (const_int 3)]))))]
11519 "TARGET_SSE4_1"
11520 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
11521 [(set_attr "type" "ssemov")
11522 (set_attr "ssememalign" "32")
11523 (set_attr "prefix_extra" "1")
11524 (set_attr "prefix" "maybe_vex")
11525 (set_attr "mode" "TI")])
11526
11527 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
11528 [(set (match_operand:V16SI 0 "register_operand" "=v")
11529 (any_extend:V16SI
11530 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
11531 "TARGET_AVX512F"
11532 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11533 [(set_attr "type" "ssemov")
11534 (set_attr "prefix" "evex")
11535 (set_attr "mode" "XI")])
11536
11537 (define_insn "avx2_<code>v8hiv8si2"
11538 [(set (match_operand:V8SI 0 "register_operand" "=x")
11539 (any_extend:V8SI
11540 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
11541 "TARGET_AVX2"
11542 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
11543 [(set_attr "type" "ssemov")
11544 (set_attr "prefix_extra" "1")
11545 (set_attr "prefix" "vex")
11546 (set_attr "mode" "OI")])
11547
11548 (define_insn "sse4_1_<code>v4hiv4si2"
11549 [(set (match_operand:V4SI 0 "register_operand" "=x")
11550 (any_extend:V4SI
11551 (vec_select:V4HI
11552 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11553 (parallel [(const_int 0) (const_int 1)
11554 (const_int 2) (const_int 3)]))))]
11555 "TARGET_SSE4_1"
11556 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
11557 [(set_attr "type" "ssemov")
11558 (set_attr "ssememalign" "64")
11559 (set_attr "prefix_extra" "1")
11560 (set_attr "prefix" "maybe_vex")
11561 (set_attr "mode" "TI")])
11562
11563 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
11564 [(set (match_operand:V8DI 0 "register_operand" "=v")
11565 (any_extend:V8DI
11566 (vec_select:V8QI
11567 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
11568 (parallel [(const_int 0) (const_int 1)
11569 (const_int 2) (const_int 3)
11570 (const_int 4) (const_int 5)
11571 (const_int 6) (const_int 7)]))))]
11572 "TARGET_AVX512F"
11573 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
11574 [(set_attr "type" "ssemov")
11575 (set_attr "prefix" "evex")
11576 (set_attr "mode" "XI")])
11577
11578 (define_insn "avx2_<code>v4qiv4di2"
11579 [(set (match_operand:V4DI 0 "register_operand" "=x")
11580 (any_extend:V4DI
11581 (vec_select:V4QI
11582 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11583 (parallel [(const_int 0) (const_int 1)
11584 (const_int 2) (const_int 3)]))))]
11585 "TARGET_AVX2"
11586 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
11587 [(set_attr "type" "ssemov")
11588 (set_attr "prefix_extra" "1")
11589 (set_attr "prefix" "vex")
11590 (set_attr "mode" "OI")])
11591
11592 (define_insn "sse4_1_<code>v2qiv2di2"
11593 [(set (match_operand:V2DI 0 "register_operand" "=x")
11594 (any_extend:V2DI
11595 (vec_select:V2QI
11596 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11597 (parallel [(const_int 0) (const_int 1)]))))]
11598 "TARGET_SSE4_1"
11599 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
11600 [(set_attr "type" "ssemov")
11601 (set_attr "ssememalign" "16")
11602 (set_attr "prefix_extra" "1")
11603 (set_attr "prefix" "maybe_vex")
11604 (set_attr "mode" "TI")])
11605
11606 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
11607 [(set (match_operand:V8DI 0 "register_operand" "=v")
11608 (any_extend:V8DI
11609 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
11610 "TARGET_AVX512F"
11611 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11612 [(set_attr "type" "ssemov")
11613 (set_attr "prefix" "evex")
11614 (set_attr "mode" "XI")])
11615
11616 (define_insn "avx2_<code>v4hiv4di2"
11617 [(set (match_operand:V4DI 0 "register_operand" "=x")
11618 (any_extend:V4DI
11619 (vec_select:V4HI
11620 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11621 (parallel [(const_int 0) (const_int 1)
11622 (const_int 2) (const_int 3)]))))]
11623 "TARGET_AVX2"
11624 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
11625 [(set_attr "type" "ssemov")
11626 (set_attr "prefix_extra" "1")
11627 (set_attr "prefix" "vex")
11628 (set_attr "mode" "OI")])
11629
11630 (define_insn "sse4_1_<code>v2hiv2di2"
11631 [(set (match_operand:V2DI 0 "register_operand" "=x")
11632 (any_extend:V2DI
11633 (vec_select:V2HI
11634 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11635 (parallel [(const_int 0) (const_int 1)]))))]
11636 "TARGET_SSE4_1"
11637 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
11638 [(set_attr "type" "ssemov")
11639 (set_attr "ssememalign" "32")
11640 (set_attr "prefix_extra" "1")
11641 (set_attr "prefix" "maybe_vex")
11642 (set_attr "mode" "TI")])
11643
11644 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
11645 [(set (match_operand:V8DI 0 "register_operand" "=v")
11646 (any_extend:V8DI
11647 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
11648 "TARGET_AVX512F"
11649 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11650 [(set_attr "type" "ssemov")
11651 (set_attr "prefix" "evex")
11652 (set_attr "mode" "XI")])
11653
11654 (define_insn "avx2_<code>v4siv4di2"
11655 [(set (match_operand:V4DI 0 "register_operand" "=x")
11656 (any_extend:V4DI
11657 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
11658 "TARGET_AVX2"
11659 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
11660 [(set_attr "type" "ssemov")
11661 (set_attr "prefix_extra" "1")
11662 (set_attr "mode" "OI")])
11663
11664 (define_insn "sse4_1_<code>v2siv2di2"
11665 [(set (match_operand:V2DI 0 "register_operand" "=x")
11666 (any_extend:V2DI
11667 (vec_select:V2SI
11668 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11669 (parallel [(const_int 0) (const_int 1)]))))]
11670 "TARGET_SSE4_1"
11671 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
11672 [(set_attr "type" "ssemov")
11673 (set_attr "ssememalign" "64")
11674 (set_attr "prefix_extra" "1")
11675 (set_attr "prefix" "maybe_vex")
11676 (set_attr "mode" "TI")])
11677
11678 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
11679 ;; setting FLAGS_REG. But it is not a really compare instruction.
11680 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
11681 [(set (reg:CC FLAGS_REG)
11682 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
11683 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
11684 UNSPEC_VTESTP))]
11685 "TARGET_AVX"
11686 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
11687 [(set_attr "type" "ssecomi")
11688 (set_attr "prefix_extra" "1")
11689 (set_attr "prefix" "vex")
11690 (set_attr "mode" "<MODE>")])
11691
11692 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
11693 ;; But it is not a really compare instruction.
11694 (define_insn "avx_ptest256"
11695 [(set (reg:CC FLAGS_REG)
11696 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
11697 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
11698 UNSPEC_PTEST))]
11699 "TARGET_AVX"
11700 "vptest\t{%1, %0|%0, %1}"
11701 [(set_attr "type" "ssecomi")
11702 (set_attr "prefix_extra" "1")
11703 (set_attr "prefix" "vex")
11704 (set_attr "btver2_decode" "vector")
11705 (set_attr "mode" "OI")])
11706
11707 (define_insn "sse4_1_ptest"
11708 [(set (reg:CC FLAGS_REG)
11709 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
11710 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11711 UNSPEC_PTEST))]
11712 "TARGET_SSE4_1"
11713 "%vptest\t{%1, %0|%0, %1}"
11714 [(set_attr "type" "ssecomi")
11715 (set_attr "prefix_extra" "1")
11716 (set_attr "prefix" "maybe_vex")
11717 (set_attr "mode" "TI")])
11718
11719 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
11720 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
11721 (unspec:VF_128_256
11722 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
11723 (match_operand:SI 2 "const_0_to_15_operand" "n")]
11724 UNSPEC_ROUND))]
11725 "TARGET_ROUND"
11726 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11727 [(set_attr "type" "ssecvt")
11728 (set (attr "prefix_data16")
11729 (if_then_else
11730 (match_test "TARGET_AVX")
11731 (const_string "*")
11732 (const_string "1")))
11733 (set_attr "prefix_extra" "1")
11734 (set_attr "length_immediate" "1")
11735 (set_attr "prefix" "maybe_vex")
11736 (set_attr "mode" "<MODE>")])
11737
11738 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
11739 [(match_operand:<sseintvecmode> 0 "register_operand")
11740 (match_operand:VF1_128_256 1 "nonimmediate_operand")
11741 (match_operand:SI 2 "const_0_to_15_operand")]
11742 "TARGET_ROUND"
11743 {
11744 rtx tmp = gen_reg_rtx (<MODE>mode);
11745
11746 emit_insn
11747 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
11748 operands[2]));
11749 emit_insn
11750 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
11751 DONE;
11752 })
11753
11754 (define_expand "avx512f_roundpd512"
11755 [(match_operand:V8DF 0 "register_operand")
11756 (match_operand:V8DF 1 "nonimmediate_operand")
11757 (match_operand:SI 2 "const_0_to_15_operand")]
11758 "TARGET_AVX512F"
11759 {
11760 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
11761 DONE;
11762 })
11763
11764 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
11765 [(match_operand:<ssepackfltmode> 0 "register_operand")
11766 (match_operand:VF2 1 "nonimmediate_operand")
11767 (match_operand:VF2 2 "nonimmediate_operand")
11768 (match_operand:SI 3 "const_0_to_15_operand")]
11769 "TARGET_ROUND"
11770 {
11771 rtx tmp0, tmp1;
11772
11773 if (<MODE>mode == V2DFmode
11774 && TARGET_AVX && !TARGET_PREFER_AVX128)
11775 {
11776 rtx tmp2 = gen_reg_rtx (V4DFmode);
11777
11778 tmp0 = gen_reg_rtx (V4DFmode);
11779 tmp1 = force_reg (V2DFmode, operands[1]);
11780
11781 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
11782 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
11783 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
11784 }
11785 else
11786 {
11787 tmp0 = gen_reg_rtx (<MODE>mode);
11788 tmp1 = gen_reg_rtx (<MODE>mode);
11789
11790 emit_insn
11791 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
11792 operands[3]));
11793 emit_insn
11794 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
11795 operands[3]));
11796 emit_insn
11797 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
11798 }
11799 DONE;
11800 })
11801
11802 (define_insn "sse4_1_round<ssescalarmodesuffix>"
11803 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
11804 (vec_merge:VF_128
11805 (unspec:VF_128
11806 [(match_operand:VF_128 2 "register_operand" "x,x")
11807 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
11808 UNSPEC_ROUND)
11809 (match_operand:VF_128 1 "register_operand" "0,x")
11810 (const_int 1)))]
11811 "TARGET_ROUND"
11812 "@
11813 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
11814 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11815 [(set_attr "isa" "noavx,avx")
11816 (set_attr "type" "ssecvt")
11817 (set_attr "length_immediate" "1")
11818 (set_attr "prefix_data16" "1,*")
11819 (set_attr "prefix_extra" "1")
11820 (set_attr "prefix" "orig,vex")
11821 (set_attr "mode" "<MODE>")])
11822
11823 (define_expand "round<mode>2"
11824 [(set (match_dup 4)
11825 (plus:VF
11826 (match_operand:VF 1 "register_operand")
11827 (match_dup 3)))
11828 (set (match_operand:VF 0 "register_operand")
11829 (unspec:VF
11830 [(match_dup 4) (match_dup 5)]
11831 UNSPEC_ROUND))]
11832 "TARGET_ROUND && !flag_trapping_math"
11833 {
11834 enum machine_mode scalar_mode;
11835 const struct real_format *fmt;
11836 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
11837 rtx half, vec_half;
11838
11839 scalar_mode = GET_MODE_INNER (<MODE>mode);
11840
11841 /* load nextafter (0.5, 0.0) */
11842 fmt = REAL_MODE_FORMAT (scalar_mode);
11843 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
11844 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
11845 half = const_double_from_real_value (pred_half, scalar_mode);
11846
11847 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
11848 vec_half = force_reg (<MODE>mode, vec_half);
11849
11850 operands[3] = gen_reg_rtx (<MODE>mode);
11851 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
11852
11853 operands[4] = gen_reg_rtx (<MODE>mode);
11854 operands[5] = GEN_INT (ROUND_TRUNC);
11855 })
11856
11857 (define_expand "round<mode>2_sfix"
11858 [(match_operand:<sseintvecmode> 0 "register_operand")
11859 (match_operand:VF1_128_256 1 "register_operand")]
11860 "TARGET_ROUND && !flag_trapping_math"
11861 {
11862 rtx tmp = gen_reg_rtx (<MODE>mode);
11863
11864 emit_insn (gen_round<mode>2 (tmp, operands[1]));
11865
11866 emit_insn
11867 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
11868 DONE;
11869 })
11870
11871 (define_expand "round<mode>2_vec_pack_sfix"
11872 [(match_operand:<ssepackfltmode> 0 "register_operand")
11873 (match_operand:VF2 1 "register_operand")
11874 (match_operand:VF2 2 "register_operand")]
11875 "TARGET_ROUND && !flag_trapping_math"
11876 {
11877 rtx tmp0, tmp1;
11878
11879 if (<MODE>mode == V2DFmode
11880 && TARGET_AVX && !TARGET_PREFER_AVX128)
11881 {
11882 rtx tmp2 = gen_reg_rtx (V4DFmode);
11883
11884 tmp0 = gen_reg_rtx (V4DFmode);
11885 tmp1 = force_reg (V2DFmode, operands[1]);
11886
11887 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
11888 emit_insn (gen_roundv4df2 (tmp2, tmp0));
11889 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
11890 }
11891 else
11892 {
11893 tmp0 = gen_reg_rtx (<MODE>mode);
11894 tmp1 = gen_reg_rtx (<MODE>mode);
11895
11896 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
11897 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
11898
11899 emit_insn
11900 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
11901 }
11902 DONE;
11903 })
11904
11905 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11906 ;;
11907 ;; Intel SSE4.2 string/text processing instructions
11908 ;;
11909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11910
11911 (define_insn_and_split "sse4_2_pcmpestr"
11912 [(set (match_operand:SI 0 "register_operand" "=c,c")
11913 (unspec:SI
11914 [(match_operand:V16QI 2 "register_operand" "x,x")
11915 (match_operand:SI 3 "register_operand" "a,a")
11916 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
11917 (match_operand:SI 5 "register_operand" "d,d")
11918 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
11919 UNSPEC_PCMPESTR))
11920 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
11921 (unspec:V16QI
11922 [(match_dup 2)
11923 (match_dup 3)
11924 (match_dup 4)
11925 (match_dup 5)
11926 (match_dup 6)]
11927 UNSPEC_PCMPESTR))
11928 (set (reg:CC FLAGS_REG)
11929 (unspec:CC
11930 [(match_dup 2)
11931 (match_dup 3)
11932 (match_dup 4)
11933 (match_dup 5)
11934 (match_dup 6)]
11935 UNSPEC_PCMPESTR))]
11936 "TARGET_SSE4_2
11937 && can_create_pseudo_p ()"
11938 "#"
11939 "&& 1"
11940 [(const_int 0)]
11941 {
11942 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
11943 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
11944 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
11945
11946 if (ecx)
11947 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
11948 operands[3], operands[4],
11949 operands[5], operands[6]));
11950 if (xmm0)
11951 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
11952 operands[3], operands[4],
11953 operands[5], operands[6]));
11954 if (flags && !(ecx || xmm0))
11955 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
11956 operands[2], operands[3],
11957 operands[4], operands[5],
11958 operands[6]));
11959 if (!(flags || ecx || xmm0))
11960 emit_note (NOTE_INSN_DELETED);
11961
11962 DONE;
11963 }
11964 [(set_attr "type" "sselog")
11965 (set_attr "prefix_data16" "1")
11966 (set_attr "prefix_extra" "1")
11967 (set_attr "ssememalign" "8")
11968 (set_attr "length_immediate" "1")
11969 (set_attr "memory" "none,load")
11970 (set_attr "mode" "TI")])
11971
11972 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
11973 [(set (match_operand:SI 0 "register_operand" "=c")
11974 (unspec:SI
11975 [(match_operand:V16QI 2 "register_operand" "x")
11976 (match_operand:SI 3 "register_operand" "a")
11977 (unspec:V16QI
11978 [(match_operand:V16QI 4 "memory_operand" "m")]
11979 UNSPEC_LOADU)
11980 (match_operand:SI 5 "register_operand" "d")
11981 (match_operand:SI 6 "const_0_to_255_operand" "n")]
11982 UNSPEC_PCMPESTR))
11983 (set (match_operand:V16QI 1 "register_operand" "=Yz")
11984 (unspec:V16QI
11985 [(match_dup 2)
11986 (match_dup 3)
11987 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
11988 (match_dup 5)
11989 (match_dup 6)]
11990 UNSPEC_PCMPESTR))
11991 (set (reg:CC FLAGS_REG)
11992 (unspec:CC
11993 [(match_dup 2)
11994 (match_dup 3)
11995 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
11996 (match_dup 5)
11997 (match_dup 6)]
11998 UNSPEC_PCMPESTR))]
11999 "TARGET_SSE4_2
12000 && can_create_pseudo_p ()"
12001 "#"
12002 "&& 1"
12003 [(const_int 0)]
12004 {
12005 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12006 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12007 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12008
12009 if (ecx)
12010 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12011 operands[3], operands[4],
12012 operands[5], operands[6]));
12013 if (xmm0)
12014 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12015 operands[3], operands[4],
12016 operands[5], operands[6]));
12017 if (flags && !(ecx || xmm0))
12018 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12019 operands[2], operands[3],
12020 operands[4], operands[5],
12021 operands[6]));
12022 if (!(flags || ecx || xmm0))
12023 emit_note (NOTE_INSN_DELETED);
12024
12025 DONE;
12026 }
12027 [(set_attr "type" "sselog")
12028 (set_attr "prefix_data16" "1")
12029 (set_attr "prefix_extra" "1")
12030 (set_attr "ssememalign" "8")
12031 (set_attr "length_immediate" "1")
12032 (set_attr "memory" "load")
12033 (set_attr "mode" "TI")])
12034
12035 (define_insn "sse4_2_pcmpestri"
12036 [(set (match_operand:SI 0 "register_operand" "=c,c")
12037 (unspec:SI
12038 [(match_operand:V16QI 1 "register_operand" "x,x")
12039 (match_operand:SI 2 "register_operand" "a,a")
12040 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12041 (match_operand:SI 4 "register_operand" "d,d")
12042 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12043 UNSPEC_PCMPESTR))
12044 (set (reg:CC FLAGS_REG)
12045 (unspec:CC
12046 [(match_dup 1)
12047 (match_dup 2)
12048 (match_dup 3)
12049 (match_dup 4)
12050 (match_dup 5)]
12051 UNSPEC_PCMPESTR))]
12052 "TARGET_SSE4_2"
12053 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
12054 [(set_attr "type" "sselog")
12055 (set_attr "prefix_data16" "1")
12056 (set_attr "prefix_extra" "1")
12057 (set_attr "prefix" "maybe_vex")
12058 (set_attr "ssememalign" "8")
12059 (set_attr "length_immediate" "1")
12060 (set_attr "btver2_decode" "vector")
12061 (set_attr "memory" "none,load")
12062 (set_attr "mode" "TI")])
12063
12064 (define_insn "sse4_2_pcmpestrm"
12065 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12066 (unspec:V16QI
12067 [(match_operand:V16QI 1 "register_operand" "x,x")
12068 (match_operand:SI 2 "register_operand" "a,a")
12069 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12070 (match_operand:SI 4 "register_operand" "d,d")
12071 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12072 UNSPEC_PCMPESTR))
12073 (set (reg:CC FLAGS_REG)
12074 (unspec:CC
12075 [(match_dup 1)
12076 (match_dup 2)
12077 (match_dup 3)
12078 (match_dup 4)
12079 (match_dup 5)]
12080 UNSPEC_PCMPESTR))]
12081 "TARGET_SSE4_2"
12082 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
12083 [(set_attr "type" "sselog")
12084 (set_attr "prefix_data16" "1")
12085 (set_attr "prefix_extra" "1")
12086 (set_attr "ssememalign" "8")
12087 (set_attr "length_immediate" "1")
12088 (set_attr "prefix" "maybe_vex")
12089 (set_attr "btver2_decode" "vector")
12090 (set_attr "memory" "none,load")
12091 (set_attr "mode" "TI")])
12092
12093 (define_insn "sse4_2_pcmpestr_cconly"
12094 [(set (reg:CC FLAGS_REG)
12095 (unspec:CC
12096 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12097 (match_operand:SI 3 "register_operand" "a,a,a,a")
12098 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
12099 (match_operand:SI 5 "register_operand" "d,d,d,d")
12100 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
12101 UNSPEC_PCMPESTR))
12102 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12103 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12104 "TARGET_SSE4_2"
12105 "@
12106 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12107 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12108 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
12109 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
12110 [(set_attr "type" "sselog")
12111 (set_attr "prefix_data16" "1")
12112 (set_attr "prefix_extra" "1")
12113 (set_attr "ssememalign" "8")
12114 (set_attr "length_immediate" "1")
12115 (set_attr "memory" "none,load,none,load")
12116 (set_attr "btver2_decode" "vector,vector,vector,vector")
12117 (set_attr "prefix" "maybe_vex")
12118 (set_attr "mode" "TI")])
12119
12120 (define_insn_and_split "sse4_2_pcmpistr"
12121 [(set (match_operand:SI 0 "register_operand" "=c,c")
12122 (unspec:SI
12123 [(match_operand:V16QI 2 "register_operand" "x,x")
12124 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12125 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
12126 UNSPEC_PCMPISTR))
12127 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12128 (unspec:V16QI
12129 [(match_dup 2)
12130 (match_dup 3)
12131 (match_dup 4)]
12132 UNSPEC_PCMPISTR))
12133 (set (reg:CC FLAGS_REG)
12134 (unspec:CC
12135 [(match_dup 2)
12136 (match_dup 3)
12137 (match_dup 4)]
12138 UNSPEC_PCMPISTR))]
12139 "TARGET_SSE4_2
12140 && can_create_pseudo_p ()"
12141 "#"
12142 "&& 1"
12143 [(const_int 0)]
12144 {
12145 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12146 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12147 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12148
12149 if (ecx)
12150 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12151 operands[3], operands[4]));
12152 if (xmm0)
12153 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12154 operands[3], operands[4]));
12155 if (flags && !(ecx || xmm0))
12156 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12157 operands[2], operands[3],
12158 operands[4]));
12159 if (!(flags || ecx || xmm0))
12160 emit_note (NOTE_INSN_DELETED);
12161
12162 DONE;
12163 }
12164 [(set_attr "type" "sselog")
12165 (set_attr "prefix_data16" "1")
12166 (set_attr "prefix_extra" "1")
12167 (set_attr "ssememalign" "8")
12168 (set_attr "length_immediate" "1")
12169 (set_attr "memory" "none,load")
12170 (set_attr "mode" "TI")])
12171
12172 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
12173 [(set (match_operand:SI 0 "register_operand" "=c")
12174 (unspec:SI
12175 [(match_operand:V16QI 2 "register_operand" "x")
12176 (unspec:V16QI
12177 [(match_operand:V16QI 3 "memory_operand" "m")]
12178 UNSPEC_LOADU)
12179 (match_operand:SI 4 "const_0_to_255_operand" "n")]
12180 UNSPEC_PCMPISTR))
12181 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12182 (unspec:V16QI
12183 [(match_dup 2)
12184 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12185 (match_dup 4)]
12186 UNSPEC_PCMPISTR))
12187 (set (reg:CC FLAGS_REG)
12188 (unspec:CC
12189 [(match_dup 2)
12190 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12191 (match_dup 4)]
12192 UNSPEC_PCMPISTR))]
12193 "TARGET_SSE4_2
12194 && can_create_pseudo_p ()"
12195 "#"
12196 "&& 1"
12197 [(const_int 0)]
12198 {
12199 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12200 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12201 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12202
12203 if (ecx)
12204 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12205 operands[3], operands[4]));
12206 if (xmm0)
12207 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12208 operands[3], operands[4]));
12209 if (flags && !(ecx || xmm0))
12210 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12211 operands[2], operands[3],
12212 operands[4]));
12213 if (!(flags || ecx || xmm0))
12214 emit_note (NOTE_INSN_DELETED);
12215
12216 DONE;
12217 }
12218 [(set_attr "type" "sselog")
12219 (set_attr "prefix_data16" "1")
12220 (set_attr "prefix_extra" "1")
12221 (set_attr "ssememalign" "8")
12222 (set_attr "length_immediate" "1")
12223 (set_attr "memory" "load")
12224 (set_attr "mode" "TI")])
12225
12226 (define_insn "sse4_2_pcmpistri"
12227 [(set (match_operand:SI 0 "register_operand" "=c,c")
12228 (unspec:SI
12229 [(match_operand:V16QI 1 "register_operand" "x,x")
12230 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12231 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12232 UNSPEC_PCMPISTR))
12233 (set (reg:CC FLAGS_REG)
12234 (unspec:CC
12235 [(match_dup 1)
12236 (match_dup 2)
12237 (match_dup 3)]
12238 UNSPEC_PCMPISTR))]
12239 "TARGET_SSE4_2"
12240 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
12241 [(set_attr "type" "sselog")
12242 (set_attr "prefix_data16" "1")
12243 (set_attr "prefix_extra" "1")
12244 (set_attr "ssememalign" "8")
12245 (set_attr "length_immediate" "1")
12246 (set_attr "prefix" "maybe_vex")
12247 (set_attr "memory" "none,load")
12248 (set_attr "btver2_decode" "vector")
12249 (set_attr "mode" "TI")])
12250
12251 (define_insn "sse4_2_pcmpistrm"
12252 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12253 (unspec:V16QI
12254 [(match_operand:V16QI 1 "register_operand" "x,x")
12255 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12256 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12257 UNSPEC_PCMPISTR))
12258 (set (reg:CC FLAGS_REG)
12259 (unspec:CC
12260 [(match_dup 1)
12261 (match_dup 2)
12262 (match_dup 3)]
12263 UNSPEC_PCMPISTR))]
12264 "TARGET_SSE4_2"
12265 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
12266 [(set_attr "type" "sselog")
12267 (set_attr "prefix_data16" "1")
12268 (set_attr "prefix_extra" "1")
12269 (set_attr "ssememalign" "8")
12270 (set_attr "length_immediate" "1")
12271 (set_attr "prefix" "maybe_vex")
12272 (set_attr "memory" "none,load")
12273 (set_attr "btver2_decode" "vector")
12274 (set_attr "mode" "TI")])
12275
12276 (define_insn "sse4_2_pcmpistr_cconly"
12277 [(set (reg:CC FLAGS_REG)
12278 (unspec:CC
12279 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12280 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
12281 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
12282 UNSPEC_PCMPISTR))
12283 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12284 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12285 "TARGET_SSE4_2"
12286 "@
12287 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12288 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12289 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
12290 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
12291 [(set_attr "type" "sselog")
12292 (set_attr "prefix_data16" "1")
12293 (set_attr "prefix_extra" "1")
12294 (set_attr "ssememalign" "8")
12295 (set_attr "length_immediate" "1")
12296 (set_attr "memory" "none,load,none,load")
12297 (set_attr "prefix" "maybe_vex")
12298 (set_attr "btver2_decode" "vector,vector,vector,vector")
12299 (set_attr "mode" "TI")])
12300
12301 (define_expand "avx512pf_gatherpf<mode>"
12302 [(unspec
12303 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12304 (mem:<ssescalarmode>
12305 (match_par_dup 5
12306 [(match_operand 2 "vsib_address_operand")
12307 (match_operand:VI48_512 1 "register_operand")
12308 (match_operand:SI 3 "const1248_operand")]))
12309 (match_operand:SI 4 "const_0_to_1_operand")]
12310 UNSPEC_GATHER_PREFETCH)]
12311 "TARGET_AVX512PF"
12312 {
12313 operands[5]
12314 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12315 operands[3]), UNSPEC_VSIBADDR);
12316 })
12317
12318 (define_insn "*avx512pf_gatherpf<mode>_mask"
12319 [(unspec
12320 [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
12321 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
12322 [(unspec:P
12323 [(match_operand:P 2 "vsib_address_operand" "Tv")
12324 (match_operand:VI48_512 1 "register_operand" "v")
12325 (match_operand:SI 3 "const1248_operand" "n")]
12326 UNSPEC_VSIBADDR)])
12327 (match_operand:SI 4 "const_0_to_1_operand" "n")]
12328 UNSPEC_GATHER_PREFETCH)]
12329 "TARGET_AVX512PF"
12330 {
12331 switch (INTVAL (operands[4]))
12332 {
12333 case 0:
12334 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12335 case 1:
12336 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12337 default:
12338 gcc_unreachable ();
12339 }
12340 }
12341 [(set_attr "type" "sse")
12342 (set_attr "prefix" "evex")
12343 (set_attr "mode" "XI")])
12344
12345 (define_insn "*avx512pf_gatherpf<mode>"
12346 [(unspec
12347 [(const_int -1)
12348 (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
12349 [(unspec:P
12350 [(match_operand:P 1 "vsib_address_operand" "Tv")
12351 (match_operand:VI48_512 0 "register_operand" "v")
12352 (match_operand:SI 2 "const1248_operand" "n")]
12353 UNSPEC_VSIBADDR)])
12354 (match_operand:SI 3 "const_0_to_1_operand" "n")]
12355 UNSPEC_GATHER_PREFETCH)]
12356 "TARGET_AVX512PF"
12357 {
12358 switch (INTVAL (operands[3]))
12359 {
12360 case 0:
12361 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
12362 case 1:
12363 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
12364 default:
12365 gcc_unreachable ();
12366 }
12367 }
12368 [(set_attr "type" "sse")
12369 (set_attr "prefix" "evex")
12370 (set_attr "mode" "XI")])
12371
12372 (define_expand "avx512pf_scatterpf<mode>"
12373 [(unspec
12374 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12375 (mem:<ssescalarmode>
12376 (match_par_dup 5
12377 [(match_operand 2 "vsib_address_operand")
12378 (match_operand:VI48_512 1 "register_operand")
12379 (match_operand:SI 3 "const1248_operand")]))
12380 (match_operand:SI 4 "const_0_to_1_operand")]
12381 UNSPEC_SCATTER_PREFETCH)]
12382 "TARGET_AVX512PF"
12383 {
12384 operands[5]
12385 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12386 operands[3]), UNSPEC_VSIBADDR);
12387 })
12388
12389 (define_insn "*avx512pf_scatterpf<mode>_mask"
12390 [(unspec
12391 [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
12392 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
12393 [(unspec:P
12394 [(match_operand:P 2 "vsib_address_operand" "Tv")
12395 (match_operand:VI48_512 1 "register_operand" "v")
12396 (match_operand:SI 3 "const1248_operand" "n")]
12397 UNSPEC_VSIBADDR)])
12398 (match_operand:SI 4 "const_0_to_1_operand" "n")]
12399 UNSPEC_SCATTER_PREFETCH)]
12400 "TARGET_AVX512PF"
12401 {
12402 switch (INTVAL (operands[4]))
12403 {
12404 case 0:
12405 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12406 case 1:
12407 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12408 default:
12409 gcc_unreachable ();
12410 }
12411 }
12412 [(set_attr "type" "sse")
12413 (set_attr "prefix" "evex")
12414 (set_attr "mode" "XI")])
12415
12416 (define_insn "*avx512pf_scatterpf<mode>"
12417 [(unspec
12418 [(const_int -1)
12419 (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
12420 [(unspec:P
12421 [(match_operand:P 1 "vsib_address_operand" "Tv")
12422 (match_operand:VI48_512 0 "register_operand" "v")
12423 (match_operand:SI 2 "const1248_operand" "n")]
12424 UNSPEC_VSIBADDR)])
12425 (match_operand:SI 3 "const_0_to_1_operand" "n")]
12426 UNSPEC_SCATTER_PREFETCH)]
12427 "TARGET_AVX512PF"
12428 {
12429 switch (INTVAL (operands[3]))
12430 {
12431 case 0:
12432 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
12433 case 1:
12434 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
12435 default:
12436 gcc_unreachable ();
12437 }
12438 }
12439 [(set_attr "type" "sse")
12440 (set_attr "prefix" "evex")
12441 (set_attr "mode" "XI")])
12442
12443 (define_insn "avx512er_exp2<mode><mask_name>"
12444 [(set (match_operand:VF_512 0 "register_operand" "=v")
12445 (unspec:VF_512
12446 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
12447 UNSPEC_EXP2))]
12448 "TARGET_AVX512ER"
12449 "vexp2<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12450 [(set_attr "prefix" "evex")
12451 (set_attr "mode" "<MODE>")])
12452
12453 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name>"
12454 [(set (match_operand:VF_512 0 "register_operand" "=v")
12455 (unspec:VF_512
12456 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
12457 UNSPEC_RCP28))]
12458 "TARGET_AVX512ER"
12459 "vrcp28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12460 [(set_attr "prefix" "evex")
12461 (set_attr "mode" "<MODE>")])
12462
12463 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name>"
12464 [(set (match_operand:VF_512 0 "register_operand" "=v")
12465 (unspec:VF_512
12466 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
12467 UNSPEC_RSQRT28))]
12468 "TARGET_AVX512ER"
12469 "vrsqrt28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12470 [(set_attr "prefix" "evex")
12471 (set_attr "mode" "<MODE>")])
12472
12473 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12474 ;;
12475 ;; XOP instructions
12476 ;;
12477 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12478
12479 (define_code_iterator xop_plus [plus ss_plus])
12480
12481 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
12482 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
12483
12484 ;; XOP parallel integer multiply/add instructions.
12485
12486 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
12487 [(set (match_operand:VI24_128 0 "register_operand" "=x")
12488 (xop_plus:VI24_128
12489 (mult:VI24_128
12490 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
12491 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
12492 (match_operand:VI24_128 3 "register_operand" "x")))]
12493 "TARGET_XOP"
12494 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12495 [(set_attr "type" "ssemuladd")
12496 (set_attr "mode" "TI")])
12497
12498 (define_insn "xop_p<macs>dql"
12499 [(set (match_operand:V2DI 0 "register_operand" "=x")
12500 (xop_plus:V2DI
12501 (mult:V2DI
12502 (sign_extend:V2DI
12503 (vec_select:V2SI
12504 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
12505 (parallel [(const_int 0) (const_int 2)])))
12506 (sign_extend:V2DI
12507 (vec_select:V2SI
12508 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12509 (parallel [(const_int 0) (const_int 2)]))))
12510 (match_operand:V2DI 3 "register_operand" "x")))]
12511 "TARGET_XOP"
12512 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12513 [(set_attr "type" "ssemuladd")
12514 (set_attr "mode" "TI")])
12515
12516 (define_insn "xop_p<macs>dqh"
12517 [(set (match_operand:V2DI 0 "register_operand" "=x")
12518 (xop_plus:V2DI
12519 (mult:V2DI
12520 (sign_extend:V2DI
12521 (vec_select:V2SI
12522 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
12523 (parallel [(const_int 1) (const_int 3)])))
12524 (sign_extend:V2DI
12525 (vec_select:V2SI
12526 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12527 (parallel [(const_int 1) (const_int 3)]))))
12528 (match_operand:V2DI 3 "register_operand" "x")))]
12529 "TARGET_XOP"
12530 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12531 [(set_attr "type" "ssemuladd")
12532 (set_attr "mode" "TI")])
12533
12534 ;; XOP parallel integer multiply/add instructions for the intrinisics
12535 (define_insn "xop_p<macs>wd"
12536 [(set (match_operand:V4SI 0 "register_operand" "=x")
12537 (xop_plus:V4SI
12538 (mult:V4SI
12539 (sign_extend:V4SI
12540 (vec_select:V4HI
12541 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
12542 (parallel [(const_int 1) (const_int 3)
12543 (const_int 5) (const_int 7)])))
12544 (sign_extend:V4SI
12545 (vec_select:V4HI
12546 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12547 (parallel [(const_int 1) (const_int 3)
12548 (const_int 5) (const_int 7)]))))
12549 (match_operand:V4SI 3 "register_operand" "x")))]
12550 "TARGET_XOP"
12551 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12552 [(set_attr "type" "ssemuladd")
12553 (set_attr "mode" "TI")])
12554
12555 (define_insn "xop_p<madcs>wd"
12556 [(set (match_operand:V4SI 0 "register_operand" "=x")
12557 (xop_plus:V4SI
12558 (plus:V4SI
12559 (mult:V4SI
12560 (sign_extend:V4SI
12561 (vec_select:V4HI
12562 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
12563 (parallel [(const_int 0) (const_int 2)
12564 (const_int 4) (const_int 6)])))
12565 (sign_extend:V4SI
12566 (vec_select:V4HI
12567 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12568 (parallel [(const_int 0) (const_int 2)
12569 (const_int 4) (const_int 6)]))))
12570 (mult:V4SI
12571 (sign_extend:V4SI
12572 (vec_select:V4HI
12573 (match_dup 1)
12574 (parallel [(const_int 1) (const_int 3)
12575 (const_int 5) (const_int 7)])))
12576 (sign_extend:V4SI
12577 (vec_select:V4HI
12578 (match_dup 2)
12579 (parallel [(const_int 1) (const_int 3)
12580 (const_int 5) (const_int 7)])))))
12581 (match_operand:V4SI 3 "register_operand" "x")))]
12582 "TARGET_XOP"
12583 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12584 [(set_attr "type" "ssemuladd")
12585 (set_attr "mode" "TI")])
12586
12587 ;; XOP parallel XMM conditional moves
12588 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
12589 [(set (match_operand:V 0 "register_operand" "=x,x")
12590 (if_then_else:V
12591 (match_operand:V 3 "nonimmediate_operand" "x,m")
12592 (match_operand:V 1 "register_operand" "x,x")
12593 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
12594 "TARGET_XOP"
12595 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12596 [(set_attr "type" "sse4arg")])
12597
12598 ;; XOP horizontal add/subtract instructions
12599 (define_insn "xop_phadd<u>bw"
12600 [(set (match_operand:V8HI 0 "register_operand" "=x")
12601 (plus:V8HI
12602 (any_extend:V8HI
12603 (vec_select:V8QI
12604 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12605 (parallel [(const_int 0) (const_int 2)
12606 (const_int 4) (const_int 6)
12607 (const_int 8) (const_int 10)
12608 (const_int 12) (const_int 14)])))
12609 (any_extend:V8HI
12610 (vec_select:V8QI
12611 (match_dup 1)
12612 (parallel [(const_int 1) (const_int 3)
12613 (const_int 5) (const_int 7)
12614 (const_int 9) (const_int 11)
12615 (const_int 13) (const_int 15)])))))]
12616 "TARGET_XOP"
12617 "vphadd<u>bw\t{%1, %0|%0, %1}"
12618 [(set_attr "type" "sseiadd1")])
12619
12620 (define_insn "xop_phadd<u>bd"
12621 [(set (match_operand:V4SI 0 "register_operand" "=x")
12622 (plus:V4SI
12623 (plus:V4SI
12624 (any_extend:V4SI
12625 (vec_select:V4QI
12626 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12627 (parallel [(const_int 0) (const_int 4)
12628 (const_int 8) (const_int 12)])))
12629 (any_extend:V4SI
12630 (vec_select:V4QI
12631 (match_dup 1)
12632 (parallel [(const_int 1) (const_int 5)
12633 (const_int 9) (const_int 13)]))))
12634 (plus:V4SI
12635 (any_extend:V4SI
12636 (vec_select:V4QI
12637 (match_dup 1)
12638 (parallel [(const_int 2) (const_int 6)
12639 (const_int 10) (const_int 14)])))
12640 (any_extend:V4SI
12641 (vec_select:V4QI
12642 (match_dup 1)
12643 (parallel [(const_int 3) (const_int 7)
12644 (const_int 11) (const_int 15)]))))))]
12645 "TARGET_XOP"
12646 "vphadd<u>bd\t{%1, %0|%0, %1}"
12647 [(set_attr "type" "sseiadd1")])
12648
12649 (define_insn "xop_phadd<u>bq"
12650 [(set (match_operand:V2DI 0 "register_operand" "=x")
12651 (plus:V2DI
12652 (plus:V2DI
12653 (plus:V2DI
12654 (any_extend:V2DI
12655 (vec_select:V2QI
12656 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12657 (parallel [(const_int 0) (const_int 8)])))
12658 (any_extend:V2DI
12659 (vec_select:V2QI
12660 (match_dup 1)
12661 (parallel [(const_int 1) (const_int 9)]))))
12662 (plus:V2DI
12663 (any_extend:V2DI
12664 (vec_select:V2QI
12665 (match_dup 1)
12666 (parallel [(const_int 2) (const_int 10)])))
12667 (any_extend:V2DI
12668 (vec_select:V2QI
12669 (match_dup 1)
12670 (parallel [(const_int 3) (const_int 11)])))))
12671 (plus:V2DI
12672 (plus:V2DI
12673 (any_extend:V2DI
12674 (vec_select:V2QI
12675 (match_dup 1)
12676 (parallel [(const_int 4) (const_int 12)])))
12677 (any_extend:V2DI
12678 (vec_select:V2QI
12679 (match_dup 1)
12680 (parallel [(const_int 5) (const_int 13)]))))
12681 (plus:V2DI
12682 (any_extend:V2DI
12683 (vec_select:V2QI
12684 (match_dup 1)
12685 (parallel [(const_int 6) (const_int 14)])))
12686 (any_extend:V2DI
12687 (vec_select:V2QI
12688 (match_dup 1)
12689 (parallel [(const_int 7) (const_int 15)])))))))]
12690 "TARGET_XOP"
12691 "vphadd<u>bq\t{%1, %0|%0, %1}"
12692 [(set_attr "type" "sseiadd1")])
12693
12694 (define_insn "xop_phadd<u>wd"
12695 [(set (match_operand:V4SI 0 "register_operand" "=x")
12696 (plus:V4SI
12697 (any_extend:V4SI
12698 (vec_select:V4HI
12699 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12700 (parallel [(const_int 0) (const_int 2)
12701 (const_int 4) (const_int 6)])))
12702 (any_extend:V4SI
12703 (vec_select:V4HI
12704 (match_dup 1)
12705 (parallel [(const_int 1) (const_int 3)
12706 (const_int 5) (const_int 7)])))))]
12707 "TARGET_XOP"
12708 "vphadd<u>wd\t{%1, %0|%0, %1}"
12709 [(set_attr "type" "sseiadd1")])
12710
12711 (define_insn "xop_phadd<u>wq"
12712 [(set (match_operand:V2DI 0 "register_operand" "=x")
12713 (plus:V2DI
12714 (plus:V2DI
12715 (any_extend:V2DI
12716 (vec_select:V2HI
12717 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12718 (parallel [(const_int 0) (const_int 4)])))
12719 (any_extend:V2DI
12720 (vec_select:V2HI
12721 (match_dup 1)
12722 (parallel [(const_int 1) (const_int 5)]))))
12723 (plus:V2DI
12724 (any_extend:V2DI
12725 (vec_select:V2HI
12726 (match_dup 1)
12727 (parallel [(const_int 2) (const_int 6)])))
12728 (any_extend:V2DI
12729 (vec_select:V2HI
12730 (match_dup 1)
12731 (parallel [(const_int 3) (const_int 7)]))))))]
12732 "TARGET_XOP"
12733 "vphadd<u>wq\t{%1, %0|%0, %1}"
12734 [(set_attr "type" "sseiadd1")])
12735
12736 (define_insn "xop_phadd<u>dq"
12737 [(set (match_operand:V2DI 0 "register_operand" "=x")
12738 (plus:V2DI
12739 (any_extend:V2DI
12740 (vec_select:V2SI
12741 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
12742 (parallel [(const_int 0) (const_int 2)])))
12743 (any_extend:V2DI
12744 (vec_select:V2SI
12745 (match_dup 1)
12746 (parallel [(const_int 1) (const_int 3)])))))]
12747 "TARGET_XOP"
12748 "vphadd<u>dq\t{%1, %0|%0, %1}"
12749 [(set_attr "type" "sseiadd1")])
12750
12751 (define_insn "xop_phsubbw"
12752 [(set (match_operand:V8HI 0 "register_operand" "=x")
12753 (minus:V8HI
12754 (sign_extend:V8HI
12755 (vec_select:V8QI
12756 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12757 (parallel [(const_int 0) (const_int 2)
12758 (const_int 4) (const_int 6)
12759 (const_int 8) (const_int 10)
12760 (const_int 12) (const_int 14)])))
12761 (sign_extend:V8HI
12762 (vec_select:V8QI
12763 (match_dup 1)
12764 (parallel [(const_int 1) (const_int 3)
12765 (const_int 5) (const_int 7)
12766 (const_int 9) (const_int 11)
12767 (const_int 13) (const_int 15)])))))]
12768 "TARGET_XOP"
12769 "vphsubbw\t{%1, %0|%0, %1}"
12770 [(set_attr "type" "sseiadd1")])
12771
12772 (define_insn "xop_phsubwd"
12773 [(set (match_operand:V4SI 0 "register_operand" "=x")
12774 (minus:V4SI
12775 (sign_extend:V4SI
12776 (vec_select:V4HI
12777 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12778 (parallel [(const_int 0) (const_int 2)
12779 (const_int 4) (const_int 6)])))
12780 (sign_extend:V4SI
12781 (vec_select:V4HI
12782 (match_dup 1)
12783 (parallel [(const_int 1) (const_int 3)
12784 (const_int 5) (const_int 7)])))))]
12785 "TARGET_XOP"
12786 "vphsubwd\t{%1, %0|%0, %1}"
12787 [(set_attr "type" "sseiadd1")])
12788
12789 (define_insn "xop_phsubdq"
12790 [(set (match_operand:V2DI 0 "register_operand" "=x")
12791 (minus:V2DI
12792 (sign_extend:V2DI
12793 (vec_select:V2SI
12794 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
12795 (parallel [(const_int 0) (const_int 2)])))
12796 (sign_extend:V2DI
12797 (vec_select:V2SI
12798 (match_dup 1)
12799 (parallel [(const_int 1) (const_int 3)])))))]
12800 "TARGET_XOP"
12801 "vphsubdq\t{%1, %0|%0, %1}"
12802 [(set_attr "type" "sseiadd1")])
12803
12804 ;; XOP permute instructions
12805 (define_insn "xop_pperm"
12806 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
12807 (unspec:V16QI
12808 [(match_operand:V16QI 1 "register_operand" "x,x")
12809 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12810 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
12811 UNSPEC_XOP_PERMUTE))]
12812 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12813 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12814 [(set_attr "type" "sse4arg")
12815 (set_attr "mode" "TI")])
12816
12817 ;; XOP pack instructions that combine two vectors into a smaller vector
12818 (define_insn "xop_pperm_pack_v2di_v4si"
12819 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
12820 (vec_concat:V4SI
12821 (truncate:V2SI
12822 (match_operand:V2DI 1 "register_operand" "x,x"))
12823 (truncate:V2SI
12824 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
12825 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
12826 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12827 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12828 [(set_attr "type" "sse4arg")
12829 (set_attr "mode" "TI")])
12830
12831 (define_insn "xop_pperm_pack_v4si_v8hi"
12832 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
12833 (vec_concat:V8HI
12834 (truncate:V4HI
12835 (match_operand:V4SI 1 "register_operand" "x,x"))
12836 (truncate:V4HI
12837 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
12838 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
12839 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12840 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12841 [(set_attr "type" "sse4arg")
12842 (set_attr "mode" "TI")])
12843
12844 (define_insn "xop_pperm_pack_v8hi_v16qi"
12845 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
12846 (vec_concat:V16QI
12847 (truncate:V8QI
12848 (match_operand:V8HI 1 "register_operand" "x,x"))
12849 (truncate:V8QI
12850 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
12851 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
12852 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12853 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12854 [(set_attr "type" "sse4arg")
12855 (set_attr "mode" "TI")])
12856
12857 ;; XOP packed rotate instructions
12858 (define_expand "rotl<mode>3"
12859 [(set (match_operand:VI_128 0 "register_operand")
12860 (rotate:VI_128
12861 (match_operand:VI_128 1 "nonimmediate_operand")
12862 (match_operand:SI 2 "general_operand")))]
12863 "TARGET_XOP"
12864 {
12865 /* If we were given a scalar, convert it to parallel */
12866 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
12867 {
12868 rtvec vs = rtvec_alloc (<ssescalarnum>);
12869 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
12870 rtx reg = gen_reg_rtx (<MODE>mode);
12871 rtx op2 = operands[2];
12872 int i;
12873
12874 if (GET_MODE (op2) != <ssescalarmode>mode)
12875 {
12876 op2 = gen_reg_rtx (<ssescalarmode>mode);
12877 convert_move (op2, operands[2], false);
12878 }
12879
12880 for (i = 0; i < <ssescalarnum>; i++)
12881 RTVEC_ELT (vs, i) = op2;
12882
12883 emit_insn (gen_vec_init<mode> (reg, par));
12884 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
12885 DONE;
12886 }
12887 })
12888
12889 (define_expand "rotr<mode>3"
12890 [(set (match_operand:VI_128 0 "register_operand")
12891 (rotatert:VI_128
12892 (match_operand:VI_128 1 "nonimmediate_operand")
12893 (match_operand:SI 2 "general_operand")))]
12894 "TARGET_XOP"
12895 {
12896 /* If we were given a scalar, convert it to parallel */
12897 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
12898 {
12899 rtvec vs = rtvec_alloc (<ssescalarnum>);
12900 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
12901 rtx neg = gen_reg_rtx (<MODE>mode);
12902 rtx reg = gen_reg_rtx (<MODE>mode);
12903 rtx op2 = operands[2];
12904 int i;
12905
12906 if (GET_MODE (op2) != <ssescalarmode>mode)
12907 {
12908 op2 = gen_reg_rtx (<ssescalarmode>mode);
12909 convert_move (op2, operands[2], false);
12910 }
12911
12912 for (i = 0; i < <ssescalarnum>; i++)
12913 RTVEC_ELT (vs, i) = op2;
12914
12915 emit_insn (gen_vec_init<mode> (reg, par));
12916 emit_insn (gen_neg<mode>2 (neg, reg));
12917 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
12918 DONE;
12919 }
12920 })
12921
12922 (define_insn "xop_rotl<mode>3"
12923 [(set (match_operand:VI_128 0 "register_operand" "=x")
12924 (rotate:VI_128
12925 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
12926 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
12927 "TARGET_XOP"
12928 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12929 [(set_attr "type" "sseishft")
12930 (set_attr "length_immediate" "1")
12931 (set_attr "mode" "TI")])
12932
12933 (define_insn "xop_rotr<mode>3"
12934 [(set (match_operand:VI_128 0 "register_operand" "=x")
12935 (rotatert:VI_128
12936 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
12937 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
12938 "TARGET_XOP"
12939 {
12940 operands[3]
12941 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
12942 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
12943 }
12944 [(set_attr "type" "sseishft")
12945 (set_attr "length_immediate" "1")
12946 (set_attr "mode" "TI")])
12947
12948 (define_expand "vrotr<mode>3"
12949 [(match_operand:VI_128 0 "register_operand")
12950 (match_operand:VI_128 1 "register_operand")
12951 (match_operand:VI_128 2 "register_operand")]
12952 "TARGET_XOP"
12953 {
12954 rtx reg = gen_reg_rtx (<MODE>mode);
12955 emit_insn (gen_neg<mode>2 (reg, operands[2]));
12956 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
12957 DONE;
12958 })
12959
12960 (define_expand "vrotl<mode>3"
12961 [(match_operand:VI_128 0 "register_operand")
12962 (match_operand:VI_128 1 "register_operand")
12963 (match_operand:VI_128 2 "register_operand")]
12964 "TARGET_XOP"
12965 {
12966 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
12967 DONE;
12968 })
12969
12970 (define_insn "xop_vrotl<mode>3"
12971 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
12972 (if_then_else:VI_128
12973 (ge:VI_128
12974 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
12975 (const_int 0))
12976 (rotate:VI_128
12977 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
12978 (match_dup 2))
12979 (rotatert:VI_128
12980 (match_dup 1)
12981 (neg:VI_128 (match_dup 2)))))]
12982 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12983 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12984 [(set_attr "type" "sseishft")
12985 (set_attr "prefix_data16" "0")
12986 (set_attr "prefix_extra" "2")
12987 (set_attr "mode" "TI")])
12988
12989 ;; XOP packed shift instructions.
12990 (define_expand "vlshr<mode>3"
12991 [(set (match_operand:VI12_128 0 "register_operand")
12992 (lshiftrt:VI12_128
12993 (match_operand:VI12_128 1 "register_operand")
12994 (match_operand:VI12_128 2 "nonimmediate_operand")))]
12995 "TARGET_XOP"
12996 {
12997 rtx neg = gen_reg_rtx (<MODE>mode);
12998 emit_insn (gen_neg<mode>2 (neg, operands[2]));
12999 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13000 DONE;
13001 })
13002
13003 (define_expand "vlshr<mode>3"
13004 [(set (match_operand:VI48_128 0 "register_operand")
13005 (lshiftrt:VI48_128
13006 (match_operand:VI48_128 1 "register_operand")
13007 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13008 "TARGET_AVX2 || TARGET_XOP"
13009 {
13010 if (!TARGET_AVX2)
13011 {
13012 rtx neg = gen_reg_rtx (<MODE>mode);
13013 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13014 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13015 DONE;
13016 }
13017 })
13018
13019 (define_expand "vlshr<mode>3"
13020 [(set (match_operand:VI48_512 0 "register_operand")
13021 (lshiftrt:VI48_512
13022 (match_operand:VI48_512 1 "register_operand")
13023 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13024 "TARGET_AVX512F")
13025
13026 (define_expand "vlshr<mode>3"
13027 [(set (match_operand:VI48_256 0 "register_operand")
13028 (lshiftrt:VI48_256
13029 (match_operand:VI48_256 1 "register_operand")
13030 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13031 "TARGET_AVX2")
13032
13033 (define_expand "vashr<mode>3"
13034 [(set (match_operand:VI128_128 0 "register_operand")
13035 (ashiftrt:VI128_128
13036 (match_operand:VI128_128 1 "register_operand")
13037 (match_operand:VI128_128 2 "nonimmediate_operand")))]
13038 "TARGET_XOP"
13039 {
13040 rtx neg = gen_reg_rtx (<MODE>mode);
13041 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13042 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
13043 DONE;
13044 })
13045
13046 (define_expand "vashrv4si3"
13047 [(set (match_operand:V4SI 0 "register_operand")
13048 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
13049 (match_operand:V4SI 2 "nonimmediate_operand")))]
13050 "TARGET_AVX2 || TARGET_XOP"
13051 {
13052 if (!TARGET_AVX2)
13053 {
13054 rtx neg = gen_reg_rtx (V4SImode);
13055 emit_insn (gen_negv4si2 (neg, operands[2]));
13056 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
13057 DONE;
13058 }
13059 })
13060
13061 (define_expand "vashrv16si3"
13062 [(set (match_operand:V16SI 0 "register_operand")
13063 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
13064 (match_operand:V16SI 2 "nonimmediate_operand")))]
13065 "TARGET_AVX512F")
13066
13067 (define_expand "vashrv8si3"
13068 [(set (match_operand:V8SI 0 "register_operand")
13069 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
13070 (match_operand:V8SI 2 "nonimmediate_operand")))]
13071 "TARGET_AVX2")
13072
13073 (define_expand "vashl<mode>3"
13074 [(set (match_operand:VI12_128 0 "register_operand")
13075 (ashift:VI12_128
13076 (match_operand:VI12_128 1 "register_operand")
13077 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13078 "TARGET_XOP"
13079 {
13080 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13081 DONE;
13082 })
13083
13084 (define_expand "vashl<mode>3"
13085 [(set (match_operand:VI48_128 0 "register_operand")
13086 (ashift:VI48_128
13087 (match_operand:VI48_128 1 "register_operand")
13088 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13089 "TARGET_AVX2 || TARGET_XOP"
13090 {
13091 if (!TARGET_AVX2)
13092 {
13093 operands[2] = force_reg (<MODE>mode, operands[2]);
13094 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13095 DONE;
13096 }
13097 })
13098
13099 (define_expand "vashl<mode>3"
13100 [(set (match_operand:VI48_512 0 "register_operand")
13101 (ashift:VI48_512
13102 (match_operand:VI48_512 1 "register_operand")
13103 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13104 "TARGET_AVX512F")
13105
13106 (define_expand "vashl<mode>3"
13107 [(set (match_operand:VI48_256 0 "register_operand")
13108 (ashift:VI48_256
13109 (match_operand:VI48_256 1 "register_operand")
13110 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13111 "TARGET_AVX2")
13112
13113 (define_insn "xop_sha<mode>3"
13114 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13115 (if_then_else:VI_128
13116 (ge:VI_128
13117 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13118 (const_int 0))
13119 (ashift:VI_128
13120 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13121 (match_dup 2))
13122 (ashiftrt:VI_128
13123 (match_dup 1)
13124 (neg:VI_128 (match_dup 2)))))]
13125 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13126 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13127 [(set_attr "type" "sseishft")
13128 (set_attr "prefix_data16" "0")
13129 (set_attr "prefix_extra" "2")
13130 (set_attr "mode" "TI")])
13131
13132 (define_insn "xop_shl<mode>3"
13133 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13134 (if_then_else:VI_128
13135 (ge:VI_128
13136 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13137 (const_int 0))
13138 (ashift:VI_128
13139 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13140 (match_dup 2))
13141 (lshiftrt:VI_128
13142 (match_dup 1)
13143 (neg:VI_128 (match_dup 2)))))]
13144 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13145 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13146 [(set_attr "type" "sseishft")
13147 (set_attr "prefix_data16" "0")
13148 (set_attr "prefix_extra" "2")
13149 (set_attr "mode" "TI")])
13150
13151 (define_expand "<shift_insn><mode>3"
13152 [(set (match_operand:VI1_AVX2 0 "register_operand")
13153 (any_shift:VI1_AVX2
13154 (match_operand:VI1_AVX2 1 "register_operand")
13155 (match_operand:SI 2 "nonmemory_operand")))]
13156 "TARGET_SSE2"
13157 {
13158 if (TARGET_XOP && <MODE>mode == V16QImode)
13159 {
13160 bool negate = false;
13161 rtx (*gen) (rtx, rtx, rtx);
13162 rtx tmp, par;
13163 int i;
13164
13165 if (<CODE> != ASHIFT)
13166 {
13167 if (CONST_INT_P (operands[2]))
13168 operands[2] = GEN_INT (-INTVAL (operands[2]));
13169 else
13170 negate = true;
13171 }
13172 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
13173 for (i = 0; i < 16; i++)
13174 XVECEXP (par, 0, i) = operands[2];
13175
13176 tmp = gen_reg_rtx (V16QImode);
13177 emit_insn (gen_vec_initv16qi (tmp, par));
13178
13179 if (negate)
13180 emit_insn (gen_negv16qi2 (tmp, tmp));
13181
13182 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
13183 emit_insn (gen (operands[0], operands[1], tmp));
13184 }
13185 else
13186 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
13187 DONE;
13188 })
13189
13190 (define_expand "ashrv2di3"
13191 [(set (match_operand:V2DI 0 "register_operand")
13192 (ashiftrt:V2DI
13193 (match_operand:V2DI 1 "register_operand")
13194 (match_operand:DI 2 "nonmemory_operand")))]
13195 "TARGET_XOP"
13196 {
13197 rtx reg = gen_reg_rtx (V2DImode);
13198 rtx par;
13199 bool negate = false;
13200 int i;
13201
13202 if (CONST_INT_P (operands[2]))
13203 operands[2] = GEN_INT (-INTVAL (operands[2]));
13204 else
13205 negate = true;
13206
13207 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
13208 for (i = 0; i < 2; i++)
13209 XVECEXP (par, 0, i) = operands[2];
13210
13211 emit_insn (gen_vec_initv2di (reg, par));
13212
13213 if (negate)
13214 emit_insn (gen_negv2di2 (reg, reg));
13215
13216 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
13217 DONE;
13218 })
13219
13220 ;; XOP FRCZ support
13221 (define_insn "xop_frcz<mode>2"
13222 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
13223 (unspec:FMAMODE
13224 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
13225 UNSPEC_FRCZ))]
13226 "TARGET_XOP"
13227 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
13228 [(set_attr "type" "ssecvt1")
13229 (set_attr "mode" "<MODE>")])
13230
13231 (define_expand "xop_vmfrcz<mode>2"
13232 [(set (match_operand:VF_128 0 "register_operand")
13233 (vec_merge:VF_128
13234 (unspec:VF_128
13235 [(match_operand:VF_128 1 "nonimmediate_operand")]
13236 UNSPEC_FRCZ)
13237 (match_dup 3)
13238 (const_int 1)))]
13239 "TARGET_XOP"
13240 "operands[3] = CONST0_RTX (<MODE>mode);")
13241
13242 (define_insn "*xop_vmfrcz<mode>2"
13243 [(set (match_operand:VF_128 0 "register_operand" "=x")
13244 (vec_merge:VF_128
13245 (unspec:VF_128
13246 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
13247 UNSPEC_FRCZ)
13248 (match_operand:VF_128 2 "const0_operand")
13249 (const_int 1)))]
13250 "TARGET_XOP"
13251 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
13252 [(set_attr "type" "ssecvt1")
13253 (set_attr "mode" "<MODE>")])
13254
13255 (define_insn "xop_maskcmp<mode>3"
13256 [(set (match_operand:VI_128 0 "register_operand" "=x")
13257 (match_operator:VI_128 1 "ix86_comparison_int_operator"
13258 [(match_operand:VI_128 2 "register_operand" "x")
13259 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13260 "TARGET_XOP"
13261 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13262 [(set_attr "type" "sse4arg")
13263 (set_attr "prefix_data16" "0")
13264 (set_attr "prefix_rep" "0")
13265 (set_attr "prefix_extra" "2")
13266 (set_attr "length_immediate" "1")
13267 (set_attr "mode" "TI")])
13268
13269 (define_insn "xop_maskcmp_uns<mode>3"
13270 [(set (match_operand:VI_128 0 "register_operand" "=x")
13271 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
13272 [(match_operand:VI_128 2 "register_operand" "x")
13273 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13274 "TARGET_XOP"
13275 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13276 [(set_attr "type" "ssecmp")
13277 (set_attr "prefix_data16" "0")
13278 (set_attr "prefix_rep" "0")
13279 (set_attr "prefix_extra" "2")
13280 (set_attr "length_immediate" "1")
13281 (set_attr "mode" "TI")])
13282
13283 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
13284 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
13285 ;; the exact instruction generated for the intrinsic.
13286 (define_insn "xop_maskcmp_uns2<mode>3"
13287 [(set (match_operand:VI_128 0 "register_operand" "=x")
13288 (unspec:VI_128
13289 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
13290 [(match_operand:VI_128 2 "register_operand" "x")
13291 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
13292 UNSPEC_XOP_UNSIGNED_CMP))]
13293 "TARGET_XOP"
13294 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13295 [(set_attr "type" "ssecmp")
13296 (set_attr "prefix_data16" "0")
13297 (set_attr "prefix_extra" "2")
13298 (set_attr "length_immediate" "1")
13299 (set_attr "mode" "TI")])
13300
13301 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
13302 ;; being added here to be complete.
13303 (define_insn "xop_pcom_tf<mode>3"
13304 [(set (match_operand:VI_128 0 "register_operand" "=x")
13305 (unspec:VI_128
13306 [(match_operand:VI_128 1 "register_operand" "x")
13307 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
13308 (match_operand:SI 3 "const_int_operand" "n")]
13309 UNSPEC_XOP_TRUEFALSE))]
13310 "TARGET_XOP"
13311 {
13312 return ((INTVAL (operands[3]) != 0)
13313 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13314 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
13315 }
13316 [(set_attr "type" "ssecmp")
13317 (set_attr "prefix_data16" "0")
13318 (set_attr "prefix_extra" "2")
13319 (set_attr "length_immediate" "1")
13320 (set_attr "mode" "TI")])
13321
13322 (define_insn "xop_vpermil2<mode>3"
13323 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
13324 (unspec:VF_128_256
13325 [(match_operand:VF_128_256 1 "register_operand" "x")
13326 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
13327 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
13328 (match_operand:SI 4 "const_0_to_3_operand" "n")]
13329 UNSPEC_VPERMIL2))]
13330 "TARGET_XOP"
13331 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
13332 [(set_attr "type" "sse4arg")
13333 (set_attr "length_immediate" "1")
13334 (set_attr "mode" "<MODE>")])
13335
13336 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13337
13338 (define_insn "aesenc"
13339 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13340 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13341 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13342 UNSPEC_AESENC))]
13343 "TARGET_AES"
13344 "@
13345 aesenc\t{%2, %0|%0, %2}
13346 vaesenc\t{%2, %1, %0|%0, %1, %2}"
13347 [(set_attr "isa" "noavx,avx")
13348 (set_attr "type" "sselog1")
13349 (set_attr "prefix_extra" "1")
13350 (set_attr "prefix" "orig,vex")
13351 (set_attr "btver2_decode" "double,double")
13352 (set_attr "mode" "TI")])
13353
13354 (define_insn "aesenclast"
13355 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13356 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13357 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13358 UNSPEC_AESENCLAST))]
13359 "TARGET_AES"
13360 "@
13361 aesenclast\t{%2, %0|%0, %2}
13362 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
13363 [(set_attr "isa" "noavx,avx")
13364 (set_attr "type" "sselog1")
13365 (set_attr "prefix_extra" "1")
13366 (set_attr "prefix" "orig,vex")
13367 (set_attr "btver2_decode" "double,double")
13368 (set_attr "mode" "TI")])
13369
13370 (define_insn "aesdec"
13371 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13372 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13373 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13374 UNSPEC_AESDEC))]
13375 "TARGET_AES"
13376 "@
13377 aesdec\t{%2, %0|%0, %2}
13378 vaesdec\t{%2, %1, %0|%0, %1, %2}"
13379 [(set_attr "isa" "noavx,avx")
13380 (set_attr "type" "sselog1")
13381 (set_attr "prefix_extra" "1")
13382 (set_attr "prefix" "orig,vex")
13383 (set_attr "btver2_decode" "double,double")
13384 (set_attr "mode" "TI")])
13385
13386 (define_insn "aesdeclast"
13387 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13388 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13389 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13390 UNSPEC_AESDECLAST))]
13391 "TARGET_AES"
13392 "@
13393 aesdeclast\t{%2, %0|%0, %2}
13394 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
13395 [(set_attr "isa" "noavx,avx")
13396 (set_attr "type" "sselog1")
13397 (set_attr "prefix_extra" "1")
13398 (set_attr "prefix" "orig,vex")
13399 (set_attr "btver2_decode" "double,double")
13400 (set_attr "mode" "TI")])
13401
13402 (define_insn "aesimc"
13403 [(set (match_operand:V2DI 0 "register_operand" "=x")
13404 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
13405 UNSPEC_AESIMC))]
13406 "TARGET_AES"
13407 "%vaesimc\t{%1, %0|%0, %1}"
13408 [(set_attr "type" "sselog1")
13409 (set_attr "prefix_extra" "1")
13410 (set_attr "prefix" "maybe_vex")
13411 (set_attr "mode" "TI")])
13412
13413 (define_insn "aeskeygenassist"
13414 [(set (match_operand:V2DI 0 "register_operand" "=x")
13415 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
13416 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13417 UNSPEC_AESKEYGENASSIST))]
13418 "TARGET_AES"
13419 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
13420 [(set_attr "type" "sselog1")
13421 (set_attr "prefix_extra" "1")
13422 (set_attr "length_immediate" "1")
13423 (set_attr "prefix" "maybe_vex")
13424 (set_attr "mode" "TI")])
13425
13426 (define_insn "pclmulqdq"
13427 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13428 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13429 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
13430 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13431 UNSPEC_PCLMUL))]
13432 "TARGET_PCLMUL"
13433 "@
13434 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
13435 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13436 [(set_attr "isa" "noavx,avx")
13437 (set_attr "type" "sselog1")
13438 (set_attr "prefix_extra" "1")
13439 (set_attr "length_immediate" "1")
13440 (set_attr "prefix" "orig,vex")
13441 (set_attr "mode" "TI")])
13442
13443 (define_expand "avx_vzeroall"
13444 [(match_par_dup 0 [(const_int 0)])]
13445 "TARGET_AVX"
13446 {
13447 int nregs = TARGET_64BIT ? 16 : 8;
13448 int regno;
13449
13450 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
13451
13452 XVECEXP (operands[0], 0, 0)
13453 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
13454 UNSPECV_VZEROALL);
13455
13456 for (regno = 0; regno < nregs; regno++)
13457 XVECEXP (operands[0], 0, regno + 1)
13458 = gen_rtx_SET (VOIDmode,
13459 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
13460 CONST0_RTX (V8SImode));
13461 })
13462
13463 (define_insn "*avx_vzeroall"
13464 [(match_parallel 0 "vzeroall_operation"
13465 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
13466 "TARGET_AVX"
13467 "vzeroall"
13468 [(set_attr "type" "sse")
13469 (set_attr "modrm" "0")
13470 (set_attr "memory" "none")
13471 (set_attr "prefix" "vex")
13472 (set_attr "btver2_decode" "vector")
13473 (set_attr "mode" "OI")])
13474
13475 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
13476 ;; if the upper 128bits are unused.
13477 (define_insn "avx_vzeroupper"
13478 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
13479 "TARGET_AVX"
13480 "vzeroupper"
13481 [(set_attr "type" "sse")
13482 (set_attr "modrm" "0")
13483 (set_attr "memory" "none")
13484 (set_attr "prefix" "vex")
13485 (set_attr "btver2_decode" "vector")
13486 (set_attr "mode" "OI")])
13487
13488 (define_insn "avx2_pbroadcast<mode>"
13489 [(set (match_operand:VI 0 "register_operand" "=x")
13490 (vec_duplicate:VI
13491 (vec_select:<ssescalarmode>
13492 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
13493 (parallel [(const_int 0)]))))]
13494 "TARGET_AVX2"
13495 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
13496 [(set_attr "type" "ssemov")
13497 (set_attr "prefix_extra" "1")
13498 (set_attr "prefix" "vex")
13499 (set_attr "mode" "<sseinsnmode>")])
13500
13501 (define_insn "avx2_pbroadcast<mode>_1"
13502 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
13503 (vec_duplicate:VI_256
13504 (vec_select:<ssescalarmode>
13505 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
13506 (parallel [(const_int 0)]))))]
13507 "TARGET_AVX2"
13508 "@
13509 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
13510 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
13511 [(set_attr "type" "ssemov")
13512 (set_attr "prefix_extra" "1")
13513 (set_attr "prefix" "vex")
13514 (set_attr "mode" "<sseinsnmode>")])
13515
13516 (define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
13517 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
13518 (unspec:VI48F_256_512
13519 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
13520 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
13521 UNSPEC_VPERMVAR))]
13522 "TARGET_AVX2 && <mask_mode512bit_condition>"
13523 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
13524 [(set_attr "type" "sselog")
13525 (set_attr "prefix" "<mask_prefix2>")
13526 (set_attr "mode" "<sseinsnmode>")])
13527
13528 (define_expand "<avx2_avx512f>_perm<mode>"
13529 [(match_operand:VI8F_256_512 0 "register_operand")
13530 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
13531 (match_operand:SI 2 "const_0_to_255_operand")]
13532 "TARGET_AVX2"
13533 {
13534 int mask = INTVAL (operands[2]);
13535 emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
13536 GEN_INT ((mask >> 0) & 3),
13537 GEN_INT ((mask >> 2) & 3),
13538 GEN_INT ((mask >> 4) & 3),
13539 GEN_INT ((mask >> 6) & 3)));
13540 DONE;
13541 })
13542
13543 (define_expand "avx512f_perm<mode>_mask"
13544 [(match_operand:V8FI 0 "register_operand")
13545 (match_operand:V8FI 1 "nonimmediate_operand")
13546 (match_operand:SI 2 "const_0_to_255_operand")
13547 (match_operand:V8FI 3 "vector_move_operand")
13548 (match_operand:<avx512fmaskmode> 4 "register_operand")]
13549 "TARGET_AVX512F"
13550 {
13551 int mask = INTVAL (operands[2]);
13552 emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
13553 GEN_INT ((mask >> 0) & 3),
13554 GEN_INT ((mask >> 2) & 3),
13555 GEN_INT ((mask >> 4) & 3),
13556 GEN_INT ((mask >> 6) & 3),
13557 operands[3], operands[4]));
13558 DONE;
13559 })
13560
13561 (define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
13562 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
13563 (vec_select:VI8F_256_512
13564 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
13565 (parallel [(match_operand 2 "const_0_to_3_operand")
13566 (match_operand 3 "const_0_to_3_operand")
13567 (match_operand 4 "const_0_to_3_operand")
13568 (match_operand 5 "const_0_to_3_operand")])))]
13569 "TARGET_AVX2 && <mask_mode512bit_condition>"
13570 {
13571 int mask = 0;
13572 mask |= INTVAL (operands[2]) << 0;
13573 mask |= INTVAL (operands[3]) << 2;
13574 mask |= INTVAL (operands[4]) << 4;
13575 mask |= INTVAL (operands[5]) << 6;
13576 operands[2] = GEN_INT (mask);
13577 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13578 }
13579 [(set_attr "type" "sselog")
13580 (set_attr "prefix" "<mask_prefix2>")
13581 (set_attr "mode" "<sseinsnmode>")])
13582
13583 (define_insn "avx2_permv2ti"
13584 [(set (match_operand:V4DI 0 "register_operand" "=x")
13585 (unspec:V4DI
13586 [(match_operand:V4DI 1 "register_operand" "x")
13587 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
13588 (match_operand:SI 3 "const_0_to_255_operand" "n")]
13589 UNSPEC_VPERMTI))]
13590 "TARGET_AVX2"
13591 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13592 [(set_attr "type" "sselog")
13593 (set_attr "prefix" "vex")
13594 (set_attr "mode" "OI")])
13595
13596 (define_insn "avx2_vec_dupv4df"
13597 [(set (match_operand:V4DF 0 "register_operand" "=x")
13598 (vec_duplicate:V4DF
13599 (vec_select:DF
13600 (match_operand:V2DF 1 "register_operand" "x")
13601 (parallel [(const_int 0)]))))]
13602 "TARGET_AVX2"
13603 "vbroadcastsd\t{%1, %0|%0, %1}"
13604 [(set_attr "type" "sselog1")
13605 (set_attr "prefix" "vex")
13606 (set_attr "mode" "V4DF")])
13607
13608 ;; Modes handled by AVX vec_dup patterns.
13609 (define_mode_iterator AVX_VEC_DUP_MODE
13610 [V8SI V8SF V4DI V4DF])
13611
13612 (define_insn "vec_dup<mode>"
13613 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
13614 (vec_duplicate:AVX_VEC_DUP_MODE
13615 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
13616 "TARGET_AVX"
13617 "@
13618 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
13619 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
13620 #"
13621 [(set_attr "type" "ssemov")
13622 (set_attr "prefix_extra" "1")
13623 (set_attr "prefix" "vex")
13624 (set_attr "isa" "*,avx2,noavx2")
13625 (set_attr "mode" "V8SF")])
13626
13627 (define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
13628 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13629 (vec_duplicate:VI48F_512
13630 (vec_select:<ssescalarmode>
13631 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
13632 (parallel [(const_int 0)]))))]
13633 "TARGET_AVX512F"
13634 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13635 [(set_attr "type" "ssemov")
13636 (set_attr "prefix" "evex")
13637 (set_attr "mode" "<sseinsnmode>")])
13638
13639 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
13640 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
13641 (vec_duplicate:V16FI
13642 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
13643 "TARGET_AVX512F"
13644 "@
13645 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
13646 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13647 [(set_attr "type" "ssemov")
13648 (set_attr "prefix" "evex")
13649 (set_attr "mode" "<sseinsnmode>")])
13650
13651 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
13652 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
13653 (vec_duplicate:V8FI
13654 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
13655 "TARGET_AVX512F"
13656 "@
13657 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
13658 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13659 [(set_attr "type" "ssemov")
13660 (set_attr "prefix" "evex")
13661 (set_attr "mode" "<sseinsnmode>")])
13662
13663 (define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
13664 [(set (match_operand:VI48_512 0 "register_operand" "=v")
13665 (vec_duplicate:VI48_512
13666 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
13667 "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
13668 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13669 [(set_attr "type" "ssemov")
13670 (set_attr "prefix" "evex")
13671 (set_attr "mode" "<sseinsnmode>")])
13672
13673 (define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
13674 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13675 (vec_duplicate:VI48F_512
13676 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
13677 "TARGET_AVX512F"
13678 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13679 [(set_attr "type" "ssemov")
13680 (set_attr "prefix" "evex")
13681 (set_attr "mode" "<sseinsnmode>")])
13682
13683 (define_insn "avx2_vbroadcasti128_<mode>"
13684 [(set (match_operand:VI_256 0 "register_operand" "=x")
13685 (vec_concat:VI_256
13686 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
13687 (match_dup 1)))]
13688 "TARGET_AVX2"
13689 "vbroadcasti128\t{%1, %0|%0, %1}"
13690 [(set_attr "type" "ssemov")
13691 (set_attr "prefix_extra" "1")
13692 (set_attr "prefix" "vex")
13693 (set_attr "mode" "OI")])
13694
13695 (define_split
13696 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
13697 (vec_duplicate:AVX_VEC_DUP_MODE
13698 (match_operand:<ssescalarmode> 1 "register_operand")))]
13699 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
13700 [(set (match_dup 2)
13701 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
13702 (set (match_dup 0)
13703 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
13704 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
13705
13706 (define_insn "avx_vbroadcastf128_<mode>"
13707 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
13708 (vec_concat:V_256
13709 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
13710 (match_dup 1)))]
13711 "TARGET_AVX"
13712 "@
13713 vbroadcast<i128>\t{%1, %0|%0, %1}
13714 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
13715 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
13716 [(set_attr "type" "ssemov,sselog1,sselog1")
13717 (set_attr "prefix_extra" "1")
13718 (set_attr "length_immediate" "0,1,1")
13719 (set_attr "prefix" "vex")
13720 (set_attr "mode" "<sseinsnmode>")])
13721
13722 (define_insn "avx512cd_maskb_vec_dupv8di"
13723 [(set (match_operand:V8DI 0 "register_operand" "=v")
13724 (vec_duplicate:V8DI
13725 (zero_extend:DI
13726 (match_operand:QI 1 "register_operand" "k"))))]
13727 "TARGET_AVX512CD"
13728 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
13729 [(set_attr "type" "mskmov")
13730 (set_attr "prefix" "evex")
13731 (set_attr "mode" "XI")])
13732
13733 (define_insn "avx512cd_maskw_vec_dupv16si"
13734 [(set (match_operand:V16SI 0 "register_operand" "=v")
13735 (vec_duplicate:V16SI
13736 (zero_extend:SI
13737 (match_operand:HI 1 "register_operand" "k"))))]
13738 "TARGET_AVX512CD"
13739 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
13740 [(set_attr "type" "mskmov")
13741 (set_attr "prefix" "evex")
13742 (set_attr "mode" "XI")])
13743
13744 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
13745 ;; If it so happens that the input is in memory, use vbroadcast.
13746 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
13747 (define_insn "*avx_vperm_broadcast_v4sf"
13748 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
13749 (vec_select:V4SF
13750 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
13751 (match_parallel 2 "avx_vbroadcast_operand"
13752 [(match_operand 3 "const_int_operand" "C,n,n")])))]
13753 "TARGET_AVX"
13754 {
13755 int elt = INTVAL (operands[3]);
13756 switch (which_alternative)
13757 {
13758 case 0:
13759 case 1:
13760 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
13761 return "vbroadcastss\t{%1, %0|%0, %k1}";
13762 case 2:
13763 operands[2] = GEN_INT (elt * 0x55);
13764 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
13765 default:
13766 gcc_unreachable ();
13767 }
13768 }
13769 [(set_attr "type" "ssemov,ssemov,sselog1")
13770 (set_attr "prefix_extra" "1")
13771 (set_attr "length_immediate" "0,0,1")
13772 (set_attr "prefix" "vex")
13773 (set_attr "mode" "SF,SF,V4SF")])
13774
13775 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
13776 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
13777 (vec_select:VF_256
13778 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
13779 (match_parallel 2 "avx_vbroadcast_operand"
13780 [(match_operand 3 "const_int_operand" "C,n,n")])))]
13781 "TARGET_AVX"
13782 "#"
13783 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
13784 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
13785 {
13786 rtx op0 = operands[0], op1 = operands[1];
13787 int elt = INTVAL (operands[3]);
13788
13789 if (REG_P (op1))
13790 {
13791 int mask;
13792
13793 if (TARGET_AVX2 && elt == 0)
13794 {
13795 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
13796 op1)));
13797 DONE;
13798 }
13799
13800 /* Shuffle element we care about into all elements of the 128-bit lane.
13801 The other lane gets shuffled too, but we don't care. */
13802 if (<MODE>mode == V4DFmode)
13803 mask = (elt & 1 ? 15 : 0);
13804 else
13805 mask = (elt & 3) * 0x55;
13806 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
13807
13808 /* Shuffle the lane we care about into both lanes of the dest. */
13809 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
13810 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
13811 DONE;
13812 }
13813
13814 operands[1] = adjust_address (op1, <ssescalarmode>mode,
13815 elt * GET_MODE_SIZE (<ssescalarmode>mode));
13816 })
13817
13818 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
13819 [(set (match_operand:VF2 0 "register_operand")
13820 (vec_select:VF2
13821 (match_operand:VF2 1 "nonimmediate_operand")
13822 (match_operand:SI 2 "const_0_to_255_operand")))]
13823 "TARGET_AVX && <mask_mode512bit_condition>"
13824 {
13825 int mask = INTVAL (operands[2]);
13826 rtx perm[<ssescalarnum>];
13827
13828 int i;
13829 for (i = 0; i < <ssescalarnum>; i = i + 2)
13830 {
13831 perm[i] = GEN_INT (((mask >> i) & 1) + i);
13832 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
13833 }
13834
13835 operands[2]
13836 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
13837 })
13838
13839 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
13840 [(set (match_operand:VF1 0 "register_operand")
13841 (vec_select:VF1
13842 (match_operand:VF1 1 "nonimmediate_operand")
13843 (match_operand:SI 2 "const_0_to_255_operand")))]
13844 "TARGET_AVX && <mask_mode512bit_condition>"
13845 {
13846 int mask = INTVAL (operands[2]);
13847 rtx perm[<ssescalarnum>];
13848
13849 int i;
13850 for (i = 0; i < <ssescalarnum>; i = i + 4)
13851 {
13852 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
13853 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
13854 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
13855 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
13856 }
13857
13858 operands[2]
13859 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
13860 })
13861
13862 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
13863 [(set (match_operand:VF 0 "register_operand" "=v")
13864 (vec_select:VF
13865 (match_operand:VF 1 "nonimmediate_operand" "vm")
13866 (match_parallel 2 ""
13867 [(match_operand 3 "const_int_operand")])))]
13868 "TARGET_AVX && <mask_mode512bit_condition>
13869 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
13870 {
13871 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
13872 operands[2] = GEN_INT (mask);
13873 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
13874 }
13875 [(set_attr "type" "sselog")
13876 (set_attr "prefix_extra" "1")
13877 (set_attr "length_immediate" "1")
13878 (set_attr "prefix" "<mask_prefix>")
13879 (set_attr "mode" "<sseinsnmode>")])
13880
13881 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
13882 [(set (match_operand:VF 0 "register_operand" "=v")
13883 (unspec:VF
13884 [(match_operand:VF 1 "register_operand" "v")
13885 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
13886 UNSPEC_VPERMIL))]
13887 "TARGET_AVX && <mask_mode512bit_condition>"
13888 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13889 [(set_attr "type" "sselog")
13890 (set_attr "prefix_extra" "1")
13891 (set_attr "btver2_decode" "vector")
13892 (set_attr "prefix" "<mask_prefix>")
13893 (set_attr "mode" "<sseinsnmode>")])
13894
13895 (define_insn "avx512f_vpermi2var<mode>3"
13896 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13897 (unspec:VI48F_512
13898 [(match_operand:VI48F_512 1 "register_operand" "v")
13899 (match_operand:<sseintvecmode> 2 "register_operand" "0")
13900 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13901 UNSPEC_VPERMI2))]
13902 "TARGET_AVX512F"
13903 "vpermi2<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}"
13904 [(set_attr "type" "sselog")
13905 (set_attr "prefix" "evex")
13906 (set_attr "mode" "<sseinsnmode>")])
13907
13908 (define_insn "avx512f_vpermi2var<mode>3_mask"
13909 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13910 (vec_merge:VI48F_512
13911 (unspec:VI48F_512
13912 [(match_operand:VI48F_512 1 "register_operand" "v")
13913 (match_operand:<sseintvecmode> 2 "register_operand" "0")
13914 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13915 UNSPEC_VPERMI2_MASK)
13916 (match_dup 0)
13917 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
13918 "TARGET_AVX512F"
13919 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
13920 [(set_attr "type" "sselog")
13921 (set_attr "prefix" "evex")
13922 (set_attr "mode" "<sseinsnmode>")])
13923
13924 (define_insn "avx512f_vpermt2var<mode>3"
13925 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13926 (unspec:VI48F_512
13927 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
13928 (match_operand:VI48F_512 2 "register_operand" "0")
13929 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13930 UNSPEC_VPERMT2))]
13931 "TARGET_AVX512F"
13932 "vpermt2<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}"
13933 [(set_attr "type" "sselog")
13934 (set_attr "prefix" "evex")
13935 (set_attr "mode" "<sseinsnmode>")])
13936
13937 (define_insn "avx512f_vpermt2var<mode>3_mask"
13938 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13939 (vec_merge:VI48F_512
13940 (unspec:VI48F_512
13941 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
13942 (match_operand:VI48F_512 2 "register_operand" "0")
13943 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13944 UNSPEC_VPERMT2)
13945 (match_dup 2)
13946 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
13947 "TARGET_AVX512F"
13948 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
13949 [(set_attr "type" "sselog")
13950 (set_attr "prefix" "evex")
13951 (set_attr "mode" "<sseinsnmode>")])
13952
13953 (define_expand "avx_vperm2f128<mode>3"
13954 [(set (match_operand:AVX256MODE2P 0 "register_operand")
13955 (unspec:AVX256MODE2P
13956 [(match_operand:AVX256MODE2P 1 "register_operand")
13957 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
13958 (match_operand:SI 3 "const_0_to_255_operand")]
13959 UNSPEC_VPERMIL2F128))]
13960 "TARGET_AVX"
13961 {
13962 int mask = INTVAL (operands[3]);
13963 if ((mask & 0x88) == 0)
13964 {
13965 rtx perm[<ssescalarnum>], t1, t2;
13966 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
13967
13968 base = (mask & 3) * nelt2;
13969 for (i = 0; i < nelt2; ++i)
13970 perm[i] = GEN_INT (base + i);
13971
13972 base = ((mask >> 4) & 3) * nelt2;
13973 for (i = 0; i < nelt2; ++i)
13974 perm[i + nelt2] = GEN_INT (base + i);
13975
13976 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
13977 operands[1], operands[2]);
13978 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
13979 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
13980 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
13981 emit_insn (t2);
13982 DONE;
13983 }
13984 })
13985
13986 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
13987 ;; means that in order to represent this properly in rtl we'd have to
13988 ;; nest *another* vec_concat with a zero operand and do the select from
13989 ;; a 4x wide vector. That doesn't seem very nice.
13990 (define_insn "*avx_vperm2f128<mode>_full"
13991 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
13992 (unspec:AVX256MODE2P
13993 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
13994 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
13995 (match_operand:SI 3 "const_0_to_255_operand" "n")]
13996 UNSPEC_VPERMIL2F128))]
13997 "TARGET_AVX"
13998 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13999 [(set_attr "type" "sselog")
14000 (set_attr "prefix_extra" "1")
14001 (set_attr "length_immediate" "1")
14002 (set_attr "prefix" "vex")
14003 (set_attr "mode" "<sseinsnmode>")])
14004
14005 (define_insn "*avx_vperm2f128<mode>_nozero"
14006 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
14007 (vec_select:AVX256MODE2P
14008 (vec_concat:<ssedoublevecmode>
14009 (match_operand:AVX256MODE2P 1 "register_operand" "x")
14010 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
14011 (match_parallel 3 ""
14012 [(match_operand 4 "const_int_operand")])))]
14013 "TARGET_AVX
14014 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
14015 {
14016 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
14017 if (mask == 0x12)
14018 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
14019 if (mask == 0x20)
14020 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
14021 operands[3] = GEN_INT (mask);
14022 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14023 }
14024 [(set_attr "type" "sselog")
14025 (set_attr "prefix_extra" "1")
14026 (set_attr "length_immediate" "1")
14027 (set_attr "prefix" "vex")
14028 (set_attr "mode" "<sseinsnmode>")])
14029
14030 (define_expand "avx_vinsertf128<mode>"
14031 [(match_operand:V_256 0 "register_operand")
14032 (match_operand:V_256 1 "register_operand")
14033 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14034 (match_operand:SI 3 "const_0_to_1_operand")]
14035 "TARGET_AVX"
14036 {
14037 rtx (*insn)(rtx, rtx, rtx);
14038
14039 switch (INTVAL (operands[3]))
14040 {
14041 case 0:
14042 insn = gen_vec_set_lo_<mode>;
14043 break;
14044 case 1:
14045 insn = gen_vec_set_hi_<mode>;
14046 break;
14047 default:
14048 gcc_unreachable ();
14049 }
14050
14051 emit_insn (insn (operands[0], operands[1], operands[2]));
14052 DONE;
14053 })
14054
14055 (define_insn "avx2_vec_set_lo_v4di"
14056 [(set (match_operand:V4DI 0 "register_operand" "=x")
14057 (vec_concat:V4DI
14058 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
14059 (vec_select:V2DI
14060 (match_operand:V4DI 1 "register_operand" "x")
14061 (parallel [(const_int 2) (const_int 3)]))))]
14062 "TARGET_AVX2"
14063 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14064 [(set_attr "type" "sselog")
14065 (set_attr "prefix_extra" "1")
14066 (set_attr "length_immediate" "1")
14067 (set_attr "prefix" "vex")
14068 (set_attr "mode" "OI")])
14069
14070 (define_insn "avx2_vec_set_hi_v4di"
14071 [(set (match_operand:V4DI 0 "register_operand" "=x")
14072 (vec_concat:V4DI
14073 (vec_select:V2DI
14074 (match_operand:V4DI 1 "register_operand" "x")
14075 (parallel [(const_int 0) (const_int 1)]))
14076 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
14077 "TARGET_AVX2"
14078 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14079 [(set_attr "type" "sselog")
14080 (set_attr "prefix_extra" "1")
14081 (set_attr "length_immediate" "1")
14082 (set_attr "prefix" "vex")
14083 (set_attr "mode" "OI")])
14084
14085 (define_insn "vec_set_lo_<mode>"
14086 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14087 (vec_concat:VI8F_256
14088 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14089 (vec_select:<ssehalfvecmode>
14090 (match_operand:VI8F_256 1 "register_operand" "x")
14091 (parallel [(const_int 2) (const_int 3)]))))]
14092 "TARGET_AVX"
14093 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14094 [(set_attr "type" "sselog")
14095 (set_attr "prefix_extra" "1")
14096 (set_attr "length_immediate" "1")
14097 (set_attr "prefix" "vex")
14098 (set_attr "mode" "<sseinsnmode>")])
14099
14100 (define_insn "vec_set_hi_<mode>"
14101 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14102 (vec_concat:VI8F_256
14103 (vec_select:<ssehalfvecmode>
14104 (match_operand:VI8F_256 1 "register_operand" "x")
14105 (parallel [(const_int 0) (const_int 1)]))
14106 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14107 "TARGET_AVX"
14108 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14109 [(set_attr "type" "sselog")
14110 (set_attr "prefix_extra" "1")
14111 (set_attr "length_immediate" "1")
14112 (set_attr "prefix" "vex")
14113 (set_attr "mode" "<sseinsnmode>")])
14114
14115 (define_insn "vec_set_lo_<mode>"
14116 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14117 (vec_concat:VI4F_256
14118 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14119 (vec_select:<ssehalfvecmode>
14120 (match_operand:VI4F_256 1 "register_operand" "x")
14121 (parallel [(const_int 4) (const_int 5)
14122 (const_int 6) (const_int 7)]))))]
14123 "TARGET_AVX"
14124 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14125 [(set_attr "type" "sselog")
14126 (set_attr "prefix_extra" "1")
14127 (set_attr "length_immediate" "1")
14128 (set_attr "prefix" "vex")
14129 (set_attr "mode" "<sseinsnmode>")])
14130
14131 (define_insn "vec_set_hi_<mode>"
14132 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14133 (vec_concat:VI4F_256
14134 (vec_select:<ssehalfvecmode>
14135 (match_operand:VI4F_256 1 "register_operand" "x")
14136 (parallel [(const_int 0) (const_int 1)
14137 (const_int 2) (const_int 3)]))
14138 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14139 "TARGET_AVX"
14140 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14141 [(set_attr "type" "sselog")
14142 (set_attr "prefix_extra" "1")
14143 (set_attr "length_immediate" "1")
14144 (set_attr "prefix" "vex")
14145 (set_attr "mode" "<sseinsnmode>")])
14146
14147 (define_insn "vec_set_lo_v16hi"
14148 [(set (match_operand:V16HI 0 "register_operand" "=x")
14149 (vec_concat:V16HI
14150 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
14151 (vec_select:V8HI
14152 (match_operand:V16HI 1 "register_operand" "x")
14153 (parallel [(const_int 8) (const_int 9)
14154 (const_int 10) (const_int 11)
14155 (const_int 12) (const_int 13)
14156 (const_int 14) (const_int 15)]))))]
14157 "TARGET_AVX"
14158 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14159 [(set_attr "type" "sselog")
14160 (set_attr "prefix_extra" "1")
14161 (set_attr "length_immediate" "1")
14162 (set_attr "prefix" "vex")
14163 (set_attr "mode" "OI")])
14164
14165 (define_insn "vec_set_hi_v16hi"
14166 [(set (match_operand:V16HI 0 "register_operand" "=x")
14167 (vec_concat:V16HI
14168 (vec_select:V8HI
14169 (match_operand:V16HI 1 "register_operand" "x")
14170 (parallel [(const_int 0) (const_int 1)
14171 (const_int 2) (const_int 3)
14172 (const_int 4) (const_int 5)
14173 (const_int 6) (const_int 7)]))
14174 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
14175 "TARGET_AVX"
14176 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14177 [(set_attr "type" "sselog")
14178 (set_attr "prefix_extra" "1")
14179 (set_attr "length_immediate" "1")
14180 (set_attr "prefix" "vex")
14181 (set_attr "mode" "OI")])
14182
14183 (define_insn "vec_set_lo_v32qi"
14184 [(set (match_operand:V32QI 0 "register_operand" "=x")
14185 (vec_concat:V32QI
14186 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
14187 (vec_select:V16QI
14188 (match_operand:V32QI 1 "register_operand" "x")
14189 (parallel [(const_int 16) (const_int 17)
14190 (const_int 18) (const_int 19)
14191 (const_int 20) (const_int 21)
14192 (const_int 22) (const_int 23)
14193 (const_int 24) (const_int 25)
14194 (const_int 26) (const_int 27)
14195 (const_int 28) (const_int 29)
14196 (const_int 30) (const_int 31)]))))]
14197 "TARGET_AVX"
14198 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14199 [(set_attr "type" "sselog")
14200 (set_attr "prefix_extra" "1")
14201 (set_attr "length_immediate" "1")
14202 (set_attr "prefix" "vex")
14203 (set_attr "mode" "OI")])
14204
14205 (define_insn "vec_set_hi_v32qi"
14206 [(set (match_operand:V32QI 0 "register_operand" "=x")
14207 (vec_concat:V32QI
14208 (vec_select:V16QI
14209 (match_operand:V32QI 1 "register_operand" "x")
14210 (parallel [(const_int 0) (const_int 1)
14211 (const_int 2) (const_int 3)
14212 (const_int 4) (const_int 5)
14213 (const_int 6) (const_int 7)
14214 (const_int 8) (const_int 9)
14215 (const_int 10) (const_int 11)
14216 (const_int 12) (const_int 13)
14217 (const_int 14) (const_int 15)]))
14218 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
14219 "TARGET_AVX"
14220 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14221 [(set_attr "type" "sselog")
14222 (set_attr "prefix_extra" "1")
14223 (set_attr "length_immediate" "1")
14224 (set_attr "prefix" "vex")
14225 (set_attr "mode" "OI")])
14226
14227 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
14228 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
14229 (unspec:V48_AVX2
14230 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
14231 (match_operand:V48_AVX2 1 "memory_operand" "m")]
14232 UNSPEC_MASKMOV))]
14233 "TARGET_AVX"
14234 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
14235 [(set_attr "type" "sselog1")
14236 (set_attr "prefix_extra" "1")
14237 (set_attr "prefix" "vex")
14238 (set_attr "btver2_decode" "vector")
14239 (set_attr "mode" "<sseinsnmode>")])
14240
14241 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
14242 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
14243 (unspec:V48_AVX2
14244 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
14245 (match_operand:V48_AVX2 2 "register_operand" "x")
14246 (match_dup 0)]
14247 UNSPEC_MASKMOV))]
14248 "TARGET_AVX"
14249 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14250 [(set_attr "type" "sselog1")
14251 (set_attr "prefix_extra" "1")
14252 (set_attr "prefix" "vex")
14253 (set_attr "btver2_decode" "vector")
14254 (set_attr "mode" "<sseinsnmode>")])
14255
14256 (define_expand "maskload<mode>"
14257 [(set (match_operand:V48_AVX2 0 "register_operand")
14258 (unspec:V48_AVX2
14259 [(match_operand:<sseintvecmode> 2 "register_operand")
14260 (match_operand:V48_AVX2 1 "memory_operand")]
14261 UNSPEC_MASKMOV))]
14262 "TARGET_AVX")
14263
14264 (define_expand "maskstore<mode>"
14265 [(set (match_operand:V48_AVX2 0 "memory_operand")
14266 (unspec:V48_AVX2
14267 [(match_operand:<sseintvecmode> 2 "register_operand")
14268 (match_operand:V48_AVX2 1 "register_operand")
14269 (match_dup 0)]
14270 UNSPEC_MASKMOV))]
14271 "TARGET_AVX")
14272
14273 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
14274 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
14275 (unspec:AVX256MODE2P
14276 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
14277 UNSPEC_CAST))]
14278 "TARGET_AVX"
14279 "#"
14280 "&& reload_completed"
14281 [(const_int 0)]
14282 {
14283 rtx op0 = operands[0];
14284 rtx op1 = operands[1];
14285 if (REG_P (op0))
14286 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
14287 else
14288 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
14289 emit_move_insn (op0, op1);
14290 DONE;
14291 })
14292
14293 (define_expand "vec_init<mode>"
14294 [(match_operand:V_256 0 "register_operand")
14295 (match_operand 1)]
14296 "TARGET_AVX"
14297 {
14298 ix86_expand_vector_init (false, operands[0], operands[1]);
14299 DONE;
14300 })
14301
14302 (define_expand "vec_init<mode>"
14303 [(match_operand:VI48F_512 0 "register_operand")
14304 (match_operand 1)]
14305 "TARGET_AVX512F"
14306 {
14307 ix86_expand_vector_init (false, operands[0], operands[1]);
14308 DONE;
14309 })
14310
14311 (define_expand "avx2_extracti128"
14312 [(match_operand:V2DI 0 "nonimmediate_operand")
14313 (match_operand:V4DI 1 "register_operand")
14314 (match_operand:SI 2 "const_0_to_1_operand")]
14315 "TARGET_AVX2"
14316 {
14317 rtx (*insn)(rtx, rtx);
14318
14319 switch (INTVAL (operands[2]))
14320 {
14321 case 0:
14322 insn = gen_vec_extract_lo_v4di;
14323 break;
14324 case 1:
14325 insn = gen_vec_extract_hi_v4di;
14326 break;
14327 default:
14328 gcc_unreachable ();
14329 }
14330
14331 emit_insn (insn (operands[0], operands[1]));
14332 DONE;
14333 })
14334
14335 (define_expand "avx2_inserti128"
14336 [(match_operand:V4DI 0 "register_operand")
14337 (match_operand:V4DI 1 "register_operand")
14338 (match_operand:V2DI 2 "nonimmediate_operand")
14339 (match_operand:SI 3 "const_0_to_1_operand")]
14340 "TARGET_AVX2"
14341 {
14342 rtx (*insn)(rtx, rtx, rtx);
14343
14344 switch (INTVAL (operands[3]))
14345 {
14346 case 0:
14347 insn = gen_avx2_vec_set_lo_v4di;
14348 break;
14349 case 1:
14350 insn = gen_avx2_vec_set_hi_v4di;
14351 break;
14352 default:
14353 gcc_unreachable ();
14354 }
14355
14356 emit_insn (insn (operands[0], operands[1], operands[2]));
14357 DONE;
14358 })
14359
14360 (define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
14361 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
14362 (ashiftrt:VI48_AVX512F
14363 (match_operand:VI48_AVX512F 1 "register_operand" "v")
14364 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
14365 "TARGET_AVX2 && <mask_mode512bit_condition>"
14366 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14367 [(set_attr "type" "sseishft")
14368 (set_attr "prefix" "maybe_evex")
14369 (set_attr "mode" "<sseinsnmode>")])
14370
14371 (define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
14372 [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
14373 (any_lshift:VI48_AVX2_48_AVX512F
14374 (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
14375 (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
14376 "TARGET_AVX2 && <mask_mode512bit_condition>"
14377 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14378 [(set_attr "type" "sseishft")
14379 (set_attr "prefix" "maybe_evex")
14380 (set_attr "mode" "<sseinsnmode>")])
14381
14382 ;; For avx_vec_concat<mode> insn pattern
14383 (define_mode_attr concat_tg_mode
14384 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
14385 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
14386
14387 (define_insn "avx_vec_concat<mode>"
14388 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
14389 (vec_concat:V_256_512
14390 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
14391 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
14392 "TARGET_AVX"
14393 {
14394 switch (which_alternative)
14395 {
14396 case 0:
14397 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
14398 case 1:
14399 switch (get_attr_mode (insn))
14400 {
14401 case MODE_V16SF:
14402 return "vmovaps\t{%1, %t0|%t0, %1}";
14403 case MODE_V8DF:
14404 return "vmovapd\t{%1, %t0|%t0, %1}";
14405 case MODE_V8SF:
14406 return "vmovaps\t{%1, %x0|%x0, %1}";
14407 case MODE_V4DF:
14408 return "vmovapd\t{%1, %x0|%x0, %1}";
14409 case MODE_XI:
14410 return "vmovdqa\t{%1, %t0|%t0, %1}";
14411 case MODE_OI:
14412 return "vmovdqa\t{%1, %x0|%x0, %1}";
14413 default:
14414 gcc_unreachable ();
14415 }
14416 default:
14417 gcc_unreachable ();
14418 }
14419 }
14420 [(set_attr "type" "sselog,ssemov")
14421 (set_attr "prefix_extra" "1,*")
14422 (set_attr "length_immediate" "1,*")
14423 (set_attr "prefix" "maybe_evex")
14424 (set_attr "mode" "<sseinsnmode>")])
14425
14426 (define_insn "vcvtph2ps"
14427 [(set (match_operand:V4SF 0 "register_operand" "=x")
14428 (vec_select:V4SF
14429 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
14430 UNSPEC_VCVTPH2PS)
14431 (parallel [(const_int 0) (const_int 1)
14432 (const_int 2) (const_int 3)])))]
14433 "TARGET_F16C"
14434 "vcvtph2ps\t{%1, %0|%0, %1}"
14435 [(set_attr "type" "ssecvt")
14436 (set_attr "prefix" "vex")
14437 (set_attr "mode" "V4SF")])
14438
14439 (define_insn "*vcvtph2ps_load"
14440 [(set (match_operand:V4SF 0 "register_operand" "=x")
14441 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
14442 UNSPEC_VCVTPH2PS))]
14443 "TARGET_F16C"
14444 "vcvtph2ps\t{%1, %0|%0, %1}"
14445 [(set_attr "type" "ssecvt")
14446 (set_attr "prefix" "vex")
14447 (set_attr "mode" "V8SF")])
14448
14449 (define_insn "vcvtph2ps256"
14450 [(set (match_operand:V8SF 0 "register_operand" "=x")
14451 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
14452 UNSPEC_VCVTPH2PS))]
14453 "TARGET_F16C"
14454 "vcvtph2ps\t{%1, %0|%0, %1}"
14455 [(set_attr "type" "ssecvt")
14456 (set_attr "prefix" "vex")
14457 (set_attr "btver2_decode" "double")
14458 (set_attr "mode" "V8SF")])
14459
14460 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name>"
14461 [(set (match_operand:V16SF 0 "register_operand" "=v")
14462 (unspec:V16SF
14463 [(match_operand:V16HI 1 "nonimmediate_operand" "vm")]
14464 UNSPEC_VCVTPH2PS))]
14465 "TARGET_AVX512F"
14466 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14467 [(set_attr "type" "ssecvt")
14468 (set_attr "prefix" "evex")
14469 (set_attr "mode" "V16SF")])
14470
14471 (define_expand "vcvtps2ph"
14472 [(set (match_operand:V8HI 0 "register_operand")
14473 (vec_concat:V8HI
14474 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
14475 (match_operand:SI 2 "const_0_to_255_operand")]
14476 UNSPEC_VCVTPS2PH)
14477 (match_dup 3)))]
14478 "TARGET_F16C"
14479 "operands[3] = CONST0_RTX (V4HImode);")
14480
14481 (define_insn "*vcvtps2ph"
14482 [(set (match_operand:V8HI 0 "register_operand" "=x")
14483 (vec_concat:V8HI
14484 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
14485 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14486 UNSPEC_VCVTPS2PH)
14487 (match_operand:V4HI 3 "const0_operand")))]
14488 "TARGET_F16C"
14489 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14490 [(set_attr "type" "ssecvt")
14491 (set_attr "prefix" "vex")
14492 (set_attr "mode" "V4SF")])
14493
14494 (define_insn "*vcvtps2ph_store"
14495 [(set (match_operand:V4HI 0 "memory_operand" "=m")
14496 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
14497 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14498 UNSPEC_VCVTPS2PH))]
14499 "TARGET_F16C"
14500 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14501 [(set_attr "type" "ssecvt")
14502 (set_attr "prefix" "vex")
14503 (set_attr "mode" "V4SF")])
14504
14505 (define_insn "vcvtps2ph256"
14506 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
14507 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
14508 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14509 UNSPEC_VCVTPS2PH))]
14510 "TARGET_F16C"
14511 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14512 [(set_attr "type" "ssecvt")
14513 (set_attr "prefix" "vex")
14514 (set_attr "btver2_decode" "vector")
14515 (set_attr "mode" "V8SF")])
14516
14517 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
14518 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
14519 (unspec:V16HI
14520 [(match_operand:V16SF 1 "register_operand" "v")
14521 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14522 UNSPEC_VCVTPS2PH))]
14523 "TARGET_AVX512F"
14524 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14525 [(set_attr "type" "ssecvt")
14526 (set_attr "prefix" "evex")
14527 (set_attr "mode" "V16SF")])
14528
14529 ;; For gather* insn patterns
14530 (define_mode_iterator VEC_GATHER_MODE
14531 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
14532 (define_mode_attr VEC_GATHER_IDXSI
14533 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
14534 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
14535 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
14536 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
14537
14538 (define_mode_attr VEC_GATHER_IDXDI
14539 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
14540 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
14541 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
14542 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
14543
14544 (define_mode_attr VEC_GATHER_SRCDI
14545 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
14546 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
14547 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
14548 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
14549
14550 (define_expand "avx2_gathersi<mode>"
14551 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
14552 (unspec:VEC_GATHER_MODE
14553 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
14554 (mem:<ssescalarmode>
14555 (match_par_dup 7
14556 [(match_operand 2 "vsib_address_operand")
14557 (match_operand:<VEC_GATHER_IDXSI>
14558 3 "register_operand")
14559 (match_operand:SI 5 "const1248_operand ")]))
14560 (mem:BLK (scratch))
14561 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
14562 UNSPEC_GATHER))
14563 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
14564 "TARGET_AVX2"
14565 {
14566 operands[7]
14567 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14568 operands[5]), UNSPEC_VSIBADDR);
14569 })
14570
14571 (define_insn "*avx2_gathersi<mode>"
14572 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14573 (unspec:VEC_GATHER_MODE
14574 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
14575 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
14576 [(unspec:P
14577 [(match_operand:P 3 "vsib_address_operand" "Tv")
14578 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
14579 (match_operand:SI 6 "const1248_operand" "n")]
14580 UNSPEC_VSIBADDR)])
14581 (mem:BLK (scratch))
14582 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
14583 UNSPEC_GATHER))
14584 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14585 "TARGET_AVX2"
14586 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
14587 [(set_attr "type" "ssemov")
14588 (set_attr "prefix" "vex")
14589 (set_attr "mode" "<sseinsnmode>")])
14590
14591 (define_insn "*avx2_gathersi<mode>_2"
14592 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14593 (unspec:VEC_GATHER_MODE
14594 [(pc)
14595 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14596 [(unspec:P
14597 [(match_operand:P 2 "vsib_address_operand" "Tv")
14598 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
14599 (match_operand:SI 5 "const1248_operand" "n")]
14600 UNSPEC_VSIBADDR)])
14601 (mem:BLK (scratch))
14602 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
14603 UNSPEC_GATHER))
14604 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14605 "TARGET_AVX2"
14606 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
14607 [(set_attr "type" "ssemov")
14608 (set_attr "prefix" "vex")
14609 (set_attr "mode" "<sseinsnmode>")])
14610
14611 (define_expand "avx2_gatherdi<mode>"
14612 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
14613 (unspec:VEC_GATHER_MODE
14614 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
14615 (mem:<ssescalarmode>
14616 (match_par_dup 7
14617 [(match_operand 2 "vsib_address_operand")
14618 (match_operand:<VEC_GATHER_IDXDI>
14619 3 "register_operand")
14620 (match_operand:SI 5 "const1248_operand ")]))
14621 (mem:BLK (scratch))
14622 (match_operand:<VEC_GATHER_SRCDI>
14623 4 "register_operand")]
14624 UNSPEC_GATHER))
14625 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
14626 "TARGET_AVX2"
14627 {
14628 operands[7]
14629 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14630 operands[5]), UNSPEC_VSIBADDR);
14631 })
14632
14633 (define_insn "*avx2_gatherdi<mode>"
14634 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14635 (unspec:VEC_GATHER_MODE
14636 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
14637 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
14638 [(unspec:P
14639 [(match_operand:P 3 "vsib_address_operand" "Tv")
14640 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
14641 (match_operand:SI 6 "const1248_operand" "n")]
14642 UNSPEC_VSIBADDR)])
14643 (mem:BLK (scratch))
14644 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
14645 UNSPEC_GATHER))
14646 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14647 "TARGET_AVX2"
14648 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
14649 [(set_attr "type" "ssemov")
14650 (set_attr "prefix" "vex")
14651 (set_attr "mode" "<sseinsnmode>")])
14652
14653 (define_insn "*avx2_gatherdi<mode>_2"
14654 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14655 (unspec:VEC_GATHER_MODE
14656 [(pc)
14657 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14658 [(unspec:P
14659 [(match_operand:P 2 "vsib_address_operand" "Tv")
14660 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
14661 (match_operand:SI 5 "const1248_operand" "n")]
14662 UNSPEC_VSIBADDR)])
14663 (mem:BLK (scratch))
14664 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
14665 UNSPEC_GATHER))
14666 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14667 "TARGET_AVX2"
14668 {
14669 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
14670 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
14671 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
14672 }
14673 [(set_attr "type" "ssemov")
14674 (set_attr "prefix" "vex")
14675 (set_attr "mode" "<sseinsnmode>")])
14676
14677 (define_insn "*avx2_gatherdi<mode>_3"
14678 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
14679 (vec_select:<VEC_GATHER_SRCDI>
14680 (unspec:VI4F_256
14681 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
14682 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
14683 [(unspec:P
14684 [(match_operand:P 3 "vsib_address_operand" "Tv")
14685 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
14686 (match_operand:SI 6 "const1248_operand" "n")]
14687 UNSPEC_VSIBADDR)])
14688 (mem:BLK (scratch))
14689 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
14690 UNSPEC_GATHER)
14691 (parallel [(const_int 0) (const_int 1)
14692 (const_int 2) (const_int 3)])))
14693 (clobber (match_scratch:VI4F_256 1 "=&x"))]
14694 "TARGET_AVX2"
14695 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
14696 [(set_attr "type" "ssemov")
14697 (set_attr "prefix" "vex")
14698 (set_attr "mode" "<sseinsnmode>")])
14699
14700 (define_insn "*avx2_gatherdi<mode>_4"
14701 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
14702 (vec_select:<VEC_GATHER_SRCDI>
14703 (unspec:VI4F_256
14704 [(pc)
14705 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14706 [(unspec:P
14707 [(match_operand:P 2 "vsib_address_operand" "Tv")
14708 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
14709 (match_operand:SI 5 "const1248_operand" "n")]
14710 UNSPEC_VSIBADDR)])
14711 (mem:BLK (scratch))
14712 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
14713 UNSPEC_GATHER)
14714 (parallel [(const_int 0) (const_int 1)
14715 (const_int 2) (const_int 3)])))
14716 (clobber (match_scratch:VI4F_256 1 "=&x"))]
14717 "TARGET_AVX2"
14718 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
14719 [(set_attr "type" "ssemov")
14720 (set_attr "prefix" "vex")
14721 (set_attr "mode" "<sseinsnmode>")])
14722
14723 (define_expand "avx512f_gathersi<mode>"
14724 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
14725 (unspec:VI48F_512
14726 [(match_operand:VI48F_512 1 "register_operand")
14727 (match_operand:<avx512fmaskmode> 4 "register_operand")
14728 (mem:<ssescalarmode>
14729 (match_par_dup 6
14730 [(match_operand 2 "vsib_address_operand")
14731 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
14732 (match_operand:SI 5 "const1248_operand")]))]
14733 UNSPEC_GATHER))
14734 (clobber (match_scratch:<avx512fmaskmode> 7))])]
14735 "TARGET_AVX512F"
14736 {
14737 operands[6]
14738 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14739 operands[5]), UNSPEC_VSIBADDR);
14740 })
14741
14742 (define_insn "*avx512f_gathersi<mode>"
14743 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14744 (unspec:VI48F_512
14745 [(match_operand:VI48F_512 1 "register_operand" "0")
14746 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
14747 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14748 [(unspec:P
14749 [(match_operand:P 4 "vsib_address_operand" "Tv")
14750 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
14751 (match_operand:SI 5 "const1248_operand" "n")]
14752 UNSPEC_VSIBADDR)])]
14753 UNSPEC_GATHER))
14754 (clobber (match_scratch:<avx512fmaskmode> 2 "=&k"))]
14755 "TARGET_AVX512F"
14756 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
14757 [(set_attr "type" "ssemov")
14758 (set_attr "prefix" "evex")
14759 (set_attr "mode" "<sseinsnmode>")])
14760
14761 (define_insn "*avx512f_gathersi<mode>_2"
14762 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14763 (unspec:VI48F_512
14764 [(pc)
14765 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
14766 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
14767 [(unspec:P
14768 [(match_operand:P 3 "vsib_address_operand" "Tv")
14769 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
14770 (match_operand:SI 4 "const1248_operand" "n")]
14771 UNSPEC_VSIBADDR)])]
14772 UNSPEC_GATHER))
14773 (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
14774 "TARGET_AVX512F"
14775 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
14776 [(set_attr "type" "ssemov")
14777 (set_attr "prefix" "evex")
14778 (set_attr "mode" "<sseinsnmode>")])
14779
14780
14781 (define_expand "avx512f_gatherdi<mode>"
14782 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
14783 (unspec:VI48F_512
14784 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
14785 (match_operand:QI 4 "register_operand")
14786 (mem:<ssescalarmode>
14787 (match_par_dup 6
14788 [(match_operand 2 "vsib_address_operand")
14789 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
14790 (match_operand:SI 5 "const1248_operand")]))]
14791 UNSPEC_GATHER))
14792 (clobber (match_scratch:QI 7))])]
14793 "TARGET_AVX512F"
14794 {
14795 operands[6]
14796 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14797 operands[5]), UNSPEC_VSIBADDR);
14798 })
14799
14800 (define_insn "*avx512f_gatherdi<mode>"
14801 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14802 (unspec:VI48F_512
14803 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
14804 (match_operand:QI 7 "register_operand" "2")
14805 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14806 [(unspec:P
14807 [(match_operand:P 4 "vsib_address_operand" "Tv")
14808 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
14809 (match_operand:SI 5 "const1248_operand" "n")]
14810 UNSPEC_VSIBADDR)])]
14811 UNSPEC_GATHER))
14812 (clobber (match_scratch:QI 2 "=&k"))]
14813 "TARGET_AVX512F"
14814 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
14815 [(set_attr "type" "ssemov")
14816 (set_attr "prefix" "evex")
14817 (set_attr "mode" "<sseinsnmode>")])
14818
14819 (define_insn "*avx512f_gatherdi<mode>_2"
14820 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14821 (unspec:VI48F_512
14822 [(pc)
14823 (match_operand:QI 6 "register_operand" "1")
14824 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
14825 [(unspec:P
14826 [(match_operand:P 3 "vsib_address_operand" "Tv")
14827 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
14828 (match_operand:SI 4 "const1248_operand" "n")]
14829 UNSPEC_VSIBADDR)])]
14830 UNSPEC_GATHER))
14831 (clobber (match_scratch:QI 1 "=&k"))]
14832 "TARGET_AVX512F"
14833 {
14834 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
14835 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
14836 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
14837 }
14838 [(set_attr "type" "ssemov")
14839 (set_attr "prefix" "evex")
14840 (set_attr "mode" "<sseinsnmode>")])
14841
14842 (define_expand "avx512f_scattersi<mode>"
14843 [(parallel [(set (mem:VI48F_512
14844 (match_par_dup 5
14845 [(match_operand 0 "vsib_address_operand")
14846 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
14847 (match_operand:SI 4 "const1248_operand")]))
14848 (unspec:VI48F_512
14849 [(match_operand:<avx512fmaskmode> 1 "register_operand")
14850 (match_operand:VI48F_512 3 "register_operand")]
14851 UNSPEC_SCATTER))
14852 (clobber (match_scratch:<avx512fmaskmode> 6))])]
14853 "TARGET_AVX512F"
14854 {
14855 operands[5]
14856 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
14857 operands[4]), UNSPEC_VSIBADDR);
14858 })
14859
14860 (define_insn "*avx512f_scattersi<mode>"
14861 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
14862 [(unspec:P
14863 [(match_operand:P 0 "vsib_address_operand" "Tv")
14864 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
14865 (match_operand:SI 4 "const1248_operand" "n")]
14866 UNSPEC_VSIBADDR)])
14867 (unspec:VI48F_512
14868 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
14869 (match_operand:VI48F_512 3 "register_operand" "v")]
14870 UNSPEC_SCATTER))
14871 (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
14872 "TARGET_AVX512F"
14873 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
14874 [(set_attr "type" "ssemov")
14875 (set_attr "prefix" "evex")
14876 (set_attr "mode" "<sseinsnmode>")])
14877
14878 (define_expand "avx512f_scatterdi<mode>"
14879 [(parallel [(set (mem:VI48F_512
14880 (match_par_dup 5
14881 [(match_operand 0 "vsib_address_operand")
14882 (match_operand:V8DI 2 "register_operand")
14883 (match_operand:SI 4 "const1248_operand")]))
14884 (unspec:VI48F_512
14885 [(match_operand:QI 1 "register_operand")
14886 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
14887 UNSPEC_SCATTER))
14888 (clobber (match_scratch:QI 6))])]
14889 "TARGET_AVX512F"
14890 {
14891 operands[5]
14892 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
14893 operands[4]), UNSPEC_VSIBADDR);
14894 })
14895
14896 (define_insn "*avx512f_scatterdi<mode>"
14897 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
14898 [(unspec:P
14899 [(match_operand:P 0 "vsib_address_operand" "Tv")
14900 (match_operand:V8DI 2 "register_operand" "v")
14901 (match_operand:SI 4 "const1248_operand" "n")]
14902 UNSPEC_VSIBADDR)])
14903 (unspec:VI48F_512
14904 [(match_operand:QI 6 "register_operand" "1")
14905 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
14906 UNSPEC_SCATTER))
14907 (clobber (match_scratch:QI 1 "=&k"))]
14908 "TARGET_AVX512F"
14909 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
14910 [(set_attr "type" "ssemov")
14911 (set_attr "prefix" "evex")
14912 (set_attr "mode" "<sseinsnmode>")])
14913
14914 (define_insn "avx512f_compress<mode>_mask"
14915 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14916 (unspec:VI48F_512
14917 [(match_operand:VI48F_512 1 "register_operand" "v")
14918 (match_operand:VI48F_512 2 "vector_move_operand" "0C")
14919 (match_operand:<avx512fmaskmode> 3 "register_operand" "k")]
14920 UNSPEC_COMPRESS))]
14921 "TARGET_AVX512F"
14922 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14923 [(set_attr "type" "ssemov")
14924 (set_attr "prefix" "evex")
14925 (set_attr "mode" "<sseinsnmode>")])
14926
14927 (define_insn "avx512f_compressstore<mode>_mask"
14928 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
14929 (unspec:VI48F_512
14930 [(match_operand:VI48F_512 1 "register_operand" "x")
14931 (match_dup 0)
14932 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")]
14933 UNSPEC_COMPRESS_STORE))]
14934 "TARGET_AVX512F"
14935 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
14936 [(set_attr "type" "ssemov")
14937 (set_attr "prefix" "evex")
14938 (set_attr "memory" "store")
14939 (set_attr "mode" "<sseinsnmode>")])
14940
14941 (define_insn "avx512f_expand<mode>_mask"
14942 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
14943 (unspec:VI48F_512
14944 [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
14945 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
14946 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")]
14947 UNSPEC_EXPAND))]
14948 "TARGET_AVX512F"
14949 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14950 [(set_attr "type" "ssemov")
14951 (set_attr "prefix" "evex")
14952 (set_attr "memory" "none,load")
14953 (set_attr "mode" "<sseinsnmode>")])
14954
14955 (define_insn "avx512f_getmant<mode><mask_name>"
14956 [(set (match_operand:VF_512 0 "register_operand" "=v")
14957 (unspec:VF_512
14958 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
14959 (match_operand:SI 2 "const_0_to_15_operand")]
14960 UNSPEC_GETMANT))]
14961 "TARGET_AVX512F"
14962 "vgetmant<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
14963 [(set_attr "prefix" "evex")
14964 (set_attr "mode" "<MODE>")])
14965
14966 (define_insn "avx512f_getmant<mode>"
14967 [(set (match_operand:VF_128 0 "register_operand" "=v")
14968 (vec_merge:VF_128
14969 (unspec:VF_128
14970 [(match_operand:VF_128 1 "register_operand" "v")
14971 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
14972 (match_operand:SI 3 "const_0_to_15_operand")]
14973 UNSPEC_GETMANT)
14974 (match_dup 1)
14975 (const_int 1)))]
14976 "TARGET_AVX512F"
14977 "vgetmant<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14978 [(set_attr "prefix" "evex")
14979 (set_attr "mode" "<ssescalarmode>")])
14980
14981 (define_insn "clz<mode>2<mask_name>"
14982 [(set (match_operand:VI48_512 0 "register_operand" "=v")
14983 (clz:VI48_512
14984 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
14985 "TARGET_AVX512CD"
14986 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14987 [(set_attr "type" "sse")
14988 (set_attr "prefix" "evex")
14989 (set_attr "mode" "<sseinsnmode>")])
14990
14991 (define_insn "<mask_codefor>conflict<mode><mask_name>"
14992 [(set (match_operand:VI48_512 0 "register_operand" "=v")
14993 (unspec:VI48_512
14994 [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
14995 UNSPEC_CONFLICT))]
14996 "TARGET_AVX512CD"
14997 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14998 [(set_attr "type" "sse")
14999 (set_attr "prefix" "evex")
15000 (set_attr "mode" "<sseinsnmode>")])