4afc4b3e249d62e2d86b16495211e71d1fec0d2f
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 ;; Free Software Foundation, Inc.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_c_enum "unspec" [
22 ;; SSE
23 UNSPEC_MOVNT
24 UNSPEC_MOVU
25
26 ;; SSE3
27 UNSPEC_LDDQU
28
29 ;; SSSE3
30 UNSPEC_PSHUFB
31 UNSPEC_PSIGN
32 UNSPEC_PALIGNR
33
34 ;; For SSE4A support
35 UNSPEC_EXTRQI
36 UNSPEC_EXTRQ
37 UNSPEC_INSERTQI
38 UNSPEC_INSERTQ
39
40 ;; For SSE4.1 support
41 UNSPEC_BLENDV
42 UNSPEC_INSERTPS
43 UNSPEC_DP
44 UNSPEC_MOVNTDQA
45 UNSPEC_MPSADBW
46 UNSPEC_PHMINPOSUW
47 UNSPEC_PTEST
48
49 ;; For SSE4.2 support
50 UNSPEC_PCMPESTR
51 UNSPEC_PCMPISTR
52
53 ;; For FMA4 support
54 UNSPEC_FMADDSUB
55 UNSPEC_XOP_UNSIGNED_CMP
56 UNSPEC_XOP_TRUEFALSE
57 UNSPEC_XOP_PERMUTE
58 UNSPEC_FRCZ
59
60 ;; For AES support
61 UNSPEC_AESENC
62 UNSPEC_AESENCLAST
63 UNSPEC_AESDEC
64 UNSPEC_AESDECLAST
65 UNSPEC_AESIMC
66 UNSPEC_AESKEYGENASSIST
67
68 ;; For PCLMUL support
69 UNSPEC_PCLMUL
70
71 ;; For AVX support
72 UNSPEC_PCMP
73 UNSPEC_VPERMIL
74 UNSPEC_VPERMIL2
75 UNSPEC_VPERMIL2F128
76 UNSPEC_CAST
77 UNSPEC_VTESTP
78 UNSPEC_VCVTPH2PS
79 UNSPEC_VCVTPS2PH
80
81 ;; For AVX2 support
82 UNSPEC_VPERMSI
83 UNSPEC_VPERMDF
84 UNSPEC_VPERMSF
85 UNSPEC_VPERMTI
86 UNSPEC_GATHER
87 UNSPEC_VSIBADDR
88 ])
89
90 (define_c_enum "unspecv" [
91 UNSPECV_LDMXCSR
92 UNSPECV_STMXCSR
93 UNSPECV_CLFLUSH
94 UNSPECV_MONITOR
95 UNSPECV_MWAIT
96 UNSPECV_VZEROALL
97 UNSPECV_VZEROUPPER
98 ])
99
100 ;; All vector modes including V?TImode, used in move patterns.
101 (define_mode_iterator V16
102 [(V32QI "TARGET_AVX") V16QI
103 (V16HI "TARGET_AVX") V8HI
104 (V8SI "TARGET_AVX") V4SI
105 (V4DI "TARGET_AVX") V2DI
106 (V2TI "TARGET_AVX") V1TI
107 (V8SF "TARGET_AVX") V4SF
108 (V4DF "TARGET_AVX") V2DF])
109
110 ;; All vector modes
111 (define_mode_iterator V
112 [(V32QI "TARGET_AVX") V16QI
113 (V16HI "TARGET_AVX") V8HI
114 (V8SI "TARGET_AVX") V4SI
115 (V4DI "TARGET_AVX") V2DI
116 (V8SF "TARGET_AVX") V4SF
117 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
118
119 ;; All 128bit vector modes
120 (define_mode_iterator V_128
121 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
122
123 ;; All 256bit vector modes
124 (define_mode_iterator V_256
125 [V32QI V16HI V8SI V4DI V8SF V4DF])
126
127 ;; All vector float modes
128 (define_mode_iterator VF
129 [(V8SF "TARGET_AVX") V4SF
130 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
131
132 ;; All SFmode vector float modes
133 (define_mode_iterator VF1
134 [(V8SF "TARGET_AVX") V4SF])
135
136 ;; All DFmode vector float modes
137 (define_mode_iterator VF2
138 [(V4DF "TARGET_AVX") V2DF])
139
140 ;; All 128bit vector float modes
141 (define_mode_iterator VF_128
142 [V4SF (V2DF "TARGET_SSE2")])
143
144 ;; All 256bit vector float modes
145 (define_mode_iterator VF_256
146 [V8SF V4DF])
147
148 ;; All vector integer modes
149 (define_mode_iterator VI
150 [(V32QI "TARGET_AVX") V16QI
151 (V16HI "TARGET_AVX") V8HI
152 (V8SI "TARGET_AVX") V4SI
153 (V4DI "TARGET_AVX") V2DI])
154
155 (define_mode_iterator VI_AVX2
156 [(V32QI "TARGET_AVX2") V16QI
157 (V16HI "TARGET_AVX2") V8HI
158 (V8SI "TARGET_AVX2") V4SI
159 (V4DI "TARGET_AVX2") V2DI])
160
161 ;; All QImode vector integer modes
162 (define_mode_iterator VI1
163 [(V32QI "TARGET_AVX") V16QI])
164
165 ;; All DImode vector integer modes
166 (define_mode_iterator VI8
167 [(V4DI "TARGET_AVX") V2DI])
168
169 (define_mode_iterator VI1_AVX2
170 [(V32QI "TARGET_AVX2") V16QI])
171
172 (define_mode_iterator VI2_AVX2
173 [(V16HI "TARGET_AVX2") V8HI])
174
175 (define_mode_iterator VI4_AVX2
176 [(V8SI "TARGET_AVX2") V4SI])
177
178 (define_mode_iterator VI8_AVX2
179 [(V4DI "TARGET_AVX2") V2DI])
180
181 ;; ??? We should probably use TImode instead.
182 (define_mode_iterator VIMAX_AVX2
183 [(V2TI "TARGET_AVX2") V1TI])
184
185 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
186 (define_mode_iterator SSESCALARMODE
187 [(V2TI "TARGET_AVX2") TI])
188
189 (define_mode_iterator VI12_AVX2
190 [(V32QI "TARGET_AVX2") V16QI
191 (V16HI "TARGET_AVX2") V8HI])
192
193 (define_mode_iterator VI24_AVX2
194 [(V16HI "TARGET_AVX2") V8HI
195 (V8SI "TARGET_AVX2") V4SI])
196
197 (define_mode_iterator VI124_AVX2
198 [(V32QI "TARGET_AVX2") V16QI
199 (V16HI "TARGET_AVX2") V8HI
200 (V8SI "TARGET_AVX2") V4SI])
201
202 (define_mode_iterator VI248_AVX2
203 [(V16HI "TARGET_AVX2") V8HI
204 (V8SI "TARGET_AVX2") V4SI
205 (V4DI "TARGET_AVX2") V2DI])
206
207 (define_mode_iterator VI48_AVX2
208 [(V8SI "TARGET_AVX2") V4SI
209 (V4DI "TARGET_AVX2") V2DI])
210
211 (define_mode_iterator V48_AVX2
212 [V4SF V2DF
213 V8SF V4DF
214 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
215 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
216
217 (define_mode_attr sse2_avx2
218 [(V16QI "sse2") (V32QI "avx2")
219 (V8HI "sse2") (V16HI "avx2")
220 (V4SI "sse2") (V8SI "avx2")
221 (V2DI "sse2") (V4DI "avx2")
222 (V1TI "sse2") (V2TI "avx2")])
223
224 (define_mode_attr ssse3_avx2
225 [(V16QI "ssse3") (V32QI "avx2")
226 (V8HI "ssse3") (V16HI "avx2")
227 (V4SI "ssse3") (V8SI "avx2")
228 (V2DI "ssse3") (V4DI "avx2")
229 (TI "ssse3") (V2TI "avx2")])
230
231 (define_mode_attr sse4_1_avx2
232 [(V16QI "sse4_1") (V32QI "avx2")
233 (V8HI "sse4_1") (V16HI "avx2")
234 (V4SI "sse4_1") (V8SI "avx2")
235 (V2DI "sse4_1") (V4DI "avx2")])
236
237 (define_mode_attr avx_avx2
238 [(V4SF "avx") (V2DF "avx")
239 (V8SF "avx") (V4DF "avx")
240 (V4SI "avx2") (V2DI "avx2")
241 (V8SI "avx2") (V4DI "avx2")])
242
243 (define_mode_attr vec_avx2
244 [(V16QI "vec") (V32QI "avx2")
245 (V8HI "vec") (V16HI "avx2")
246 (V4SI "vec") (V8SI "avx2")
247 (V2DI "vec") (V4DI "avx2")])
248
249 (define_mode_attr ssedoublemode
250 [(V16HI "V16SI") (V8HI "V8SI")])
251
252 (define_mode_attr ssebytemode
253 [(V4DI "V32QI") (V2DI "V16QI")])
254
255 ;; All 128bit vector integer modes
256 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
257
258 ;; All 256bit vector integer modes
259 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
260
261 ;; Random 128bit vector integer mode combinations
262 (define_mode_iterator VI12_128 [V16QI V8HI])
263 (define_mode_iterator VI14_128 [V16QI V4SI])
264 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
265 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
266 (define_mode_iterator VI24_128 [V8HI V4SI])
267 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
268 (define_mode_iterator VI48_128 [V4SI V2DI])
269
270 ;; Random 256bit vector integer mode combinations
271 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
272 (define_mode_iterator VI48_256 [V8SI V4DI])
273
274 ;; Int-float size matches
275 (define_mode_iterator VI4F_128 [V4SI V4SF])
276 (define_mode_iterator VI8F_128 [V2DI V2DF])
277 (define_mode_iterator VI4F_256 [V8SI V8SF])
278 (define_mode_iterator VI8F_256 [V4DI V4DF])
279
280 ;; Mapping from float mode to required SSE level
281 (define_mode_attr sse
282 [(SF "sse") (DF "sse2")
283 (V4SF "sse") (V2DF "sse2")
284 (V8SF "avx") (V4DF "avx")])
285
286 (define_mode_attr sse2
287 [(V16QI "sse2") (V32QI "avx")
288 (V2DI "sse2") (V4DI "avx")])
289
290 (define_mode_attr sse3
291 [(V16QI "sse3") (V32QI "avx")])
292
293 (define_mode_attr sse4_1
294 [(V4SF "sse4_1") (V2DF "sse4_1")
295 (V8SF "avx") (V4DF "avx")])
296
297 (define_mode_attr avxsizesuffix
298 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
299 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
300 (V8SF "256") (V4DF "256")
301 (V4SF "") (V2DF "")])
302
303 ;; SSE instruction mode
304 (define_mode_attr sseinsnmode
305 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
306 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
307 (V8SF "V8SF") (V4DF "V4DF")
308 (V4SF "V4SF") (V2DF "V2DF")
309 (TI "TI")])
310
311 ;; Mapping of vector float modes to an integer mode of the same size
312 (define_mode_attr sseintvecmode
313 [(V8SF "V8SI") (V4DF "V4DI")
314 (V4SF "V4SI") (V2DF "V2DI")
315 (V8SI "V8SI") (V4DI "V4DI")
316 (V4SI "V4SI") (V2DI "V2DI")
317 (V16HI "V16HI") (V8HI "V8HI")
318 (V32QI "V32QI") (V16QI "V16QI")])
319
320 (define_mode_attr sseintvecmodelower
321 [(V8SF "v8si") (V4DF "v4di")
322 (V4SF "v4si") (V2DF "v2di")
323 (V8SI "v8si") (V4DI "v4di")
324 (V4SI "v4si") (V2DI "v2di")
325 (V16HI "v16hi") (V8HI "v8hi")
326 (V32QI "v32qi") (V16QI "v16qi")])
327
328 ;; Mapping of vector modes to a vector mode of double size
329 (define_mode_attr ssedoublevecmode
330 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
331 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
332 (V8SF "V16SF") (V4DF "V8DF")
333 (V4SF "V8SF") (V2DF "V4DF")])
334
335 ;; Mapping of vector modes to a vector mode of half size
336 (define_mode_attr ssehalfvecmode
337 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
338 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
339 (V8SF "V4SF") (V4DF "V2DF")
340 (V4SF "V2SF")])
341
342 ;; Mapping of vector modes back to the scalar modes
343 (define_mode_attr ssescalarmode
344 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
345 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
346 (V8SF "SF") (V4DF "DF")
347 (V4SF "SF") (V2DF "DF")])
348
349 ;; Number of scalar elements in each vector type
350 (define_mode_attr ssescalarnum
351 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
352 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
353 (V8SF "8") (V4DF "4")
354 (V4SF "4") (V2DF "2")])
355
356 ;; SSE prefix for integer vector modes
357 (define_mode_attr sseintprefix
358 [(V2DI "p") (V2DF "")
359 (V4DI "p") (V4DF "")
360 (V4SI "p") (V4SF "")
361 (V8SI "p") (V8SF "")])
362
363 ;; SSE scalar suffix for vector modes
364 (define_mode_attr ssescalarmodesuffix
365 [(SF "ss") (DF "sd")
366 (V8SF "ss") (V4DF "sd")
367 (V4SF "ss") (V2DF "sd")
368 (V8SI "ss") (V4DI "sd")
369 (V4SI "d")])
370
371 ;; Pack/unpack vector modes
372 (define_mode_attr sseunpackmode
373 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
374 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
375
376 (define_mode_attr ssepackmode
377 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
378 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
379
380 ;; Mapping of the max integer size for xop rotate immediate constraint
381 (define_mode_attr sserotatemax
382 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
383
384 ;; Mapping of mode to cast intrinsic name
385 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
386
387 ;; Instruction suffix for sign and zero extensions.
388 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
389
390 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
391 (define_mode_attr i128
392 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
393 (V8SI "%~128") (V4DI "%~128")])
394
395 ;; Mix-n-match
396 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
397
398 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
399
400 ;; Mapping of immediate bits for blend instructions
401 (define_mode_attr blendbits
402 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
403
404 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
405
406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
407 ;;
408 ;; Move patterns
409 ;;
410 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
411
412 ;; All of these patterns are enabled for SSE1 as well as SSE2.
413 ;; This is essential for maintaining stable calling conventions.
414
415 (define_expand "mov<mode>"
416 [(set (match_operand:V16 0 "nonimmediate_operand" "")
417 (match_operand:V16 1 "nonimmediate_operand" ""))]
418 "TARGET_SSE"
419 {
420 ix86_expand_vector_move (<MODE>mode, operands);
421 DONE;
422 })
423
424 (define_insn "*mov<mode>_internal"
425 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
426 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
427 "TARGET_SSE
428 && (register_operand (operands[0], <MODE>mode)
429 || register_operand (operands[1], <MODE>mode))"
430 {
431 switch (which_alternative)
432 {
433 case 0:
434 return standard_sse_constant_opcode (insn, operands[1]);
435 case 1:
436 case 2:
437 switch (get_attr_mode (insn))
438 {
439 case MODE_V8SF:
440 case MODE_V4SF:
441 if (TARGET_AVX
442 && (misaligned_operand (operands[0], <MODE>mode)
443 || misaligned_operand (operands[1], <MODE>mode)))
444 return "vmovups\t{%1, %0|%0, %1}";
445 else
446 return "%vmovaps\t{%1, %0|%0, %1}";
447
448 case MODE_V4DF:
449 case MODE_V2DF:
450 if (TARGET_AVX
451 && (misaligned_operand (operands[0], <MODE>mode)
452 || misaligned_operand (operands[1], <MODE>mode)))
453 return "vmovupd\t{%1, %0|%0, %1}";
454 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
455 return "%vmovaps\t{%1, %0|%0, %1}";
456 else
457 return "%vmovapd\t{%1, %0|%0, %1}";
458
459 case MODE_OI:
460 case MODE_TI:
461 if (TARGET_AVX
462 && (misaligned_operand (operands[0], <MODE>mode)
463 || misaligned_operand (operands[1], <MODE>mode)))
464 return "vmovdqu\t{%1, %0|%0, %1}";
465 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
466 return "%vmovaps\t{%1, %0|%0, %1}";
467 else
468 return "%vmovdqa\t{%1, %0|%0, %1}";
469
470 default:
471 gcc_unreachable ();
472 }
473 default:
474 gcc_unreachable ();
475 }
476 }
477 [(set_attr "type" "sselog1,ssemov,ssemov")
478 (set_attr "prefix" "maybe_vex")
479 (set (attr "mode")
480 (cond [(match_test "TARGET_AVX")
481 (const_string "<sseinsnmode>")
482 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
483 (not (match_test "TARGET_SSE2")))
484 (and (eq_attr "alternative" "2")
485 (match_test "TARGET_SSE_TYPELESS_STORES")))
486 (const_string "V4SF")
487 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
488 (const_string "V4SF")
489 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
490 (const_string "V2DF")
491 ]
492 (const_string "TI")))])
493
494 (define_insn "sse2_movq128"
495 [(set (match_operand:V2DI 0 "register_operand" "=x")
496 (vec_concat:V2DI
497 (vec_select:DI
498 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
499 (parallel [(const_int 0)]))
500 (const_int 0)))]
501 "TARGET_SSE2"
502 "%vmovq\t{%1, %0|%0, %1}"
503 [(set_attr "type" "ssemov")
504 (set_attr "prefix" "maybe_vex")
505 (set_attr "mode" "TI")])
506
507 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
508 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
509 ;; from memory, we'd prefer to load the memory directly into the %xmm
510 ;; register. To facilitate this happy circumstance, this pattern won't
511 ;; split until after register allocation. If the 64-bit value didn't
512 ;; come from memory, this is the best we can do. This is much better
513 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
514 ;; from there.
515
516 (define_insn_and_split "movdi_to_sse"
517 [(parallel
518 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
519 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
520 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
521 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
522 "#"
523 "&& reload_completed"
524 [(const_int 0)]
525 {
526 if (register_operand (operands[1], DImode))
527 {
528 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
529 Assemble the 64-bit DImode value in an xmm register. */
530 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
531 gen_rtx_SUBREG (SImode, operands[1], 0)));
532 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
533 gen_rtx_SUBREG (SImode, operands[1], 4)));
534 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
535 operands[2]));
536 }
537 else if (memory_operand (operands[1], DImode))
538 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
539 operands[1], const0_rtx));
540 else
541 gcc_unreachable ();
542 })
543
544 (define_split
545 [(set (match_operand:V4SF 0 "register_operand" "")
546 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
547 "TARGET_SSE && reload_completed"
548 [(set (match_dup 0)
549 (vec_merge:V4SF
550 (vec_duplicate:V4SF (match_dup 1))
551 (match_dup 2)
552 (const_int 1)))]
553 {
554 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
555 operands[2] = CONST0_RTX (V4SFmode);
556 })
557
558 (define_split
559 [(set (match_operand:V2DF 0 "register_operand" "")
560 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
561 "TARGET_SSE2 && reload_completed"
562 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
563 {
564 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
565 operands[2] = CONST0_RTX (DFmode);
566 })
567
568 (define_expand "push<mode>1"
569 [(match_operand:V16 0 "register_operand" "")]
570 "TARGET_SSE"
571 {
572 ix86_expand_push (<MODE>mode, operands[0]);
573 DONE;
574 })
575
576 (define_expand "movmisalign<mode>"
577 [(set (match_operand:V16 0 "nonimmediate_operand" "")
578 (match_operand:V16 1 "nonimmediate_operand" ""))]
579 "TARGET_SSE"
580 {
581 ix86_expand_vector_move_misalign (<MODE>mode, operands);
582 DONE;
583 })
584
585 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
586 [(set (match_operand:VF 0 "nonimmediate_operand" "")
587 (unspec:VF
588 [(match_operand:VF 1 "nonimmediate_operand" "")]
589 UNSPEC_MOVU))]
590 "TARGET_SSE"
591 {
592 if (MEM_P (operands[0]) && MEM_P (operands[1]))
593 operands[1] = force_reg (<MODE>mode, operands[1]);
594 })
595
596 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
597 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
598 (unspec:VF
599 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
600 UNSPEC_MOVU))]
601 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
602 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
603 [(set_attr "type" "ssemov")
604 (set_attr "movu" "1")
605 (set_attr "prefix" "maybe_vex")
606 (set_attr "mode" "<MODE>")])
607
608 (define_expand "<sse2>_movdqu<avxsizesuffix>"
609 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
610 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
611 UNSPEC_MOVU))]
612 "TARGET_SSE2"
613 {
614 if (MEM_P (operands[0]) && MEM_P (operands[1]))
615 operands[1] = force_reg (<MODE>mode, operands[1]);
616 })
617
618 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
619 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
620 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
621 UNSPEC_MOVU))]
622 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
623 "%vmovdqu\t{%1, %0|%0, %1}"
624 [(set_attr "type" "ssemov")
625 (set_attr "movu" "1")
626 (set (attr "prefix_data16")
627 (if_then_else
628 (match_test "TARGET_AVX")
629 (const_string "*")
630 (const_string "1")))
631 (set_attr "prefix" "maybe_vex")
632 (set_attr "mode" "<sseinsnmode>")])
633
634 (define_insn "<sse3>_lddqu<avxsizesuffix>"
635 [(set (match_operand:VI1 0 "register_operand" "=x")
636 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
637 UNSPEC_LDDQU))]
638 "TARGET_SSE3"
639 "%vlddqu\t{%1, %0|%0, %1}"
640 [(set_attr "type" "ssemov")
641 (set_attr "movu" "1")
642 (set (attr "prefix_data16")
643 (if_then_else
644 (match_test "TARGET_AVX")
645 (const_string "*")
646 (const_string "0")))
647 (set (attr "prefix_rep")
648 (if_then_else
649 (match_test "TARGET_AVX")
650 (const_string "*")
651 (const_string "1")))
652 (set_attr "prefix" "maybe_vex")
653 (set_attr "mode" "<sseinsnmode>")])
654
655 (define_insn "sse2_movnti<mode>"
656 [(set (match_operand:SWI48 0 "memory_operand" "=m")
657 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
658 UNSPEC_MOVNT))]
659 "TARGET_SSE2"
660 "movnti\t{%1, %0|%0, %1}"
661 [(set_attr "type" "ssemov")
662 (set_attr "prefix_data16" "0")
663 (set_attr "mode" "<MODE>")])
664
665 (define_insn "<sse>_movnt<mode>"
666 [(set (match_operand:VF 0 "memory_operand" "=m")
667 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
668 UNSPEC_MOVNT))]
669 "TARGET_SSE"
670 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
671 [(set_attr "type" "ssemov")
672 (set_attr "prefix" "maybe_vex")
673 (set_attr "mode" "<MODE>")])
674
675 (define_insn "<sse2>_movnt<mode>"
676 [(set (match_operand:VI8 0 "memory_operand" "=m")
677 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
678 UNSPEC_MOVNT))]
679 "TARGET_SSE2"
680 "%vmovntdq\t{%1, %0|%0, %1}"
681 [(set_attr "type" "ssecvt")
682 (set (attr "prefix_data16")
683 (if_then_else
684 (match_test "TARGET_AVX")
685 (const_string "*")
686 (const_string "1")))
687 (set_attr "prefix" "maybe_vex")
688 (set_attr "mode" "<sseinsnmode>")])
689
690 ; Expand patterns for non-temporal stores. At the moment, only those
691 ; that directly map to insns are defined; it would be possible to
692 ; define patterns for other modes that would expand to several insns.
693
694 ;; Modes handled by storent patterns.
695 (define_mode_iterator STORENT_MODE
696 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
697 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
698 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
699 (V8SF "TARGET_AVX") V4SF
700 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
701
702 (define_expand "storent<mode>"
703 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
704 (unspec:STORENT_MODE
705 [(match_operand:STORENT_MODE 1 "register_operand" "")]
706 UNSPEC_MOVNT))]
707 "TARGET_SSE")
708
709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
710 ;;
711 ;; Parallel floating point arithmetic
712 ;;
713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
714
715 (define_expand "<code><mode>2"
716 [(set (match_operand:VF 0 "register_operand" "")
717 (absneg:VF
718 (match_operand:VF 1 "register_operand" "")))]
719 "TARGET_SSE"
720 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
721
722 (define_insn_and_split "*absneg<mode>2"
723 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
724 (match_operator:VF 3 "absneg_operator"
725 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
726 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
727 "TARGET_SSE"
728 "#"
729 "&& reload_completed"
730 [(const_int 0)]
731 {
732 enum rtx_code absneg_op;
733 rtx op1, op2;
734 rtx t;
735
736 if (TARGET_AVX)
737 {
738 if (MEM_P (operands[1]))
739 op1 = operands[2], op2 = operands[1];
740 else
741 op1 = operands[1], op2 = operands[2];
742 }
743 else
744 {
745 op1 = operands[0];
746 if (rtx_equal_p (operands[0], operands[1]))
747 op2 = operands[2];
748 else
749 op2 = operands[1];
750 }
751
752 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
753 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
754 t = gen_rtx_SET (VOIDmode, operands[0], t);
755 emit_insn (t);
756 DONE;
757 }
758 [(set_attr "isa" "noavx,noavx,avx,avx")])
759
760 (define_expand "<plusminus_insn><mode>3"
761 [(set (match_operand:VF 0 "register_operand" "")
762 (plusminus:VF
763 (match_operand:VF 1 "nonimmediate_operand" "")
764 (match_operand:VF 2 "nonimmediate_operand" "")))]
765 "TARGET_SSE"
766 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
767
768 (define_insn "*<plusminus_insn><mode>3"
769 [(set (match_operand:VF 0 "register_operand" "=x,x")
770 (plusminus:VF
771 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
772 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
773 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
774 "@
775 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
776 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
777 [(set_attr "isa" "noavx,avx")
778 (set_attr "type" "sseadd")
779 (set_attr "prefix" "orig,vex")
780 (set_attr "mode" "<MODE>")])
781
782 (define_insn "<sse>_vm<plusminus_insn><mode>3"
783 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
784 (vec_merge:VF_128
785 (plusminus:VF_128
786 (match_operand:VF_128 1 "register_operand" "0,x")
787 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
788 (match_dup 1)
789 (const_int 1)))]
790 "TARGET_SSE"
791 "@
792 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
793 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
794 [(set_attr "isa" "noavx,avx")
795 (set_attr "type" "sseadd")
796 (set_attr "prefix" "orig,vex")
797 (set_attr "mode" "<ssescalarmode>")])
798
799 (define_expand "mul<mode>3"
800 [(set (match_operand:VF 0 "register_operand" "")
801 (mult:VF
802 (match_operand:VF 1 "nonimmediate_operand" "")
803 (match_operand:VF 2 "nonimmediate_operand" "")))]
804 "TARGET_SSE"
805 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
806
807 (define_insn "*mul<mode>3"
808 [(set (match_operand:VF 0 "register_operand" "=x,x")
809 (mult:VF
810 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
811 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
812 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
813 "@
814 mul<ssemodesuffix>\t{%2, %0|%0, %2}
815 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
816 [(set_attr "isa" "noavx,avx")
817 (set_attr "type" "ssemul")
818 (set_attr "prefix" "orig,vex")
819 (set_attr "mode" "<MODE>")])
820
821 (define_insn "<sse>_vmmul<mode>3"
822 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
823 (vec_merge:VF_128
824 (mult:VF_128
825 (match_operand:VF_128 1 "register_operand" "0,x")
826 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
827 (match_dup 1)
828 (const_int 1)))]
829 "TARGET_SSE"
830 "@
831 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
832 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
833 [(set_attr "isa" "noavx,avx")
834 (set_attr "type" "ssemul")
835 (set_attr "prefix" "orig,vex")
836 (set_attr "mode" "<ssescalarmode>")])
837
838 (define_expand "div<mode>3"
839 [(set (match_operand:VF2 0 "register_operand" "")
840 (div:VF2 (match_operand:VF2 1 "register_operand" "")
841 (match_operand:VF2 2 "nonimmediate_operand" "")))]
842 "TARGET_SSE2"
843 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
844
845 (define_expand "div<mode>3"
846 [(set (match_operand:VF1 0 "register_operand" "")
847 (div:VF1 (match_operand:VF1 1 "register_operand" "")
848 (match_operand:VF1 2 "nonimmediate_operand" "")))]
849 "TARGET_SSE"
850 {
851 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
852
853 if (TARGET_SSE_MATH
854 && TARGET_RECIP_VEC_DIV
855 && !optimize_insn_for_size_p ()
856 && flag_finite_math_only && !flag_trapping_math
857 && flag_unsafe_math_optimizations)
858 {
859 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
860 DONE;
861 }
862 })
863
864 (define_insn "<sse>_div<mode>3"
865 [(set (match_operand:VF 0 "register_operand" "=x,x")
866 (div:VF
867 (match_operand:VF 1 "register_operand" "0,x")
868 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
869 "TARGET_SSE"
870 "@
871 div<ssemodesuffix>\t{%2, %0|%0, %2}
872 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
873 [(set_attr "isa" "noavx,avx")
874 (set_attr "type" "ssediv")
875 (set_attr "prefix" "orig,vex")
876 (set_attr "mode" "<MODE>")])
877
878 (define_insn "<sse>_vmdiv<mode>3"
879 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
880 (vec_merge:VF_128
881 (div:VF_128
882 (match_operand:VF_128 1 "register_operand" "0,x")
883 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
884 (match_dup 1)
885 (const_int 1)))]
886 "TARGET_SSE"
887 "@
888 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
889 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
890 [(set_attr "isa" "noavx,avx")
891 (set_attr "type" "ssediv")
892 (set_attr "prefix" "orig,vex")
893 (set_attr "mode" "<ssescalarmode>")])
894
895 (define_insn "<sse>_rcp<mode>2"
896 [(set (match_operand:VF1 0 "register_operand" "=x")
897 (unspec:VF1
898 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
899 "TARGET_SSE"
900 "%vrcpps\t{%1, %0|%0, %1}"
901 [(set_attr "type" "sse")
902 (set_attr "atom_sse_attr" "rcp")
903 (set_attr "prefix" "maybe_vex")
904 (set_attr "mode" "<MODE>")])
905
906 (define_insn "sse_vmrcpv4sf2"
907 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
908 (vec_merge:V4SF
909 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
910 UNSPEC_RCP)
911 (match_operand:V4SF 2 "register_operand" "0,x")
912 (const_int 1)))]
913 "TARGET_SSE"
914 "@
915 rcpss\t{%1, %0|%0, %1}
916 vrcpss\t{%1, %2, %0|%0, %2, %1}"
917 [(set_attr "isa" "noavx,avx")
918 (set_attr "type" "sse")
919 (set_attr "atom_sse_attr" "rcp")
920 (set_attr "prefix" "orig,vex")
921 (set_attr "mode" "SF")])
922
923 (define_expand "sqrt<mode>2"
924 [(set (match_operand:VF2 0 "register_operand" "")
925 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
926 "TARGET_SSE2")
927
928 (define_expand "sqrt<mode>2"
929 [(set (match_operand:VF1 0 "register_operand" "")
930 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
931 "TARGET_SSE"
932 {
933 if (TARGET_SSE_MATH
934 && TARGET_RECIP_VEC_SQRT
935 && !optimize_insn_for_size_p ()
936 && flag_finite_math_only && !flag_trapping_math
937 && flag_unsafe_math_optimizations)
938 {
939 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
940 DONE;
941 }
942 })
943
944 (define_insn "<sse>_sqrt<mode>2"
945 [(set (match_operand:VF 0 "register_operand" "=x")
946 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
947 "TARGET_SSE"
948 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
949 [(set_attr "type" "sse")
950 (set_attr "atom_sse_attr" "sqrt")
951 (set_attr "prefix" "maybe_vex")
952 (set_attr "mode" "<MODE>")])
953
954 (define_insn "<sse>_vmsqrt<mode>2"
955 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
956 (vec_merge:VF_128
957 (sqrt:VF_128
958 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
959 (match_operand:VF_128 2 "register_operand" "0,x")
960 (const_int 1)))]
961 "TARGET_SSE"
962 "@
963 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
964 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
965 [(set_attr "isa" "noavx,avx")
966 (set_attr "type" "sse")
967 (set_attr "atom_sse_attr" "sqrt")
968 (set_attr "prefix" "orig,vex")
969 (set_attr "mode" "<ssescalarmode>")])
970
971 (define_expand "rsqrt<mode>2"
972 [(set (match_operand:VF1 0 "register_operand" "")
973 (unspec:VF1
974 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
975 "TARGET_SSE_MATH"
976 {
977 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
978 DONE;
979 })
980
981 (define_insn "<sse>_rsqrt<mode>2"
982 [(set (match_operand:VF1 0 "register_operand" "=x")
983 (unspec:VF1
984 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
985 "TARGET_SSE"
986 "%vrsqrtps\t{%1, %0|%0, %1}"
987 [(set_attr "type" "sse")
988 (set_attr "prefix" "maybe_vex")
989 (set_attr "mode" "<MODE>")])
990
991 (define_insn "sse_vmrsqrtv4sf2"
992 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
993 (vec_merge:V4SF
994 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
995 UNSPEC_RSQRT)
996 (match_operand:V4SF 2 "register_operand" "0,x")
997 (const_int 1)))]
998 "TARGET_SSE"
999 "@
1000 rsqrtss\t{%1, %0|%0, %1}
1001 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1002 [(set_attr "isa" "noavx,avx")
1003 (set_attr "type" "sse")
1004 (set_attr "prefix" "orig,vex")
1005 (set_attr "mode" "SF")])
1006
1007 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1008 ;; isn't really correct, as those rtl operators aren't defined when
1009 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1010
1011 (define_expand "<code><mode>3"
1012 [(set (match_operand:VF 0 "register_operand" "")
1013 (smaxmin:VF
1014 (match_operand:VF 1 "nonimmediate_operand" "")
1015 (match_operand:VF 2 "nonimmediate_operand" "")))]
1016 "TARGET_SSE"
1017 {
1018 if (!flag_finite_math_only)
1019 operands[1] = force_reg (<MODE>mode, operands[1]);
1020 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1021 })
1022
1023 (define_insn "*<code><mode>3_finite"
1024 [(set (match_operand:VF 0 "register_operand" "=x,x")
1025 (smaxmin:VF
1026 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1027 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1028 "TARGET_SSE && flag_finite_math_only
1029 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1030 "@
1031 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1032 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1033 [(set_attr "isa" "noavx,avx")
1034 (set_attr "type" "sseadd")
1035 (set_attr "prefix" "orig,vex")
1036 (set_attr "mode" "<MODE>")])
1037
1038 (define_insn "*<code><mode>3"
1039 [(set (match_operand:VF 0 "register_operand" "=x,x")
1040 (smaxmin:VF
1041 (match_operand:VF 1 "register_operand" "0,x")
1042 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1043 "TARGET_SSE && !flag_finite_math_only"
1044 "@
1045 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1046 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1047 [(set_attr "isa" "noavx,avx")
1048 (set_attr "type" "sseadd")
1049 (set_attr "prefix" "orig,vex")
1050 (set_attr "mode" "<MODE>")])
1051
1052 (define_insn "<sse>_vm<code><mode>3"
1053 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1054 (vec_merge:VF_128
1055 (smaxmin:VF_128
1056 (match_operand:VF_128 1 "register_operand" "0,x")
1057 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1058 (match_dup 1)
1059 (const_int 1)))]
1060 "TARGET_SSE"
1061 "@
1062 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1063 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1064 [(set_attr "isa" "noavx,avx")
1065 (set_attr "type" "sse")
1066 (set_attr "prefix" "orig,vex")
1067 (set_attr "mode" "<ssescalarmode>")])
1068
1069 ;; These versions of the min/max patterns implement exactly the operations
1070 ;; min = (op1 < op2 ? op1 : op2)
1071 ;; max = (!(op1 < op2) ? op1 : op2)
1072 ;; Their operands are not commutative, and thus they may be used in the
1073 ;; presence of -0.0 and NaN.
1074
1075 (define_insn "*ieee_smin<mode>3"
1076 [(set (match_operand:VF 0 "register_operand" "=x,x")
1077 (unspec:VF
1078 [(match_operand:VF 1 "register_operand" "0,x")
1079 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1080 UNSPEC_IEEE_MIN))]
1081 "TARGET_SSE"
1082 "@
1083 min<ssemodesuffix>\t{%2, %0|%0, %2}
1084 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1085 [(set_attr "isa" "noavx,avx")
1086 (set_attr "type" "sseadd")
1087 (set_attr "prefix" "orig,vex")
1088 (set_attr "mode" "<MODE>")])
1089
1090 (define_insn "*ieee_smax<mode>3"
1091 [(set (match_operand:VF 0 "register_operand" "=x,x")
1092 (unspec:VF
1093 [(match_operand:VF 1 "register_operand" "0,x")
1094 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1095 UNSPEC_IEEE_MAX))]
1096 "TARGET_SSE"
1097 "@
1098 max<ssemodesuffix>\t{%2, %0|%0, %2}
1099 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1100 [(set_attr "isa" "noavx,avx")
1101 (set_attr "type" "sseadd")
1102 (set_attr "prefix" "orig,vex")
1103 (set_attr "mode" "<MODE>")])
1104
1105 (define_insn "avx_addsubv4df3"
1106 [(set (match_operand:V4DF 0 "register_operand" "=x")
1107 (vec_merge:V4DF
1108 (plus:V4DF
1109 (match_operand:V4DF 1 "register_operand" "x")
1110 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1111 (minus:V4DF (match_dup 1) (match_dup 2))
1112 (const_int 10)))]
1113 "TARGET_AVX"
1114 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1115 [(set_attr "type" "sseadd")
1116 (set_attr "prefix" "vex")
1117 (set_attr "mode" "V4DF")])
1118
1119 (define_insn "sse3_addsubv2df3"
1120 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1121 (vec_merge:V2DF
1122 (plus:V2DF
1123 (match_operand:V2DF 1 "register_operand" "0,x")
1124 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1125 (minus:V2DF (match_dup 1) (match_dup 2))
1126 (const_int 2)))]
1127 "TARGET_SSE3"
1128 "@
1129 addsubpd\t{%2, %0|%0, %2}
1130 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1131 [(set_attr "isa" "noavx,avx")
1132 (set_attr "type" "sseadd")
1133 (set_attr "atom_unit" "complex")
1134 (set_attr "prefix" "orig,vex")
1135 (set_attr "mode" "V2DF")])
1136
1137 (define_insn "avx_addsubv8sf3"
1138 [(set (match_operand:V8SF 0 "register_operand" "=x")
1139 (vec_merge:V8SF
1140 (plus:V8SF
1141 (match_operand:V8SF 1 "register_operand" "x")
1142 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1143 (minus:V8SF (match_dup 1) (match_dup 2))
1144 (const_int 170)))]
1145 "TARGET_AVX"
1146 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1147 [(set_attr "type" "sseadd")
1148 (set_attr "prefix" "vex")
1149 (set_attr "mode" "V8SF")])
1150
1151 (define_insn "sse3_addsubv4sf3"
1152 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1153 (vec_merge:V4SF
1154 (plus:V4SF
1155 (match_operand:V4SF 1 "register_operand" "0,x")
1156 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1157 (minus:V4SF (match_dup 1) (match_dup 2))
1158 (const_int 10)))]
1159 "TARGET_SSE3"
1160 "@
1161 addsubps\t{%2, %0|%0, %2}
1162 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1163 [(set_attr "isa" "noavx,avx")
1164 (set_attr "type" "sseadd")
1165 (set_attr "prefix" "orig,vex")
1166 (set_attr "prefix_rep" "1,*")
1167 (set_attr "mode" "V4SF")])
1168
1169 (define_insn "avx_h<plusminus_insn>v4df3"
1170 [(set (match_operand:V4DF 0 "register_operand" "=x")
1171 (vec_concat:V4DF
1172 (vec_concat:V2DF
1173 (plusminus:DF
1174 (vec_select:DF
1175 (match_operand:V4DF 1 "register_operand" "x")
1176 (parallel [(const_int 0)]))
1177 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1178 (plusminus:DF
1179 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1180 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1181 (vec_concat:V2DF
1182 (plusminus:DF
1183 (vec_select:DF
1184 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1185 (parallel [(const_int 0)]))
1186 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1187 (plusminus:DF
1188 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1189 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1190 "TARGET_AVX"
1191 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1192 [(set_attr "type" "sseadd")
1193 (set_attr "prefix" "vex")
1194 (set_attr "mode" "V4DF")])
1195
1196 (define_insn "sse3_h<plusminus_insn>v2df3"
1197 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1198 (vec_concat:V2DF
1199 (plusminus:DF
1200 (vec_select:DF
1201 (match_operand:V2DF 1 "register_operand" "0,x")
1202 (parallel [(const_int 0)]))
1203 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1204 (plusminus:DF
1205 (vec_select:DF
1206 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1207 (parallel [(const_int 0)]))
1208 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1209 "TARGET_SSE3"
1210 "@
1211 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1212 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1213 [(set_attr "isa" "noavx,avx")
1214 (set_attr "type" "sseadd")
1215 (set_attr "prefix" "orig,vex")
1216 (set_attr "mode" "V2DF")])
1217
1218 (define_insn "avx_h<plusminus_insn>v8sf3"
1219 [(set (match_operand:V8SF 0 "register_operand" "=x")
1220 (vec_concat:V8SF
1221 (vec_concat:V4SF
1222 (vec_concat:V2SF
1223 (plusminus:SF
1224 (vec_select:SF
1225 (match_operand:V8SF 1 "register_operand" "x")
1226 (parallel [(const_int 0)]))
1227 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1228 (plusminus:SF
1229 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1230 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1231 (vec_concat:V2SF
1232 (plusminus:SF
1233 (vec_select:SF
1234 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1235 (parallel [(const_int 0)]))
1236 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1237 (plusminus:SF
1238 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1239 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1240 (vec_concat:V4SF
1241 (vec_concat:V2SF
1242 (plusminus:SF
1243 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1244 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1245 (plusminus:SF
1246 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1247 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1248 (vec_concat:V2SF
1249 (plusminus:SF
1250 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1251 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1252 (plusminus:SF
1253 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1254 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1255 "TARGET_AVX"
1256 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1257 [(set_attr "type" "sseadd")
1258 (set_attr "prefix" "vex")
1259 (set_attr "mode" "V8SF")])
1260
1261 (define_insn "sse3_h<plusminus_insn>v4sf3"
1262 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1263 (vec_concat:V4SF
1264 (vec_concat:V2SF
1265 (plusminus:SF
1266 (vec_select:SF
1267 (match_operand:V4SF 1 "register_operand" "0,x")
1268 (parallel [(const_int 0)]))
1269 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1270 (plusminus:SF
1271 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1272 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1273 (vec_concat:V2SF
1274 (plusminus:SF
1275 (vec_select:SF
1276 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1277 (parallel [(const_int 0)]))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1279 (plusminus:SF
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1282 "TARGET_SSE3"
1283 "@
1284 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1285 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1286 [(set_attr "isa" "noavx,avx")
1287 (set_attr "type" "sseadd")
1288 (set_attr "atom_unit" "complex")
1289 (set_attr "prefix" "orig,vex")
1290 (set_attr "prefix_rep" "1,*")
1291 (set_attr "mode" "V4SF")])
1292
1293 (define_expand "reduc_splus_v4df"
1294 [(match_operand:V4DF 0 "register_operand" "")
1295 (match_operand:V4DF 1 "register_operand" "")]
1296 "TARGET_AVX"
1297 {
1298 rtx tmp = gen_reg_rtx (V4DFmode);
1299 rtx tmp2 = gen_reg_rtx (V4DFmode);
1300 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1301 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1302 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1303 DONE;
1304 })
1305
1306 (define_expand "reduc_splus_v2df"
1307 [(match_operand:V2DF 0 "register_operand" "")
1308 (match_operand:V2DF 1 "register_operand" "")]
1309 "TARGET_SSE3"
1310 {
1311 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1312 DONE;
1313 })
1314
1315 (define_expand "reduc_splus_v8sf"
1316 [(match_operand:V8SF 0 "register_operand" "")
1317 (match_operand:V8SF 1 "register_operand" "")]
1318 "TARGET_AVX"
1319 {
1320 rtx tmp = gen_reg_rtx (V8SFmode);
1321 rtx tmp2 = gen_reg_rtx (V8SFmode);
1322 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1323 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1324 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1325 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1326 DONE;
1327 })
1328
1329 (define_expand "reduc_splus_v4sf"
1330 [(match_operand:V4SF 0 "register_operand" "")
1331 (match_operand:V4SF 1 "register_operand" "")]
1332 "TARGET_SSE"
1333 {
1334 if (TARGET_SSE3)
1335 {
1336 rtx tmp = gen_reg_rtx (V4SFmode);
1337 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1338 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1339 }
1340 else
1341 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1342 DONE;
1343 })
1344
1345 ;; Modes handled by reduc_sm{in,ax}* patterns.
1346 (define_mode_iterator REDUC_SMINMAX_MODE
1347 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1348 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1349 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1350 (V4SF "TARGET_SSE")])
1351
1352 (define_expand "reduc_<code>_<mode>"
1353 [(smaxmin:REDUC_SMINMAX_MODE
1354 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1355 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1356 ""
1357 {
1358 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1359 DONE;
1360 })
1361
1362 (define_expand "reduc_<code>_<mode>"
1363 [(umaxmin:VI_256
1364 (match_operand:VI_256 0 "register_operand" "")
1365 (match_operand:VI_256 1 "register_operand" ""))]
1366 "TARGET_AVX2"
1367 {
1368 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1369 DONE;
1370 })
1371
1372 (define_expand "reduc_umin_v8hi"
1373 [(umin:V8HI
1374 (match_operand:V8HI 0 "register_operand" "")
1375 (match_operand:V8HI 1 "register_operand" ""))]
1376 "TARGET_SSE4_1"
1377 {
1378 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1379 DONE;
1380 })
1381
1382 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1383 ;;
1384 ;; Parallel floating point comparisons
1385 ;;
1386 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1387
1388 (define_insn "avx_cmp<mode>3"
1389 [(set (match_operand:VF 0 "register_operand" "=x")
1390 (unspec:VF
1391 [(match_operand:VF 1 "register_operand" "x")
1392 (match_operand:VF 2 "nonimmediate_operand" "xm")
1393 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1394 UNSPEC_PCMP))]
1395 "TARGET_AVX"
1396 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1397 [(set_attr "type" "ssecmp")
1398 (set_attr "length_immediate" "1")
1399 (set_attr "prefix" "vex")
1400 (set_attr "mode" "<MODE>")])
1401
1402 (define_insn "avx_vmcmp<mode>3"
1403 [(set (match_operand:VF_128 0 "register_operand" "=x")
1404 (vec_merge:VF_128
1405 (unspec:VF_128
1406 [(match_operand:VF_128 1 "register_operand" "x")
1407 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1408 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1409 UNSPEC_PCMP)
1410 (match_dup 1)
1411 (const_int 1)))]
1412 "TARGET_AVX"
1413 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1414 [(set_attr "type" "ssecmp")
1415 (set_attr "length_immediate" "1")
1416 (set_attr "prefix" "vex")
1417 (set_attr "mode" "<ssescalarmode>")])
1418
1419 (define_insn "*<sse>_maskcmp<mode>3_comm"
1420 [(set (match_operand:VF 0 "register_operand" "=x,x")
1421 (match_operator:VF 3 "sse_comparison_operator"
1422 [(match_operand:VF 1 "register_operand" "%0,x")
1423 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1424 "TARGET_SSE
1425 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1426 "@
1427 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1428 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1429 [(set_attr "isa" "noavx,avx")
1430 (set_attr "type" "ssecmp")
1431 (set_attr "length_immediate" "1")
1432 (set_attr "prefix" "orig,vex")
1433 (set_attr "mode" "<MODE>")])
1434
1435 (define_insn "<sse>_maskcmp<mode>3"
1436 [(set (match_operand:VF 0 "register_operand" "=x,x")
1437 (match_operator:VF 3 "sse_comparison_operator"
1438 [(match_operand:VF 1 "register_operand" "0,x")
1439 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1440 "TARGET_SSE"
1441 "@
1442 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1443 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1444 [(set_attr "isa" "noavx,avx")
1445 (set_attr "type" "ssecmp")
1446 (set_attr "length_immediate" "1")
1447 (set_attr "prefix" "orig,vex")
1448 (set_attr "mode" "<MODE>")])
1449
1450 (define_insn "<sse>_vmmaskcmp<mode>3"
1451 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1452 (vec_merge:VF_128
1453 (match_operator:VF_128 3 "sse_comparison_operator"
1454 [(match_operand:VF_128 1 "register_operand" "0,x")
1455 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1456 (match_dup 1)
1457 (const_int 1)))]
1458 "TARGET_SSE"
1459 "@
1460 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1461 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1462 [(set_attr "isa" "noavx,avx")
1463 (set_attr "type" "ssecmp")
1464 (set_attr "length_immediate" "1,*")
1465 (set_attr "prefix" "orig,vex")
1466 (set_attr "mode" "<ssescalarmode>")])
1467
1468 (define_insn "<sse>_comi"
1469 [(set (reg:CCFP FLAGS_REG)
1470 (compare:CCFP
1471 (vec_select:MODEF
1472 (match_operand:<ssevecmode> 0 "register_operand" "x")
1473 (parallel [(const_int 0)]))
1474 (vec_select:MODEF
1475 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1476 (parallel [(const_int 0)]))))]
1477 "SSE_FLOAT_MODE_P (<MODE>mode)"
1478 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1479 [(set_attr "type" "ssecomi")
1480 (set_attr "prefix" "maybe_vex")
1481 (set_attr "prefix_rep" "0")
1482 (set (attr "prefix_data16")
1483 (if_then_else (eq_attr "mode" "DF")
1484 (const_string "1")
1485 (const_string "0")))
1486 (set_attr "mode" "<MODE>")])
1487
1488 (define_insn "<sse>_ucomi"
1489 [(set (reg:CCFPU FLAGS_REG)
1490 (compare:CCFPU
1491 (vec_select:MODEF
1492 (match_operand:<ssevecmode> 0 "register_operand" "x")
1493 (parallel [(const_int 0)]))
1494 (vec_select:MODEF
1495 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1496 (parallel [(const_int 0)]))))]
1497 "SSE_FLOAT_MODE_P (<MODE>mode)"
1498 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1499 [(set_attr "type" "ssecomi")
1500 (set_attr "prefix" "maybe_vex")
1501 (set_attr "prefix_rep" "0")
1502 (set (attr "prefix_data16")
1503 (if_then_else (eq_attr "mode" "DF")
1504 (const_string "1")
1505 (const_string "0")))
1506 (set_attr "mode" "<MODE>")])
1507
1508 (define_expand "vcond<V_256:mode><VF_256:mode>"
1509 [(set (match_operand:V_256 0 "register_operand" "")
1510 (if_then_else:V_256
1511 (match_operator 3 ""
1512 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1513 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1514 (match_operand:V_256 1 "general_operand" "")
1515 (match_operand:V_256 2 "general_operand" "")))]
1516 "TARGET_AVX
1517 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1518 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1519 {
1520 bool ok = ix86_expand_fp_vcond (operands);
1521 gcc_assert (ok);
1522 DONE;
1523 })
1524
1525 (define_expand "vcond<V_128:mode><VF_128:mode>"
1526 [(set (match_operand:V_128 0 "register_operand" "")
1527 (if_then_else:V_128
1528 (match_operator 3 ""
1529 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1530 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1531 (match_operand:V_128 1 "general_operand" "")
1532 (match_operand:V_128 2 "general_operand" "")))]
1533 "TARGET_SSE
1534 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1535 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1536 {
1537 bool ok = ix86_expand_fp_vcond (operands);
1538 gcc_assert (ok);
1539 DONE;
1540 })
1541
1542 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1543 ;;
1544 ;; Parallel floating point logical operations
1545 ;;
1546 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1547
1548 (define_insn "<sse>_andnot<mode>3"
1549 [(set (match_operand:VF 0 "register_operand" "=x,x")
1550 (and:VF
1551 (not:VF
1552 (match_operand:VF 1 "register_operand" "0,x"))
1553 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1554 "TARGET_SSE"
1555 {
1556 static char buf[32];
1557 const char *insn;
1558 const char *suffix
1559 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1560
1561 switch (which_alternative)
1562 {
1563 case 0:
1564 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1565 break;
1566 case 1:
1567 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1568 break;
1569 default:
1570 gcc_unreachable ();
1571 }
1572
1573 snprintf (buf, sizeof (buf), insn, suffix);
1574 return buf;
1575 }
1576 [(set_attr "isa" "noavx,avx")
1577 (set_attr "type" "sselog")
1578 (set_attr "prefix" "orig,vex")
1579 (set_attr "mode" "<MODE>")])
1580
1581 (define_expand "<code><mode>3"
1582 [(set (match_operand:VF 0 "register_operand" "")
1583 (any_logic:VF
1584 (match_operand:VF 1 "nonimmediate_operand" "")
1585 (match_operand:VF 2 "nonimmediate_operand" "")))]
1586 "TARGET_SSE"
1587 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1588
1589 (define_insn "*<code><mode>3"
1590 [(set (match_operand:VF 0 "register_operand" "=x,x")
1591 (any_logic:VF
1592 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1593 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1594 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1595 {
1596 static char buf[32];
1597 const char *insn;
1598 const char *suffix
1599 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1600
1601 switch (which_alternative)
1602 {
1603 case 0:
1604 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1605 break;
1606 case 1:
1607 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1608 break;
1609 default:
1610 gcc_unreachable ();
1611 }
1612
1613 snprintf (buf, sizeof (buf), insn, suffix);
1614 return buf;
1615 }
1616 [(set_attr "isa" "noavx,avx")
1617 (set_attr "type" "sselog")
1618 (set_attr "prefix" "orig,vex")
1619 (set_attr "mode" "<MODE>")])
1620
1621 (define_expand "copysign<mode>3"
1622 [(set (match_dup 4)
1623 (and:VF
1624 (not:VF (match_dup 3))
1625 (match_operand:VF 1 "nonimmediate_operand" "")))
1626 (set (match_dup 5)
1627 (and:VF (match_dup 3)
1628 (match_operand:VF 2 "nonimmediate_operand" "")))
1629 (set (match_operand:VF 0 "register_operand" "")
1630 (ior:VF (match_dup 4) (match_dup 5)))]
1631 "TARGET_SSE"
1632 {
1633 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1634
1635 operands[4] = gen_reg_rtx (<MODE>mode);
1636 operands[5] = gen_reg_rtx (<MODE>mode);
1637 })
1638
1639 ;; Also define scalar versions. These are used for abs, neg, and
1640 ;; conditional move. Using subregs into vector modes causes register
1641 ;; allocation lossage. These patterns do not allow memory operands
1642 ;; because the native instructions read the full 128-bits.
1643
1644 (define_insn "*andnot<mode>3"
1645 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1646 (and:MODEF
1647 (not:MODEF
1648 (match_operand:MODEF 1 "register_operand" "0,x"))
1649 (match_operand:MODEF 2 "register_operand" "x,x")))]
1650 "SSE_FLOAT_MODE_P (<MODE>mode)"
1651 {
1652 static char buf[32];
1653 const char *insn;
1654 const char *suffix
1655 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1656
1657 switch (which_alternative)
1658 {
1659 case 0:
1660 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1661 break;
1662 case 1:
1663 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1664 break;
1665 default:
1666 gcc_unreachable ();
1667 }
1668
1669 snprintf (buf, sizeof (buf), insn, suffix);
1670 return buf;
1671 }
1672 [(set_attr "isa" "noavx,avx")
1673 (set_attr "type" "sselog")
1674 (set_attr "prefix" "orig,vex")
1675 (set_attr "mode" "<ssevecmode>")])
1676
1677 (define_insn "*<code><mode>3"
1678 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1679 (any_logic:MODEF
1680 (match_operand:MODEF 1 "register_operand" "%0,x")
1681 (match_operand:MODEF 2 "register_operand" "x,x")))]
1682 "SSE_FLOAT_MODE_P (<MODE>mode)"
1683 {
1684 static char buf[32];
1685 const char *insn;
1686 const char *suffix
1687 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1688
1689 switch (which_alternative)
1690 {
1691 case 0:
1692 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1693 break;
1694 case 1:
1695 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1696 break;
1697 default:
1698 gcc_unreachable ();
1699 }
1700
1701 snprintf (buf, sizeof (buf), insn, suffix);
1702 return buf;
1703 }
1704 [(set_attr "isa" "noavx,avx")
1705 (set_attr "type" "sselog")
1706 (set_attr "prefix" "orig,vex")
1707 (set_attr "mode" "<ssevecmode>")])
1708
1709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1710 ;;
1711 ;; FMA4 floating point multiply/accumulate instructions. This
1712 ;; includes the scalar version of the instructions as well as the
1713 ;; vector.
1714 ;;
1715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1716
1717 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1718 ;; combine to generate a multiply/add with two memory references. We then
1719 ;; split this insn, into loading up the destination register with one of the
1720 ;; memory operations. If we don't manage to split the insn, reload will
1721 ;; generate the appropriate moves. The reason this is needed, is that combine
1722 ;; has already folded one of the memory references into both the multiply and
1723 ;; add insns, and it can't generate a new pseudo. I.e.:
1724 ;; (set (reg1) (mem (addr1)))
1725 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1726 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1727 ;;
1728 ;; ??? This is historic, pre-dating the gimple fma transformation.
1729 ;; We could now properly represent that only one memory operand is
1730 ;; allowed and not be penalized during optimization.
1731
1732 ;; Intrinsic FMA operations.
1733
1734 ;; The standard names for fma is only available with SSE math enabled.
1735 (define_expand "fma<mode>4"
1736 [(set (match_operand:FMAMODE 0 "register_operand")
1737 (fma:FMAMODE
1738 (match_operand:FMAMODE 1 "nonimmediate_operand")
1739 (match_operand:FMAMODE 2 "nonimmediate_operand")
1740 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1741 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1742
1743 (define_expand "fms<mode>4"
1744 [(set (match_operand:FMAMODE 0 "register_operand")
1745 (fma:FMAMODE
1746 (match_operand:FMAMODE 1 "nonimmediate_operand")
1747 (match_operand:FMAMODE 2 "nonimmediate_operand")
1748 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1749 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1750
1751 (define_expand "fnma<mode>4"
1752 [(set (match_operand:FMAMODE 0 "register_operand")
1753 (fma:FMAMODE
1754 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1755 (match_operand:FMAMODE 2 "nonimmediate_operand")
1756 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1757 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1758
1759 (define_expand "fnms<mode>4"
1760 [(set (match_operand:FMAMODE 0 "register_operand")
1761 (fma:FMAMODE
1762 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1763 (match_operand:FMAMODE 2 "nonimmediate_operand")
1764 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1765 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1766
1767 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1768 (define_expand "fma4i_fmadd_<mode>"
1769 [(set (match_operand:FMAMODE 0 "register_operand")
1770 (fma:FMAMODE
1771 (match_operand:FMAMODE 1 "nonimmediate_operand")
1772 (match_operand:FMAMODE 2 "nonimmediate_operand")
1773 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1774 "TARGET_FMA || TARGET_FMA4")
1775
1776 (define_insn "*fma4i_fmadd_<mode>"
1777 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1778 (fma:FMAMODE
1779 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1780 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1781 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1782 "TARGET_FMA4"
1783 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1784 [(set_attr "type" "ssemuladd")
1785 (set_attr "mode" "<MODE>")])
1786
1787 (define_insn "*fma4i_fmsub_<mode>"
1788 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1789 (fma:FMAMODE
1790 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1791 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1792 (neg:FMAMODE
1793 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1794 "TARGET_FMA4"
1795 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1796 [(set_attr "type" "ssemuladd")
1797 (set_attr "mode" "<MODE>")])
1798
1799 (define_insn "*fma4i_fnmadd_<mode>"
1800 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1801 (fma:FMAMODE
1802 (neg:FMAMODE
1803 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1804 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1805 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1806 "TARGET_FMA4"
1807 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1808 [(set_attr "type" "ssemuladd")
1809 (set_attr "mode" "<MODE>")])
1810
1811 (define_insn "*fma4i_fnmsub_<mode>"
1812 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1813 (fma:FMAMODE
1814 (neg:FMAMODE
1815 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1816 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1817 (neg:FMAMODE
1818 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1819 "TARGET_FMA4"
1820 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1821 [(set_attr "type" "ssemuladd")
1822 (set_attr "mode" "<MODE>")])
1823
1824 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1825 ;; entire destination register, with the high-order elements zeroed.
1826
1827 (define_expand "fma4i_vmfmadd_<mode>"
1828 [(set (match_operand:VF_128 0 "register_operand")
1829 (vec_merge:VF_128
1830 (fma:VF_128
1831 (match_operand:VF_128 1 "nonimmediate_operand")
1832 (match_operand:VF_128 2 "nonimmediate_operand")
1833 (match_operand:VF_128 3 "nonimmediate_operand"))
1834 (match_dup 4)
1835 (const_int 1)))]
1836 "TARGET_FMA4"
1837 {
1838 operands[4] = CONST0_RTX (<MODE>mode);
1839 })
1840
1841 (define_expand "fmai_vmfmadd_<mode>"
1842 [(set (match_operand:VF_128 0 "register_operand")
1843 (vec_merge:VF_128
1844 (fma:VF_128
1845 (match_operand:VF_128 1 "nonimmediate_operand")
1846 (match_operand:VF_128 2 "nonimmediate_operand")
1847 (match_operand:VF_128 3 "nonimmediate_operand"))
1848 (match_dup 0)
1849 (const_int 1)))]
1850 "TARGET_FMA")
1851
1852 (define_insn "*fmai_fmadd_<mode>"
1853 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1854 (vec_merge:VF_128
1855 (fma:VF_128
1856 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1857 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1858 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1859 (match_dup 0)
1860 (const_int 1)))]
1861 "TARGET_FMA"
1862 "@
1863 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1864 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1865 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1866 [(set_attr "type" "ssemuladd")
1867 (set_attr "mode" "<MODE>")])
1868
1869 (define_insn "*fmai_fmsub_<mode>"
1870 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1871 (vec_merge:VF_128
1872 (fma:VF_128
1873 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1874 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1875 (neg:VF_128
1876 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1877 (match_dup 0)
1878 (const_int 1)))]
1879 "TARGET_FMA"
1880 "@
1881 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1882 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1883 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1884 [(set_attr "type" "ssemuladd")
1885 (set_attr "mode" "<MODE>")])
1886
1887 (define_insn "*fmai_fnmadd_<mode>"
1888 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1889 (vec_merge:VF_128
1890 (fma:VF_128
1891 (neg:VF_128
1892 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1893 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1894 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1895 (match_dup 0)
1896 (const_int 1)))]
1897 "TARGET_FMA"
1898 "@
1899 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1900 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1901 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1902 [(set_attr "type" "ssemuladd")
1903 (set_attr "mode" "<MODE>")])
1904
1905 (define_insn "*fmai_fnmsub_<mode>"
1906 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1907 (vec_merge:VF_128
1908 (fma:VF_128
1909 (neg:VF_128
1910 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1911 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1912 (neg:VF_128
1913 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1914 (match_dup 0)
1915 (const_int 1)))]
1916 "TARGET_FMA"
1917 "@
1918 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1919 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1920 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1921 [(set_attr "type" "ssemuladd")
1922 (set_attr "mode" "<MODE>")])
1923
1924 (define_insn "*fma4i_vmfmadd_<mode>"
1925 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1926 (vec_merge:VF_128
1927 (fma:VF_128
1928 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1929 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1930 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1931 (match_operand:VF_128 4 "const0_operand" "")
1932 (const_int 1)))]
1933 "TARGET_FMA4"
1934 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1935 [(set_attr "type" "ssemuladd")
1936 (set_attr "mode" "<MODE>")])
1937
1938 (define_insn "*fma4i_vmfmsub_<mode>"
1939 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1940 (vec_merge:VF_128
1941 (fma:VF_128
1942 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1943 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1944 (neg:VF_128
1945 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1946 (match_operand:VF_128 4 "const0_operand" "")
1947 (const_int 1)))]
1948 "TARGET_FMA4"
1949 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1950 [(set_attr "type" "ssemuladd")
1951 (set_attr "mode" "<MODE>")])
1952
1953 (define_insn "*fma4i_vmfnmadd_<mode>"
1954 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1955 (vec_merge:VF_128
1956 (fma:VF_128
1957 (neg:VF_128
1958 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1959 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1960 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1961 (match_operand:VF_128 4 "const0_operand" "")
1962 (const_int 1)))]
1963 "TARGET_FMA4"
1964 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1965 [(set_attr "type" "ssemuladd")
1966 (set_attr "mode" "<MODE>")])
1967
1968 (define_insn "*fma4i_vmfnmsub_<mode>"
1969 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1970 (vec_merge:VF_128
1971 (fma:VF_128
1972 (neg:VF_128
1973 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1974 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1975 (neg:VF_128
1976 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1977 (match_operand:VF_128 4 "const0_operand" "")
1978 (const_int 1)))]
1979 "TARGET_FMA4"
1980 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1981 [(set_attr "type" "ssemuladd")
1982 (set_attr "mode" "<MODE>")])
1983
1984 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1985 ;;
1986 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1987 ;;
1988 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1989
1990 ;; It would be possible to represent these without the UNSPEC as
1991 ;;
1992 ;; (vec_merge
1993 ;; (fma op1 op2 op3)
1994 ;; (fma op1 op2 (neg op3))
1995 ;; (merge-const))
1996 ;;
1997 ;; But this doesn't seem useful in practice.
1998
1999 (define_expand "fmaddsub_<mode>"
2000 [(set (match_operand:VF 0 "register_operand")
2001 (unspec:VF
2002 [(match_operand:VF 1 "nonimmediate_operand")
2003 (match_operand:VF 2 "nonimmediate_operand")
2004 (match_operand:VF 3 "nonimmediate_operand")]
2005 UNSPEC_FMADDSUB))]
2006 "TARGET_FMA || TARGET_FMA4")
2007
2008 (define_insn "*fma4_fmaddsub_<mode>"
2009 [(set (match_operand:VF 0 "register_operand" "=x,x")
2010 (unspec:VF
2011 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
2012 (match_operand:VF 2 "nonimmediate_operand" " x,m")
2013 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
2014 UNSPEC_FMADDSUB))]
2015 "TARGET_FMA4"
2016 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2017 [(set_attr "type" "ssemuladd")
2018 (set_attr "mode" "<MODE>")])
2019
2020 (define_insn "*fma4_fmsubadd_<mode>"
2021 [(set (match_operand:VF 0 "register_operand" "=x,x")
2022 (unspec:VF
2023 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
2024 (match_operand:VF 2 "nonimmediate_operand" " x,m")
2025 (neg:VF
2026 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
2027 UNSPEC_FMADDSUB))]
2028 "TARGET_FMA4"
2029 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2030 [(set_attr "type" "ssemuladd")
2031 (set_attr "mode" "<MODE>")])
2032
2033 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2034 ;;
2035 ;; FMA3 floating point multiply/accumulate instructions.
2036 ;;
2037 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2038
2039 (define_insn "*fma_fmadd_<mode>"
2040 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2041 (fma:FMAMODE
2042 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2043 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2044 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2045 "TARGET_FMA"
2046 "@
2047 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2048 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2049 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2050 [(set_attr "type" "ssemuladd")
2051 (set_attr "mode" "<MODE>")])
2052
2053 (define_insn "*fma_fmsub_<mode>"
2054 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2055 (fma:FMAMODE
2056 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2057 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2058 (neg:FMAMODE
2059 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2060 "TARGET_FMA"
2061 "@
2062 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2063 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2064 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2065 [(set_attr "type" "ssemuladd")
2066 (set_attr "mode" "<MODE>")])
2067
2068 (define_insn "*fma_fnmadd_<mode>"
2069 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2070 (fma:FMAMODE
2071 (neg:FMAMODE
2072 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2073 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2074 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2075 "TARGET_FMA"
2076 "@
2077 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2078 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2079 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2080 [(set_attr "type" "ssemuladd")
2081 (set_attr "mode" "<MODE>")])
2082
2083 (define_insn "*fma_fnmsub_<mode>"
2084 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2085 (fma:FMAMODE
2086 (neg:FMAMODE
2087 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2088 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2089 (neg:FMAMODE
2090 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2091 "TARGET_FMA"
2092 "@
2093 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2094 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2095 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2096 [(set_attr "type" "ssemuladd")
2097 (set_attr "mode" "<MODE>")])
2098
2099 (define_insn "*fma_fmaddsub_<mode>"
2100 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2101 (unspec:VF
2102 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2103 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2104 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2105 UNSPEC_FMADDSUB))]
2106 "TARGET_FMA"
2107 "@
2108 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2109 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2110 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2111 [(set_attr "type" "ssemuladd")
2112 (set_attr "mode" "<MODE>")])
2113
2114 (define_insn "*fma_fmsubadd_<mode>"
2115 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2116 (unspec:VF
2117 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2118 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2119 (neg:VF
2120 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2121 UNSPEC_FMADDSUB))]
2122 "TARGET_FMA"
2123 "@
2124 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2125 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2126 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2127 [(set_attr "type" "ssemuladd")
2128 (set_attr "mode" "<MODE>")])
2129
2130 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2131 ;;
2132 ;; Parallel single-precision floating point conversion operations
2133 ;;
2134 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2135
2136 (define_insn "sse_cvtpi2ps"
2137 [(set (match_operand:V4SF 0 "register_operand" "=x")
2138 (vec_merge:V4SF
2139 (vec_duplicate:V4SF
2140 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2141 (match_operand:V4SF 1 "register_operand" "0")
2142 (const_int 3)))]
2143 "TARGET_SSE"
2144 "cvtpi2ps\t{%2, %0|%0, %2}"
2145 [(set_attr "type" "ssecvt")
2146 (set_attr "mode" "V4SF")])
2147
2148 (define_insn "sse_cvtps2pi"
2149 [(set (match_operand:V2SI 0 "register_operand" "=y")
2150 (vec_select:V2SI
2151 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2152 UNSPEC_FIX_NOTRUNC)
2153 (parallel [(const_int 0) (const_int 1)])))]
2154 "TARGET_SSE"
2155 "cvtps2pi\t{%1, %0|%0, %1}"
2156 [(set_attr "type" "ssecvt")
2157 (set_attr "unit" "mmx")
2158 (set_attr "mode" "DI")])
2159
2160 (define_insn "sse_cvttps2pi"
2161 [(set (match_operand:V2SI 0 "register_operand" "=y")
2162 (vec_select:V2SI
2163 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2164 (parallel [(const_int 0) (const_int 1)])))]
2165 "TARGET_SSE"
2166 "cvttps2pi\t{%1, %0|%0, %1}"
2167 [(set_attr "type" "ssecvt")
2168 (set_attr "unit" "mmx")
2169 (set_attr "prefix_rep" "0")
2170 (set_attr "mode" "SF")])
2171
2172 (define_insn "sse_cvtsi2ss"
2173 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2174 (vec_merge:V4SF
2175 (vec_duplicate:V4SF
2176 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2177 (match_operand:V4SF 1 "register_operand" "0,0,x")
2178 (const_int 1)))]
2179 "TARGET_SSE"
2180 "@
2181 cvtsi2ss\t{%2, %0|%0, %2}
2182 cvtsi2ss\t{%2, %0|%0, %2}
2183 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2184 [(set_attr "isa" "noavx,noavx,avx")
2185 (set_attr "type" "sseicvt")
2186 (set_attr "athlon_decode" "vector,double,*")
2187 (set_attr "amdfam10_decode" "vector,double,*")
2188 (set_attr "bdver1_decode" "double,direct,*")
2189 (set_attr "prefix" "orig,orig,vex")
2190 (set_attr "mode" "SF")])
2191
2192 (define_insn "sse_cvtsi2ssq"
2193 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2194 (vec_merge:V4SF
2195 (vec_duplicate:V4SF
2196 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2197 (match_operand:V4SF 1 "register_operand" "0,0,x")
2198 (const_int 1)))]
2199 "TARGET_SSE && TARGET_64BIT"
2200 "@
2201 cvtsi2ssq\t{%2, %0|%0, %2}
2202 cvtsi2ssq\t{%2, %0|%0, %2}
2203 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2204 [(set_attr "isa" "noavx,noavx,avx")
2205 (set_attr "type" "sseicvt")
2206 (set_attr "athlon_decode" "vector,double,*")
2207 (set_attr "amdfam10_decode" "vector,double,*")
2208 (set_attr "bdver1_decode" "double,direct,*")
2209 (set_attr "length_vex" "*,*,4")
2210 (set_attr "prefix_rex" "1,1,*")
2211 (set_attr "prefix" "orig,orig,vex")
2212 (set_attr "mode" "SF")])
2213
2214 (define_insn "sse_cvtss2si"
2215 [(set (match_operand:SI 0 "register_operand" "=r,r")
2216 (unspec:SI
2217 [(vec_select:SF
2218 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2219 (parallel [(const_int 0)]))]
2220 UNSPEC_FIX_NOTRUNC))]
2221 "TARGET_SSE"
2222 "%vcvtss2si\t{%1, %0|%0, %1}"
2223 [(set_attr "type" "sseicvt")
2224 (set_attr "athlon_decode" "double,vector")
2225 (set_attr "bdver1_decode" "double,double")
2226 (set_attr "prefix_rep" "1")
2227 (set_attr "prefix" "maybe_vex")
2228 (set_attr "mode" "SI")])
2229
2230 (define_insn "sse_cvtss2si_2"
2231 [(set (match_operand:SI 0 "register_operand" "=r,r")
2232 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2233 UNSPEC_FIX_NOTRUNC))]
2234 "TARGET_SSE"
2235 "%vcvtss2si\t{%1, %0|%0, %1}"
2236 [(set_attr "type" "sseicvt")
2237 (set_attr "athlon_decode" "double,vector")
2238 (set_attr "amdfam10_decode" "double,double")
2239 (set_attr "bdver1_decode" "double,double")
2240 (set_attr "prefix_rep" "1")
2241 (set_attr "prefix" "maybe_vex")
2242 (set_attr "mode" "SI")])
2243
2244 (define_insn "sse_cvtss2siq"
2245 [(set (match_operand:DI 0 "register_operand" "=r,r")
2246 (unspec:DI
2247 [(vec_select:SF
2248 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2249 (parallel [(const_int 0)]))]
2250 UNSPEC_FIX_NOTRUNC))]
2251 "TARGET_SSE && TARGET_64BIT"
2252 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2253 [(set_attr "type" "sseicvt")
2254 (set_attr "athlon_decode" "double,vector")
2255 (set_attr "bdver1_decode" "double,double")
2256 (set_attr "prefix_rep" "1")
2257 (set_attr "prefix" "maybe_vex")
2258 (set_attr "mode" "DI")])
2259
2260 (define_insn "sse_cvtss2siq_2"
2261 [(set (match_operand:DI 0 "register_operand" "=r,r")
2262 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2263 UNSPEC_FIX_NOTRUNC))]
2264 "TARGET_SSE && TARGET_64BIT"
2265 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2266 [(set_attr "type" "sseicvt")
2267 (set_attr "athlon_decode" "double,vector")
2268 (set_attr "amdfam10_decode" "double,double")
2269 (set_attr "bdver1_decode" "double,double")
2270 (set_attr "prefix_rep" "1")
2271 (set_attr "prefix" "maybe_vex")
2272 (set_attr "mode" "DI")])
2273
2274 (define_insn "sse_cvttss2si"
2275 [(set (match_operand:SI 0 "register_operand" "=r,r")
2276 (fix:SI
2277 (vec_select:SF
2278 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2279 (parallel [(const_int 0)]))))]
2280 "TARGET_SSE"
2281 "%vcvttss2si\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "sseicvt")
2283 (set_attr "athlon_decode" "double,vector")
2284 (set_attr "amdfam10_decode" "double,double")
2285 (set_attr "bdver1_decode" "double,double")
2286 (set_attr "prefix_rep" "1")
2287 (set_attr "prefix" "maybe_vex")
2288 (set_attr "mode" "SI")])
2289
2290 (define_insn "sse_cvttss2siq"
2291 [(set (match_operand:DI 0 "register_operand" "=r,r")
2292 (fix:DI
2293 (vec_select:SF
2294 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2295 (parallel [(const_int 0)]))))]
2296 "TARGET_SSE && TARGET_64BIT"
2297 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2298 [(set_attr "type" "sseicvt")
2299 (set_attr "athlon_decode" "double,vector")
2300 (set_attr "amdfam10_decode" "double,double")
2301 (set_attr "bdver1_decode" "double,double")
2302 (set_attr "prefix_rep" "1")
2303 (set_attr "prefix" "maybe_vex")
2304 (set_attr "mode" "DI")])
2305
2306 (define_insn "float<sseintvecmodelower><mode>2"
2307 [(set (match_operand:VF1 0 "register_operand" "=x")
2308 (float:VF1
2309 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2310 "TARGET_SSE2"
2311 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2312 [(set_attr "type" "ssecvt")
2313 (set_attr "prefix" "maybe_vex")
2314 (set_attr "mode" "<sseinsnmode>")])
2315
2316 (define_expand "floatuns<sseintvecmodelower><mode>2"
2317 [(match_operand:VF1 0 "register_operand" "")
2318 (match_operand:<sseintvecmode> 1 "register_operand" "")]
2319 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2320 {
2321 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2322 DONE;
2323 })
2324
2325 (define_insn "avx_cvtps2dq256"
2326 [(set (match_operand:V8SI 0 "register_operand" "=x")
2327 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2328 UNSPEC_FIX_NOTRUNC))]
2329 "TARGET_AVX"
2330 "vcvtps2dq\t{%1, %0|%0, %1}"
2331 [(set_attr "type" "ssecvt")
2332 (set_attr "prefix" "vex")
2333 (set_attr "mode" "OI")])
2334
2335 (define_insn "sse2_cvtps2dq"
2336 [(set (match_operand:V4SI 0 "register_operand" "=x")
2337 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2338 UNSPEC_FIX_NOTRUNC))]
2339 "TARGET_SSE2"
2340 "%vcvtps2dq\t{%1, %0|%0, %1}"
2341 [(set_attr "type" "ssecvt")
2342 (set (attr "prefix_data16")
2343 (if_then_else
2344 (match_test "TARGET_AVX")
2345 (const_string "*")
2346 (const_string "1")))
2347 (set_attr "prefix" "maybe_vex")
2348 (set_attr "mode" "TI")])
2349
2350 (define_insn "fix_truncv8sfv8si2"
2351 [(set (match_operand:V8SI 0 "register_operand" "=x")
2352 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2353 "TARGET_AVX"
2354 "vcvttps2dq\t{%1, %0|%0, %1}"
2355 [(set_attr "type" "ssecvt")
2356 (set_attr "prefix" "vex")
2357 (set_attr "mode" "OI")])
2358
2359 (define_insn "fix_truncv4sfv4si2"
2360 [(set (match_operand:V4SI 0 "register_operand" "=x")
2361 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2362 "TARGET_SSE2"
2363 "%vcvttps2dq\t{%1, %0|%0, %1}"
2364 [(set_attr "type" "ssecvt")
2365 (set (attr "prefix_rep")
2366 (if_then_else
2367 (match_test "TARGET_AVX")
2368 (const_string "*")
2369 (const_string "1")))
2370 (set (attr "prefix_data16")
2371 (if_then_else
2372 (match_test "TARGET_AVX")
2373 (const_string "*")
2374 (const_string "0")))
2375 (set_attr "prefix_data16" "0")
2376 (set_attr "prefix" "maybe_vex")
2377 (set_attr "mode" "TI")])
2378
2379 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2380 [(match_operand:<sseintvecmode> 0 "register_operand" "")
2381 (match_operand:VF1 1 "register_operand" "")]
2382 "TARGET_SSE2"
2383 {
2384 rtx tmp[3];
2385 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2386 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2387 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2388 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2389 DONE;
2390 })
2391
2392 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2393 ;;
2394 ;; Parallel double-precision floating point conversion operations
2395 ;;
2396 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2397
2398 (define_insn "sse2_cvtpi2pd"
2399 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2400 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2401 "TARGET_SSE2"
2402 "cvtpi2pd\t{%1, %0|%0, %1}"
2403 [(set_attr "type" "ssecvt")
2404 (set_attr "unit" "mmx,*")
2405 (set_attr "prefix_data16" "1,*")
2406 (set_attr "mode" "V2DF")])
2407
2408 (define_insn "sse2_cvtpd2pi"
2409 [(set (match_operand:V2SI 0 "register_operand" "=y")
2410 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2411 UNSPEC_FIX_NOTRUNC))]
2412 "TARGET_SSE2"
2413 "cvtpd2pi\t{%1, %0|%0, %1}"
2414 [(set_attr "type" "ssecvt")
2415 (set_attr "unit" "mmx")
2416 (set_attr "bdver1_decode" "double")
2417 (set_attr "prefix_data16" "1")
2418 (set_attr "mode" "DI")])
2419
2420 (define_insn "sse2_cvttpd2pi"
2421 [(set (match_operand:V2SI 0 "register_operand" "=y")
2422 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2423 "TARGET_SSE2"
2424 "cvttpd2pi\t{%1, %0|%0, %1}"
2425 [(set_attr "type" "ssecvt")
2426 (set_attr "unit" "mmx")
2427 (set_attr "bdver1_decode" "double")
2428 (set_attr "prefix_data16" "1")
2429 (set_attr "mode" "TI")])
2430
2431 (define_insn "sse2_cvtsi2sd"
2432 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2433 (vec_merge:V2DF
2434 (vec_duplicate:V2DF
2435 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2436 (match_operand:V2DF 1 "register_operand" "0,0,x")
2437 (const_int 1)))]
2438 "TARGET_SSE2"
2439 "@
2440 cvtsi2sd\t{%2, %0|%0, %2}
2441 cvtsi2sd\t{%2, %0|%0, %2}
2442 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2443 [(set_attr "isa" "noavx,noavx,avx")
2444 (set_attr "type" "sseicvt")
2445 (set_attr "athlon_decode" "double,direct,*")
2446 (set_attr "amdfam10_decode" "vector,double,*")
2447 (set_attr "bdver1_decode" "double,direct,*")
2448 (set_attr "prefix" "orig,orig,vex")
2449 (set_attr "mode" "DF")])
2450
2451 (define_insn "sse2_cvtsi2sdq"
2452 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2453 (vec_merge:V2DF
2454 (vec_duplicate:V2DF
2455 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2456 (match_operand:V2DF 1 "register_operand" "0,0,x")
2457 (const_int 1)))]
2458 "TARGET_SSE2 && TARGET_64BIT"
2459 "@
2460 cvtsi2sdq\t{%2, %0|%0, %2}
2461 cvtsi2sdq\t{%2, %0|%0, %2}
2462 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2463 [(set_attr "isa" "noavx,noavx,avx")
2464 (set_attr "type" "sseicvt")
2465 (set_attr "athlon_decode" "double,direct,*")
2466 (set_attr "amdfam10_decode" "vector,double,*")
2467 (set_attr "bdver1_decode" "double,direct,*")
2468 (set_attr "length_vex" "*,*,4")
2469 (set_attr "prefix_rex" "1,1,*")
2470 (set_attr "prefix" "orig,orig,vex")
2471 (set_attr "mode" "DF")])
2472
2473 (define_insn "sse2_cvtsd2si"
2474 [(set (match_operand:SI 0 "register_operand" "=r,r")
2475 (unspec:SI
2476 [(vec_select:DF
2477 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2478 (parallel [(const_int 0)]))]
2479 UNSPEC_FIX_NOTRUNC))]
2480 "TARGET_SSE2"
2481 "%vcvtsd2si\t{%1, %0|%0, %1}"
2482 [(set_attr "type" "sseicvt")
2483 (set_attr "athlon_decode" "double,vector")
2484 (set_attr "bdver1_decode" "double,double")
2485 (set_attr "prefix_rep" "1")
2486 (set_attr "prefix" "maybe_vex")
2487 (set_attr "mode" "SI")])
2488
2489 (define_insn "sse2_cvtsd2si_2"
2490 [(set (match_operand:SI 0 "register_operand" "=r,r")
2491 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2492 UNSPEC_FIX_NOTRUNC))]
2493 "TARGET_SSE2"
2494 "%vcvtsd2si\t{%1, %0|%0, %1}"
2495 [(set_attr "type" "sseicvt")
2496 (set_attr "athlon_decode" "double,vector")
2497 (set_attr "amdfam10_decode" "double,double")
2498 (set_attr "bdver1_decode" "double,double")
2499 (set_attr "prefix_rep" "1")
2500 (set_attr "prefix" "maybe_vex")
2501 (set_attr "mode" "SI")])
2502
2503 (define_insn "sse2_cvtsd2siq"
2504 [(set (match_operand:DI 0 "register_operand" "=r,r")
2505 (unspec:DI
2506 [(vec_select:DF
2507 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2508 (parallel [(const_int 0)]))]
2509 UNSPEC_FIX_NOTRUNC))]
2510 "TARGET_SSE2 && TARGET_64BIT"
2511 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2512 [(set_attr "type" "sseicvt")
2513 (set_attr "athlon_decode" "double,vector")
2514 (set_attr "bdver1_decode" "double,double")
2515 (set_attr "prefix_rep" "1")
2516 (set_attr "prefix" "maybe_vex")
2517 (set_attr "mode" "DI")])
2518
2519 (define_insn "sse2_cvtsd2siq_2"
2520 [(set (match_operand:DI 0 "register_operand" "=r,r")
2521 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2522 UNSPEC_FIX_NOTRUNC))]
2523 "TARGET_SSE2 && TARGET_64BIT"
2524 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2525 [(set_attr "type" "sseicvt")
2526 (set_attr "athlon_decode" "double,vector")
2527 (set_attr "amdfam10_decode" "double,double")
2528 (set_attr "bdver1_decode" "double,double")
2529 (set_attr "prefix_rep" "1")
2530 (set_attr "prefix" "maybe_vex")
2531 (set_attr "mode" "DI")])
2532
2533 (define_insn "sse2_cvttsd2si"
2534 [(set (match_operand:SI 0 "register_operand" "=r,r")
2535 (fix:SI
2536 (vec_select:DF
2537 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2538 (parallel [(const_int 0)]))))]
2539 "TARGET_SSE2"
2540 "%vcvttsd2si\t{%1, %0|%0, %1}"
2541 [(set_attr "type" "sseicvt")
2542 (set_attr "athlon_decode" "double,vector")
2543 (set_attr "amdfam10_decode" "double,double")
2544 (set_attr "bdver1_decode" "double,double")
2545 (set_attr "prefix_rep" "1")
2546 (set_attr "prefix" "maybe_vex")
2547 (set_attr "mode" "SI")])
2548
2549 (define_insn "sse2_cvttsd2siq"
2550 [(set (match_operand:DI 0 "register_operand" "=r,r")
2551 (fix:DI
2552 (vec_select:DF
2553 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2554 (parallel [(const_int 0)]))))]
2555 "TARGET_SSE2 && TARGET_64BIT"
2556 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2557 [(set_attr "type" "sseicvt")
2558 (set_attr "athlon_decode" "double,vector")
2559 (set_attr "amdfam10_decode" "double,double")
2560 (set_attr "bdver1_decode" "double,double")
2561 (set_attr "prefix_rep" "1")
2562 (set_attr "prefix" "maybe_vex")
2563 (set_attr "mode" "DI")])
2564
2565 (define_insn "floatv4siv4df2"
2566 [(set (match_operand:V4DF 0 "register_operand" "=x")
2567 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2568 "TARGET_AVX"
2569 "vcvtdq2pd\t{%1, %0|%0, %1}"
2570 [(set_attr "type" "ssecvt")
2571 (set_attr "prefix" "vex")
2572 (set_attr "mode" "V4DF")])
2573
2574 (define_insn "avx_cvtdq2pd256_2"
2575 [(set (match_operand:V4DF 0 "register_operand" "=x")
2576 (float:V4DF
2577 (vec_select:V4SI
2578 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2579 (parallel [(const_int 0) (const_int 1)
2580 (const_int 2) (const_int 3)]))))]
2581 "TARGET_AVX"
2582 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2583 [(set_attr "type" "ssecvt")
2584 (set_attr "prefix" "vex")
2585 (set_attr "mode" "V4DF")])
2586
2587 (define_insn "sse2_cvtdq2pd"
2588 [(set (match_operand:V2DF 0 "register_operand" "=x")
2589 (float:V2DF
2590 (vec_select:V2SI
2591 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2592 (parallel [(const_int 0) (const_int 1)]))))]
2593 "TARGET_SSE2"
2594 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2595 [(set_attr "type" "ssecvt")
2596 (set_attr "prefix" "maybe_vex")
2597 (set_attr "mode" "V2DF")])
2598
2599 (define_insn "avx_cvtpd2dq256"
2600 [(set (match_operand:V4SI 0 "register_operand" "=x")
2601 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2602 UNSPEC_FIX_NOTRUNC))]
2603 "TARGET_AVX"
2604 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2605 [(set_attr "type" "ssecvt")
2606 (set_attr "prefix" "vex")
2607 (set_attr "mode" "OI")])
2608
2609 (define_expand "avx_cvtpd2dq256_2"
2610 [(set (match_operand:V8SI 0 "register_operand" "")
2611 (vec_concat:V8SI
2612 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2613 UNSPEC_FIX_NOTRUNC)
2614 (match_dup 2)))]
2615 "TARGET_AVX"
2616 "operands[2] = CONST0_RTX (V4SImode);")
2617
2618 (define_insn "*avx_cvtpd2dq256_2"
2619 [(set (match_operand:V8SI 0 "register_operand" "=x")
2620 (vec_concat:V8SI
2621 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2622 UNSPEC_FIX_NOTRUNC)
2623 (match_operand:V4SI 2 "const0_operand" "")))]
2624 "TARGET_AVX"
2625 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2626 [(set_attr "type" "ssecvt")
2627 (set_attr "prefix" "vex")
2628 (set_attr "mode" "OI")])
2629
2630 (define_expand "sse2_cvtpd2dq"
2631 [(set (match_operand:V4SI 0 "register_operand" "")
2632 (vec_concat:V4SI
2633 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2634 UNSPEC_FIX_NOTRUNC)
2635 (match_dup 2)))]
2636 "TARGET_SSE2"
2637 "operands[2] = CONST0_RTX (V2SImode);")
2638
2639 (define_insn "*sse2_cvtpd2dq"
2640 [(set (match_operand:V4SI 0 "register_operand" "=x")
2641 (vec_concat:V4SI
2642 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2643 UNSPEC_FIX_NOTRUNC)
2644 (match_operand:V2SI 2 "const0_operand" "")))]
2645 "TARGET_SSE2"
2646 {
2647 if (TARGET_AVX)
2648 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2649 else
2650 return "cvtpd2dq\t{%1, %0|%0, %1}";
2651 }
2652 [(set_attr "type" "ssecvt")
2653 (set_attr "prefix_rep" "1")
2654 (set_attr "prefix_data16" "0")
2655 (set_attr "prefix" "maybe_vex")
2656 (set_attr "mode" "TI")
2657 (set_attr "amdfam10_decode" "double")
2658 (set_attr "athlon_decode" "vector")
2659 (set_attr "bdver1_decode" "double")])
2660
2661 (define_insn "fix_truncv4dfv4si2"
2662 [(set (match_operand:V4SI 0 "register_operand" "=x")
2663 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2664 "TARGET_AVX"
2665 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2666 [(set_attr "type" "ssecvt")
2667 (set_attr "prefix" "vex")
2668 (set_attr "mode" "OI")])
2669
2670 (define_expand "avx_cvttpd2dq256_2"
2671 [(set (match_operand:V8SI 0 "register_operand" "")
2672 (vec_concat:V8SI
2673 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2674 (match_dup 2)))]
2675 "TARGET_AVX"
2676 "operands[2] = CONST0_RTX (V4SImode);")
2677
2678 (define_insn "*avx_cvttpd2dq256_2"
2679 [(set (match_operand:V8SI 0 "register_operand" "=x")
2680 (vec_concat:V8SI
2681 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2682 (match_operand:V4SI 2 "const0_operand" "")))]
2683 "TARGET_AVX"
2684 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2685 [(set_attr "type" "ssecvt")
2686 (set_attr "prefix" "vex")
2687 (set_attr "mode" "OI")])
2688
2689 (define_expand "sse2_cvttpd2dq"
2690 [(set (match_operand:V4SI 0 "register_operand" "")
2691 (vec_concat:V4SI
2692 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2693 (match_dup 2)))]
2694 "TARGET_SSE2"
2695 "operands[2] = CONST0_RTX (V2SImode);")
2696
2697 (define_insn "*sse2_cvttpd2dq"
2698 [(set (match_operand:V4SI 0 "register_operand" "=x")
2699 (vec_concat:V4SI
2700 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2701 (match_operand:V2SI 2 "const0_operand" "")))]
2702 "TARGET_SSE2"
2703 {
2704 if (TARGET_AVX)
2705 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2706 else
2707 return "cvttpd2dq\t{%1, %0|%0, %1}";
2708 }
2709 [(set_attr "type" "ssecvt")
2710 (set_attr "amdfam10_decode" "double")
2711 (set_attr "athlon_decode" "vector")
2712 (set_attr "bdver1_decode" "double")
2713 (set_attr "prefix" "maybe_vex")
2714 (set_attr "mode" "TI")])
2715
2716 (define_insn "sse2_cvtsd2ss"
2717 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2718 (vec_merge:V4SF
2719 (vec_duplicate:V4SF
2720 (float_truncate:V2SF
2721 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2722 (match_operand:V4SF 1 "register_operand" "0,0,x")
2723 (const_int 1)))]
2724 "TARGET_SSE2"
2725 "@
2726 cvtsd2ss\t{%2, %0|%0, %2}
2727 cvtsd2ss\t{%2, %0|%0, %2}
2728 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2729 [(set_attr "isa" "noavx,noavx,avx")
2730 (set_attr "type" "ssecvt")
2731 (set_attr "athlon_decode" "vector,double,*")
2732 (set_attr "amdfam10_decode" "vector,double,*")
2733 (set_attr "bdver1_decode" "direct,direct,*")
2734 (set_attr "prefix" "orig,orig,vex")
2735 (set_attr "mode" "SF")])
2736
2737 (define_insn "sse2_cvtss2sd"
2738 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2739 (vec_merge:V2DF
2740 (float_extend:V2DF
2741 (vec_select:V2SF
2742 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2743 (parallel [(const_int 0) (const_int 1)])))
2744 (match_operand:V2DF 1 "register_operand" "0,0,x")
2745 (const_int 1)))]
2746 "TARGET_SSE2"
2747 "@
2748 cvtss2sd\t{%2, %0|%0, %2}
2749 cvtss2sd\t{%2, %0|%0, %2}
2750 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2751 [(set_attr "isa" "noavx,noavx,avx")
2752 (set_attr "type" "ssecvt")
2753 (set_attr "amdfam10_decode" "vector,double,*")
2754 (set_attr "athlon_decode" "direct,direct,*")
2755 (set_attr "bdver1_decode" "direct,direct,*")
2756 (set_attr "prefix" "orig,orig,vex")
2757 (set_attr "mode" "DF")])
2758
2759 (define_insn "avx_cvtpd2ps256"
2760 [(set (match_operand:V4SF 0 "register_operand" "=x")
2761 (float_truncate:V4SF
2762 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2763 "TARGET_AVX"
2764 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2765 [(set_attr "type" "ssecvt")
2766 (set_attr "prefix" "vex")
2767 (set_attr "mode" "V4SF")])
2768
2769 (define_expand "sse2_cvtpd2ps"
2770 [(set (match_operand:V4SF 0 "register_operand" "")
2771 (vec_concat:V4SF
2772 (float_truncate:V2SF
2773 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2774 (match_dup 2)))]
2775 "TARGET_SSE2"
2776 "operands[2] = CONST0_RTX (V2SFmode);")
2777
2778 (define_insn "*sse2_cvtpd2ps"
2779 [(set (match_operand:V4SF 0 "register_operand" "=x")
2780 (vec_concat:V4SF
2781 (float_truncate:V2SF
2782 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2783 (match_operand:V2SF 2 "const0_operand" "")))]
2784 "TARGET_SSE2"
2785 {
2786 if (TARGET_AVX)
2787 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2788 else
2789 return "cvtpd2ps\t{%1, %0|%0, %1}";
2790 }
2791 [(set_attr "type" "ssecvt")
2792 (set_attr "amdfam10_decode" "double")
2793 (set_attr "athlon_decode" "vector")
2794 (set_attr "bdver1_decode" "double")
2795 (set_attr "prefix_data16" "1")
2796 (set_attr "prefix" "maybe_vex")
2797 (set_attr "mode" "V4SF")])
2798
2799 (define_insn "avx_cvtps2pd256"
2800 [(set (match_operand:V4DF 0 "register_operand" "=x")
2801 (float_extend:V4DF
2802 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2803 "TARGET_AVX"
2804 "vcvtps2pd\t{%1, %0|%0, %1}"
2805 [(set_attr "type" "ssecvt")
2806 (set_attr "prefix" "vex")
2807 (set_attr "mode" "V4DF")])
2808
2809 (define_insn "*avx_cvtps2pd256_2"
2810 [(set (match_operand:V4DF 0 "register_operand" "=x")
2811 (float_extend:V4DF
2812 (vec_select:V4SF
2813 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2814 (parallel [(const_int 0) (const_int 1)
2815 (const_int 2) (const_int 3)]))))]
2816 "TARGET_AVX"
2817 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2818 [(set_attr "type" "ssecvt")
2819 (set_attr "prefix" "vex")
2820 (set_attr "mode" "V4DF")])
2821
2822 (define_insn "sse2_cvtps2pd"
2823 [(set (match_operand:V2DF 0 "register_operand" "=x")
2824 (float_extend:V2DF
2825 (vec_select:V2SF
2826 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2827 (parallel [(const_int 0) (const_int 1)]))))]
2828 "TARGET_SSE2"
2829 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2830 [(set_attr "type" "ssecvt")
2831 (set_attr "amdfam10_decode" "direct")
2832 (set_attr "athlon_decode" "double")
2833 (set_attr "bdver1_decode" "double")
2834 (set_attr "prefix_data16" "0")
2835 (set_attr "prefix" "maybe_vex")
2836 (set_attr "mode" "V2DF")])
2837
2838 (define_expand "vec_unpacks_hi_v4sf"
2839 [(set (match_dup 2)
2840 (vec_select:V4SF
2841 (vec_concat:V8SF
2842 (match_dup 2)
2843 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2844 (parallel [(const_int 6) (const_int 7)
2845 (const_int 2) (const_int 3)])))
2846 (set (match_operand:V2DF 0 "register_operand" "")
2847 (float_extend:V2DF
2848 (vec_select:V2SF
2849 (match_dup 2)
2850 (parallel [(const_int 0) (const_int 1)]))))]
2851 "TARGET_SSE2"
2852 "operands[2] = gen_reg_rtx (V4SFmode);")
2853
2854 (define_expand "vec_unpacks_hi_v8sf"
2855 [(set (match_dup 2)
2856 (vec_select:V4SF
2857 (match_operand:V8SF 1 "nonimmediate_operand" "")
2858 (parallel [(const_int 4) (const_int 5)
2859 (const_int 6) (const_int 7)])))
2860 (set (match_operand:V4DF 0 "register_operand" "")
2861 (float_extend:V4DF
2862 (match_dup 2)))]
2863 "TARGET_AVX"
2864 "operands[2] = gen_reg_rtx (V4SFmode);")
2865
2866 (define_expand "vec_unpacks_lo_v4sf"
2867 [(set (match_operand:V2DF 0 "register_operand" "")
2868 (float_extend:V2DF
2869 (vec_select:V2SF
2870 (match_operand:V4SF 1 "nonimmediate_operand" "")
2871 (parallel [(const_int 0) (const_int 1)]))))]
2872 "TARGET_SSE2")
2873
2874 (define_expand "vec_unpacks_lo_v8sf"
2875 [(set (match_operand:V4DF 0 "register_operand" "")
2876 (float_extend:V4DF
2877 (vec_select:V4SF
2878 (match_operand:V8SF 1 "nonimmediate_operand" "")
2879 (parallel [(const_int 0) (const_int 1)
2880 (const_int 2) (const_int 3)]))))]
2881 "TARGET_AVX")
2882
2883 (define_mode_attr sseunpackfltmode
2884 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2885
2886 (define_expand "vec_unpacks_float_hi_<mode>"
2887 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2888 (match_operand:VI2_AVX2 1 "register_operand" "")]
2889 "TARGET_SSE2"
2890 {
2891 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2892
2893 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2894 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2895 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2896 DONE;
2897 })
2898
2899 (define_expand "vec_unpacks_float_lo_<mode>"
2900 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2901 (match_operand:VI2_AVX2 1 "register_operand" "")]
2902 "TARGET_SSE2"
2903 {
2904 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2905
2906 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2907 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2908 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2909 DONE;
2910 })
2911
2912 (define_expand "vec_unpacku_float_hi_<mode>"
2913 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2914 (match_operand:VI2_AVX2 1 "register_operand" "")]
2915 "TARGET_SSE2"
2916 {
2917 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2918
2919 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2920 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2921 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2922 DONE;
2923 })
2924
2925 (define_expand "vec_unpacku_float_lo_<mode>"
2926 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2927 (match_operand:VI2_AVX2 1 "register_operand" "")]
2928 "TARGET_SSE2"
2929 {
2930 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2931
2932 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2933 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2934 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2935 DONE;
2936 })
2937
2938 (define_expand "vec_unpacks_float_hi_v4si"
2939 [(set (match_dup 2)
2940 (vec_select:V4SI
2941 (match_operand:V4SI 1 "nonimmediate_operand" "")
2942 (parallel [(const_int 2) (const_int 3)
2943 (const_int 2) (const_int 3)])))
2944 (set (match_operand:V2DF 0 "register_operand" "")
2945 (float:V2DF
2946 (vec_select:V2SI
2947 (match_dup 2)
2948 (parallel [(const_int 0) (const_int 1)]))))]
2949 "TARGET_SSE2"
2950 "operands[2] = gen_reg_rtx (V4SImode);")
2951
2952 (define_expand "vec_unpacks_float_lo_v4si"
2953 [(set (match_operand:V2DF 0 "register_operand" "")
2954 (float:V2DF
2955 (vec_select:V2SI
2956 (match_operand:V4SI 1 "nonimmediate_operand" "")
2957 (parallel [(const_int 0) (const_int 1)]))))]
2958 "TARGET_SSE2")
2959
2960 (define_expand "vec_unpacks_float_hi_v8si"
2961 [(set (match_dup 2)
2962 (vec_select:V4SI
2963 (match_operand:V8SI 1 "nonimmediate_operand" "")
2964 (parallel [(const_int 4) (const_int 5)
2965 (const_int 6) (const_int 7)])))
2966 (set (match_operand:V4DF 0 "register_operand" "")
2967 (float:V4DF
2968 (match_dup 2)))]
2969 "TARGET_AVX"
2970 "operands[2] = gen_reg_rtx (V4SImode);")
2971
2972 (define_expand "vec_unpacks_float_lo_v8si"
2973 [(set (match_operand:V4DF 0 "register_operand" "")
2974 (float:V4DF
2975 (vec_select:V4SI
2976 (match_operand:V8SI 1 "nonimmediate_operand" "")
2977 (parallel [(const_int 0) (const_int 1)
2978 (const_int 2) (const_int 3)]))))]
2979 "TARGET_AVX")
2980
2981 (define_expand "vec_unpacku_float_hi_v4si"
2982 [(set (match_dup 5)
2983 (vec_select:V4SI
2984 (match_operand:V4SI 1 "nonimmediate_operand" "")
2985 (parallel [(const_int 2) (const_int 3)
2986 (const_int 2) (const_int 3)])))
2987 (set (match_dup 6)
2988 (float:V2DF
2989 (vec_select:V2SI
2990 (match_dup 5)
2991 (parallel [(const_int 0) (const_int 1)]))))
2992 (set (match_dup 7)
2993 (lt:V2DF (match_dup 6) (match_dup 3)))
2994 (set (match_dup 8)
2995 (and:V2DF (match_dup 7) (match_dup 4)))
2996 (set (match_operand:V2DF 0 "register_operand" "")
2997 (plus:V2DF (match_dup 6) (match_dup 8)))]
2998 "TARGET_SSE2"
2999 {
3000 REAL_VALUE_TYPE TWO32r;
3001 rtx x;
3002 int i;
3003
3004 real_ldexp (&TWO32r, &dconst1, 32);
3005 x = const_double_from_real_value (TWO32r, DFmode);
3006
3007 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3008 operands[4] = force_reg (V2DFmode,
3009 ix86_build_const_vector (V2DFmode, 1, x));
3010
3011 operands[5] = gen_reg_rtx (V4SImode);
3012
3013 for (i = 6; i < 9; i++)
3014 operands[i] = gen_reg_rtx (V2DFmode);
3015 })
3016
3017 (define_expand "vec_unpacku_float_lo_v4si"
3018 [(set (match_dup 5)
3019 (float:V2DF
3020 (vec_select:V2SI
3021 (match_operand:V4SI 1 "nonimmediate_operand" "")
3022 (parallel [(const_int 0) (const_int 1)]))))
3023 (set (match_dup 6)
3024 (lt:V2DF (match_dup 5) (match_dup 3)))
3025 (set (match_dup 7)
3026 (and:V2DF (match_dup 6) (match_dup 4)))
3027 (set (match_operand:V2DF 0 "register_operand" "")
3028 (plus:V2DF (match_dup 5) (match_dup 7)))]
3029 "TARGET_SSE2"
3030 {
3031 REAL_VALUE_TYPE TWO32r;
3032 rtx x;
3033 int i;
3034
3035 real_ldexp (&TWO32r, &dconst1, 32);
3036 x = const_double_from_real_value (TWO32r, DFmode);
3037
3038 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3039 operands[4] = force_reg (V2DFmode,
3040 ix86_build_const_vector (V2DFmode, 1, x));
3041
3042 for (i = 5; i < 8; i++)
3043 operands[i] = gen_reg_rtx (V2DFmode);
3044 })
3045
3046 (define_expand "vec_unpacku_float_hi_v8si"
3047 [(match_operand:V4DF 0 "register_operand" "")
3048 (match_operand:V8SI 1 "register_operand" "")]
3049 "TARGET_AVX"
3050 {
3051 REAL_VALUE_TYPE TWO32r;
3052 rtx x, tmp[6];
3053 int i;
3054
3055 real_ldexp (&TWO32r, &dconst1, 32);
3056 x = const_double_from_real_value (TWO32r, DFmode);
3057
3058 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3059 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3060 tmp[5] = gen_reg_rtx (V4SImode);
3061
3062 for (i = 2; i < 5; i++)
3063 tmp[i] = gen_reg_rtx (V4DFmode);
3064 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
3065 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
3066 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3067 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3068 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3069 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3070 DONE;
3071 })
3072
3073 (define_expand "vec_unpacku_float_lo_v8si"
3074 [(match_operand:V4DF 0 "register_operand" "")
3075 (match_operand:V8SI 1 "nonimmediate_operand" "")]
3076 "TARGET_AVX"
3077 {
3078 REAL_VALUE_TYPE TWO32r;
3079 rtx x, tmp[5];
3080 int i;
3081
3082 real_ldexp (&TWO32r, &dconst1, 32);
3083 x = const_double_from_real_value (TWO32r, DFmode);
3084
3085 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3086 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3087
3088 for (i = 2; i < 5; i++)
3089 tmp[i] = gen_reg_rtx (V4DFmode);
3090 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3091 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3092 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3093 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3094 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3095 DONE;
3096 })
3097
3098 (define_expand "vec_pack_trunc_v4df"
3099 [(set (match_dup 3)
3100 (float_truncate:V4SF
3101 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3102 (set (match_dup 4)
3103 (float_truncate:V4SF
3104 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3105 (set (match_operand:V8SF 0 "register_operand" "")
3106 (vec_concat:V8SF
3107 (match_dup 3)
3108 (match_dup 4)))]
3109 "TARGET_AVX"
3110 {
3111 operands[3] = gen_reg_rtx (V4SFmode);
3112 operands[4] = gen_reg_rtx (V4SFmode);
3113 })
3114
3115 (define_expand "vec_pack_trunc_v2df"
3116 [(match_operand:V4SF 0 "register_operand" "")
3117 (match_operand:V2DF 1 "nonimmediate_operand" "")
3118 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3119 "TARGET_SSE2"
3120 {
3121 rtx tmp0, tmp1;
3122
3123 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3124 {
3125 tmp0 = gen_reg_rtx (V4DFmode);
3126 tmp1 = force_reg (V2DFmode, operands[1]);
3127
3128 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3129 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3130 }
3131 else
3132 {
3133 tmp0 = gen_reg_rtx (V4SFmode);
3134 tmp1 = gen_reg_rtx (V4SFmode);
3135
3136 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3137 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3138 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3139 }
3140 DONE;
3141 })
3142
3143 (define_expand "vec_pack_sfix_trunc_v4df"
3144 [(match_operand:V8SI 0 "register_operand" "")
3145 (match_operand:V4DF 1 "nonimmediate_operand" "")
3146 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3147 "TARGET_AVX"
3148 {
3149 rtx r1, r2;
3150
3151 r1 = gen_reg_rtx (V4SImode);
3152 r2 = gen_reg_rtx (V4SImode);
3153
3154 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3155 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3156 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3157 DONE;
3158 })
3159
3160 (define_expand "vec_pack_sfix_trunc_v2df"
3161 [(match_operand:V4SI 0 "register_operand" "")
3162 (match_operand:V2DF 1 "nonimmediate_operand" "")
3163 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3164 "TARGET_SSE2"
3165 {
3166 rtx tmp0, tmp1;
3167
3168 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3169 {
3170 tmp0 = gen_reg_rtx (V4DFmode);
3171 tmp1 = force_reg (V2DFmode, operands[1]);
3172
3173 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3174 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3175 }
3176 else
3177 {
3178 tmp0 = gen_reg_rtx (V4SImode);
3179 tmp1 = gen_reg_rtx (V4SImode);
3180
3181 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3182 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3183 emit_insn
3184 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3185 gen_lowpart (V2DImode, tmp0),
3186 gen_lowpart (V2DImode, tmp1)));
3187 }
3188 DONE;
3189 })
3190
3191 (define_mode_attr ssepackfltmode
3192 [(V4DF "V8SI") (V2DF "V4SI")])
3193
3194 (define_expand "vec_pack_ufix_trunc_<mode>"
3195 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
3196 (match_operand:VF2 1 "register_operand" "")
3197 (match_operand:VF2 2 "register_operand" "")]
3198 "TARGET_SSE2"
3199 {
3200 rtx tmp[7];
3201 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3202 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3203 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3204 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3205 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3206 {
3207 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3208 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3209 }
3210 else
3211 {
3212 tmp[5] = gen_reg_rtx (V8SFmode);
3213 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3214 gen_lowpart (V8SFmode, tmp[3]), 0);
3215 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3216 }
3217 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3218 operands[0], 0, OPTAB_DIRECT);
3219 if (tmp[6] != operands[0])
3220 emit_move_insn (operands[0], tmp[6]);
3221 DONE;
3222 })
3223
3224 (define_expand "vec_pack_sfix_v4df"
3225 [(match_operand:V8SI 0 "register_operand" "")
3226 (match_operand:V4DF 1 "nonimmediate_operand" "")
3227 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3228 "TARGET_AVX"
3229 {
3230 rtx r1, r2;
3231
3232 r1 = gen_reg_rtx (V4SImode);
3233 r2 = gen_reg_rtx (V4SImode);
3234
3235 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3236 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3237 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3238 DONE;
3239 })
3240
3241 (define_expand "vec_pack_sfix_v2df"
3242 [(match_operand:V4SI 0 "register_operand" "")
3243 (match_operand:V2DF 1 "nonimmediate_operand" "")
3244 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3245 "TARGET_SSE2"
3246 {
3247 rtx tmp0, tmp1;
3248
3249 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3250 {
3251 tmp0 = gen_reg_rtx (V4DFmode);
3252 tmp1 = force_reg (V2DFmode, operands[1]);
3253
3254 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3255 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3256 }
3257 else
3258 {
3259 tmp0 = gen_reg_rtx (V4SImode);
3260 tmp1 = gen_reg_rtx (V4SImode);
3261
3262 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3263 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3264 emit_insn
3265 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3266 gen_lowpart (V2DImode, tmp0),
3267 gen_lowpart (V2DImode, tmp1)));
3268 }
3269 DONE;
3270 })
3271
3272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3273 ;;
3274 ;; Parallel single-precision floating point element swizzling
3275 ;;
3276 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3277
3278 (define_expand "sse_movhlps_exp"
3279 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3280 (vec_select:V4SF
3281 (vec_concat:V8SF
3282 (match_operand:V4SF 1 "nonimmediate_operand" "")
3283 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3284 (parallel [(const_int 6)
3285 (const_int 7)
3286 (const_int 2)
3287 (const_int 3)])))]
3288 "TARGET_SSE"
3289 {
3290 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3291
3292 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3293
3294 /* Fix up the destination if needed. */
3295 if (dst != operands[0])
3296 emit_move_insn (operands[0], dst);
3297
3298 DONE;
3299 })
3300
3301 (define_insn "sse_movhlps"
3302 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3303 (vec_select:V4SF
3304 (vec_concat:V8SF
3305 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3306 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3307 (parallel [(const_int 6)
3308 (const_int 7)
3309 (const_int 2)
3310 (const_int 3)])))]
3311 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3312 "@
3313 movhlps\t{%2, %0|%0, %2}
3314 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3315 movlps\t{%H2, %0|%0, %H2}
3316 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3317 %vmovhps\t{%2, %0|%0, %2}"
3318 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3319 (set_attr "type" "ssemov")
3320 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3321 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3322
3323 (define_expand "sse_movlhps_exp"
3324 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3325 (vec_select:V4SF
3326 (vec_concat:V8SF
3327 (match_operand:V4SF 1 "nonimmediate_operand" "")
3328 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3329 (parallel [(const_int 0)
3330 (const_int 1)
3331 (const_int 4)
3332 (const_int 5)])))]
3333 "TARGET_SSE"
3334 {
3335 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3336
3337 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3338
3339 /* Fix up the destination if needed. */
3340 if (dst != operands[0])
3341 emit_move_insn (operands[0], dst);
3342
3343 DONE;
3344 })
3345
3346 (define_insn "sse_movlhps"
3347 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3348 (vec_select:V4SF
3349 (vec_concat:V8SF
3350 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3351 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3352 (parallel [(const_int 0)
3353 (const_int 1)
3354 (const_int 4)
3355 (const_int 5)])))]
3356 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3357 "@
3358 movlhps\t{%2, %0|%0, %2}
3359 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3360 movhps\t{%2, %0|%0, %2}
3361 vmovhps\t{%2, %1, %0|%0, %1, %2}
3362 %vmovlps\t{%2, %H0|%H0, %2}"
3363 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3364 (set_attr "type" "ssemov")
3365 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3366 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3367
3368 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3369 (define_insn "avx_unpckhps256"
3370 [(set (match_operand:V8SF 0 "register_operand" "=x")
3371 (vec_select:V8SF
3372 (vec_concat:V16SF
3373 (match_operand:V8SF 1 "register_operand" "x")
3374 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3375 (parallel [(const_int 2) (const_int 10)
3376 (const_int 3) (const_int 11)
3377 (const_int 6) (const_int 14)
3378 (const_int 7) (const_int 15)])))]
3379 "TARGET_AVX"
3380 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3381 [(set_attr "type" "sselog")
3382 (set_attr "prefix" "vex")
3383 (set_attr "mode" "V8SF")])
3384
3385 (define_expand "vec_interleave_highv8sf"
3386 [(set (match_dup 3)
3387 (vec_select:V8SF
3388 (vec_concat:V16SF
3389 (match_operand:V8SF 1 "register_operand" "x")
3390 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3391 (parallel [(const_int 0) (const_int 8)
3392 (const_int 1) (const_int 9)
3393 (const_int 4) (const_int 12)
3394 (const_int 5) (const_int 13)])))
3395 (set (match_dup 4)
3396 (vec_select:V8SF
3397 (vec_concat:V16SF
3398 (match_dup 1)
3399 (match_dup 2))
3400 (parallel [(const_int 2) (const_int 10)
3401 (const_int 3) (const_int 11)
3402 (const_int 6) (const_int 14)
3403 (const_int 7) (const_int 15)])))
3404 (set (match_operand:V8SF 0 "register_operand" "")
3405 (vec_select:V8SF
3406 (vec_concat:V16SF
3407 (match_dup 3)
3408 (match_dup 4))
3409 (parallel [(const_int 4) (const_int 5)
3410 (const_int 6) (const_int 7)
3411 (const_int 12) (const_int 13)
3412 (const_int 14) (const_int 15)])))]
3413 "TARGET_AVX"
3414 {
3415 operands[3] = gen_reg_rtx (V8SFmode);
3416 operands[4] = gen_reg_rtx (V8SFmode);
3417 })
3418
3419 (define_insn "vec_interleave_highv4sf"
3420 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3421 (vec_select:V4SF
3422 (vec_concat:V8SF
3423 (match_operand:V4SF 1 "register_operand" "0,x")
3424 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3425 (parallel [(const_int 2) (const_int 6)
3426 (const_int 3) (const_int 7)])))]
3427 "TARGET_SSE"
3428 "@
3429 unpckhps\t{%2, %0|%0, %2}
3430 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3431 [(set_attr "isa" "noavx,avx")
3432 (set_attr "type" "sselog")
3433 (set_attr "prefix" "orig,vex")
3434 (set_attr "mode" "V4SF")])
3435
3436 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3437 (define_insn "avx_unpcklps256"
3438 [(set (match_operand:V8SF 0 "register_operand" "=x")
3439 (vec_select:V8SF
3440 (vec_concat:V16SF
3441 (match_operand:V8SF 1 "register_operand" "x")
3442 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3443 (parallel [(const_int 0) (const_int 8)
3444 (const_int 1) (const_int 9)
3445 (const_int 4) (const_int 12)
3446 (const_int 5) (const_int 13)])))]
3447 "TARGET_AVX"
3448 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3449 [(set_attr "type" "sselog")
3450 (set_attr "prefix" "vex")
3451 (set_attr "mode" "V8SF")])
3452
3453 (define_expand "vec_interleave_lowv8sf"
3454 [(set (match_dup 3)
3455 (vec_select:V8SF
3456 (vec_concat:V16SF
3457 (match_operand:V8SF 1 "register_operand" "x")
3458 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3459 (parallel [(const_int 0) (const_int 8)
3460 (const_int 1) (const_int 9)
3461 (const_int 4) (const_int 12)
3462 (const_int 5) (const_int 13)])))
3463 (set (match_dup 4)
3464 (vec_select:V8SF
3465 (vec_concat:V16SF
3466 (match_dup 1)
3467 (match_dup 2))
3468 (parallel [(const_int 2) (const_int 10)
3469 (const_int 3) (const_int 11)
3470 (const_int 6) (const_int 14)
3471 (const_int 7) (const_int 15)])))
3472 (set (match_operand:V8SF 0 "register_operand" "")
3473 (vec_select:V8SF
3474 (vec_concat:V16SF
3475 (match_dup 3)
3476 (match_dup 4))
3477 (parallel [(const_int 0) (const_int 1)
3478 (const_int 2) (const_int 3)
3479 (const_int 8) (const_int 9)
3480 (const_int 10) (const_int 11)])))]
3481 "TARGET_AVX"
3482 {
3483 operands[3] = gen_reg_rtx (V8SFmode);
3484 operands[4] = gen_reg_rtx (V8SFmode);
3485 })
3486
3487 (define_insn "vec_interleave_lowv4sf"
3488 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3489 (vec_select:V4SF
3490 (vec_concat:V8SF
3491 (match_operand:V4SF 1 "register_operand" "0,x")
3492 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3493 (parallel [(const_int 0) (const_int 4)
3494 (const_int 1) (const_int 5)])))]
3495 "TARGET_SSE"
3496 "@
3497 unpcklps\t{%2, %0|%0, %2}
3498 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3499 [(set_attr "isa" "noavx,avx")
3500 (set_attr "type" "sselog")
3501 (set_attr "prefix" "orig,vex")
3502 (set_attr "mode" "V4SF")])
3503
3504 ;; These are modeled with the same vec_concat as the others so that we
3505 ;; capture users of shufps that can use the new instructions
3506 (define_insn "avx_movshdup256"
3507 [(set (match_operand:V8SF 0 "register_operand" "=x")
3508 (vec_select:V8SF
3509 (vec_concat:V16SF
3510 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3511 (match_dup 1))
3512 (parallel [(const_int 1) (const_int 1)
3513 (const_int 3) (const_int 3)
3514 (const_int 5) (const_int 5)
3515 (const_int 7) (const_int 7)])))]
3516 "TARGET_AVX"
3517 "vmovshdup\t{%1, %0|%0, %1}"
3518 [(set_attr "type" "sse")
3519 (set_attr "prefix" "vex")
3520 (set_attr "mode" "V8SF")])
3521
3522 (define_insn "sse3_movshdup"
3523 [(set (match_operand:V4SF 0 "register_operand" "=x")
3524 (vec_select:V4SF
3525 (vec_concat:V8SF
3526 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3527 (match_dup 1))
3528 (parallel [(const_int 1)
3529 (const_int 1)
3530 (const_int 7)
3531 (const_int 7)])))]
3532 "TARGET_SSE3"
3533 "%vmovshdup\t{%1, %0|%0, %1}"
3534 [(set_attr "type" "sse")
3535 (set_attr "prefix_rep" "1")
3536 (set_attr "prefix" "maybe_vex")
3537 (set_attr "mode" "V4SF")])
3538
3539 (define_insn "avx_movsldup256"
3540 [(set (match_operand:V8SF 0 "register_operand" "=x")
3541 (vec_select:V8SF
3542 (vec_concat:V16SF
3543 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3544 (match_dup 1))
3545 (parallel [(const_int 0) (const_int 0)
3546 (const_int 2) (const_int 2)
3547 (const_int 4) (const_int 4)
3548 (const_int 6) (const_int 6)])))]
3549 "TARGET_AVX"
3550 "vmovsldup\t{%1, %0|%0, %1}"
3551 [(set_attr "type" "sse")
3552 (set_attr "prefix" "vex")
3553 (set_attr "mode" "V8SF")])
3554
3555 (define_insn "sse3_movsldup"
3556 [(set (match_operand:V4SF 0 "register_operand" "=x")
3557 (vec_select:V4SF
3558 (vec_concat:V8SF
3559 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3560 (match_dup 1))
3561 (parallel [(const_int 0)
3562 (const_int 0)
3563 (const_int 6)
3564 (const_int 6)])))]
3565 "TARGET_SSE3"
3566 "%vmovsldup\t{%1, %0|%0, %1}"
3567 [(set_attr "type" "sse")
3568 (set_attr "prefix_rep" "1")
3569 (set_attr "prefix" "maybe_vex")
3570 (set_attr "mode" "V4SF")])
3571
3572 (define_expand "avx_shufps256"
3573 [(match_operand:V8SF 0 "register_operand" "")
3574 (match_operand:V8SF 1 "register_operand" "")
3575 (match_operand:V8SF 2 "nonimmediate_operand" "")
3576 (match_operand:SI 3 "const_int_operand" "")]
3577 "TARGET_AVX"
3578 {
3579 int mask = INTVAL (operands[3]);
3580 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3581 GEN_INT ((mask >> 0) & 3),
3582 GEN_INT ((mask >> 2) & 3),
3583 GEN_INT (((mask >> 4) & 3) + 8),
3584 GEN_INT (((mask >> 6) & 3) + 8),
3585 GEN_INT (((mask >> 0) & 3) + 4),
3586 GEN_INT (((mask >> 2) & 3) + 4),
3587 GEN_INT (((mask >> 4) & 3) + 12),
3588 GEN_INT (((mask >> 6) & 3) + 12)));
3589 DONE;
3590 })
3591
3592 ;; One bit in mask selects 2 elements.
3593 (define_insn "avx_shufps256_1"
3594 [(set (match_operand:V8SF 0 "register_operand" "=x")
3595 (vec_select:V8SF
3596 (vec_concat:V16SF
3597 (match_operand:V8SF 1 "register_operand" "x")
3598 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3599 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3600 (match_operand 4 "const_0_to_3_operand" "")
3601 (match_operand 5 "const_8_to_11_operand" "")
3602 (match_operand 6 "const_8_to_11_operand" "")
3603 (match_operand 7 "const_4_to_7_operand" "")
3604 (match_operand 8 "const_4_to_7_operand" "")
3605 (match_operand 9 "const_12_to_15_operand" "")
3606 (match_operand 10 "const_12_to_15_operand" "")])))]
3607 "TARGET_AVX
3608 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3609 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3610 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3611 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3612 {
3613 int mask;
3614 mask = INTVAL (operands[3]);
3615 mask |= INTVAL (operands[4]) << 2;
3616 mask |= (INTVAL (operands[5]) - 8) << 4;
3617 mask |= (INTVAL (operands[6]) - 8) << 6;
3618 operands[3] = GEN_INT (mask);
3619
3620 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3621 }
3622 [(set_attr "type" "sselog")
3623 (set_attr "length_immediate" "1")
3624 (set_attr "prefix" "vex")
3625 (set_attr "mode" "V8SF")])
3626
3627 (define_expand "sse_shufps"
3628 [(match_operand:V4SF 0 "register_operand" "")
3629 (match_operand:V4SF 1 "register_operand" "")
3630 (match_operand:V4SF 2 "nonimmediate_operand" "")
3631 (match_operand:SI 3 "const_int_operand" "")]
3632 "TARGET_SSE"
3633 {
3634 int mask = INTVAL (operands[3]);
3635 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3636 GEN_INT ((mask >> 0) & 3),
3637 GEN_INT ((mask >> 2) & 3),
3638 GEN_INT (((mask >> 4) & 3) + 4),
3639 GEN_INT (((mask >> 6) & 3) + 4)));
3640 DONE;
3641 })
3642
3643 (define_insn "sse_shufps_<mode>"
3644 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3645 (vec_select:VI4F_128
3646 (vec_concat:<ssedoublevecmode>
3647 (match_operand:VI4F_128 1 "register_operand" "0,x")
3648 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3649 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3650 (match_operand 4 "const_0_to_3_operand" "")
3651 (match_operand 5 "const_4_to_7_operand" "")
3652 (match_operand 6 "const_4_to_7_operand" "")])))]
3653 "TARGET_SSE"
3654 {
3655 int mask = 0;
3656 mask |= INTVAL (operands[3]) << 0;
3657 mask |= INTVAL (operands[4]) << 2;
3658 mask |= (INTVAL (operands[5]) - 4) << 4;
3659 mask |= (INTVAL (operands[6]) - 4) << 6;
3660 operands[3] = GEN_INT (mask);
3661
3662 switch (which_alternative)
3663 {
3664 case 0:
3665 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3666 case 1:
3667 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3668 default:
3669 gcc_unreachable ();
3670 }
3671 }
3672 [(set_attr "isa" "noavx,avx")
3673 (set_attr "type" "sselog")
3674 (set_attr "length_immediate" "1")
3675 (set_attr "prefix" "orig,vex")
3676 (set_attr "mode" "V4SF")])
3677
3678 (define_insn "sse_storehps"
3679 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3680 (vec_select:V2SF
3681 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3682 (parallel [(const_int 2) (const_int 3)])))]
3683 "TARGET_SSE"
3684 "@
3685 %vmovhps\t{%1, %0|%0, %1}
3686 %vmovhlps\t{%1, %d0|%d0, %1}
3687 %vmovlps\t{%H1, %d0|%d0, %H1}"
3688 [(set_attr "type" "ssemov")
3689 (set_attr "prefix" "maybe_vex")
3690 (set_attr "mode" "V2SF,V4SF,V2SF")])
3691
3692 (define_expand "sse_loadhps_exp"
3693 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3694 (vec_concat:V4SF
3695 (vec_select:V2SF
3696 (match_operand:V4SF 1 "nonimmediate_operand" "")
3697 (parallel [(const_int 0) (const_int 1)]))
3698 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3699 "TARGET_SSE"
3700 {
3701 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3702
3703 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3704
3705 /* Fix up the destination if needed. */
3706 if (dst != operands[0])
3707 emit_move_insn (operands[0], dst);
3708
3709 DONE;
3710 })
3711
3712 (define_insn "sse_loadhps"
3713 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3714 (vec_concat:V4SF
3715 (vec_select:V2SF
3716 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3717 (parallel [(const_int 0) (const_int 1)]))
3718 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3719 "TARGET_SSE"
3720 "@
3721 movhps\t{%2, %0|%0, %2}
3722 vmovhps\t{%2, %1, %0|%0, %1, %2}
3723 movlhps\t{%2, %0|%0, %2}
3724 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3725 %vmovlps\t{%2, %H0|%H0, %2}"
3726 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3727 (set_attr "type" "ssemov")
3728 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3729 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3730
3731 (define_insn "sse_storelps"
3732 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3733 (vec_select:V2SF
3734 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3735 (parallel [(const_int 0) (const_int 1)])))]
3736 "TARGET_SSE"
3737 "@
3738 %vmovlps\t{%1, %0|%0, %1}
3739 %vmovaps\t{%1, %0|%0, %1}
3740 %vmovlps\t{%1, %d0|%d0, %1}"
3741 [(set_attr "type" "ssemov")
3742 (set_attr "prefix" "maybe_vex")
3743 (set_attr "mode" "V2SF,V4SF,V2SF")])
3744
3745 (define_expand "sse_loadlps_exp"
3746 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3747 (vec_concat:V4SF
3748 (match_operand:V2SF 2 "nonimmediate_operand" "")
3749 (vec_select:V2SF
3750 (match_operand:V4SF 1 "nonimmediate_operand" "")
3751 (parallel [(const_int 2) (const_int 3)]))))]
3752 "TARGET_SSE"
3753 {
3754 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3755
3756 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3757
3758 /* Fix up the destination if needed. */
3759 if (dst != operands[0])
3760 emit_move_insn (operands[0], dst);
3761
3762 DONE;
3763 })
3764
3765 (define_insn "sse_loadlps"
3766 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3767 (vec_concat:V4SF
3768 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3769 (vec_select:V2SF
3770 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3771 (parallel [(const_int 2) (const_int 3)]))))]
3772 "TARGET_SSE"
3773 "@
3774 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3775 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3776 movlps\t{%2, %0|%0, %2}
3777 vmovlps\t{%2, %1, %0|%0, %1, %2}
3778 %vmovlps\t{%2, %0|%0, %2}"
3779 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3780 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3781 (set_attr "length_immediate" "1,1,*,*,*")
3782 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3783 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3784
3785 (define_insn "sse_movss"
3786 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3787 (vec_merge:V4SF
3788 (match_operand:V4SF 2 "register_operand" " x,x")
3789 (match_operand:V4SF 1 "register_operand" " 0,x")
3790 (const_int 1)))]
3791 "TARGET_SSE"
3792 "@
3793 movss\t{%2, %0|%0, %2}
3794 vmovss\t{%2, %1, %0|%0, %1, %2}"
3795 [(set_attr "isa" "noavx,avx")
3796 (set_attr "type" "ssemov")
3797 (set_attr "prefix" "orig,vex")
3798 (set_attr "mode" "SF")])
3799
3800 (define_insn "avx2_vec_dup<mode>"
3801 [(set (match_operand:VF1 0 "register_operand" "=x")
3802 (vec_duplicate:VF1
3803 (vec_select:SF
3804 (match_operand:V4SF 1 "register_operand" "x")
3805 (parallel [(const_int 0)]))))]
3806 "TARGET_AVX2"
3807 "vbroadcastss\t{%1, %0|%0, %1}"
3808 [(set_attr "type" "sselog1")
3809 (set_attr "prefix" "vex")
3810 (set_attr "mode" "<MODE>")])
3811
3812 (define_insn "vec_dupv4sf"
3813 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
3814 (vec_duplicate:V4SF
3815 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
3816 "TARGET_SSE"
3817 "@
3818 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3819 vbroadcastss\t{%1, %0|%0, %1}
3820 shufps\t{$0, %0, %0|%0, %0, 0}"
3821 [(set_attr "isa" "avx,avx,noavx")
3822 (set_attr "type" "sselog1,ssemov,sselog1")
3823 (set_attr "length_immediate" "1,0,1")
3824 (set_attr "prefix_extra" "0,1,*")
3825 (set_attr "prefix" "vex,vex,orig")
3826 (set_attr "mode" "V4SF")])
3827
3828 ;; Although insertps takes register source, we prefer
3829 ;; unpcklps with register source since it is shorter.
3830 (define_insn "*vec_concatv2sf_sse4_1"
3831 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3832 (vec_concat:V2SF
3833 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3834 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3835 "TARGET_SSE4_1"
3836 "@
3837 unpcklps\t{%2, %0|%0, %2}
3838 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3839 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3840 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3841 %vmovss\t{%1, %0|%0, %1}
3842 punpckldq\t{%2, %0|%0, %2}
3843 movd\t{%1, %0|%0, %1}"
3844 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3845 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3846 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3847 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3848 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3849 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3850 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3851
3852 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3853 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3854 ;; alternatives pretty much forces the MMX alternative to be chosen.
3855 (define_insn "*vec_concatv2sf_sse"
3856 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3857 (vec_concat:V2SF
3858 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3859 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3860 "TARGET_SSE"
3861 "@
3862 unpcklps\t{%2, %0|%0, %2}
3863 movss\t{%1, %0|%0, %1}
3864 punpckldq\t{%2, %0|%0, %2}
3865 movd\t{%1, %0|%0, %1}"
3866 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3867 (set_attr "mode" "V4SF,SF,DI,DI")])
3868
3869 (define_insn "*vec_concatv4sf"
3870 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3871 (vec_concat:V4SF
3872 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3873 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3874 "TARGET_SSE"
3875 "@
3876 movlhps\t{%2, %0|%0, %2}
3877 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3878 movhps\t{%2, %0|%0, %2}
3879 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3880 [(set_attr "isa" "noavx,avx,noavx,avx")
3881 (set_attr "type" "ssemov")
3882 (set_attr "prefix" "orig,vex,orig,vex")
3883 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3884
3885 (define_expand "vec_init<mode>"
3886 [(match_operand:V_128 0 "register_operand" "")
3887 (match_operand 1 "" "")]
3888 "TARGET_SSE"
3889 {
3890 ix86_expand_vector_init (false, operands[0], operands[1]);
3891 DONE;
3892 })
3893
3894 ;; Avoid combining registers from different units in a single alternative,
3895 ;; see comment above inline_secondary_memory_needed function in i386.c
3896 (define_insn "vec_set<mode>_0"
3897 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3898 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
3899 (vec_merge:VI4F_128
3900 (vec_duplicate:VI4F_128
3901 (match_operand:<ssescalarmode> 2 "general_operand"
3902 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
3903 (match_operand:VI4F_128 1 "vector_move_operand"
3904 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
3905 (const_int 1)))]
3906 "TARGET_SSE"
3907 "@
3908 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3909 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3910 %vmovd\t{%2, %0|%0, %2}
3911 movss\t{%2, %0|%0, %2}
3912 movss\t{%2, %0|%0, %2}
3913 vmovss\t{%2, %1, %0|%0, %1, %2}
3914 pinsrd\t{$0, %2, %0|%0, %2, 0}
3915 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3916 #
3917 #
3918 #"
3919 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3920 (set (attr "type")
3921 (cond [(eq_attr "alternative" "0,6,7")
3922 (const_string "sselog")
3923 (eq_attr "alternative" "9")
3924 (const_string "imov")
3925 (eq_attr "alternative" "10")
3926 (const_string "fmov")
3927 ]
3928 (const_string "ssemov")))
3929 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3930 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3931 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3932 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3933
3934 ;; A subset is vec_setv4sf.
3935 (define_insn "*vec_setv4sf_sse4_1"
3936 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3937 (vec_merge:V4SF
3938 (vec_duplicate:V4SF
3939 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3940 (match_operand:V4SF 1 "register_operand" "0,x")
3941 (match_operand:SI 3 "const_int_operand" "")))]
3942 "TARGET_SSE4_1
3943 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3944 < GET_MODE_NUNITS (V4SFmode))"
3945 {
3946 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3947 switch (which_alternative)
3948 {
3949 case 0:
3950 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3951 case 1:
3952 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3953 default:
3954 gcc_unreachable ();
3955 }
3956 }
3957 [(set_attr "isa" "noavx,avx")
3958 (set_attr "type" "sselog")
3959 (set_attr "prefix_data16" "1,*")
3960 (set_attr "prefix_extra" "1")
3961 (set_attr "length_immediate" "1")
3962 (set_attr "prefix" "orig,vex")
3963 (set_attr "mode" "V4SF")])
3964
3965 (define_insn "sse4_1_insertps"
3966 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3967 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3968 (match_operand:V4SF 1 "register_operand" "0,x")
3969 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3970 UNSPEC_INSERTPS))]
3971 "TARGET_SSE4_1"
3972 {
3973 if (MEM_P (operands[2]))
3974 {
3975 unsigned count_s = INTVAL (operands[3]) >> 6;
3976 if (count_s)
3977 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3978 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3979 }
3980 switch (which_alternative)
3981 {
3982 case 0:
3983 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3984 case 1:
3985 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3986 default:
3987 gcc_unreachable ();
3988 }
3989 }
3990 [(set_attr "isa" "noavx,avx")
3991 (set_attr "type" "sselog")
3992 (set_attr "prefix_data16" "1,*")
3993 (set_attr "prefix_extra" "1")
3994 (set_attr "length_immediate" "1")
3995 (set_attr "prefix" "orig,vex")
3996 (set_attr "mode" "V4SF")])
3997
3998 (define_split
3999 [(set (match_operand:VI4F_128 0 "memory_operand" "")
4000 (vec_merge:VI4F_128
4001 (vec_duplicate:VI4F_128
4002 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4003 (match_dup 0)
4004 (const_int 1)))]
4005 "TARGET_SSE && reload_completed"
4006 [(const_int 0)]
4007 {
4008 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4009 operands[1]);
4010 DONE;
4011 })
4012
4013 (define_expand "vec_set<mode>"
4014 [(match_operand:V 0 "register_operand" "")
4015 (match_operand:<ssescalarmode> 1 "register_operand" "")
4016 (match_operand 2 "const_int_operand" "")]
4017 "TARGET_SSE"
4018 {
4019 ix86_expand_vector_set (false, operands[0], operands[1],
4020 INTVAL (operands[2]));
4021 DONE;
4022 })
4023
4024 (define_insn_and_split "*vec_extractv4sf_0"
4025 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4026 (vec_select:SF
4027 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4028 (parallel [(const_int 0)])))]
4029 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4030 "#"
4031 "&& reload_completed"
4032 [(const_int 0)]
4033 {
4034 rtx op1 = operands[1];
4035 if (REG_P (op1))
4036 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4037 else
4038 op1 = gen_lowpart (SFmode, op1);
4039 emit_move_insn (operands[0], op1);
4040 DONE;
4041 })
4042
4043 (define_insn_and_split "*sse4_1_extractps"
4044 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
4045 (vec_select:SF
4046 (match_operand:V4SF 1 "register_operand" "x,0,x")
4047 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4048 "TARGET_SSE4_1"
4049 "@
4050 %vextractps\t{%2, %1, %0|%0, %1, %2}
4051 #
4052 #"
4053 "&& reload_completed && SSE_REG_P (operands[0])"
4054 [(const_int 0)]
4055 {
4056 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4057 switch (INTVAL (operands[2]))
4058 {
4059 case 1:
4060 case 3:
4061 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4062 operands[2], operands[2],
4063 GEN_INT (INTVAL (operands[2]) + 4),
4064 GEN_INT (INTVAL (operands[2]) + 4)));
4065 break;
4066 case 2:
4067 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4068 break;
4069 default:
4070 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4071 gcc_unreachable ();
4072 }
4073 DONE;
4074 }
4075 [(set_attr "isa" "*,noavx,avx")
4076 (set_attr "type" "sselog,*,*")
4077 (set_attr "prefix_data16" "1,*,*")
4078 (set_attr "prefix_extra" "1,*,*")
4079 (set_attr "length_immediate" "1,*,*")
4080 (set_attr "prefix" "maybe_vex,*,*")
4081 (set_attr "mode" "V4SF,*,*")])
4082
4083 (define_insn_and_split "*vec_extract_v4sf_mem"
4084 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4085 (vec_select:SF
4086 (match_operand:V4SF 1 "memory_operand" "o,o,o")
4087 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4088 "TARGET_SSE"
4089 "#"
4090 "&& reload_completed"
4091 [(const_int 0)]
4092 {
4093 int i = INTVAL (operands[2]);
4094
4095 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4096 DONE;
4097 })
4098
4099 (define_expand "avx_vextractf128<mode>"
4100 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
4101 (match_operand:V_256 1 "register_operand" "")
4102 (match_operand:SI 2 "const_0_to_1_operand" "")]
4103 "TARGET_AVX"
4104 {
4105 rtx (*insn)(rtx, rtx);
4106
4107 switch (INTVAL (operands[2]))
4108 {
4109 case 0:
4110 insn = gen_vec_extract_lo_<mode>;
4111 break;
4112 case 1:
4113 insn = gen_vec_extract_hi_<mode>;
4114 break;
4115 default:
4116 gcc_unreachable ();
4117 }
4118
4119 emit_insn (insn (operands[0], operands[1]));
4120 DONE;
4121 })
4122
4123 (define_insn_and_split "vec_extract_lo_<mode>"
4124 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4125 (vec_select:<ssehalfvecmode>
4126 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4127 (parallel [(const_int 0) (const_int 1)])))]
4128 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4129 "#"
4130 "&& reload_completed"
4131 [(const_int 0)]
4132 {
4133 rtx op1 = operands[1];
4134 if (REG_P (op1))
4135 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4136 else
4137 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4138 emit_move_insn (operands[0], op1);
4139 DONE;
4140 })
4141
4142 (define_insn "vec_extract_hi_<mode>"
4143 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4144 (vec_select:<ssehalfvecmode>
4145 (match_operand:VI8F_256 1 "register_operand" "x,x")
4146 (parallel [(const_int 2) (const_int 3)])))]
4147 "TARGET_AVX"
4148 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4149 [(set_attr "type" "sselog")
4150 (set_attr "prefix_extra" "1")
4151 (set_attr "length_immediate" "1")
4152 (set_attr "memory" "none,store")
4153 (set_attr "prefix" "vex")
4154 (set_attr "mode" "<sseinsnmode>")])
4155
4156 (define_insn_and_split "vec_extract_lo_<mode>"
4157 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4158 (vec_select:<ssehalfvecmode>
4159 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4160 (parallel [(const_int 0) (const_int 1)
4161 (const_int 2) (const_int 3)])))]
4162 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4163 "#"
4164 "&& reload_completed"
4165 [(const_int 0)]
4166 {
4167 rtx op1 = operands[1];
4168 if (REG_P (op1))
4169 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4170 else
4171 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4172 emit_move_insn (operands[0], op1);
4173 DONE;
4174 })
4175
4176 (define_insn "vec_extract_hi_<mode>"
4177 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4178 (vec_select:<ssehalfvecmode>
4179 (match_operand:VI4F_256 1 "register_operand" "x,x")
4180 (parallel [(const_int 4) (const_int 5)
4181 (const_int 6) (const_int 7)])))]
4182 "TARGET_AVX"
4183 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4184 [(set_attr "type" "sselog")
4185 (set_attr "prefix_extra" "1")
4186 (set_attr "length_immediate" "1")
4187 (set_attr "memory" "none,store")
4188 (set_attr "prefix" "vex")
4189 (set_attr "mode" "<sseinsnmode>")])
4190
4191 (define_insn_and_split "vec_extract_lo_v16hi"
4192 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4193 (vec_select:V8HI
4194 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4195 (parallel [(const_int 0) (const_int 1)
4196 (const_int 2) (const_int 3)
4197 (const_int 4) (const_int 5)
4198 (const_int 6) (const_int 7)])))]
4199 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4200 "#"
4201 "&& reload_completed"
4202 [(const_int 0)]
4203 {
4204 rtx op1 = operands[1];
4205 if (REG_P (op1))
4206 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4207 else
4208 op1 = gen_lowpart (V8HImode, op1);
4209 emit_move_insn (operands[0], op1);
4210 DONE;
4211 })
4212
4213 (define_insn "vec_extract_hi_v16hi"
4214 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4215 (vec_select:V8HI
4216 (match_operand:V16HI 1 "register_operand" "x,x")
4217 (parallel [(const_int 8) (const_int 9)
4218 (const_int 10) (const_int 11)
4219 (const_int 12) (const_int 13)
4220 (const_int 14) (const_int 15)])))]
4221 "TARGET_AVX"
4222 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4223 [(set_attr "type" "sselog")
4224 (set_attr "prefix_extra" "1")
4225 (set_attr "length_immediate" "1")
4226 (set_attr "memory" "none,store")
4227 (set_attr "prefix" "vex")
4228 (set_attr "mode" "OI")])
4229
4230 (define_insn_and_split "vec_extract_lo_v32qi"
4231 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4232 (vec_select:V16QI
4233 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4234 (parallel [(const_int 0) (const_int 1)
4235 (const_int 2) (const_int 3)
4236 (const_int 4) (const_int 5)
4237 (const_int 6) (const_int 7)
4238 (const_int 8) (const_int 9)
4239 (const_int 10) (const_int 11)
4240 (const_int 12) (const_int 13)
4241 (const_int 14) (const_int 15)])))]
4242 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4243 "#"
4244 "&& reload_completed"
4245 [(const_int 0)]
4246 {
4247 rtx op1 = operands[1];
4248 if (REG_P (op1))
4249 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4250 else
4251 op1 = gen_lowpart (V16QImode, op1);
4252 emit_move_insn (operands[0], op1);
4253 DONE;
4254 })
4255
4256 (define_insn "vec_extract_hi_v32qi"
4257 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4258 (vec_select:V16QI
4259 (match_operand:V32QI 1 "register_operand" "x,x")
4260 (parallel [(const_int 16) (const_int 17)
4261 (const_int 18) (const_int 19)
4262 (const_int 20) (const_int 21)
4263 (const_int 22) (const_int 23)
4264 (const_int 24) (const_int 25)
4265 (const_int 26) (const_int 27)
4266 (const_int 28) (const_int 29)
4267 (const_int 30) (const_int 31)])))]
4268 "TARGET_AVX"
4269 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4270 [(set_attr "type" "sselog")
4271 (set_attr "prefix_extra" "1")
4272 (set_attr "length_immediate" "1")
4273 (set_attr "memory" "none,store")
4274 (set_attr "prefix" "vex")
4275 (set_attr "mode" "OI")])
4276
4277 ;; Modes handled by vec_extract patterns.
4278 (define_mode_iterator VEC_EXTRACT_MODE
4279 [(V32QI "TARGET_AVX") V16QI
4280 (V16HI "TARGET_AVX") V8HI
4281 (V8SI "TARGET_AVX") V4SI
4282 (V4DI "TARGET_AVX") V2DI
4283 (V8SF "TARGET_AVX") V4SF
4284 (V4DF "TARGET_AVX") V2DF])
4285
4286 (define_expand "vec_extract<mode>"
4287 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4288 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4289 (match_operand 2 "const_int_operand" "")]
4290 "TARGET_SSE"
4291 {
4292 ix86_expand_vector_extract (false, operands[0], operands[1],
4293 INTVAL (operands[2]));
4294 DONE;
4295 })
4296
4297 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4298 ;;
4299 ;; Parallel double-precision floating point element swizzling
4300 ;;
4301 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4302
4303 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4304 (define_insn "avx_unpckhpd256"
4305 [(set (match_operand:V4DF 0 "register_operand" "=x")
4306 (vec_select:V4DF
4307 (vec_concat:V8DF
4308 (match_operand:V4DF 1 "register_operand" "x")
4309 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4310 (parallel [(const_int 1) (const_int 5)
4311 (const_int 3) (const_int 7)])))]
4312 "TARGET_AVX"
4313 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4314 [(set_attr "type" "sselog")
4315 (set_attr "prefix" "vex")
4316 (set_attr "mode" "V4DF")])
4317
4318 (define_expand "vec_interleave_highv4df"
4319 [(set (match_dup 3)
4320 (vec_select:V4DF
4321 (vec_concat:V8DF
4322 (match_operand:V4DF 1 "register_operand" "x")
4323 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4324 (parallel [(const_int 0) (const_int 4)
4325 (const_int 2) (const_int 6)])))
4326 (set (match_dup 4)
4327 (vec_select:V4DF
4328 (vec_concat:V8DF
4329 (match_dup 1)
4330 (match_dup 2))
4331 (parallel [(const_int 1) (const_int 5)
4332 (const_int 3) (const_int 7)])))
4333 (set (match_operand:V4DF 0 "register_operand" "")
4334 (vec_select:V4DF
4335 (vec_concat:V8DF
4336 (match_dup 3)
4337 (match_dup 4))
4338 (parallel [(const_int 2) (const_int 3)
4339 (const_int 6) (const_int 7)])))]
4340 "TARGET_AVX"
4341 {
4342 operands[3] = gen_reg_rtx (V4DFmode);
4343 operands[4] = gen_reg_rtx (V4DFmode);
4344 })
4345
4346
4347 (define_expand "vec_interleave_highv2df"
4348 [(set (match_operand:V2DF 0 "register_operand" "")
4349 (vec_select:V2DF
4350 (vec_concat:V4DF
4351 (match_operand:V2DF 1 "nonimmediate_operand" "")
4352 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4353 (parallel [(const_int 1)
4354 (const_int 3)])))]
4355 "TARGET_SSE2"
4356 {
4357 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4358 operands[2] = force_reg (V2DFmode, operands[2]);
4359 })
4360
4361 (define_insn "*vec_interleave_highv2df"
4362 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4363 (vec_select:V2DF
4364 (vec_concat:V4DF
4365 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4366 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4367 (parallel [(const_int 1)
4368 (const_int 3)])))]
4369 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4370 "@
4371 unpckhpd\t{%2, %0|%0, %2}
4372 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4373 %vmovddup\t{%H1, %0|%0, %H1}
4374 movlpd\t{%H1, %0|%0, %H1}
4375 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4376 %vmovhpd\t{%1, %0|%0, %1}"
4377 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4378 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4379 (set_attr "prefix_data16" "*,*,*,1,*,1")
4380 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4381 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4382
4383 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4384 (define_expand "avx_movddup256"
4385 [(set (match_operand:V4DF 0 "register_operand" "")
4386 (vec_select:V4DF
4387 (vec_concat:V8DF
4388 (match_operand:V4DF 1 "nonimmediate_operand" "")
4389 (match_dup 1))
4390 (parallel [(const_int 0) (const_int 4)
4391 (const_int 2) (const_int 6)])))]
4392 "TARGET_AVX")
4393
4394 (define_expand "avx_unpcklpd256"
4395 [(set (match_operand:V4DF 0 "register_operand" "")
4396 (vec_select:V4DF
4397 (vec_concat:V8DF
4398 (match_operand:V4DF 1 "register_operand" "")
4399 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4400 (parallel [(const_int 0) (const_int 4)
4401 (const_int 2) (const_int 6)])))]
4402 "TARGET_AVX")
4403
4404 (define_insn "*avx_unpcklpd256"
4405 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4406 (vec_select:V4DF
4407 (vec_concat:V8DF
4408 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4409 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4410 (parallel [(const_int 0) (const_int 4)
4411 (const_int 2) (const_int 6)])))]
4412 "TARGET_AVX"
4413 "@
4414 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4415 vmovddup\t{%1, %0|%0, %1}"
4416 [(set_attr "type" "sselog")
4417 (set_attr "prefix" "vex")
4418 (set_attr "mode" "V4DF")])
4419
4420 (define_expand "vec_interleave_lowv4df"
4421 [(set (match_dup 3)
4422 (vec_select:V4DF
4423 (vec_concat:V8DF
4424 (match_operand:V4DF 1 "register_operand" "x")
4425 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4426 (parallel [(const_int 0) (const_int 4)
4427 (const_int 2) (const_int 6)])))
4428 (set (match_dup 4)
4429 (vec_select:V4DF
4430 (vec_concat:V8DF
4431 (match_dup 1)
4432 (match_dup 2))
4433 (parallel [(const_int 1) (const_int 5)
4434 (const_int 3) (const_int 7)])))
4435 (set (match_operand:V4DF 0 "register_operand" "")
4436 (vec_select:V4DF
4437 (vec_concat:V8DF
4438 (match_dup 3)
4439 (match_dup 4))
4440 (parallel [(const_int 0) (const_int 1)
4441 (const_int 4) (const_int 5)])))]
4442 "TARGET_AVX"
4443 {
4444 operands[3] = gen_reg_rtx (V4DFmode);
4445 operands[4] = gen_reg_rtx (V4DFmode);
4446 })
4447
4448 (define_expand "vec_interleave_lowv2df"
4449 [(set (match_operand:V2DF 0 "register_operand" "")
4450 (vec_select:V2DF
4451 (vec_concat:V4DF
4452 (match_operand:V2DF 1 "nonimmediate_operand" "")
4453 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4454 (parallel [(const_int 0)
4455 (const_int 2)])))]
4456 "TARGET_SSE2"
4457 {
4458 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4459 operands[1] = force_reg (V2DFmode, operands[1]);
4460 })
4461
4462 (define_insn "*vec_interleave_lowv2df"
4463 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4464 (vec_select:V2DF
4465 (vec_concat:V4DF
4466 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4467 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4468 (parallel [(const_int 0)
4469 (const_int 2)])))]
4470 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4471 "@
4472 unpcklpd\t{%2, %0|%0, %2}
4473 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4474 %vmovddup\t{%1, %0|%0, %1}
4475 movhpd\t{%2, %0|%0, %2}
4476 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4477 %vmovlpd\t{%2, %H0|%H0, %2}"
4478 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4479 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4480 (set_attr "prefix_data16" "*,*,*,1,*,1")
4481 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4482 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4483
4484 (define_split
4485 [(set (match_operand:V2DF 0 "memory_operand" "")
4486 (vec_select:V2DF
4487 (vec_concat:V4DF
4488 (match_operand:V2DF 1 "register_operand" "")
4489 (match_dup 1))
4490 (parallel [(const_int 0)
4491 (const_int 2)])))]
4492 "TARGET_SSE3 && reload_completed"
4493 [(const_int 0)]
4494 {
4495 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4496 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4497 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4498 DONE;
4499 })
4500
4501 (define_split
4502 [(set (match_operand:V2DF 0 "register_operand" "")
4503 (vec_select:V2DF
4504 (vec_concat:V4DF
4505 (match_operand:V2DF 1 "memory_operand" "")
4506 (match_dup 1))
4507 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4508 (match_operand:SI 3 "const_int_operand" "")])))]
4509 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4510 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4511 {
4512 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4513 })
4514
4515 (define_expand "avx_shufpd256"
4516 [(match_operand:V4DF 0 "register_operand" "")
4517 (match_operand:V4DF 1 "register_operand" "")
4518 (match_operand:V4DF 2 "nonimmediate_operand" "")
4519 (match_operand:SI 3 "const_int_operand" "")]
4520 "TARGET_AVX"
4521 {
4522 int mask = INTVAL (operands[3]);
4523 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4524 GEN_INT (mask & 1),
4525 GEN_INT (mask & 2 ? 5 : 4),
4526 GEN_INT (mask & 4 ? 3 : 2),
4527 GEN_INT (mask & 8 ? 7 : 6)));
4528 DONE;
4529 })
4530
4531 (define_insn "avx_shufpd256_1"
4532 [(set (match_operand:V4DF 0 "register_operand" "=x")
4533 (vec_select:V4DF
4534 (vec_concat:V8DF
4535 (match_operand:V4DF 1 "register_operand" "x")
4536 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4537 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4538 (match_operand 4 "const_4_to_5_operand" "")
4539 (match_operand 5 "const_2_to_3_operand" "")
4540 (match_operand 6 "const_6_to_7_operand" "")])))]
4541 "TARGET_AVX"
4542 {
4543 int mask;
4544 mask = INTVAL (operands[3]);
4545 mask |= (INTVAL (operands[4]) - 4) << 1;
4546 mask |= (INTVAL (operands[5]) - 2) << 2;
4547 mask |= (INTVAL (operands[6]) - 6) << 3;
4548 operands[3] = GEN_INT (mask);
4549
4550 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4551 }
4552 [(set_attr "type" "sselog")
4553 (set_attr "length_immediate" "1")
4554 (set_attr "prefix" "vex")
4555 (set_attr "mode" "V4DF")])
4556
4557 (define_expand "sse2_shufpd"
4558 [(match_operand:V2DF 0 "register_operand" "")
4559 (match_operand:V2DF 1 "register_operand" "")
4560 (match_operand:V2DF 2 "nonimmediate_operand" "")
4561 (match_operand:SI 3 "const_int_operand" "")]
4562 "TARGET_SSE2"
4563 {
4564 int mask = INTVAL (operands[3]);
4565 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4566 GEN_INT (mask & 1),
4567 GEN_INT (mask & 2 ? 3 : 2)));
4568 DONE;
4569 })
4570
4571 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4572 (define_insn "avx2_interleave_highv4di"
4573 [(set (match_operand:V4DI 0 "register_operand" "=x")
4574 (vec_select:V4DI
4575 (vec_concat:V8DI
4576 (match_operand:V4DI 1 "register_operand" "x")
4577 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4578 (parallel [(const_int 1)
4579 (const_int 5)
4580 (const_int 3)
4581 (const_int 7)])))]
4582 "TARGET_AVX2"
4583 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4584 [(set_attr "type" "sselog")
4585 (set_attr "prefix" "vex")
4586 (set_attr "mode" "OI")])
4587
4588 (define_insn "vec_interleave_highv2di"
4589 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4590 (vec_select:V2DI
4591 (vec_concat:V4DI
4592 (match_operand:V2DI 1 "register_operand" "0,x")
4593 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4594 (parallel [(const_int 1)
4595 (const_int 3)])))]
4596 "TARGET_SSE2"
4597 "@
4598 punpckhqdq\t{%2, %0|%0, %2}
4599 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4600 [(set_attr "isa" "noavx,avx")
4601 (set_attr "type" "sselog")
4602 (set_attr "prefix_data16" "1,*")
4603 (set_attr "prefix" "orig,vex")
4604 (set_attr "mode" "TI")])
4605
4606 (define_insn "avx2_interleave_lowv4di"
4607 [(set (match_operand:V4DI 0 "register_operand" "=x")
4608 (vec_select:V4DI
4609 (vec_concat:V8DI
4610 (match_operand:V4DI 1 "register_operand" "x")
4611 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4612 (parallel [(const_int 0)
4613 (const_int 4)
4614 (const_int 2)
4615 (const_int 6)])))]
4616 "TARGET_AVX2"
4617 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4618 [(set_attr "type" "sselog")
4619 (set_attr "prefix" "vex")
4620 (set_attr "mode" "OI")])
4621
4622 (define_insn "vec_interleave_lowv2di"
4623 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4624 (vec_select:V2DI
4625 (vec_concat:V4DI
4626 (match_operand:V2DI 1 "register_operand" "0,x")
4627 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4628 (parallel [(const_int 0)
4629 (const_int 2)])))]
4630 "TARGET_SSE2"
4631 "@
4632 punpcklqdq\t{%2, %0|%0, %2}
4633 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4634 [(set_attr "isa" "noavx,avx")
4635 (set_attr "type" "sselog")
4636 (set_attr "prefix_data16" "1,*")
4637 (set_attr "prefix" "orig,vex")
4638 (set_attr "mode" "TI")])
4639
4640 (define_insn "sse2_shufpd_<mode>"
4641 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4642 (vec_select:VI8F_128
4643 (vec_concat:<ssedoublevecmode>
4644 (match_operand:VI8F_128 1 "register_operand" "0,x")
4645 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4646 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4647 (match_operand 4 "const_2_to_3_operand" "")])))]
4648 "TARGET_SSE2"
4649 {
4650 int mask;
4651 mask = INTVAL (operands[3]);
4652 mask |= (INTVAL (operands[4]) - 2) << 1;
4653 operands[3] = GEN_INT (mask);
4654
4655 switch (which_alternative)
4656 {
4657 case 0:
4658 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4659 case 1:
4660 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4661 default:
4662 gcc_unreachable ();
4663 }
4664 }
4665 [(set_attr "isa" "noavx,avx")
4666 (set_attr "type" "sselog")
4667 (set_attr "length_immediate" "1")
4668 (set_attr "prefix" "orig,vex")
4669 (set_attr "mode" "V2DF")])
4670
4671 ;; Avoid combining registers from different units in a single alternative,
4672 ;; see comment above inline_secondary_memory_needed function in i386.c
4673 (define_insn "sse2_storehpd"
4674 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4675 (vec_select:DF
4676 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4677 (parallel [(const_int 1)])))]
4678 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4679 "@
4680 %vmovhpd\t{%1, %0|%0, %1}
4681 unpckhpd\t%0, %0
4682 vunpckhpd\t{%d1, %0|%0, %d1}
4683 #
4684 #
4685 #"
4686 [(set_attr "isa" "*,noavx,avx,*,*,*")
4687 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4688 (set (attr "prefix_data16")
4689 (if_then_else
4690 (and (eq_attr "alternative" "0")
4691 (not (match_test "TARGET_AVX")))
4692 (const_string "1")
4693 (const_string "*")))
4694 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4695 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4696
4697 (define_split
4698 [(set (match_operand:DF 0 "register_operand" "")
4699 (vec_select:DF
4700 (match_operand:V2DF 1 "memory_operand" "")
4701 (parallel [(const_int 1)])))]
4702 "TARGET_SSE2 && reload_completed"
4703 [(set (match_dup 0) (match_dup 1))]
4704 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4705
4706 (define_insn "*vec_extractv2df_1_sse"
4707 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4708 (vec_select:DF
4709 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4710 (parallel [(const_int 1)])))]
4711 "!TARGET_SSE2 && TARGET_SSE
4712 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4713 "@
4714 movhps\t{%1, %0|%0, %1}
4715 movhlps\t{%1, %0|%0, %1}
4716 movlps\t{%H1, %0|%0, %H1}"
4717 [(set_attr "type" "ssemov")
4718 (set_attr "mode" "V2SF,V4SF,V2SF")])
4719
4720 ;; Avoid combining registers from different units in a single alternative,
4721 ;; see comment above inline_secondary_memory_needed function in i386.c
4722 (define_insn "sse2_storelpd"
4723 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4724 (vec_select:DF
4725 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4726 (parallel [(const_int 0)])))]
4727 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4728 "@
4729 %vmovlpd\t{%1, %0|%0, %1}
4730 #
4731 #
4732 #
4733 #"
4734 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4735 (set_attr "prefix_data16" "1,*,*,*,*")
4736 (set_attr "prefix" "maybe_vex")
4737 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4738
4739 (define_split
4740 [(set (match_operand:DF 0 "register_operand" "")
4741 (vec_select:DF
4742 (match_operand:V2DF 1 "nonimmediate_operand" "")
4743 (parallel [(const_int 0)])))]
4744 "TARGET_SSE2 && reload_completed"
4745 [(const_int 0)]
4746 {
4747 rtx op1 = operands[1];
4748 if (REG_P (op1))
4749 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4750 else
4751 op1 = gen_lowpart (DFmode, op1);
4752 emit_move_insn (operands[0], op1);
4753 DONE;
4754 })
4755
4756 (define_insn "*vec_extractv2df_0_sse"
4757 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4758 (vec_select:DF
4759 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4760 (parallel [(const_int 0)])))]
4761 "!TARGET_SSE2 && TARGET_SSE
4762 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4763 "@
4764 movlps\t{%1, %0|%0, %1}
4765 movaps\t{%1, %0|%0, %1}
4766 movlps\t{%1, %0|%0, %1}"
4767 [(set_attr "type" "ssemov")
4768 (set_attr "mode" "V2SF,V4SF,V2SF")])
4769
4770 (define_expand "sse2_loadhpd_exp"
4771 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4772 (vec_concat:V2DF
4773 (vec_select:DF
4774 (match_operand:V2DF 1 "nonimmediate_operand" "")
4775 (parallel [(const_int 0)]))
4776 (match_operand:DF 2 "nonimmediate_operand" "")))]
4777 "TARGET_SSE2"
4778 {
4779 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4780
4781 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4782
4783 /* Fix up the destination if needed. */
4784 if (dst != operands[0])
4785 emit_move_insn (operands[0], dst);
4786
4787 DONE;
4788 })
4789
4790 ;; Avoid combining registers from different units in a single alternative,
4791 ;; see comment above inline_secondary_memory_needed function in i386.c
4792 (define_insn "sse2_loadhpd"
4793 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4794 "=x,x,x,x,o,o ,o")
4795 (vec_concat:V2DF
4796 (vec_select:DF
4797 (match_operand:V2DF 1 "nonimmediate_operand"
4798 " 0,x,0,x,0,0 ,0")
4799 (parallel [(const_int 0)]))
4800 (match_operand:DF 2 "nonimmediate_operand"
4801 " m,m,x,x,x,*f,r")))]
4802 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4803 "@
4804 movhpd\t{%2, %0|%0, %2}
4805 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4806 unpcklpd\t{%2, %0|%0, %2}
4807 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4808 #
4809 #
4810 #"
4811 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4812 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4813 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4814 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4815 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4816
4817 (define_split
4818 [(set (match_operand:V2DF 0 "memory_operand" "")
4819 (vec_concat:V2DF
4820 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4821 (match_operand:DF 1 "register_operand" "")))]
4822 "TARGET_SSE2 && reload_completed"
4823 [(set (match_dup 0) (match_dup 1))]
4824 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4825
4826 (define_expand "sse2_loadlpd_exp"
4827 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4828 (vec_concat:V2DF
4829 (match_operand:DF 2 "nonimmediate_operand" "")
4830 (vec_select:DF
4831 (match_operand:V2DF 1 "nonimmediate_operand" "")
4832 (parallel [(const_int 1)]))))]
4833 "TARGET_SSE2"
4834 {
4835 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4836
4837 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4838
4839 /* Fix up the destination if needed. */
4840 if (dst != operands[0])
4841 emit_move_insn (operands[0], dst);
4842
4843 DONE;
4844 })
4845
4846 ;; Avoid combining registers from different units in a single alternative,
4847 ;; see comment above inline_secondary_memory_needed function in i386.c
4848 (define_insn "sse2_loadlpd"
4849 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4850 "=x,x,x,x,x,x,x,x,m,m ,m")
4851 (vec_concat:V2DF
4852 (match_operand:DF 2 "nonimmediate_operand"
4853 " m,m,m,x,x,0,0,x,x,*f,r")
4854 (vec_select:DF
4855 (match_operand:V2DF 1 "vector_move_operand"
4856 " C,0,x,0,x,x,o,o,0,0 ,0")
4857 (parallel [(const_int 1)]))))]
4858 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4859 "@
4860 %vmovsd\t{%2, %0|%0, %2}
4861 movlpd\t{%2, %0|%0, %2}
4862 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4863 movsd\t{%2, %0|%0, %2}
4864 vmovsd\t{%2, %1, %0|%0, %1, %2}
4865 shufpd\t{$2, %1, %0|%0, %1, 2}
4866 movhpd\t{%H1, %0|%0, %H1}
4867 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4868 #
4869 #
4870 #"
4871 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4872 (set (attr "type")
4873 (cond [(eq_attr "alternative" "5")
4874 (const_string "sselog")
4875 (eq_attr "alternative" "9")
4876 (const_string "fmov")
4877 (eq_attr "alternative" "10")
4878 (const_string "imov")
4879 ]
4880 (const_string "ssemov")))
4881 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4882 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4883 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4884 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4885
4886 (define_split
4887 [(set (match_operand:V2DF 0 "memory_operand" "")
4888 (vec_concat:V2DF
4889 (match_operand:DF 1 "register_operand" "")
4890 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4891 "TARGET_SSE2 && reload_completed"
4892 [(set (match_dup 0) (match_dup 1))]
4893 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4894
4895 (define_insn "sse2_movsd"
4896 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4897 (vec_merge:V2DF
4898 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4899 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4900 (const_int 1)))]
4901 "TARGET_SSE2"
4902 "@
4903 movsd\t{%2, %0|%0, %2}
4904 vmovsd\t{%2, %1, %0|%0, %1, %2}
4905 movlpd\t{%2, %0|%0, %2}
4906 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4907 %vmovlpd\t{%2, %0|%0, %2}
4908 shufpd\t{$2, %1, %0|%0, %1, 2}
4909 movhps\t{%H1, %0|%0, %H1}
4910 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4911 %vmovhps\t{%1, %H0|%H0, %1}"
4912 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4913 (set (attr "type")
4914 (if_then_else
4915 (eq_attr "alternative" "5")
4916 (const_string "sselog")
4917 (const_string "ssemov")))
4918 (set (attr "prefix_data16")
4919 (if_then_else
4920 (and (eq_attr "alternative" "2,4")
4921 (not (match_test "TARGET_AVX")))
4922 (const_string "1")
4923 (const_string "*")))
4924 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4925 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4926 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4927
4928 (define_insn "vec_dupv2df"
4929 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4930 (vec_duplicate:V2DF
4931 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
4932 "TARGET_SSE2"
4933 "@
4934 unpcklpd\t%0, %0
4935 %vmovddup\t{%1, %0|%0, %1}"
4936 [(set_attr "isa" "noavx,sse3")
4937 (set_attr "type" "sselog1")
4938 (set_attr "prefix" "orig,maybe_vex")
4939 (set_attr "mode" "V2DF")])
4940
4941 (define_insn "*vec_concatv2df"
4942 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
4943 (vec_concat:V2DF
4944 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
4945 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
4946 "TARGET_SSE"
4947 "@
4948 unpcklpd\t{%2, %0|%0, %2}
4949 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4950 %vmovddup\t{%1, %0|%0, %1}
4951 movhpd\t{%2, %0|%0, %2}
4952 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4953 %vmovsd\t{%1, %0|%0, %1}
4954 movlhps\t{%2, %0|%0, %2}
4955 movhps\t{%2, %0|%0, %2}"
4956 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
4957 (set (attr "type")
4958 (if_then_else
4959 (eq_attr "alternative" "0,1,2")
4960 (const_string "sselog")
4961 (const_string "ssemov")))
4962 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
4963 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
4964 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
4965
4966 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4967 ;;
4968 ;; Parallel integral arithmetic
4969 ;;
4970 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4971
4972 (define_expand "neg<mode>2"
4973 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4974 (minus:VI_AVX2
4975 (match_dup 2)
4976 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4977 "TARGET_SSE2"
4978 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4979
4980 (define_expand "<plusminus_insn><mode>3"
4981 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4982 (plusminus:VI_AVX2
4983 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4984 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4985 "TARGET_SSE2"
4986 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4987
4988 (define_insn "*<plusminus_insn><mode>3"
4989 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4990 (plusminus:VI_AVX2
4991 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4992 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4993 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4994 "@
4995 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4996 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4997 [(set_attr "isa" "noavx,avx")
4998 (set_attr "type" "sseiadd")
4999 (set_attr "prefix_data16" "1,*")
5000 (set_attr "prefix" "orig,vex")
5001 (set_attr "mode" "<sseinsnmode>")])
5002
5003 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
5004 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
5005 (sat_plusminus:VI12_AVX2
5006 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
5007 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
5008 "TARGET_SSE2"
5009 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5010
5011 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
5012 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
5013 (sat_plusminus:VI12_AVX2
5014 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
5015 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5016 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5017 "@
5018 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5019 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5020 [(set_attr "isa" "noavx,avx")
5021 (set_attr "type" "sseiadd")
5022 (set_attr "prefix_data16" "1,*")
5023 (set_attr "prefix" "orig,vex")
5024 (set_attr "mode" "TI")])
5025
5026 (define_insn_and_split "mul<mode>3"
5027 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
5028 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
5029 (match_operand:VI1_AVX2 2 "register_operand" "")))]
5030 "TARGET_SSE2
5031 && can_create_pseudo_p ()"
5032 "#"
5033 "&& 1"
5034 [(const_int 0)]
5035 {
5036 rtx t[6];
5037 int i;
5038 enum machine_mode mulmode = <sseunpackmode>mode;
5039
5040 for (i = 0; i < 6; ++i)
5041 t[i] = gen_reg_rtx (<MODE>mode);
5042
5043 /* Unpack data such that we've got a source byte in each low byte of
5044 each word. We don't care what goes into the high byte of each word.
5045 Rather than trying to get zero in there, most convenient is to let
5046 it be a copy of the low byte. */
5047 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
5048 operands[1]));
5049 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
5050 operands[2]));
5051 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
5052 operands[1]));
5053 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
5054 operands[2]));
5055
5056 /* Multiply words. The end-of-line annotations here give a picture of what
5057 the output of that instruction looks like. Dot means don't care; the
5058 letters are the bytes of the result with A being the most significant. */
5059 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
5060 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
5061 gen_lowpart (mulmode, t[0]),
5062 gen_lowpart (mulmode, t[1]))));
5063 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
5064 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
5065 gen_lowpart (mulmode, t[2]),
5066 gen_lowpart (mulmode, t[3]))));
5067
5068 /* Extract the even bytes and merge them back together. */
5069 if (<MODE>mode == V16QImode)
5070 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5071 else
5072 {
5073 /* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane,
5074 this can't be normal even extraction, but one where additionally
5075 the second and third quarter are swapped. That is even one insn
5076 shorter than even extraction. */
5077 rtvec v = rtvec_alloc (32);
5078 for (i = 0; i < 32; ++i)
5079 RTVEC_ELT (v, i)
5080 = GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0));
5081 t[0] = operands[0];
5082 t[1] = t[5];
5083 t[2] = t[4];
5084 t[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
5085 ix86_expand_vec_perm_const (t);
5086 }
5087
5088 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5089 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5090 DONE;
5091 })
5092
5093 (define_expand "mul<mode>3"
5094 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5095 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
5096 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
5097 "TARGET_SSE2"
5098 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5099
5100 (define_insn "*mul<mode>3"
5101 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5102 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5103 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5104 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5105 "@
5106 pmullw\t{%2, %0|%0, %2}
5107 vpmullw\t{%2, %1, %0|%0, %1, %2}"
5108 [(set_attr "isa" "noavx,avx")
5109 (set_attr "type" "sseimul")
5110 (set_attr "prefix_data16" "1,*")
5111 (set_attr "prefix" "orig,vex")
5112 (set_attr "mode" "<sseinsnmode>")])
5113
5114 (define_expand "<s>mul<mode>3_highpart"
5115 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5116 (truncate:VI2_AVX2
5117 (lshiftrt:<ssedoublemode>
5118 (mult:<ssedoublemode>
5119 (any_extend:<ssedoublemode>
5120 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
5121 (any_extend:<ssedoublemode>
5122 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
5123 (const_int 16))))]
5124 "TARGET_SSE2"
5125 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5126
5127 (define_insn "*<s>mul<mode>3_highpart"
5128 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5129 (truncate:VI2_AVX2
5130 (lshiftrt:<ssedoublemode>
5131 (mult:<ssedoublemode>
5132 (any_extend:<ssedoublemode>
5133 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5134 (any_extend:<ssedoublemode>
5135 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5136 (const_int 16))))]
5137 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5138 "@
5139 pmulh<u>w\t{%2, %0|%0, %2}
5140 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5141 [(set_attr "isa" "noavx,avx")
5142 (set_attr "type" "sseimul")
5143 (set_attr "prefix_data16" "1,*")
5144 (set_attr "prefix" "orig,vex")
5145 (set_attr "mode" "<sseinsnmode>")])
5146
5147 (define_expand "avx2_umulv4siv4di3"
5148 [(set (match_operand:V4DI 0 "register_operand" "")
5149 (mult:V4DI
5150 (zero_extend:V4DI
5151 (vec_select:V4SI
5152 (match_operand:V8SI 1 "nonimmediate_operand" "")
5153 (parallel [(const_int 0) (const_int 2)
5154 (const_int 4) (const_int 6)])))
5155 (zero_extend:V4DI
5156 (vec_select:V4SI
5157 (match_operand:V8SI 2 "nonimmediate_operand" "")
5158 (parallel [(const_int 0) (const_int 2)
5159 (const_int 4) (const_int 6)])))))]
5160 "TARGET_AVX2"
5161 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5162
5163 (define_insn "*avx_umulv4siv4di3"
5164 [(set (match_operand:V4DI 0 "register_operand" "=x")
5165 (mult:V4DI
5166 (zero_extend:V4DI
5167 (vec_select:V4SI
5168 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5169 (parallel [(const_int 0) (const_int 2)
5170 (const_int 4) (const_int 6)])))
5171 (zero_extend:V4DI
5172 (vec_select:V4SI
5173 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5174 (parallel [(const_int 0) (const_int 2)
5175 (const_int 4) (const_int 6)])))))]
5176 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5177 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5178 [(set_attr "type" "sseimul")
5179 (set_attr "prefix" "vex")
5180 (set_attr "mode" "OI")])
5181
5182 (define_expand "sse2_umulv2siv2di3"
5183 [(set (match_operand:V2DI 0 "register_operand" "")
5184 (mult:V2DI
5185 (zero_extend:V2DI
5186 (vec_select:V2SI
5187 (match_operand:V4SI 1 "nonimmediate_operand" "")
5188 (parallel [(const_int 0) (const_int 2)])))
5189 (zero_extend:V2DI
5190 (vec_select:V2SI
5191 (match_operand:V4SI 2 "nonimmediate_operand" "")
5192 (parallel [(const_int 0) (const_int 2)])))))]
5193 "TARGET_SSE2"
5194 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5195
5196 (define_insn "*sse2_umulv2siv2di3"
5197 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5198 (mult:V2DI
5199 (zero_extend:V2DI
5200 (vec_select:V2SI
5201 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5202 (parallel [(const_int 0) (const_int 2)])))
5203 (zero_extend:V2DI
5204 (vec_select:V2SI
5205 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5206 (parallel [(const_int 0) (const_int 2)])))))]
5207 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5208 "@
5209 pmuludq\t{%2, %0|%0, %2}
5210 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5211 [(set_attr "isa" "noavx,avx")
5212 (set_attr "type" "sseimul")
5213 (set_attr "prefix_data16" "1,*")
5214 (set_attr "prefix" "orig,vex")
5215 (set_attr "mode" "TI")])
5216
5217 (define_expand "avx2_mulv4siv4di3"
5218 [(set (match_operand:V4DI 0 "register_operand" "")
5219 (mult:V4DI
5220 (sign_extend:V4DI
5221 (vec_select:V4SI
5222 (match_operand:V8SI 1 "nonimmediate_operand" "")
5223 (parallel [(const_int 0) (const_int 2)
5224 (const_int 4) (const_int 6)])))
5225 (sign_extend:V4DI
5226 (vec_select:V4SI
5227 (match_operand:V8SI 2 "nonimmediate_operand" "")
5228 (parallel [(const_int 0) (const_int 2)
5229 (const_int 4) (const_int 6)])))))]
5230 "TARGET_AVX2"
5231 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5232
5233 (define_insn "*avx2_mulv4siv4di3"
5234 [(set (match_operand:V4DI 0 "register_operand" "=x")
5235 (mult:V4DI
5236 (sign_extend:V4DI
5237 (vec_select:V4SI
5238 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5239 (parallel [(const_int 0) (const_int 2)
5240 (const_int 4) (const_int 6)])))
5241 (sign_extend:V4DI
5242 (vec_select:V4SI
5243 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5244 (parallel [(const_int 0) (const_int 2)
5245 (const_int 4) (const_int 6)])))))]
5246 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5247 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5248 [(set_attr "isa" "avx")
5249 (set_attr "type" "sseimul")
5250 (set_attr "prefix_extra" "1")
5251 (set_attr "prefix" "vex")
5252 (set_attr "mode" "OI")])
5253
5254 (define_expand "sse4_1_mulv2siv2di3"
5255 [(set (match_operand:V2DI 0 "register_operand" "")
5256 (mult:V2DI
5257 (sign_extend:V2DI
5258 (vec_select:V2SI
5259 (match_operand:V4SI 1 "nonimmediate_operand" "")
5260 (parallel [(const_int 0) (const_int 2)])))
5261 (sign_extend:V2DI
5262 (vec_select:V2SI
5263 (match_operand:V4SI 2 "nonimmediate_operand" "")
5264 (parallel [(const_int 0) (const_int 2)])))))]
5265 "TARGET_SSE4_1"
5266 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5267
5268 (define_insn "*sse4_1_mulv2siv2di3"
5269 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5270 (mult:V2DI
5271 (sign_extend:V2DI
5272 (vec_select:V2SI
5273 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5274 (parallel [(const_int 0) (const_int 2)])))
5275 (sign_extend:V2DI
5276 (vec_select:V2SI
5277 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5278 (parallel [(const_int 0) (const_int 2)])))))]
5279 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5280 "@
5281 pmuldq\t{%2, %0|%0, %2}
5282 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5283 [(set_attr "isa" "noavx,avx")
5284 (set_attr "type" "sseimul")
5285 (set_attr "prefix_data16" "1,*")
5286 (set_attr "prefix_extra" "1")
5287 (set_attr "prefix" "orig,vex")
5288 (set_attr "mode" "TI")])
5289
5290 (define_expand "avx2_pmaddwd"
5291 [(set (match_operand:V8SI 0 "register_operand" "")
5292 (plus:V8SI
5293 (mult:V8SI
5294 (sign_extend:V8SI
5295 (vec_select:V8HI
5296 (match_operand:V16HI 1 "nonimmediate_operand" "")
5297 (parallel [(const_int 0)
5298 (const_int 2)
5299 (const_int 4)
5300 (const_int 6)
5301 (const_int 8)
5302 (const_int 10)
5303 (const_int 12)
5304 (const_int 14)])))
5305 (sign_extend:V8SI
5306 (vec_select:V8HI
5307 (match_operand:V16HI 2 "nonimmediate_operand" "")
5308 (parallel [(const_int 0)
5309 (const_int 2)
5310 (const_int 4)
5311 (const_int 6)
5312 (const_int 8)
5313 (const_int 10)
5314 (const_int 12)
5315 (const_int 14)]))))
5316 (mult:V8SI
5317 (sign_extend:V8SI
5318 (vec_select:V8HI (match_dup 1)
5319 (parallel [(const_int 1)
5320 (const_int 3)
5321 (const_int 5)
5322 (const_int 7)
5323 (const_int 9)
5324 (const_int 11)
5325 (const_int 13)
5326 (const_int 15)])))
5327 (sign_extend:V8SI
5328 (vec_select:V8HI (match_dup 2)
5329 (parallel [(const_int 1)
5330 (const_int 3)
5331 (const_int 5)
5332 (const_int 7)
5333 (const_int 9)
5334 (const_int 11)
5335 (const_int 13)
5336 (const_int 15)]))))))]
5337 "TARGET_AVX2"
5338 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5339
5340 (define_expand "sse2_pmaddwd"
5341 [(set (match_operand:V4SI 0 "register_operand" "")
5342 (plus:V4SI
5343 (mult:V4SI
5344 (sign_extend:V4SI
5345 (vec_select:V4HI
5346 (match_operand:V8HI 1 "nonimmediate_operand" "")
5347 (parallel [(const_int 0)
5348 (const_int 2)
5349 (const_int 4)
5350 (const_int 6)])))
5351 (sign_extend:V4SI
5352 (vec_select:V4HI
5353 (match_operand:V8HI 2 "nonimmediate_operand" "")
5354 (parallel [(const_int 0)
5355 (const_int 2)
5356 (const_int 4)
5357 (const_int 6)]))))
5358 (mult:V4SI
5359 (sign_extend:V4SI
5360 (vec_select:V4HI (match_dup 1)
5361 (parallel [(const_int 1)
5362 (const_int 3)
5363 (const_int 5)
5364 (const_int 7)])))
5365 (sign_extend:V4SI
5366 (vec_select:V4HI (match_dup 2)
5367 (parallel [(const_int 1)
5368 (const_int 3)
5369 (const_int 5)
5370 (const_int 7)]))))))]
5371 "TARGET_SSE2"
5372 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5373
5374 (define_insn "*avx2_pmaddwd"
5375 [(set (match_operand:V8SI 0 "register_operand" "=x")
5376 (plus:V8SI
5377 (mult:V8SI
5378 (sign_extend:V8SI
5379 (vec_select:V8HI
5380 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5381 (parallel [(const_int 0)
5382 (const_int 2)
5383 (const_int 4)
5384 (const_int 6)
5385 (const_int 8)
5386 (const_int 10)
5387 (const_int 12)
5388 (const_int 14)])))
5389 (sign_extend:V8SI
5390 (vec_select:V8HI
5391 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5392 (parallel [(const_int 0)
5393 (const_int 2)
5394 (const_int 4)
5395 (const_int 6)
5396 (const_int 8)
5397 (const_int 10)
5398 (const_int 12)
5399 (const_int 14)]))))
5400 (mult:V8SI
5401 (sign_extend:V8SI
5402 (vec_select:V8HI (match_dup 1)
5403 (parallel [(const_int 1)
5404 (const_int 3)
5405 (const_int 5)
5406 (const_int 7)
5407 (const_int 9)
5408 (const_int 11)
5409 (const_int 13)
5410 (const_int 15)])))
5411 (sign_extend:V8SI
5412 (vec_select:V8HI (match_dup 2)
5413 (parallel [(const_int 1)
5414 (const_int 3)
5415 (const_int 5)
5416 (const_int 7)
5417 (const_int 9)
5418 (const_int 11)
5419 (const_int 13)
5420 (const_int 15)]))))))]
5421 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5422 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5423 [(set_attr "type" "sseiadd")
5424 (set_attr "prefix" "vex")
5425 (set_attr "mode" "OI")])
5426
5427 (define_insn "*sse2_pmaddwd"
5428 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5429 (plus:V4SI
5430 (mult:V4SI
5431 (sign_extend:V4SI
5432 (vec_select:V4HI
5433 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5434 (parallel [(const_int 0)
5435 (const_int 2)
5436 (const_int 4)
5437 (const_int 6)])))
5438 (sign_extend:V4SI
5439 (vec_select:V4HI
5440 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5441 (parallel [(const_int 0)
5442 (const_int 2)
5443 (const_int 4)
5444 (const_int 6)]))))
5445 (mult:V4SI
5446 (sign_extend:V4SI
5447 (vec_select:V4HI (match_dup 1)
5448 (parallel [(const_int 1)
5449 (const_int 3)
5450 (const_int 5)
5451 (const_int 7)])))
5452 (sign_extend:V4SI
5453 (vec_select:V4HI (match_dup 2)
5454 (parallel [(const_int 1)
5455 (const_int 3)
5456 (const_int 5)
5457 (const_int 7)]))))))]
5458 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5459 "@
5460 pmaddwd\t{%2, %0|%0, %2}
5461 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5462 [(set_attr "isa" "noavx,avx")
5463 (set_attr "type" "sseiadd")
5464 (set_attr "atom_unit" "simul")
5465 (set_attr "prefix_data16" "1,*")
5466 (set_attr "prefix" "orig,vex")
5467 (set_attr "mode" "TI")])
5468
5469 (define_expand "mul<mode>3"
5470 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5471 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5472 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5473 "TARGET_SSE2"
5474 {
5475 if (TARGET_SSE4_1 || TARGET_AVX)
5476 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5477 })
5478
5479 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5480 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5481 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5482 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5483 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5484 "@
5485 pmulld\t{%2, %0|%0, %2}
5486 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5487 [(set_attr "isa" "noavx,avx")
5488 (set_attr "type" "sseimul")
5489 (set_attr "prefix_extra" "1")
5490 (set_attr "prefix" "orig,vex")
5491 (set_attr "mode" "<sseinsnmode>")])
5492
5493 (define_insn_and_split "*sse2_mulv4si3"
5494 [(set (match_operand:V4SI 0 "register_operand" "")
5495 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5496 (match_operand:V4SI 2 "register_operand" "")))]
5497 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5498 && can_create_pseudo_p ()"
5499 "#"
5500 "&& 1"
5501 [(const_int 0)]
5502 {
5503 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5504 rtx op0, op1, op2;
5505
5506 op0 = operands[0];
5507 op1 = operands[1];
5508 op2 = operands[2];
5509 t1 = gen_reg_rtx (V4SImode);
5510 t2 = gen_reg_rtx (V4SImode);
5511 t3 = gen_reg_rtx (V4SImode);
5512 t4 = gen_reg_rtx (V4SImode);
5513 t5 = gen_reg_rtx (V4SImode);
5514 t6 = gen_reg_rtx (V4SImode);
5515 thirtytwo = GEN_INT (32);
5516
5517 /* Multiply elements 2 and 0. */
5518 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5519 op1, op2));
5520
5521 /* Shift both input vectors down one element, so that elements 3
5522 and 1 are now in the slots for elements 2 and 0. For K8, at
5523 least, this is faster than using a shuffle. */
5524 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5525 gen_lowpart (V1TImode, op1),
5526 thirtytwo));
5527 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5528 gen_lowpart (V1TImode, op2),
5529 thirtytwo));
5530 /* Multiply elements 3 and 1. */
5531 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5532 t2, t3));
5533
5534 /* Move the results in element 2 down to element 1; we don't care
5535 what goes in elements 2 and 3. */
5536 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5537 const0_rtx, const0_rtx));
5538 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5539 const0_rtx, const0_rtx));
5540
5541 /* Merge the parts back together. */
5542 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5543
5544 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5545 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5546 DONE;
5547 })
5548
5549 (define_insn_and_split "mul<mode>3"
5550 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5551 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5552 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5553 "TARGET_SSE2
5554 && can_create_pseudo_p ()"
5555 "#"
5556 "&& 1"
5557 [(const_int 0)]
5558 {
5559 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5560 rtx op0, op1, op2;
5561
5562 op0 = operands[0];
5563 op1 = operands[1];
5564 op2 = operands[2];
5565
5566 if (TARGET_XOP && <MODE>mode == V2DImode)
5567 {
5568 /* op1: A,B,C,D, op2: E,F,G,H */
5569 op1 = gen_lowpart (V4SImode, op1);
5570 op2 = gen_lowpart (V4SImode, op2);
5571
5572 t1 = gen_reg_rtx (V4SImode);
5573 t2 = gen_reg_rtx (V4SImode);
5574 t3 = gen_reg_rtx (V2DImode);
5575 t4 = gen_reg_rtx (V2DImode);
5576
5577 /* t1: B,A,D,C */
5578 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5579 GEN_INT (1),
5580 GEN_INT (0),
5581 GEN_INT (3),
5582 GEN_INT (2)));
5583
5584 /* t2: (B*E),(A*F),(D*G),(C*H) */
5585 emit_insn (gen_mulv4si3 (t2, t1, op2));
5586
5587 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5588 emit_insn (gen_xop_phadddq (t3, t2));
5589
5590 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5591 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5592
5593 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5594 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5595 }
5596 else
5597 {
5598 t1 = gen_reg_rtx (<MODE>mode);
5599 t2 = gen_reg_rtx (<MODE>mode);
5600 t3 = gen_reg_rtx (<MODE>mode);
5601 t4 = gen_reg_rtx (<MODE>mode);
5602 t5 = gen_reg_rtx (<MODE>mode);
5603 t6 = gen_reg_rtx (<MODE>mode);
5604 thirtytwo = GEN_INT (32);
5605
5606 /* Multiply low parts. */
5607 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5608 (t1, gen_lowpart (<ssepackmode>mode, op1),
5609 gen_lowpart (<ssepackmode>mode, op2)));
5610
5611 /* Shift input vectors right 32 bits so we can multiply high parts. */
5612 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5613 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5614
5615 /* Multiply high parts by low parts. */
5616 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5617 (t4, gen_lowpart (<ssepackmode>mode, op1),
5618 gen_lowpart (<ssepackmode>mode, t3)));
5619 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5620 (t5, gen_lowpart (<ssepackmode>mode, op2),
5621 gen_lowpart (<ssepackmode>mode, t2)));
5622
5623 /* Shift them back. */
5624 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5625 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5626
5627 /* Add the three parts together. */
5628 emit_insn (gen_add<mode>3 (t6, t1, t4));
5629 emit_insn (gen_add<mode>3 (op0, t6, t5));
5630 }
5631
5632 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5633 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5634 DONE;
5635 })
5636
5637 (define_expand "vec_widen_<s>mult_hi_<mode>"
5638 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5639 (any_extend:<sseunpackmode>
5640 (match_operand:VI2_AVX2 1 "register_operand" ""))
5641 (match_operand:VI2_AVX2 2 "register_operand" "")]
5642 "TARGET_SSE2"
5643 {
5644 rtx op1, op2, t1, t2, dest;
5645
5646 op1 = operands[1];
5647 op2 = operands[2];
5648 t1 = gen_reg_rtx (<MODE>mode);
5649 t2 = gen_reg_rtx (<MODE>mode);
5650 dest = gen_lowpart (<MODE>mode, operands[0]);
5651
5652 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5653 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5654 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5655 DONE;
5656 })
5657
5658 (define_expand "vec_widen_<s>mult_lo_<mode>"
5659 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5660 (any_extend:<sseunpackmode>
5661 (match_operand:VI2_AVX2 1 "register_operand" ""))
5662 (match_operand:VI2_AVX2 2 "register_operand" "")]
5663 "TARGET_SSE2"
5664 {
5665 rtx op1, op2, t1, t2, dest;
5666
5667 op1 = operands[1];
5668 op2 = operands[2];
5669 t1 = gen_reg_rtx (<MODE>mode);
5670 t2 = gen_reg_rtx (<MODE>mode);
5671 dest = gen_lowpart (<MODE>mode, operands[0]);
5672
5673 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5674 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5675 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5676 DONE;
5677 })
5678
5679 (define_expand "vec_widen_<s>mult_hi_v8si"
5680 [(match_operand:V4DI 0 "register_operand" "")
5681 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5682 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5683 "TARGET_AVX2"
5684 {
5685 rtx t1, t2, t3, t4;
5686
5687 t1 = gen_reg_rtx (V4DImode);
5688 t2 = gen_reg_rtx (V4DImode);
5689 t3 = gen_reg_rtx (V8SImode);
5690 t4 = gen_reg_rtx (V8SImode);
5691 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5692 const0_rtx, const2_rtx,
5693 const1_rtx, GEN_INT (3)));
5694 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5695 const0_rtx, const2_rtx,
5696 const1_rtx, GEN_INT (3)));
5697 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5698 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5699 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5700 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5701 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5702 DONE;
5703 })
5704
5705 (define_expand "vec_widen_<s>mult_lo_v8si"
5706 [(match_operand:V4DI 0 "register_operand" "")
5707 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5708 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5709 "TARGET_AVX2"
5710 {
5711 rtx t1, t2, t3, t4;
5712
5713 t1 = gen_reg_rtx (V4DImode);
5714 t2 = gen_reg_rtx (V4DImode);
5715 t3 = gen_reg_rtx (V8SImode);
5716 t4 = gen_reg_rtx (V8SImode);
5717 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5718 const0_rtx, const2_rtx,
5719 const1_rtx, GEN_INT (3)));
5720 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5721 const0_rtx, const2_rtx,
5722 const1_rtx, GEN_INT (3)));
5723 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5724 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5725 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5726 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5727 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5728 DONE;
5729 })
5730
5731 (define_expand "vec_widen_smult_hi_v4si"
5732 [(match_operand:V2DI 0 "register_operand" "")
5733 (match_operand:V4SI 1 "register_operand" "")
5734 (match_operand:V4SI 2 "register_operand" "")]
5735 "TARGET_SSE4_1"
5736 {
5737 rtx op1, op2, t1, t2;
5738
5739 op1 = operands[1];
5740 op2 = operands[2];
5741 t1 = gen_reg_rtx (V4SImode);
5742 t2 = gen_reg_rtx (V4SImode);
5743
5744 if (TARGET_XOP)
5745 {
5746 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5747 GEN_INT (1), GEN_INT (3)));
5748 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5749 GEN_INT (1), GEN_INT (3)));
5750 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5751 DONE;
5752 }
5753
5754 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5755 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5756 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5757 DONE;
5758 })
5759
5760 (define_expand "vec_widen_smult_lo_v4si"
5761 [(match_operand:V2DI 0 "register_operand" "")
5762 (match_operand:V4SI 1 "register_operand" "")
5763 (match_operand:V4SI 2 "register_operand" "")]
5764 "TARGET_SSE4_1"
5765 {
5766 rtx op1, op2, t1, t2;
5767
5768 op1 = operands[1];
5769 op2 = operands[2];
5770 t1 = gen_reg_rtx (V4SImode);
5771 t2 = gen_reg_rtx (V4SImode);
5772
5773 if (TARGET_XOP)
5774 {
5775 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5776 GEN_INT (1), GEN_INT (3)));
5777 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5778 GEN_INT (1), GEN_INT (3)));
5779 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5780 DONE;
5781 }
5782
5783 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5784 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5785 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5786 DONE;
5787 })
5788
5789 (define_expand "vec_widen_umult_hi_v4si"
5790 [(match_operand:V2DI 0 "register_operand" "")
5791 (match_operand:V4SI 1 "register_operand" "")
5792 (match_operand:V4SI 2 "register_operand" "")]
5793 "TARGET_SSE2"
5794 {
5795 rtx op1, op2, t1, t2;
5796
5797 op1 = operands[1];
5798 op2 = operands[2];
5799 t1 = gen_reg_rtx (V4SImode);
5800 t2 = gen_reg_rtx (V4SImode);
5801
5802 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5803 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5804 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5805 DONE;
5806 })
5807
5808 (define_expand "vec_widen_umult_lo_v4si"
5809 [(match_operand:V2DI 0 "register_operand" "")
5810 (match_operand:V4SI 1 "register_operand" "")
5811 (match_operand:V4SI 2 "register_operand" "")]
5812 "TARGET_SSE2"
5813 {
5814 rtx op1, op2, t1, t2;
5815
5816 op1 = operands[1];
5817 op2 = operands[2];
5818 t1 = gen_reg_rtx (V4SImode);
5819 t2 = gen_reg_rtx (V4SImode);
5820
5821 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5822 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5823 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5824 DONE;
5825 })
5826
5827 (define_expand "sdot_prod<mode>"
5828 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5829 (match_operand:VI2_AVX2 1 "register_operand" "")
5830 (match_operand:VI2_AVX2 2 "register_operand" "")
5831 (match_operand:<sseunpackmode> 3 "register_operand" "")]
5832 "TARGET_SSE2"
5833 {
5834 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5835 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5836 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5837 gen_rtx_PLUS (<sseunpackmode>mode,
5838 operands[3], t)));
5839 DONE;
5840 })
5841
5842 (define_code_attr sse2_sse4_1
5843 [(zero_extend "sse2") (sign_extend "sse4_1")])
5844
5845 (define_expand "<s>dot_prodv4si"
5846 [(match_operand:V2DI 0 "register_operand" "")
5847 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5848 (match_operand:V4SI 2 "register_operand" "")
5849 (match_operand:V2DI 3 "register_operand" "")]
5850 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5851 {
5852 rtx t1, t2, t3, t4;
5853
5854 t1 = gen_reg_rtx (V2DImode);
5855 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5856 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5857
5858 t2 = gen_reg_rtx (V4SImode);
5859 t3 = gen_reg_rtx (V4SImode);
5860 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5861 gen_lowpart (V1TImode, operands[1]),
5862 GEN_INT (32)));
5863 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5864 gen_lowpart (V1TImode, operands[2]),
5865 GEN_INT (32)));
5866
5867 t4 = gen_reg_rtx (V2DImode);
5868 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5869
5870 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5871 DONE;
5872 })
5873
5874 (define_expand "<s>dot_prodv8si"
5875 [(match_operand:V4DI 0 "register_operand" "")
5876 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5877 (match_operand:V8SI 2 "register_operand" "")
5878 (match_operand:V4DI 3 "register_operand" "")]
5879 "TARGET_AVX2"
5880 {
5881 rtx t1, t2, t3, t4;
5882
5883 t1 = gen_reg_rtx (V4DImode);
5884 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5885 emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5886
5887 t2 = gen_reg_rtx (V8SImode);
5888 t3 = gen_reg_rtx (V8SImode);
5889 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5890 gen_lowpart (V2TImode, operands[1]),
5891 GEN_INT (32)));
5892 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5893 gen_lowpart (V2TImode, operands[2]),
5894 GEN_INT (32)));
5895
5896 t4 = gen_reg_rtx (V4DImode);
5897 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5898
5899 emit_insn (gen_addv4di3 (operands[0], t1, t4));
5900 DONE;
5901 })
5902
5903 (define_insn "ashr<mode>3"
5904 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5905 (ashiftrt:VI24_AVX2
5906 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5907 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5908 "TARGET_SSE2"
5909 "@
5910 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5911 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5912 [(set_attr "isa" "noavx,avx")
5913 (set_attr "type" "sseishft")
5914 (set (attr "length_immediate")
5915 (if_then_else (match_operand 2 "const_int_operand" "")
5916 (const_string "1")
5917 (const_string "0")))
5918 (set_attr "prefix_data16" "1,*")
5919 (set_attr "prefix" "orig,vex")
5920 (set_attr "mode" "<sseinsnmode>")])
5921
5922 (define_insn "<shift_insn><mode>3"
5923 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5924 (any_lshift:VI248_AVX2
5925 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5926 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5927 "TARGET_SSE2"
5928 "@
5929 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5930 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5931 [(set_attr "isa" "noavx,avx")
5932 (set_attr "type" "sseishft")
5933 (set (attr "length_immediate")
5934 (if_then_else (match_operand 2 "const_int_operand" "")
5935 (const_string "1")
5936 (const_string "0")))
5937 (set_attr "prefix_data16" "1,*")
5938 (set_attr "prefix" "orig,vex")
5939 (set_attr "mode" "<sseinsnmode>")])
5940
5941 (define_expand "vec_shl_<mode>"
5942 [(set (match_operand:VI_128 0 "register_operand" "")
5943 (ashift:V1TI
5944 (match_operand:VI_128 1 "register_operand" "")
5945 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5946 "TARGET_SSE2"
5947 {
5948 operands[0] = gen_lowpart (V1TImode, operands[0]);
5949 operands[1] = gen_lowpart (V1TImode, operands[1]);
5950 })
5951
5952 (define_insn "<sse2_avx2>_ashl<mode>3"
5953 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5954 (ashift:VIMAX_AVX2
5955 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5956 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5957 "TARGET_SSE2"
5958 {
5959 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5960
5961 switch (which_alternative)
5962 {
5963 case 0:
5964 return "pslldq\t{%2, %0|%0, %2}";
5965 case 1:
5966 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5967 default:
5968 gcc_unreachable ();
5969 }
5970 }
5971 [(set_attr "isa" "noavx,avx")
5972 (set_attr "type" "sseishft")
5973 (set_attr "length_immediate" "1")
5974 (set_attr "prefix_data16" "1,*")
5975 (set_attr "prefix" "orig,vex")
5976 (set_attr "mode" "<sseinsnmode>")])
5977
5978 (define_expand "vec_shr_<mode>"
5979 [(set (match_operand:VI_128 0 "register_operand" "")
5980 (lshiftrt:V1TI
5981 (match_operand:VI_128 1 "register_operand" "")
5982 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5983 "TARGET_SSE2"
5984 {
5985 operands[0] = gen_lowpart (V1TImode, operands[0]);
5986 operands[1] = gen_lowpart (V1TImode, operands[1]);
5987 })
5988
5989 (define_insn "<sse2_avx2>_lshr<mode>3"
5990 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5991 (lshiftrt:VIMAX_AVX2
5992 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5993 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5994 "TARGET_SSE2"
5995 {
5996 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5997
5998 switch (which_alternative)
5999 {
6000 case 0:
6001 return "psrldq\t{%2, %0|%0, %2}";
6002 case 1:
6003 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
6004 default:
6005 gcc_unreachable ();
6006 }
6007 }
6008 [(set_attr "isa" "noavx,avx")
6009 (set_attr "type" "sseishft")
6010 (set_attr "length_immediate" "1")
6011 (set_attr "atom_unit" "sishuf")
6012 (set_attr "prefix_data16" "1,*")
6013 (set_attr "prefix" "orig,vex")
6014 (set_attr "mode" "<sseinsnmode>")])
6015
6016
6017 (define_expand "<code><mode>3"
6018 [(set (match_operand:VI124_256 0 "register_operand" "")
6019 (maxmin:VI124_256
6020 (match_operand:VI124_256 1 "nonimmediate_operand" "")
6021 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
6022 "TARGET_AVX2"
6023 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6024
6025 (define_insn "*avx2_<code><mode>3"
6026 [(set (match_operand:VI124_256 0 "register_operand" "=x")
6027 (maxmin:VI124_256
6028 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
6029 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
6030 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6031 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6032 [(set_attr "type" "sseiadd")
6033 (set_attr "prefix_extra" "1")
6034 (set_attr "prefix" "vex")
6035 (set_attr "mode" "OI")])
6036
6037 (define_expand "<code><mode>3"
6038 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
6039 (maxmin:VI8_AVX2
6040 (match_operand:VI8_AVX2 1 "register_operand" "")
6041 (match_operand:VI8_AVX2 2 "register_operand" "")))]
6042 "TARGET_SSE4_2"
6043 {
6044 enum rtx_code code;
6045 rtx xops[6];
6046 bool ok;
6047
6048 xops[0] = operands[0];
6049
6050 if (<CODE> == SMAX || <CODE> == UMAX)
6051 {
6052 xops[1] = operands[1];
6053 xops[2] = operands[2];
6054 }
6055 else
6056 {
6057 xops[1] = operands[2];
6058 xops[2] = operands[1];
6059 }
6060
6061 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
6062
6063 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
6064 xops[4] = operands[1];
6065 xops[5] = operands[2];
6066
6067 ok = ix86_expand_int_vcond (xops);
6068 gcc_assert (ok);
6069 DONE;
6070 })
6071
6072 (define_expand "<code><mode>3"
6073 [(set (match_operand:VI124_128 0 "register_operand" "")
6074 (smaxmin:VI124_128
6075 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6076 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6077 "TARGET_SSE2"
6078 {
6079 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
6080 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6081 else
6082 {
6083 rtx xops[6];
6084 bool ok;
6085
6086 xops[0] = operands[0];
6087 operands[1] = force_reg (<MODE>mode, operands[1]);
6088 operands[2] = force_reg (<MODE>mode, operands[2]);
6089
6090 if (<CODE> == SMAX)
6091 {
6092 xops[1] = operands[1];
6093 xops[2] = operands[2];
6094 }
6095 else
6096 {
6097 xops[1] = operands[2];
6098 xops[2] = operands[1];
6099 }
6100
6101 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6102 xops[4] = operands[1];
6103 xops[5] = operands[2];
6104
6105 ok = ix86_expand_int_vcond (xops);
6106 gcc_assert (ok);
6107 DONE;
6108 }
6109 })
6110
6111 (define_insn "*sse4_1_<code><mode>3"
6112 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6113 (smaxmin:VI14_128
6114 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6115 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6116 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6117 "@
6118 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6119 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6120 [(set_attr "isa" "noavx,avx")
6121 (set_attr "type" "sseiadd")
6122 (set_attr "prefix_extra" "1,*")
6123 (set_attr "prefix" "orig,vex")
6124 (set_attr "mode" "TI")])
6125
6126 (define_insn "*<code>v8hi3"
6127 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6128 (smaxmin:V8HI
6129 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6130 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6131 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6132 "@
6133 p<maxmin_int>w\t{%2, %0|%0, %2}
6134 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6135 [(set_attr "isa" "noavx,avx")
6136 (set_attr "type" "sseiadd")
6137 (set_attr "prefix_data16" "1,*")
6138 (set_attr "prefix_extra" "*,1")
6139 (set_attr "prefix" "orig,vex")
6140 (set_attr "mode" "TI")])
6141
6142 (define_expand "<code><mode>3"
6143 [(set (match_operand:VI124_128 0 "register_operand" "")
6144 (umaxmin:VI124_128
6145 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6146 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6147 "TARGET_SSE2"
6148 {
6149 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6150 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6151 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6152 {
6153 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6154 operands[1] = force_reg (<MODE>mode, operands[1]);
6155 if (rtx_equal_p (op3, op2))
6156 op3 = gen_reg_rtx (V8HImode);
6157 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6158 emit_insn (gen_addv8hi3 (op0, op3, op2));
6159 DONE;
6160 }
6161 else
6162 {
6163 rtx xops[6];
6164 bool ok;
6165
6166 operands[1] = force_reg (<MODE>mode, operands[1]);
6167 operands[2] = force_reg (<MODE>mode, operands[2]);
6168
6169 xops[0] = operands[0];
6170
6171 if (<CODE> == UMAX)
6172 {
6173 xops[1] = operands[1];
6174 xops[2] = operands[2];
6175 }
6176 else
6177 {
6178 xops[1] = operands[2];
6179 xops[2] = operands[1];
6180 }
6181
6182 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6183 xops[4] = operands[1];
6184 xops[5] = operands[2];
6185
6186 ok = ix86_expand_int_vcond (xops);
6187 gcc_assert (ok);
6188 DONE;
6189 }
6190 })
6191
6192 (define_insn "*sse4_1_<code><mode>3"
6193 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6194 (umaxmin:VI24_128
6195 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6196 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6197 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6198 "@
6199 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6200 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6201 [(set_attr "isa" "noavx,avx")
6202 (set_attr "type" "sseiadd")
6203 (set_attr "prefix_extra" "1,*")
6204 (set_attr "prefix" "orig,vex")
6205 (set_attr "mode" "TI")])
6206
6207 (define_insn "*<code>v16qi3"
6208 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6209 (umaxmin:V16QI
6210 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6211 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6212 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6213 "@
6214 p<maxmin_int>b\t{%2, %0|%0, %2}
6215 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6216 [(set_attr "isa" "noavx,avx")
6217 (set_attr "type" "sseiadd")
6218 (set_attr "prefix_data16" "1,*")
6219 (set_attr "prefix_extra" "*,1")
6220 (set_attr "prefix" "orig,vex")
6221 (set_attr "mode" "TI")])
6222
6223 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6224 ;;
6225 ;; Parallel integral comparisons
6226 ;;
6227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6228
6229 (define_expand "avx2_eq<mode>3"
6230 [(set (match_operand:VI_256 0 "register_operand" "")
6231 (eq:VI_256
6232 (match_operand:VI_256 1 "nonimmediate_operand" "")
6233 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6234 "TARGET_AVX2"
6235 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6236
6237 (define_insn "*avx2_eq<mode>3"
6238 [(set (match_operand:VI_256 0 "register_operand" "=x")
6239 (eq:VI_256
6240 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6241 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6242 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6243 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6244 [(set_attr "type" "ssecmp")
6245 (set_attr "prefix_extra" "1")
6246 (set_attr "prefix" "vex")
6247 (set_attr "mode" "OI")])
6248
6249 (define_insn "*sse4_1_eqv2di3"
6250 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6251 (eq:V2DI
6252 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6253 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6254 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6255 "@
6256 pcmpeqq\t{%2, %0|%0, %2}
6257 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6258 [(set_attr "isa" "noavx,avx")
6259 (set_attr "type" "ssecmp")
6260 (set_attr "prefix_extra" "1")
6261 (set_attr "prefix" "orig,vex")
6262 (set_attr "mode" "TI")])
6263
6264 (define_insn "*sse2_eq<mode>3"
6265 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6266 (eq:VI124_128
6267 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6268 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6269 "TARGET_SSE2 && !TARGET_XOP
6270 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6271 "@
6272 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6273 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6274 [(set_attr "isa" "noavx,avx")
6275 (set_attr "type" "ssecmp")
6276 (set_attr "prefix_data16" "1,*")
6277 (set_attr "prefix" "orig,vex")
6278 (set_attr "mode" "TI")])
6279
6280 (define_expand "sse2_eq<mode>3"
6281 [(set (match_operand:VI124_128 0 "register_operand" "")
6282 (eq:VI124_128
6283 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6284 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6285 "TARGET_SSE2 && !TARGET_XOP "
6286 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6287
6288 (define_expand "sse4_1_eqv2di3"
6289 [(set (match_operand:V2DI 0 "register_operand" "")
6290 (eq:V2DI
6291 (match_operand:V2DI 1 "nonimmediate_operand" "")
6292 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6293 "TARGET_SSE4_1"
6294 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6295
6296 (define_insn "sse4_2_gtv2di3"
6297 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6298 (gt:V2DI
6299 (match_operand:V2DI 1 "register_operand" "0,x")
6300 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6301 "TARGET_SSE4_2"
6302 "@
6303 pcmpgtq\t{%2, %0|%0, %2}
6304 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6305 [(set_attr "isa" "noavx,avx")
6306 (set_attr "type" "ssecmp")
6307 (set_attr "prefix_extra" "1")
6308 (set_attr "prefix" "orig,vex")
6309 (set_attr "mode" "TI")])
6310
6311 (define_insn "avx2_gt<mode>3"
6312 [(set (match_operand:VI_256 0 "register_operand" "=x")
6313 (gt:VI_256
6314 (match_operand:VI_256 1 "register_operand" "x")
6315 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6316 "TARGET_AVX2"
6317 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6318 [(set_attr "type" "ssecmp")
6319 (set_attr "prefix_extra" "1")
6320 (set_attr "prefix" "vex")
6321 (set_attr "mode" "OI")])
6322
6323 (define_insn "sse2_gt<mode>3"
6324 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6325 (gt:VI124_128
6326 (match_operand:VI124_128 1 "register_operand" "0,x")
6327 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6328 "TARGET_SSE2 && !TARGET_XOP"
6329 "@
6330 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6331 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6332 [(set_attr "isa" "noavx,avx")
6333 (set_attr "type" "ssecmp")
6334 (set_attr "prefix_data16" "1,*")
6335 (set_attr "prefix" "orig,vex")
6336 (set_attr "mode" "TI")])
6337
6338 (define_expand "vcond<V_256:mode><VI_256:mode>"
6339 [(set (match_operand:V_256 0 "register_operand" "")
6340 (if_then_else:V_256
6341 (match_operator 3 ""
6342 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6343 (match_operand:VI_256 5 "general_operand" "")])
6344 (match_operand:V_256 1 "" "")
6345 (match_operand:V_256 2 "" "")))]
6346 "TARGET_AVX2
6347 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6348 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6349 {
6350 bool ok = ix86_expand_int_vcond (operands);
6351 gcc_assert (ok);
6352 DONE;
6353 })
6354
6355 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6356 [(set (match_operand:V_128 0 "register_operand" "")
6357 (if_then_else:V_128
6358 (match_operator 3 ""
6359 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6360 (match_operand:VI124_128 5 "general_operand" "")])
6361 (match_operand:V_128 1 "" "")
6362 (match_operand:V_128 2 "" "")))]
6363 "TARGET_SSE2
6364 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6365 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6366 {
6367 bool ok = ix86_expand_int_vcond (operands);
6368 gcc_assert (ok);
6369 DONE;
6370 })
6371
6372 (define_expand "vcond<VI8F_128:mode>v2di"
6373 [(set (match_operand:VI8F_128 0 "register_operand" "")
6374 (if_then_else:VI8F_128
6375 (match_operator 3 ""
6376 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6377 (match_operand:V2DI 5 "general_operand" "")])
6378 (match_operand:VI8F_128 1 "" "")
6379 (match_operand:VI8F_128 2 "" "")))]
6380 "TARGET_SSE4_2"
6381 {
6382 bool ok = ix86_expand_int_vcond (operands);
6383 gcc_assert (ok);
6384 DONE;
6385 })
6386
6387 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6388 [(set (match_operand:V_256 0 "register_operand" "")
6389 (if_then_else:V_256
6390 (match_operator 3 ""
6391 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6392 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6393 (match_operand:V_256 1 "general_operand" "")
6394 (match_operand:V_256 2 "general_operand" "")))]
6395 "TARGET_AVX2
6396 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6397 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6398 {
6399 bool ok = ix86_expand_int_vcond (operands);
6400 gcc_assert (ok);
6401 DONE;
6402 })
6403
6404 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6405 [(set (match_operand:V_128 0 "register_operand" "")
6406 (if_then_else:V_128
6407 (match_operator 3 ""
6408 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6409 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6410 (match_operand:V_128 1 "general_operand" "")
6411 (match_operand:V_128 2 "general_operand" "")))]
6412 "TARGET_SSE2
6413 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6414 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6415 {
6416 bool ok = ix86_expand_int_vcond (operands);
6417 gcc_assert (ok);
6418 DONE;
6419 })
6420
6421 (define_expand "vcondu<VI8F_128:mode>v2di"
6422 [(set (match_operand:VI8F_128 0 "register_operand" "")
6423 (if_then_else:VI8F_128
6424 (match_operator 3 ""
6425 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6426 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6427 (match_operand:VI8F_128 1 "general_operand" "")
6428 (match_operand:VI8F_128 2 "general_operand" "")))]
6429 "TARGET_SSE4_2"
6430 {
6431 bool ok = ix86_expand_int_vcond (operands);
6432 gcc_assert (ok);
6433 DONE;
6434 })
6435
6436 (define_mode_iterator VEC_PERM_AVX2
6437 [V16QI V8HI V4SI V2DI V4SF V2DF
6438 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6439 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6440 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6441
6442 (define_expand "vec_perm<mode>"
6443 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6444 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6445 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6446 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6447 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6448 {
6449 ix86_expand_vec_perm (operands);
6450 DONE;
6451 })
6452
6453 (define_mode_iterator VEC_PERM_CONST
6454 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6455 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6456 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6457 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6458 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6459 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6460
6461 (define_expand "vec_perm_const<mode>"
6462 [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6463 (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6464 (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6465 (match_operand:<sseintvecmode> 3 "" "")]
6466 ""
6467 {
6468 if (ix86_expand_vec_perm_const (operands))
6469 DONE;
6470 else
6471 FAIL;
6472 })
6473
6474 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6475 ;;
6476 ;; Parallel bitwise logical operations
6477 ;;
6478 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6479
6480 (define_expand "one_cmpl<mode>2"
6481 [(set (match_operand:VI 0 "register_operand" "")
6482 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6483 (match_dup 2)))]
6484 "TARGET_SSE"
6485 {
6486 int i, n = GET_MODE_NUNITS (<MODE>mode);
6487 rtvec v = rtvec_alloc (n);
6488
6489 for (i = 0; i < n; ++i)
6490 RTVEC_ELT (v, i) = constm1_rtx;
6491
6492 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6493 })
6494
6495 (define_expand "<sse2_avx2>_andnot<mode>3"
6496 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6497 (and:VI_AVX2
6498 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6499 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6500 "TARGET_SSE2")
6501
6502 (define_insn "*andnot<mode>3"
6503 [(set (match_operand:VI 0 "register_operand" "=x,x")
6504 (and:VI
6505 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6506 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6507 "TARGET_SSE"
6508 {
6509 static char buf[32];
6510 const char *ops;
6511 const char *tmp;
6512
6513 switch (get_attr_mode (insn))
6514 {
6515 case MODE_OI:
6516 gcc_assert (TARGET_AVX2);
6517 case MODE_TI:
6518 gcc_assert (TARGET_SSE2);
6519
6520 tmp = "pandn";
6521 break;
6522
6523 case MODE_V8SF:
6524 gcc_assert (TARGET_AVX);
6525 case MODE_V4SF:
6526 gcc_assert (TARGET_SSE);
6527
6528 tmp = "andnps";
6529 break;
6530
6531 default:
6532 gcc_unreachable ();
6533 }
6534
6535 switch (which_alternative)
6536 {
6537 case 0:
6538 ops = "%s\t{%%2, %%0|%%0, %%2}";
6539 break;
6540 case 1:
6541 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6542 break;
6543 default:
6544 gcc_unreachable ();
6545 }
6546
6547 snprintf (buf, sizeof (buf), ops, tmp);
6548 return buf;
6549 }
6550 [(set_attr "isa" "noavx,avx")
6551 (set_attr "type" "sselog")
6552 (set (attr "prefix_data16")
6553 (if_then_else
6554 (and (eq_attr "alternative" "0")
6555 (eq_attr "mode" "TI"))
6556 (const_string "1")
6557 (const_string "*")))
6558 (set_attr "prefix" "orig,vex")
6559 (set (attr "mode")
6560 (cond [(and (not (match_test "TARGET_AVX2"))
6561 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6562 (const_string "V8SF")
6563 (not (match_test "TARGET_SSE2"))
6564 (const_string "V4SF")
6565 ]
6566 (const_string "<sseinsnmode>")))])
6567
6568 (define_expand "<code><mode>3"
6569 [(set (match_operand:VI 0 "register_operand" "")
6570 (any_logic:VI
6571 (match_operand:VI 1 "nonimmediate_operand" "")
6572 (match_operand:VI 2 "nonimmediate_operand" "")))]
6573 "TARGET_SSE"
6574 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6575
6576 (define_insn "*<code><mode>3"
6577 [(set (match_operand:VI 0 "register_operand" "=x,x")
6578 (any_logic:VI
6579 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6580 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6581 "TARGET_SSE
6582 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6583 {
6584 static char buf[32];
6585 const char *ops;
6586 const char *tmp;
6587
6588 switch (get_attr_mode (insn))
6589 {
6590 case MODE_OI:
6591 gcc_assert (TARGET_AVX2);
6592 case MODE_TI:
6593 gcc_assert (TARGET_SSE2);
6594
6595 tmp = "p<logic>";
6596 break;
6597
6598 case MODE_V8SF:
6599 gcc_assert (TARGET_AVX);
6600 case MODE_V4SF:
6601 gcc_assert (TARGET_SSE);
6602
6603 tmp = "<logic>ps";
6604 break;
6605
6606 default:
6607 gcc_unreachable ();
6608 }
6609
6610 switch (which_alternative)
6611 {
6612 case 0:
6613 ops = "%s\t{%%2, %%0|%%0, %%2}";
6614 break;
6615 case 1:
6616 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6617 break;
6618 default:
6619 gcc_unreachable ();
6620 }
6621
6622 snprintf (buf, sizeof (buf), ops, tmp);
6623 return buf;
6624 }
6625 [(set_attr "isa" "noavx,avx")
6626 (set_attr "type" "sselog")
6627 (set (attr "prefix_data16")
6628 (if_then_else
6629 (and (eq_attr "alternative" "0")
6630 (eq_attr "mode" "TI"))
6631 (const_string "1")
6632 (const_string "*")))
6633 (set_attr "prefix" "orig,vex")
6634 (set (attr "mode")
6635 (cond [(and (not (match_test "TARGET_AVX2"))
6636 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6637 (const_string "V8SF")
6638 (not (match_test "TARGET_SSE2"))
6639 (const_string "V4SF")
6640 ]
6641 (const_string "<sseinsnmode>")))])
6642
6643 (define_insn "*andnottf3"
6644 [(set (match_operand:TF 0 "register_operand" "=x,x")
6645 (and:TF
6646 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6647 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6648 "TARGET_SSE2"
6649 "@
6650 pandn\t{%2, %0|%0, %2}
6651 vpandn\t{%2, %1, %0|%0, %1, %2}"
6652 [(set_attr "isa" "noavx,avx")
6653 (set_attr "type" "sselog")
6654 (set_attr "prefix_data16" "1,*")
6655 (set_attr "prefix" "orig,vex")
6656 (set_attr "mode" "TI")])
6657
6658 (define_expand "<code>tf3"
6659 [(set (match_operand:TF 0 "register_operand" "")
6660 (any_logic:TF
6661 (match_operand:TF 1 "nonimmediate_operand" "")
6662 (match_operand:TF 2 "nonimmediate_operand" "")))]
6663 "TARGET_SSE2"
6664 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6665
6666 (define_insn "*<code>tf3"
6667 [(set (match_operand:TF 0 "register_operand" "=x,x")
6668 (any_logic:TF
6669 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6670 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6671 "TARGET_SSE2
6672 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6673 "@
6674 p<logic>\t{%2, %0|%0, %2}
6675 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6676 [(set_attr "isa" "noavx,avx")
6677 (set_attr "type" "sselog")
6678 (set_attr "prefix_data16" "1,*")
6679 (set_attr "prefix" "orig,vex")
6680 (set_attr "mode" "TI")])
6681
6682 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6683 ;;
6684 ;; Parallel integral element swizzling
6685 ;;
6686 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6687
6688 (define_expand "vec_pack_trunc_<mode>"
6689 [(match_operand:<ssepackmode> 0 "register_operand" "")
6690 (match_operand:VI248_AVX2 1 "register_operand" "")
6691 (match_operand:VI248_AVX2 2 "register_operand" "")]
6692 "TARGET_SSE2"
6693 {
6694 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6695 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6696 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6697 DONE;
6698 })
6699
6700 (define_insn "<sse2_avx2>_packsswb"
6701 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6702 (vec_concat:VI1_AVX2
6703 (ss_truncate:<ssehalfvecmode>
6704 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6705 (ss_truncate:<ssehalfvecmode>
6706 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6707 "TARGET_SSE2"
6708 "@
6709 packsswb\t{%2, %0|%0, %2}
6710 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6711 [(set_attr "isa" "noavx,avx")
6712 (set_attr "type" "sselog")
6713 (set_attr "prefix_data16" "1,*")
6714 (set_attr "prefix" "orig,vex")
6715 (set_attr "mode" "<sseinsnmode>")])
6716
6717 (define_insn "<sse2_avx2>_packssdw"
6718 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6719 (vec_concat:VI2_AVX2
6720 (ss_truncate:<ssehalfvecmode>
6721 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6722 (ss_truncate:<ssehalfvecmode>
6723 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6724 "TARGET_SSE2"
6725 "@
6726 packssdw\t{%2, %0|%0, %2}
6727 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6728 [(set_attr "isa" "noavx,avx")
6729 (set_attr "type" "sselog")
6730 (set_attr "prefix_data16" "1,*")
6731 (set_attr "prefix" "orig,vex")
6732 (set_attr "mode" "<sseinsnmode>")])
6733
6734 (define_insn "<sse2_avx2>_packuswb"
6735 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6736 (vec_concat:VI1_AVX2
6737 (us_truncate:<ssehalfvecmode>
6738 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6739 (us_truncate:<ssehalfvecmode>
6740 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6741 "TARGET_SSE2"
6742 "@
6743 packuswb\t{%2, %0|%0, %2}
6744 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6745 [(set_attr "isa" "noavx,avx")
6746 (set_attr "type" "sselog")
6747 (set_attr "prefix_data16" "1,*")
6748 (set_attr "prefix" "orig,vex")
6749 (set_attr "mode" "<sseinsnmode>")])
6750
6751 (define_insn "avx2_interleave_highv32qi"
6752 [(set (match_operand:V32QI 0 "register_operand" "=x")
6753 (vec_select:V32QI
6754 (vec_concat:V64QI
6755 (match_operand:V32QI 1 "register_operand" "x")
6756 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6757 (parallel [(const_int 8) (const_int 40)
6758 (const_int 9) (const_int 41)
6759 (const_int 10) (const_int 42)
6760 (const_int 11) (const_int 43)
6761 (const_int 12) (const_int 44)
6762 (const_int 13) (const_int 45)
6763 (const_int 14) (const_int 46)
6764 (const_int 15) (const_int 47)
6765 (const_int 24) (const_int 56)
6766 (const_int 25) (const_int 57)
6767 (const_int 26) (const_int 58)
6768 (const_int 27) (const_int 59)
6769 (const_int 28) (const_int 60)
6770 (const_int 29) (const_int 61)
6771 (const_int 30) (const_int 62)
6772 (const_int 31) (const_int 63)])))]
6773 "TARGET_AVX2"
6774 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6775 [(set_attr "type" "sselog")
6776 (set_attr "prefix" "vex")
6777 (set_attr "mode" "OI")])
6778
6779 (define_insn "vec_interleave_highv16qi"
6780 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6781 (vec_select:V16QI
6782 (vec_concat:V32QI
6783 (match_operand:V16QI 1 "register_operand" "0,x")
6784 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6785 (parallel [(const_int 8) (const_int 24)
6786 (const_int 9) (const_int 25)
6787 (const_int 10) (const_int 26)
6788 (const_int 11) (const_int 27)
6789 (const_int 12) (const_int 28)
6790 (const_int 13) (const_int 29)
6791 (const_int 14) (const_int 30)
6792 (const_int 15) (const_int 31)])))]
6793 "TARGET_SSE2"
6794 "@
6795 punpckhbw\t{%2, %0|%0, %2}
6796 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6797 [(set_attr "isa" "noavx,avx")
6798 (set_attr "type" "sselog")
6799 (set_attr "prefix_data16" "1,*")
6800 (set_attr "prefix" "orig,vex")
6801 (set_attr "mode" "TI")])
6802
6803 (define_insn "avx2_interleave_lowv32qi"
6804 [(set (match_operand:V32QI 0 "register_operand" "=x")
6805 (vec_select:V32QI
6806 (vec_concat:V64QI
6807 (match_operand:V32QI 1 "register_operand" "x")
6808 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6809 (parallel [(const_int 0) (const_int 32)
6810 (const_int 1) (const_int 33)
6811 (const_int 2) (const_int 34)
6812 (const_int 3) (const_int 35)
6813 (const_int 4) (const_int 36)
6814 (const_int 5) (const_int 37)
6815 (const_int 6) (const_int 38)
6816 (const_int 7) (const_int 39)
6817 (const_int 16) (const_int 48)
6818 (const_int 17) (const_int 49)
6819 (const_int 18) (const_int 50)
6820 (const_int 19) (const_int 51)
6821 (const_int 20) (const_int 52)
6822 (const_int 21) (const_int 53)
6823 (const_int 22) (const_int 54)
6824 (const_int 23) (const_int 55)])))]
6825 "TARGET_AVX2"
6826 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6827 [(set_attr "type" "sselog")
6828 (set_attr "prefix" "vex")
6829 (set_attr "mode" "OI")])
6830
6831 (define_insn "vec_interleave_lowv16qi"
6832 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6833 (vec_select:V16QI
6834 (vec_concat:V32QI
6835 (match_operand:V16QI 1 "register_operand" "0,x")
6836 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6837 (parallel [(const_int 0) (const_int 16)
6838 (const_int 1) (const_int 17)
6839 (const_int 2) (const_int 18)
6840 (const_int 3) (const_int 19)
6841 (const_int 4) (const_int 20)
6842 (const_int 5) (const_int 21)
6843 (const_int 6) (const_int 22)
6844 (const_int 7) (const_int 23)])))]
6845 "TARGET_SSE2"
6846 "@
6847 punpcklbw\t{%2, %0|%0, %2}
6848 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6849 [(set_attr "isa" "noavx,avx")
6850 (set_attr "type" "sselog")
6851 (set_attr "prefix_data16" "1,*")
6852 (set_attr "prefix" "orig,vex")
6853 (set_attr "mode" "TI")])
6854
6855 (define_insn "avx2_interleave_highv16hi"
6856 [(set (match_operand:V16HI 0 "register_operand" "=x")
6857 (vec_select:V16HI
6858 (vec_concat:V32HI
6859 (match_operand:V16HI 1 "register_operand" "x")
6860 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6861 (parallel [(const_int 4) (const_int 20)
6862 (const_int 5) (const_int 21)
6863 (const_int 6) (const_int 22)
6864 (const_int 7) (const_int 23)
6865 (const_int 12) (const_int 28)
6866 (const_int 13) (const_int 29)
6867 (const_int 14) (const_int 30)
6868 (const_int 15) (const_int 31)])))]
6869 "TARGET_AVX2"
6870 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6871 [(set_attr "type" "sselog")
6872 (set_attr "prefix" "vex")
6873 (set_attr "mode" "OI")])
6874
6875 (define_insn "vec_interleave_highv8hi"
6876 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6877 (vec_select:V8HI
6878 (vec_concat:V16HI
6879 (match_operand:V8HI 1 "register_operand" "0,x")
6880 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6881 (parallel [(const_int 4) (const_int 12)
6882 (const_int 5) (const_int 13)
6883 (const_int 6) (const_int 14)
6884 (const_int 7) (const_int 15)])))]
6885 "TARGET_SSE2"
6886 "@
6887 punpckhwd\t{%2, %0|%0, %2}
6888 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6889 [(set_attr "isa" "noavx,avx")
6890 (set_attr "type" "sselog")
6891 (set_attr "prefix_data16" "1,*")
6892 (set_attr "prefix" "orig,vex")
6893 (set_attr "mode" "TI")])
6894
6895 (define_insn "avx2_interleave_lowv16hi"
6896 [(set (match_operand:V16HI 0 "register_operand" "=x")
6897 (vec_select:V16HI
6898 (vec_concat:V32HI
6899 (match_operand:V16HI 1 "register_operand" "x")
6900 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6901 (parallel [(const_int 0) (const_int 16)
6902 (const_int 1) (const_int 17)
6903 (const_int 2) (const_int 18)
6904 (const_int 3) (const_int 19)
6905 (const_int 8) (const_int 24)
6906 (const_int 9) (const_int 25)
6907 (const_int 10) (const_int 26)
6908 (const_int 11) (const_int 27)])))]
6909 "TARGET_AVX2"
6910 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6911 [(set_attr "type" "sselog")
6912 (set_attr "prefix" "vex")
6913 (set_attr "mode" "OI")])
6914
6915 (define_insn "vec_interleave_lowv8hi"
6916 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6917 (vec_select:V8HI
6918 (vec_concat:V16HI
6919 (match_operand:V8HI 1 "register_operand" "0,x")
6920 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6921 (parallel [(const_int 0) (const_int 8)
6922 (const_int 1) (const_int 9)
6923 (const_int 2) (const_int 10)
6924 (const_int 3) (const_int 11)])))]
6925 "TARGET_SSE2"
6926 "@
6927 punpcklwd\t{%2, %0|%0, %2}
6928 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6929 [(set_attr "isa" "noavx,avx")
6930 (set_attr "type" "sselog")
6931 (set_attr "prefix_data16" "1,*")
6932 (set_attr "prefix" "orig,vex")
6933 (set_attr "mode" "TI")])
6934
6935 (define_insn "avx2_interleave_highv8si"
6936 [(set (match_operand:V8SI 0 "register_operand" "=x")
6937 (vec_select:V8SI
6938 (vec_concat:V16SI
6939 (match_operand:V8SI 1 "register_operand" "x")
6940 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6941 (parallel [(const_int 2) (const_int 10)
6942 (const_int 3) (const_int 11)
6943 (const_int 6) (const_int 14)
6944 (const_int 7) (const_int 15)])))]
6945 "TARGET_AVX2"
6946 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6947 [(set_attr "type" "sselog")
6948 (set_attr "prefix" "vex")
6949 (set_attr "mode" "OI")])
6950
6951 (define_insn "vec_interleave_highv4si"
6952 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6953 (vec_select:V4SI
6954 (vec_concat:V8SI
6955 (match_operand:V4SI 1 "register_operand" "0,x")
6956 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6957 (parallel [(const_int 2) (const_int 6)
6958 (const_int 3) (const_int 7)])))]
6959 "TARGET_SSE2"
6960 "@
6961 punpckhdq\t{%2, %0|%0, %2}
6962 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6963 [(set_attr "isa" "noavx,avx")
6964 (set_attr "type" "sselog")
6965 (set_attr "prefix_data16" "1,*")
6966 (set_attr "prefix" "orig,vex")
6967 (set_attr "mode" "TI")])
6968
6969 (define_insn "avx2_interleave_lowv8si"
6970 [(set (match_operand:V8SI 0 "register_operand" "=x")
6971 (vec_select:V8SI
6972 (vec_concat:V16SI
6973 (match_operand:V8SI 1 "register_operand" "x")
6974 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6975 (parallel [(const_int 0) (const_int 8)
6976 (const_int 1) (const_int 9)
6977 (const_int 4) (const_int 12)
6978 (const_int 5) (const_int 13)])))]
6979 "TARGET_AVX2"
6980 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6981 [(set_attr "type" "sselog")
6982 (set_attr "prefix" "vex")
6983 (set_attr "mode" "OI")])
6984
6985 (define_insn "vec_interleave_lowv4si"
6986 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6987 (vec_select:V4SI
6988 (vec_concat:V8SI
6989 (match_operand:V4SI 1 "register_operand" "0,x")
6990 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6991 (parallel [(const_int 0) (const_int 4)
6992 (const_int 1) (const_int 5)])))]
6993 "TARGET_SSE2"
6994 "@
6995 punpckldq\t{%2, %0|%0, %2}
6996 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6997 [(set_attr "isa" "noavx,avx")
6998 (set_attr "type" "sselog")
6999 (set_attr "prefix_data16" "1,*")
7000 (set_attr "prefix" "orig,vex")
7001 (set_attr "mode" "TI")])
7002
7003 (define_expand "vec_interleave_high<mode>"
7004 [(match_operand:VI_256 0 "register_operand" "=x")
7005 (match_operand:VI_256 1 "register_operand" "x")
7006 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
7007 "TARGET_AVX2"
7008 {
7009 rtx t1 = gen_reg_rtx (<MODE>mode);
7010 rtx t2 = gen_reg_rtx (<MODE>mode);
7011 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
7012 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
7013 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
7014 gen_lowpart (V4DImode, t1),
7015 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
7016 DONE;
7017 })
7018
7019 (define_expand "vec_interleave_low<mode>"
7020 [(match_operand:VI_256 0 "register_operand" "=x")
7021 (match_operand:VI_256 1 "register_operand" "x")
7022 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
7023 "TARGET_AVX2"
7024 {
7025 rtx t1 = gen_reg_rtx (<MODE>mode);
7026 rtx t2 = gen_reg_rtx (<MODE>mode);
7027 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
7028 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
7029 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
7030 gen_lowpart (V4DImode, t1),
7031 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
7032 DONE;
7033 })
7034
7035 ;; Modes handled by pinsr patterns.
7036 (define_mode_iterator PINSR_MODE
7037 [(V16QI "TARGET_SSE4_1") V8HI
7038 (V4SI "TARGET_SSE4_1")
7039 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
7040
7041 (define_mode_attr sse2p4_1
7042 [(V16QI "sse4_1") (V8HI "sse2")
7043 (V4SI "sse4_1") (V2DI "sse4_1")])
7044
7045 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
7046 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
7047 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
7048 (vec_merge:PINSR_MODE
7049 (vec_duplicate:PINSR_MODE
7050 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
7051 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
7052 (match_operand:SI 3 "const_int_operand" "")))]
7053 "TARGET_SSE2
7054 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7055 < GET_MODE_NUNITS (<MODE>mode))"
7056 {
7057 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7058
7059 switch (which_alternative)
7060 {
7061 case 0:
7062 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7063 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
7064 /* FALLTHRU */
7065 case 1:
7066 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
7067 case 2:
7068 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7069 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7070 /* FALLTHRU */
7071 case 3:
7072 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7073 default:
7074 gcc_unreachable ();
7075 }
7076 }
7077 [(set_attr "isa" "noavx,noavx,avx,avx")
7078 (set_attr "type" "sselog")
7079 (set (attr "prefix_rex")
7080 (if_then_else
7081 (and (not (match_test "TARGET_AVX"))
7082 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
7083 (const_string "1")
7084 (const_string "*")))
7085 (set (attr "prefix_data16")
7086 (if_then_else
7087 (and (not (match_test "TARGET_AVX"))
7088 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7089 (const_string "1")
7090 (const_string "*")))
7091 (set (attr "prefix_extra")
7092 (if_then_else
7093 (and (not (match_test "TARGET_AVX"))
7094 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7095 (const_string "*")
7096 (const_string "1")))
7097 (set_attr "length_immediate" "1")
7098 (set_attr "prefix" "orig,orig,vex,vex")
7099 (set_attr "mode" "TI")])
7100
7101 (define_insn "*sse4_1_pextrb_<mode>"
7102 [(set (match_operand:SWI48 0 "register_operand" "=r")
7103 (zero_extend:SWI48
7104 (vec_select:QI
7105 (match_operand:V16QI 1 "register_operand" "x")
7106 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7107 "TARGET_SSE4_1"
7108 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7109 [(set_attr "type" "sselog")
7110 (set_attr "prefix_extra" "1")
7111 (set_attr "length_immediate" "1")
7112 (set_attr "prefix" "maybe_vex")
7113 (set_attr "mode" "TI")])
7114
7115 (define_insn "*sse4_1_pextrb_memory"
7116 [(set (match_operand:QI 0 "memory_operand" "=m")
7117 (vec_select:QI
7118 (match_operand:V16QI 1 "register_operand" "x")
7119 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7120 "TARGET_SSE4_1"
7121 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7122 [(set_attr "type" "sselog")
7123 (set_attr "prefix_extra" "1")
7124 (set_attr "length_immediate" "1")
7125 (set_attr "prefix" "maybe_vex")
7126 (set_attr "mode" "TI")])
7127
7128 (define_insn "*sse2_pextrw_<mode>"
7129 [(set (match_operand:SWI48 0 "register_operand" "=r")
7130 (zero_extend:SWI48
7131 (vec_select:HI
7132 (match_operand:V8HI 1 "register_operand" "x")
7133 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7134 "TARGET_SSE2"
7135 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7136 [(set_attr "type" "sselog")
7137 (set_attr "prefix_data16" "1")
7138 (set_attr "length_immediate" "1")
7139 (set_attr "prefix" "maybe_vex")
7140 (set_attr "mode" "TI")])
7141
7142 (define_insn "*sse4_1_pextrw_memory"
7143 [(set (match_operand:HI 0 "memory_operand" "=m")
7144 (vec_select:HI
7145 (match_operand:V8HI 1 "register_operand" "x")
7146 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7147 "TARGET_SSE4_1"
7148 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7149 [(set_attr "type" "sselog")
7150 (set_attr "prefix_extra" "1")
7151 (set_attr "length_immediate" "1")
7152 (set_attr "prefix" "maybe_vex")
7153 (set_attr "mode" "TI")])
7154
7155 (define_insn "*sse4_1_pextrd"
7156 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7157 (vec_select:SI
7158 (match_operand:V4SI 1 "register_operand" "x")
7159 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7160 "TARGET_SSE4_1"
7161 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7162 [(set_attr "type" "sselog")
7163 (set_attr "prefix_extra" "1")
7164 (set_attr "length_immediate" "1")
7165 (set_attr "prefix" "maybe_vex")
7166 (set_attr "mode" "TI")])
7167
7168 (define_insn "*sse4_1_pextrd_zext"
7169 [(set (match_operand:DI 0 "register_operand" "=r")
7170 (zero_extend:DI
7171 (vec_select:SI
7172 (match_operand:V4SI 1 "register_operand" "x")
7173 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7174 "TARGET_64BIT && TARGET_SSE4_1"
7175 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7176 [(set_attr "type" "sselog")
7177 (set_attr "prefix_extra" "1")
7178 (set_attr "length_immediate" "1")
7179 (set_attr "prefix" "maybe_vex")
7180 (set_attr "mode" "TI")])
7181
7182 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7183 (define_insn "*sse4_1_pextrq"
7184 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7185 (vec_select:DI
7186 (match_operand:V2DI 1 "register_operand" "x")
7187 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7188 "TARGET_SSE4_1 && TARGET_64BIT"
7189 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7190 [(set_attr "type" "sselog")
7191 (set_attr "prefix_rex" "1")
7192 (set_attr "prefix_extra" "1")
7193 (set_attr "length_immediate" "1")
7194 (set_attr "prefix" "maybe_vex")
7195 (set_attr "mode" "TI")])
7196
7197 (define_expand "avx2_pshufdv3"
7198 [(match_operand:V8SI 0 "register_operand" "")
7199 (match_operand:V8SI 1 "nonimmediate_operand" "")
7200 (match_operand:SI 2 "const_0_to_255_operand" "")]
7201 "TARGET_AVX2"
7202 {
7203 int mask = INTVAL (operands[2]);
7204 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7205 GEN_INT ((mask >> 0) & 3),
7206 GEN_INT ((mask >> 2) & 3),
7207 GEN_INT ((mask >> 4) & 3),
7208 GEN_INT ((mask >> 6) & 3),
7209 GEN_INT (((mask >> 0) & 3) + 4),
7210 GEN_INT (((mask >> 2) & 3) + 4),
7211 GEN_INT (((mask >> 4) & 3) + 4),
7212 GEN_INT (((mask >> 6) & 3) + 4)));
7213 DONE;
7214 })
7215
7216 (define_insn "avx2_pshufd_1"
7217 [(set (match_operand:V8SI 0 "register_operand" "=x")
7218 (vec_select:V8SI
7219 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7220 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7221 (match_operand 3 "const_0_to_3_operand" "")
7222 (match_operand 4 "const_0_to_3_operand" "")
7223 (match_operand 5 "const_0_to_3_operand" "")
7224 (match_operand 6 "const_4_to_7_operand" "")
7225 (match_operand 7 "const_4_to_7_operand" "")
7226 (match_operand 8 "const_4_to_7_operand" "")
7227 (match_operand 9 "const_4_to_7_operand" "")])))]
7228 "TARGET_AVX2
7229 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7230 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7231 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7232 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7233 {
7234 int mask = 0;
7235 mask |= INTVAL (operands[2]) << 0;
7236 mask |= INTVAL (operands[3]) << 2;
7237 mask |= INTVAL (operands[4]) << 4;
7238 mask |= INTVAL (operands[5]) << 6;
7239 operands[2] = GEN_INT (mask);
7240
7241 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7242 }
7243 [(set_attr "type" "sselog1")
7244 (set_attr "prefix" "vex")
7245 (set_attr "length_immediate" "1")
7246 (set_attr "mode" "OI")])
7247
7248 (define_expand "sse2_pshufd"
7249 [(match_operand:V4SI 0 "register_operand" "")
7250 (match_operand:V4SI 1 "nonimmediate_operand" "")
7251 (match_operand:SI 2 "const_int_operand" "")]
7252 "TARGET_SSE2"
7253 {
7254 int mask = INTVAL (operands[2]);
7255 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7256 GEN_INT ((mask >> 0) & 3),
7257 GEN_INT ((mask >> 2) & 3),
7258 GEN_INT ((mask >> 4) & 3),
7259 GEN_INT ((mask >> 6) & 3)));
7260 DONE;
7261 })
7262
7263 (define_insn "sse2_pshufd_1"
7264 [(set (match_operand:V4SI 0 "register_operand" "=x")
7265 (vec_select:V4SI
7266 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7267 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7268 (match_operand 3 "const_0_to_3_operand" "")
7269 (match_operand 4 "const_0_to_3_operand" "")
7270 (match_operand 5 "const_0_to_3_operand" "")])))]
7271 "TARGET_SSE2"
7272 {
7273 int mask = 0;
7274 mask |= INTVAL (operands[2]) << 0;
7275 mask |= INTVAL (operands[3]) << 2;
7276 mask |= INTVAL (operands[4]) << 4;
7277 mask |= INTVAL (operands[5]) << 6;
7278 operands[2] = GEN_INT (mask);
7279
7280 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7281 }
7282 [(set_attr "type" "sselog1")
7283 (set_attr "prefix_data16" "1")
7284 (set_attr "prefix" "maybe_vex")
7285 (set_attr "length_immediate" "1")
7286 (set_attr "mode" "TI")])
7287
7288 (define_expand "avx2_pshuflwv3"
7289 [(match_operand:V16HI 0 "register_operand" "")
7290 (match_operand:V16HI 1 "nonimmediate_operand" "")
7291 (match_operand:SI 2 "const_0_to_255_operand" "")]
7292 "TARGET_AVX2"
7293 {
7294 int mask = INTVAL (operands[2]);
7295 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7296 GEN_INT ((mask >> 0) & 3),
7297 GEN_INT ((mask >> 2) & 3),
7298 GEN_INT ((mask >> 4) & 3),
7299 GEN_INT ((mask >> 6) & 3),
7300 GEN_INT (((mask >> 0) & 3) + 8),
7301 GEN_INT (((mask >> 2) & 3) + 8),
7302 GEN_INT (((mask >> 4) & 3) + 8),
7303 GEN_INT (((mask >> 6) & 3) + 8)));
7304 DONE;
7305 })
7306
7307 (define_insn "avx2_pshuflw_1"
7308 [(set (match_operand:V16HI 0 "register_operand" "=x")
7309 (vec_select:V16HI
7310 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7311 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7312 (match_operand 3 "const_0_to_3_operand" "")
7313 (match_operand 4 "const_0_to_3_operand" "")
7314 (match_operand 5 "const_0_to_3_operand" "")
7315 (const_int 4)
7316 (const_int 5)
7317 (const_int 6)
7318 (const_int 7)
7319 (match_operand 6 "const_8_to_11_operand" "")
7320 (match_operand 7 "const_8_to_11_operand" "")
7321 (match_operand 8 "const_8_to_11_operand" "")
7322 (match_operand 9 "const_8_to_11_operand" "")
7323 (const_int 12)
7324 (const_int 13)
7325 (const_int 14)
7326 (const_int 15)])))]
7327 "TARGET_AVX2
7328 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7329 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7330 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7331 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7332 {
7333 int mask = 0;
7334 mask |= INTVAL (operands[2]) << 0;
7335 mask |= INTVAL (operands[3]) << 2;
7336 mask |= INTVAL (operands[4]) << 4;
7337 mask |= INTVAL (operands[5]) << 6;
7338 operands[2] = GEN_INT (mask);
7339
7340 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7341 }
7342 [(set_attr "type" "sselog")
7343 (set_attr "prefix" "vex")
7344 (set_attr "length_immediate" "1")
7345 (set_attr "mode" "OI")])
7346
7347 (define_expand "sse2_pshuflw"
7348 [(match_operand:V8HI 0 "register_operand" "")
7349 (match_operand:V8HI 1 "nonimmediate_operand" "")
7350 (match_operand:SI 2 "const_int_operand" "")]
7351 "TARGET_SSE2"
7352 {
7353 int mask = INTVAL (operands[2]);
7354 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7355 GEN_INT ((mask >> 0) & 3),
7356 GEN_INT ((mask >> 2) & 3),
7357 GEN_INT ((mask >> 4) & 3),
7358 GEN_INT ((mask >> 6) & 3)));
7359 DONE;
7360 })
7361
7362 (define_insn "sse2_pshuflw_1"
7363 [(set (match_operand:V8HI 0 "register_operand" "=x")
7364 (vec_select:V8HI
7365 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7366 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7367 (match_operand 3 "const_0_to_3_operand" "")
7368 (match_operand 4 "const_0_to_3_operand" "")
7369 (match_operand 5 "const_0_to_3_operand" "")
7370 (const_int 4)
7371 (const_int 5)
7372 (const_int 6)
7373 (const_int 7)])))]
7374 "TARGET_SSE2"
7375 {
7376 int mask = 0;
7377 mask |= INTVAL (operands[2]) << 0;
7378 mask |= INTVAL (operands[3]) << 2;
7379 mask |= INTVAL (operands[4]) << 4;
7380 mask |= INTVAL (operands[5]) << 6;
7381 operands[2] = GEN_INT (mask);
7382
7383 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7384 }
7385 [(set_attr "type" "sselog")
7386 (set_attr "prefix_data16" "0")
7387 (set_attr "prefix_rep" "1")
7388 (set_attr "prefix" "maybe_vex")
7389 (set_attr "length_immediate" "1")
7390 (set_attr "mode" "TI")])
7391
7392 (define_expand "avx2_pshufhwv3"
7393 [(match_operand:V16HI 0 "register_operand" "")
7394 (match_operand:V16HI 1 "nonimmediate_operand" "")
7395 (match_operand:SI 2 "const_0_to_255_operand" "")]
7396 "TARGET_AVX2"
7397 {
7398 int mask = INTVAL (operands[2]);
7399 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7400 GEN_INT (((mask >> 0) & 3) + 4),
7401 GEN_INT (((mask >> 2) & 3) + 4),
7402 GEN_INT (((mask >> 4) & 3) + 4),
7403 GEN_INT (((mask >> 6) & 3) + 4),
7404 GEN_INT (((mask >> 0) & 3) + 12),
7405 GEN_INT (((mask >> 2) & 3) + 12),
7406 GEN_INT (((mask >> 4) & 3) + 12),
7407 GEN_INT (((mask >> 6) & 3) + 12)));
7408 DONE;
7409 })
7410
7411 (define_insn "avx2_pshufhw_1"
7412 [(set (match_operand:V16HI 0 "register_operand" "=x")
7413 (vec_select:V16HI
7414 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7415 (parallel [(const_int 0)
7416 (const_int 1)
7417 (const_int 2)
7418 (const_int 3)
7419 (match_operand 2 "const_4_to_7_operand" "")
7420 (match_operand 3 "const_4_to_7_operand" "")
7421 (match_operand 4 "const_4_to_7_operand" "")
7422 (match_operand 5 "const_4_to_7_operand" "")
7423 (const_int 8)
7424 (const_int 9)
7425 (const_int 10)
7426 (const_int 11)
7427 (match_operand 6 "const_12_to_15_operand" "")
7428 (match_operand 7 "const_12_to_15_operand" "")
7429 (match_operand 8 "const_12_to_15_operand" "")
7430 (match_operand 9 "const_12_to_15_operand" "")])))]
7431 "TARGET_AVX2
7432 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7433 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7434 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7435 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7436 {
7437 int mask = 0;
7438 mask |= (INTVAL (operands[2]) - 4) << 0;
7439 mask |= (INTVAL (operands[3]) - 4) << 2;
7440 mask |= (INTVAL (operands[4]) - 4) << 4;
7441 mask |= (INTVAL (operands[5]) - 4) << 6;
7442 operands[2] = GEN_INT (mask);
7443
7444 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7445 }
7446 [(set_attr "type" "sselog")
7447 (set_attr "prefix" "vex")
7448 (set_attr "length_immediate" "1")
7449 (set_attr "mode" "OI")])
7450
7451 (define_expand "sse2_pshufhw"
7452 [(match_operand:V8HI 0 "register_operand" "")
7453 (match_operand:V8HI 1 "nonimmediate_operand" "")
7454 (match_operand:SI 2 "const_int_operand" "")]
7455 "TARGET_SSE2"
7456 {
7457 int mask = INTVAL (operands[2]);
7458 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7459 GEN_INT (((mask >> 0) & 3) + 4),
7460 GEN_INT (((mask >> 2) & 3) + 4),
7461 GEN_INT (((mask >> 4) & 3) + 4),
7462 GEN_INT (((mask >> 6) & 3) + 4)));
7463 DONE;
7464 })
7465
7466 (define_insn "sse2_pshufhw_1"
7467 [(set (match_operand:V8HI 0 "register_operand" "=x")
7468 (vec_select:V8HI
7469 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7470 (parallel [(const_int 0)
7471 (const_int 1)
7472 (const_int 2)
7473 (const_int 3)
7474 (match_operand 2 "const_4_to_7_operand" "")
7475 (match_operand 3 "const_4_to_7_operand" "")
7476 (match_operand 4 "const_4_to_7_operand" "")
7477 (match_operand 5 "const_4_to_7_operand" "")])))]
7478 "TARGET_SSE2"
7479 {
7480 int mask = 0;
7481 mask |= (INTVAL (operands[2]) - 4) << 0;
7482 mask |= (INTVAL (operands[3]) - 4) << 2;
7483 mask |= (INTVAL (operands[4]) - 4) << 4;
7484 mask |= (INTVAL (operands[5]) - 4) << 6;
7485 operands[2] = GEN_INT (mask);
7486
7487 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7488 }
7489 [(set_attr "type" "sselog")
7490 (set_attr "prefix_rep" "1")
7491 (set_attr "prefix_data16" "0")
7492 (set_attr "prefix" "maybe_vex")
7493 (set_attr "length_immediate" "1")
7494 (set_attr "mode" "TI")])
7495
7496 (define_expand "sse2_loadd"
7497 [(set (match_operand:V4SI 0 "register_operand" "")
7498 (vec_merge:V4SI
7499 (vec_duplicate:V4SI
7500 (match_operand:SI 1 "nonimmediate_operand" ""))
7501 (match_dup 2)
7502 (const_int 1)))]
7503 "TARGET_SSE"
7504 "operands[2] = CONST0_RTX (V4SImode);")
7505
7506 (define_insn "sse2_loadld"
7507 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7508 (vec_merge:V4SI
7509 (vec_duplicate:V4SI
7510 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7511 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7512 (const_int 1)))]
7513 "TARGET_SSE"
7514 "@
7515 %vmovd\t{%2, %0|%0, %2}
7516 %vmovd\t{%2, %0|%0, %2}
7517 movss\t{%2, %0|%0, %2}
7518 movss\t{%2, %0|%0, %2}
7519 vmovss\t{%2, %1, %0|%0, %1, %2}"
7520 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7521 (set_attr "type" "ssemov")
7522 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7523 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7524
7525 (define_insn_and_split "sse2_stored"
7526 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7527 (vec_select:SI
7528 (match_operand:V4SI 1 "register_operand" "x,Yi")
7529 (parallel [(const_int 0)])))]
7530 "TARGET_SSE"
7531 "#"
7532 "&& reload_completed
7533 && (TARGET_INTER_UNIT_MOVES
7534 || MEM_P (operands [0])
7535 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7536 [(set (match_dup 0) (match_dup 1))]
7537 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7538
7539 (define_insn_and_split "*vec_ext_v4si_mem"
7540 [(set (match_operand:SI 0 "register_operand" "=r")
7541 (vec_select:SI
7542 (match_operand:V4SI 1 "memory_operand" "o")
7543 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7544 ""
7545 "#"
7546 "reload_completed"
7547 [(const_int 0)]
7548 {
7549 int i = INTVAL (operands[2]);
7550
7551 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7552 DONE;
7553 })
7554
7555 (define_expand "sse_storeq"
7556 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7557 (vec_select:DI
7558 (match_operand:V2DI 1 "register_operand" "")
7559 (parallel [(const_int 0)])))]
7560 "TARGET_SSE")
7561
7562 (define_insn "*sse2_storeq_rex64"
7563 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7564 (vec_select:DI
7565 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7566 (parallel [(const_int 0)])))]
7567 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7568 "@
7569 #
7570 #
7571 mov{q}\t{%1, %0|%0, %1}"
7572 [(set_attr "type" "*,*,imov")
7573 (set_attr "mode" "*,*,DI")])
7574
7575 (define_insn "*sse2_storeq"
7576 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7577 (vec_select:DI
7578 (match_operand:V2DI 1 "register_operand" "x")
7579 (parallel [(const_int 0)])))]
7580 "TARGET_SSE"
7581 "#")
7582
7583 (define_split
7584 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7585 (vec_select:DI
7586 (match_operand:V2DI 1 "register_operand" "")
7587 (parallel [(const_int 0)])))]
7588 "TARGET_SSE
7589 && reload_completed
7590 && (TARGET_INTER_UNIT_MOVES
7591 || MEM_P (operands [0])
7592 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7593 [(set (match_dup 0) (match_dup 1))]
7594 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7595
7596 (define_insn "*vec_extractv2di_1_rex64"
7597 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7598 (vec_select:DI
7599 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7600 (parallel [(const_int 1)])))]
7601 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7602 "@
7603 %vmovhps\t{%1, %0|%0, %1}
7604 psrldq\t{$8, %0|%0, 8}
7605 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7606 %vmovq\t{%H1, %0|%0, %H1}
7607 mov{q}\t{%H1, %0|%0, %H1}"
7608 [(set_attr "isa" "*,noavx,avx,*,*")
7609 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7610 (set_attr "length_immediate" "*,1,1,*,*")
7611 (set_attr "memory" "*,none,none,*,*")
7612 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7613 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7614
7615 (define_insn "*vec_extractv2di_1"
7616 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7617 (vec_select:DI
7618 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7619 (parallel [(const_int 1)])))]
7620 "!TARGET_64BIT && TARGET_SSE
7621 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7622 "@
7623 %vmovhps\t{%1, %0|%0, %1}
7624 psrldq\t{$8, %0|%0, 8}
7625 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7626 %vmovq\t{%H1, %0|%0, %H1}
7627 movhlps\t{%1, %0|%0, %1}
7628 movlps\t{%H1, %0|%0, %H1}"
7629 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7630 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7631 (set_attr "length_immediate" "*,1,1,*,*,*")
7632 (set_attr "memory" "*,none,none,*,*,*")
7633 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7634 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7635
7636 (define_insn "*vec_dupv4si"
7637 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7638 (vec_duplicate:V4SI
7639 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7640 "TARGET_SSE"
7641 "@
7642 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7643 vbroadcastss\t{%1, %0|%0, %1}
7644 shufps\t{$0, %0, %0|%0, %0, 0}"
7645 [(set_attr "isa" "sse2,avx,noavx")
7646 (set_attr "type" "sselog1,ssemov,sselog1")
7647 (set_attr "length_immediate" "1,0,1")
7648 (set_attr "prefix_extra" "0,1,*")
7649 (set_attr "prefix" "maybe_vex,vex,orig")
7650 (set_attr "mode" "TI,V4SF,V4SF")])
7651
7652 (define_insn "*vec_dupv2di"
7653 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
7654 (vec_duplicate:V2DI
7655 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7656 "TARGET_SSE"
7657 "@
7658 punpcklqdq\t%0, %0
7659 vpunpcklqdq\t{%d1, %0|%0, %d1}
7660 %vmovddup\t{%1, %0|%0, %1}
7661 movlhps\t%0, %0"
7662 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7663 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7664 (set_attr "prefix" "orig,vex,maybe_vex,orig")
7665 (set_attr "mode" "TI,TI,DF,V4SF")])
7666
7667 (define_insn "*vec_concatv2si_sse4_1"
7668 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7669 (vec_concat:V2SI
7670 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7671 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7672 "TARGET_SSE4_1"
7673 "@
7674 pinsrd\t{$1, %2, %0|%0, %2, 1}
7675 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7676 punpckldq\t{%2, %0|%0, %2}
7677 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7678 %vmovd\t{%1, %0|%0, %1}
7679 punpckldq\t{%2, %0|%0, %2}
7680 movd\t{%1, %0|%0, %1}"
7681 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7682 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7683 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7684 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7685 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7686 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7687
7688 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7689 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7690 ;; alternatives pretty much forces the MMX alternative to be chosen.
7691 (define_insn "*vec_concatv2si_sse2"
7692 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7693 (vec_concat:V2SI
7694 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7695 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7696 "TARGET_SSE2"
7697 "@
7698 punpckldq\t{%2, %0|%0, %2}
7699 movd\t{%1, %0|%0, %1}
7700 punpckldq\t{%2, %0|%0, %2}
7701 movd\t{%1, %0|%0, %1}"
7702 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7703 (set_attr "mode" "TI,TI,DI,DI")])
7704
7705 (define_insn "*vec_concatv2si_sse"
7706 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7707 (vec_concat:V2SI
7708 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7709 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7710 "TARGET_SSE"
7711 "@
7712 unpcklps\t{%2, %0|%0, %2}
7713 movss\t{%1, %0|%0, %1}
7714 punpckldq\t{%2, %0|%0, %2}
7715 movd\t{%1, %0|%0, %1}"
7716 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7717 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7718
7719 (define_insn "*vec_concatv4si"
7720 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7721 (vec_concat:V4SI
7722 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7723 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7724 "TARGET_SSE"
7725 "@
7726 punpcklqdq\t{%2, %0|%0, %2}
7727 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7728 movlhps\t{%2, %0|%0, %2}
7729 movhps\t{%2, %0|%0, %2}
7730 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7731 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7732 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7733 (set_attr "prefix" "orig,vex,orig,orig,vex")
7734 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7735
7736 ;; movd instead of movq is required to handle broken assemblers.
7737 (define_insn "*vec_concatv2di_rex64"
7738 [(set (match_operand:V2DI 0 "register_operand"
7739 "=x,x ,x ,Yi,!x,x,x,x,x")
7740 (vec_concat:V2DI
7741 (match_operand:DI 1 "nonimmediate_operand"
7742 " 0,x ,xm,r ,*y,0,x,0,x")
7743 (match_operand:DI 2 "vector_move_operand"
7744 "rm,rm,C ,C ,C ,x,x,m,m")))]
7745 "TARGET_64BIT"
7746 "@
7747 pinsrq\t{$1, %2, %0|%0, %2, 1}
7748 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7749 %vmovq\t{%1, %0|%0, %1}
7750 %vmovd\t{%1, %0|%0, %1}
7751 movq2dq\t{%1, %0|%0, %1}
7752 punpcklqdq\t{%2, %0|%0, %2}
7753 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7754 movhps\t{%2, %0|%0, %2}
7755 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7756 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7757 (set (attr "type")
7758 (if_then_else
7759 (eq_attr "alternative" "0,1,5,6")
7760 (const_string "sselog")
7761 (const_string "ssemov")))
7762 (set (attr "prefix_rex")
7763 (if_then_else
7764 (and (eq_attr "alternative" "0,3")
7765 (not (match_test "TARGET_AVX")))
7766 (const_string "1")
7767 (const_string "*")))
7768 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7769 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7770 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7771 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7772
7773 (define_insn "vec_concatv2di"
7774 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7775 (vec_concat:V2DI
7776 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7777 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7778 "!TARGET_64BIT && TARGET_SSE"
7779 "@
7780 %vmovq\t{%1, %0|%0, %1}
7781 movq2dq\t{%1, %0|%0, %1}
7782 punpcklqdq\t{%2, %0|%0, %2}
7783 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7784 movlhps\t{%2, %0|%0, %2}
7785 movhps\t{%2, %0|%0, %2}
7786 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7787 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7788 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7789 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7790 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7791
7792 (define_expand "vec_unpacks_lo_<mode>"
7793 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7794 (match_operand:VI124_AVX2 1 "register_operand" "")]
7795 "TARGET_SSE2"
7796 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7797
7798 (define_expand "vec_unpacks_hi_<mode>"
7799 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7800 (match_operand:VI124_AVX2 1 "register_operand" "")]
7801 "TARGET_SSE2"
7802 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7803
7804 (define_expand "vec_unpacku_lo_<mode>"
7805 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7806 (match_operand:VI124_AVX2 1 "register_operand" "")]
7807 "TARGET_SSE2"
7808 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7809
7810 (define_expand "vec_unpacku_hi_<mode>"
7811 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7812 (match_operand:VI124_AVX2 1 "register_operand" "")]
7813 "TARGET_SSE2"
7814 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7815
7816 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7817 ;;
7818 ;; Miscellaneous
7819 ;;
7820 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7821
7822 (define_expand "avx2_uavgv32qi3"
7823 [(set (match_operand:V32QI 0 "register_operand" "")
7824 (truncate:V32QI
7825 (lshiftrt:V32HI
7826 (plus:V32HI
7827 (plus:V32HI
7828 (zero_extend:V32HI
7829 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7830 (zero_extend:V32HI
7831 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7832 (const_vector:V32QI [(const_int 1) (const_int 1)
7833 (const_int 1) (const_int 1)
7834 (const_int 1) (const_int 1)
7835 (const_int 1) (const_int 1)
7836 (const_int 1) (const_int 1)
7837 (const_int 1) (const_int 1)
7838 (const_int 1) (const_int 1)
7839 (const_int 1) (const_int 1)
7840 (const_int 1) (const_int 1)
7841 (const_int 1) (const_int 1)
7842 (const_int 1) (const_int 1)
7843 (const_int 1) (const_int 1)
7844 (const_int 1) (const_int 1)
7845 (const_int 1) (const_int 1)
7846 (const_int 1) (const_int 1)
7847 (const_int 1) (const_int 1)]))
7848 (const_int 1))))]
7849 "TARGET_AVX2"
7850 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7851
7852 (define_expand "sse2_uavgv16qi3"
7853 [(set (match_operand:V16QI 0 "register_operand" "")
7854 (truncate:V16QI
7855 (lshiftrt:V16HI
7856 (plus:V16HI
7857 (plus:V16HI
7858 (zero_extend:V16HI
7859 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7860 (zero_extend:V16HI
7861 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7862 (const_vector:V16QI [(const_int 1) (const_int 1)
7863 (const_int 1) (const_int 1)
7864 (const_int 1) (const_int 1)
7865 (const_int 1) (const_int 1)
7866 (const_int 1) (const_int 1)
7867 (const_int 1) (const_int 1)
7868 (const_int 1) (const_int 1)
7869 (const_int 1) (const_int 1)]))
7870 (const_int 1))))]
7871 "TARGET_SSE2"
7872 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7873
7874 (define_insn "*avx2_uavgv32qi3"
7875 [(set (match_operand:V32QI 0 "register_operand" "=x")
7876 (truncate:V32QI
7877 (lshiftrt:V32HI
7878 (plus:V32HI
7879 (plus:V32HI
7880 (zero_extend:V32HI
7881 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7882 (zero_extend:V32HI
7883 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7884 (const_vector:V32QI [(const_int 1) (const_int 1)
7885 (const_int 1) (const_int 1)
7886 (const_int 1) (const_int 1)
7887 (const_int 1) (const_int 1)
7888 (const_int 1) (const_int 1)
7889 (const_int 1) (const_int 1)
7890 (const_int 1) (const_int 1)
7891 (const_int 1) (const_int 1)
7892 (const_int 1) (const_int 1)
7893 (const_int 1) (const_int 1)
7894 (const_int 1) (const_int 1)
7895 (const_int 1) (const_int 1)
7896 (const_int 1) (const_int 1)
7897 (const_int 1) (const_int 1)
7898 (const_int 1) (const_int 1)
7899 (const_int 1) (const_int 1)]))
7900 (const_int 1))))]
7901 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7902 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7903 [(set_attr "type" "sseiadd")
7904 (set_attr "prefix" "vex")
7905 (set_attr "mode" "OI")])
7906
7907 (define_insn "*sse2_uavgv16qi3"
7908 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7909 (truncate:V16QI
7910 (lshiftrt:V16HI
7911 (plus:V16HI
7912 (plus:V16HI
7913 (zero_extend:V16HI
7914 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7915 (zero_extend:V16HI
7916 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7917 (const_vector:V16QI [(const_int 1) (const_int 1)
7918 (const_int 1) (const_int 1)
7919 (const_int 1) (const_int 1)
7920 (const_int 1) (const_int 1)
7921 (const_int 1) (const_int 1)
7922 (const_int 1) (const_int 1)
7923 (const_int 1) (const_int 1)
7924 (const_int 1) (const_int 1)]))
7925 (const_int 1))))]
7926 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7927 "@
7928 pavgb\t{%2, %0|%0, %2}
7929 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7930 [(set_attr "isa" "noavx,avx")
7931 (set_attr "type" "sseiadd")
7932 (set_attr "prefix_data16" "1,*")
7933 (set_attr "prefix" "orig,vex")
7934 (set_attr "mode" "TI")])
7935
7936 (define_expand "avx2_uavgv16hi3"
7937 [(set (match_operand:V16HI 0 "register_operand" "")
7938 (truncate:V16HI
7939 (lshiftrt:V16SI
7940 (plus:V16SI
7941 (plus:V16SI
7942 (zero_extend:V16SI
7943 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7944 (zero_extend:V16SI
7945 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7946 (const_vector:V16HI [(const_int 1) (const_int 1)
7947 (const_int 1) (const_int 1)
7948 (const_int 1) (const_int 1)
7949 (const_int 1) (const_int 1)
7950 (const_int 1) (const_int 1)
7951 (const_int 1) (const_int 1)
7952 (const_int 1) (const_int 1)
7953 (const_int 1) (const_int 1)]))
7954 (const_int 1))))]
7955 "TARGET_AVX2"
7956 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7957
7958 (define_expand "sse2_uavgv8hi3"
7959 [(set (match_operand:V8HI 0 "register_operand" "")
7960 (truncate:V8HI
7961 (lshiftrt:V8SI
7962 (plus:V8SI
7963 (plus:V8SI
7964 (zero_extend:V8SI
7965 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7966 (zero_extend:V8SI
7967 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7968 (const_vector:V8HI [(const_int 1) (const_int 1)
7969 (const_int 1) (const_int 1)
7970 (const_int 1) (const_int 1)
7971 (const_int 1) (const_int 1)]))
7972 (const_int 1))))]
7973 "TARGET_SSE2"
7974 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7975
7976 (define_insn "*avx2_uavgv16hi3"
7977 [(set (match_operand:V16HI 0 "register_operand" "=x")
7978 (truncate:V16HI
7979 (lshiftrt:V16SI
7980 (plus:V16SI
7981 (plus:V16SI
7982 (zero_extend:V16SI
7983 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7984 (zero_extend:V16SI
7985 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7986 (const_vector:V16HI [(const_int 1) (const_int 1)
7987 (const_int 1) (const_int 1)
7988 (const_int 1) (const_int 1)
7989 (const_int 1) (const_int 1)
7990 (const_int 1) (const_int 1)
7991 (const_int 1) (const_int 1)
7992 (const_int 1) (const_int 1)
7993 (const_int 1) (const_int 1)]))
7994 (const_int 1))))]
7995 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7996 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7997 [(set_attr "type" "sseiadd")
7998 (set_attr "prefix" "vex")
7999 (set_attr "mode" "OI")])
8000
8001 (define_insn "*sse2_uavgv8hi3"
8002 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8003 (truncate:V8HI
8004 (lshiftrt:V8SI
8005 (plus:V8SI
8006 (plus:V8SI
8007 (zero_extend:V8SI
8008 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
8009 (zero_extend:V8SI
8010 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
8011 (const_vector:V8HI [(const_int 1) (const_int 1)
8012 (const_int 1) (const_int 1)
8013 (const_int 1) (const_int 1)
8014 (const_int 1) (const_int 1)]))
8015 (const_int 1))))]
8016 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8017 "@
8018 pavgw\t{%2, %0|%0, %2}
8019 vpavgw\t{%2, %1, %0|%0, %1, %2}"
8020 [(set_attr "isa" "noavx,avx")
8021 (set_attr "type" "sseiadd")
8022 (set_attr "prefix_data16" "1,*")
8023 (set_attr "prefix" "orig,vex")
8024 (set_attr "mode" "TI")])
8025
8026 ;; The correct representation for this is absolutely enormous, and
8027 ;; surely not generally useful.
8028 (define_insn "<sse2_avx2>_psadbw"
8029 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
8030 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
8031 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
8032 UNSPEC_PSADBW))]
8033 "TARGET_SSE2"
8034 "@
8035 psadbw\t{%2, %0|%0, %2}
8036 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8037 [(set_attr "isa" "noavx,avx")
8038 (set_attr "type" "sseiadd")
8039 (set_attr "atom_unit" "simul")
8040 (set_attr "prefix_data16" "1,*")
8041 (set_attr "prefix" "orig,vex")
8042 (set_attr "mode" "<sseinsnmode>")])
8043
8044 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
8045 [(set (match_operand:SI 0 "register_operand" "=r")
8046 (unspec:SI
8047 [(match_operand:VF 1 "register_operand" "x")]
8048 UNSPEC_MOVMSK))]
8049 "TARGET_SSE"
8050 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8051 [(set_attr "type" "ssemov")
8052 (set_attr "prefix" "maybe_vex")
8053 (set_attr "mode" "<MODE>")])
8054
8055 (define_insn "avx2_pmovmskb"
8056 [(set (match_operand:SI 0 "register_operand" "=r")
8057 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
8058 UNSPEC_MOVMSK))]
8059 "TARGET_AVX2"
8060 "vpmovmskb\t{%1, %0|%0, %1}"
8061 [(set_attr "type" "ssemov")
8062 (set_attr "prefix" "vex")
8063 (set_attr "mode" "DI")])
8064
8065 (define_insn "sse2_pmovmskb"
8066 [(set (match_operand:SI 0 "register_operand" "=r")
8067 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8068 UNSPEC_MOVMSK))]
8069 "TARGET_SSE2"
8070 "%vpmovmskb\t{%1, %0|%0, %1}"
8071 [(set_attr "type" "ssemov")
8072 (set_attr "prefix_data16" "1")
8073 (set_attr "prefix" "maybe_vex")
8074 (set_attr "mode" "SI")])
8075
8076 (define_expand "sse2_maskmovdqu"
8077 [(set (match_operand:V16QI 0 "memory_operand" "")
8078 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8079 (match_operand:V16QI 2 "register_operand" "")
8080 (match_dup 0)]
8081 UNSPEC_MASKMOV))]
8082 "TARGET_SSE2")
8083
8084 (define_insn "*sse2_maskmovdqu"
8085 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
8086 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8087 (match_operand:V16QI 2 "register_operand" "x")
8088 (mem:V16QI (match_dup 0))]
8089 UNSPEC_MASKMOV))]
8090 "TARGET_SSE2"
8091 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8092 [(set_attr "type" "ssemov")
8093 (set_attr "prefix_data16" "1")
8094 ;; The implicit %rdi operand confuses default length_vex computation.
8095 (set (attr "length_vex")
8096 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8097 (set_attr "prefix" "maybe_vex")
8098 (set_attr "mode" "TI")])
8099
8100 (define_insn "sse_ldmxcsr"
8101 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8102 UNSPECV_LDMXCSR)]
8103 "TARGET_SSE"
8104 "%vldmxcsr\t%0"
8105 [(set_attr "type" "sse")
8106 (set_attr "atom_sse_attr" "mxcsr")
8107 (set_attr "prefix" "maybe_vex")
8108 (set_attr "memory" "load")])
8109
8110 (define_insn "sse_stmxcsr"
8111 [(set (match_operand:SI 0 "memory_operand" "=m")
8112 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8113 "TARGET_SSE"
8114 "%vstmxcsr\t%0"
8115 [(set_attr "type" "sse")
8116 (set_attr "atom_sse_attr" "mxcsr")
8117 (set_attr "prefix" "maybe_vex")
8118 (set_attr "memory" "store")])
8119
8120 (define_insn "sse2_clflush"
8121 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8122 UNSPECV_CLFLUSH)]
8123 "TARGET_SSE2"
8124 "clflush\t%a0"
8125 [(set_attr "type" "sse")
8126 (set_attr "atom_sse_attr" "fence")
8127 (set_attr "memory" "unknown")])
8128
8129
8130 (define_insn "sse3_mwait"
8131 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8132 (match_operand:SI 1 "register_operand" "c")]
8133 UNSPECV_MWAIT)]
8134 "TARGET_SSE3"
8135 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8136 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8137 ;; we only need to set up 32bit registers.
8138 "mwait"
8139 [(set_attr "length" "3")])
8140
8141 (define_insn "sse3_monitor"
8142 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8143 (match_operand:SI 1 "register_operand" "c")
8144 (match_operand:SI 2 "register_operand" "d")]
8145 UNSPECV_MONITOR)]
8146 "TARGET_SSE3 && !TARGET_64BIT"
8147 "monitor\t%0, %1, %2"
8148 [(set_attr "length" "3")])
8149
8150 (define_insn "sse3_monitor64"
8151 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8152 (match_operand:SI 1 "register_operand" "c")
8153 (match_operand:SI 2 "register_operand" "d")]
8154 UNSPECV_MONITOR)]
8155 "TARGET_SSE3 && TARGET_64BIT"
8156 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8157 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8158 ;; zero extended to 64bit, we only need to set up 32bit registers.
8159 "monitor"
8160 [(set_attr "length" "3")])
8161
8162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8163 ;;
8164 ;; SSSE3 instructions
8165 ;;
8166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8167
8168 (define_insn "avx2_phaddwv16hi3"
8169 [(set (match_operand:V16HI 0 "register_operand" "=x")
8170 (vec_concat:V16HI
8171 (vec_concat:V8HI
8172 (vec_concat:V4HI
8173 (vec_concat:V2HI
8174 (plus:HI
8175 (vec_select:HI
8176 (match_operand:V16HI 1 "register_operand" "x")
8177 (parallel [(const_int 0)]))
8178 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8179 (plus:HI
8180 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8181 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8182 (vec_concat:V2HI
8183 (plus:HI
8184 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8185 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8186 (plus:HI
8187 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8188 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8189 (vec_concat:V4HI
8190 (vec_concat:V2HI
8191 (plus:HI
8192 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8193 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8194 (plus:HI
8195 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8196 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8197 (vec_concat:V2HI
8198 (plus:HI
8199 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8200 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8201 (plus:HI
8202 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8203 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8204 (vec_concat:V8HI
8205 (vec_concat:V4HI
8206 (vec_concat:V2HI
8207 (plus:HI
8208 (vec_select:HI
8209 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8210 (parallel [(const_int 0)]))
8211 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8212 (plus:HI
8213 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8214 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8215 (vec_concat:V2HI
8216 (plus:HI
8217 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8218 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8219 (plus:HI
8220 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8221 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8222 (vec_concat:V4HI
8223 (vec_concat:V2HI
8224 (plus:HI
8225 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8226 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8227 (plus:HI
8228 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8229 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8230 (vec_concat:V2HI
8231 (plus:HI
8232 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8233 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8234 (plus:HI
8235 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8236 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8237 "TARGET_AVX2"
8238 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8239 [(set_attr "type" "sseiadd")
8240 (set_attr "prefix_extra" "1")
8241 (set_attr "prefix" "vex")
8242 (set_attr "mode" "OI")])
8243
8244 (define_insn "ssse3_phaddwv8hi3"
8245 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8246 (vec_concat:V8HI
8247 (vec_concat:V4HI
8248 (vec_concat:V2HI
8249 (plus:HI
8250 (vec_select:HI
8251 (match_operand:V8HI 1 "register_operand" "0,x")
8252 (parallel [(const_int 0)]))
8253 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8254 (plus:HI
8255 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8256 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8257 (vec_concat:V2HI
8258 (plus:HI
8259 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8260 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8261 (plus:HI
8262 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8263 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8264 (vec_concat:V4HI
8265 (vec_concat:V2HI
8266 (plus:HI
8267 (vec_select:HI
8268 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8269 (parallel [(const_int 0)]))
8270 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8271 (plus:HI
8272 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8273 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8274 (vec_concat:V2HI
8275 (plus:HI
8276 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8277 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8278 (plus:HI
8279 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8280 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8281 "TARGET_SSSE3"
8282 "@
8283 phaddw\t{%2, %0|%0, %2}
8284 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8285 [(set_attr "isa" "noavx,avx")
8286 (set_attr "type" "sseiadd")
8287 (set_attr "atom_unit" "complex")
8288 (set_attr "prefix_data16" "1,*")
8289 (set_attr "prefix_extra" "1")
8290 (set_attr "prefix" "orig,vex")
8291 (set_attr "mode" "TI")])
8292
8293 (define_insn "ssse3_phaddwv4hi3"
8294 [(set (match_operand:V4HI 0 "register_operand" "=y")
8295 (vec_concat:V4HI
8296 (vec_concat:V2HI
8297 (plus:HI
8298 (vec_select:HI
8299 (match_operand:V4HI 1 "register_operand" "0")
8300 (parallel [(const_int 0)]))
8301 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8302 (plus:HI
8303 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8304 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8305 (vec_concat:V2HI
8306 (plus:HI
8307 (vec_select:HI
8308 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8309 (parallel [(const_int 0)]))
8310 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8311 (plus:HI
8312 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8313 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8314 "TARGET_SSSE3"
8315 "phaddw\t{%2, %0|%0, %2}"
8316 [(set_attr "type" "sseiadd")
8317 (set_attr "atom_unit" "complex")
8318 (set_attr "prefix_extra" "1")
8319 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8320 (set_attr "mode" "DI")])
8321
8322 (define_insn "avx2_phadddv8si3"
8323 [(set (match_operand:V8SI 0 "register_operand" "=x")
8324 (vec_concat:V8SI
8325 (vec_concat:V4SI
8326 (vec_concat:V2SI
8327 (plus:SI
8328 (vec_select:SI
8329 (match_operand:V8SI 1 "register_operand" "x")
8330 (parallel [(const_int 0)]))
8331 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8332 (plus:SI
8333 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8334 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8335 (vec_concat:V2SI
8336 (plus:SI
8337 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8338 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8339 (plus:SI
8340 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8341 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8342 (vec_concat:V4SI
8343 (vec_concat:V2SI
8344 (plus:SI
8345 (vec_select:SI
8346 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8347 (parallel [(const_int 0)]))
8348 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8349 (plus:SI
8350 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8351 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8352 (vec_concat:V2SI
8353 (plus:SI
8354 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8355 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8356 (plus:SI
8357 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8358 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8359 "TARGET_AVX2"
8360 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8361 [(set_attr "type" "sseiadd")
8362 (set_attr "prefix_extra" "1")
8363 (set_attr "prefix" "vex")
8364 (set_attr "mode" "OI")])
8365
8366 (define_insn "ssse3_phadddv4si3"
8367 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8368 (vec_concat:V4SI
8369 (vec_concat:V2SI
8370 (plus:SI
8371 (vec_select:SI
8372 (match_operand:V4SI 1 "register_operand" "0,x")
8373 (parallel [(const_int 0)]))
8374 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8375 (plus:SI
8376 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8377 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8378 (vec_concat:V2SI
8379 (plus:SI
8380 (vec_select:SI
8381 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8382 (parallel [(const_int 0)]))
8383 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8384 (plus:SI
8385 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8386 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8387 "TARGET_SSSE3"
8388 "@
8389 phaddd\t{%2, %0|%0, %2}
8390 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8391 [(set_attr "isa" "noavx,avx")
8392 (set_attr "type" "sseiadd")
8393 (set_attr "atom_unit" "complex")
8394 (set_attr "prefix_data16" "1,*")
8395 (set_attr "prefix_extra" "1")
8396 (set_attr "prefix" "orig,vex")
8397 (set_attr "mode" "TI")])
8398
8399 (define_insn "ssse3_phadddv2si3"
8400 [(set (match_operand:V2SI 0 "register_operand" "=y")
8401 (vec_concat:V2SI
8402 (plus:SI
8403 (vec_select:SI
8404 (match_operand:V2SI 1 "register_operand" "0")
8405 (parallel [(const_int 0)]))
8406 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8407 (plus:SI
8408 (vec_select:SI
8409 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8410 (parallel [(const_int 0)]))
8411 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8412 "TARGET_SSSE3"
8413 "phaddd\t{%2, %0|%0, %2}"
8414 [(set_attr "type" "sseiadd")
8415 (set_attr "atom_unit" "complex")
8416 (set_attr "prefix_extra" "1")
8417 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8418 (set_attr "mode" "DI")])
8419
8420 (define_insn "avx2_phaddswv16hi3"
8421 [(set (match_operand:V16HI 0 "register_operand" "=x")
8422 (vec_concat:V16HI
8423 (vec_concat:V8HI
8424 (vec_concat:V4HI
8425 (vec_concat:V2HI
8426 (ss_plus:HI
8427 (vec_select:HI
8428 (match_operand:V16HI 1 "register_operand" "x")
8429 (parallel [(const_int 0)]))
8430 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8431 (ss_plus:HI
8432 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8433 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8434 (vec_concat:V2HI
8435 (ss_plus:HI
8436 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8437 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8438 (ss_plus:HI
8439 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8440 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8441 (vec_concat:V4HI
8442 (vec_concat:V2HI
8443 (ss_plus:HI
8444 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8445 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8446 (ss_plus:HI
8447 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8448 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8449 (vec_concat:V2HI
8450 (ss_plus:HI
8451 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8452 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8453 (ss_plus:HI
8454 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8455 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8456 (vec_concat:V8HI
8457 (vec_concat:V4HI
8458 (vec_concat:V2HI
8459 (ss_plus:HI
8460 (vec_select:HI
8461 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8462 (parallel [(const_int 0)]))
8463 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8464 (ss_plus:HI
8465 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8466 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8467 (vec_concat:V2HI
8468 (ss_plus:HI
8469 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8470 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8471 (ss_plus:HI
8472 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8473 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8474 (vec_concat:V4HI
8475 (vec_concat:V2HI
8476 (ss_plus:HI
8477 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8478 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8479 (ss_plus:HI
8480 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8482 (vec_concat:V2HI
8483 (ss_plus:HI
8484 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8485 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8486 (ss_plus:HI
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8489 "TARGET_AVX2"
8490 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8491 [(set_attr "type" "sseiadd")
8492 (set_attr "prefix_extra" "1")
8493 (set_attr "prefix" "vex")
8494 (set_attr "mode" "OI")])
8495
8496 (define_insn "ssse3_phaddswv8hi3"
8497 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8498 (vec_concat:V8HI
8499 (vec_concat:V4HI
8500 (vec_concat:V2HI
8501 (ss_plus:HI
8502 (vec_select:HI
8503 (match_operand:V8HI 1 "register_operand" "0,x")
8504 (parallel [(const_int 0)]))
8505 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8506 (ss_plus:HI
8507 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8509 (vec_concat:V2HI
8510 (ss_plus:HI
8511 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8512 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8513 (ss_plus:HI
8514 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8515 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8516 (vec_concat:V4HI
8517 (vec_concat:V2HI
8518 (ss_plus:HI
8519 (vec_select:HI
8520 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8521 (parallel [(const_int 0)]))
8522 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8523 (ss_plus:HI
8524 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8525 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8526 (vec_concat:V2HI
8527 (ss_plus:HI
8528 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8529 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8530 (ss_plus:HI
8531 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8532 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8533 "TARGET_SSSE3"
8534 "@
8535 phaddsw\t{%2, %0|%0, %2}
8536 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8537 [(set_attr "isa" "noavx,avx")
8538 (set_attr "type" "sseiadd")
8539 (set_attr "atom_unit" "complex")
8540 (set_attr "prefix_data16" "1,*")
8541 (set_attr "prefix_extra" "1")
8542 (set_attr "prefix" "orig,vex")
8543 (set_attr "mode" "TI")])
8544
8545 (define_insn "ssse3_phaddswv4hi3"
8546 [(set (match_operand:V4HI 0 "register_operand" "=y")
8547 (vec_concat:V4HI
8548 (vec_concat:V2HI
8549 (ss_plus:HI
8550 (vec_select:HI
8551 (match_operand:V4HI 1 "register_operand" "0")
8552 (parallel [(const_int 0)]))
8553 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8554 (ss_plus:HI
8555 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8556 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8557 (vec_concat:V2HI
8558 (ss_plus:HI
8559 (vec_select:HI
8560 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8561 (parallel [(const_int 0)]))
8562 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8563 (ss_plus:HI
8564 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8565 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8566 "TARGET_SSSE3"
8567 "phaddsw\t{%2, %0|%0, %2}"
8568 [(set_attr "type" "sseiadd")
8569 (set_attr "atom_unit" "complex")
8570 (set_attr "prefix_extra" "1")
8571 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8572 (set_attr "mode" "DI")])
8573
8574 (define_insn "avx2_phsubwv16hi3"
8575 [(set (match_operand:V16HI 0 "register_operand" "=x")
8576 (vec_concat:V16HI
8577 (vec_concat:V8HI
8578 (vec_concat:V4HI
8579 (vec_concat:V2HI
8580 (minus:HI
8581 (vec_select:HI
8582 (match_operand:V16HI 1 "register_operand" "x")
8583 (parallel [(const_int 0)]))
8584 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8585 (minus:HI
8586 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8587 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8588 (vec_concat:V2HI
8589 (minus:HI
8590 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8591 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8592 (minus:HI
8593 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8594 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8595 (vec_concat:V4HI
8596 (vec_concat:V2HI
8597 (minus:HI
8598 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8599 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8600 (minus:HI
8601 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8602 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8603 (vec_concat:V2HI
8604 (minus:HI
8605 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8606 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8607 (minus:HI
8608 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8609 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8610 (vec_concat:V8HI
8611 (vec_concat:V4HI
8612 (vec_concat:V2HI
8613 (minus:HI
8614 (vec_select:HI
8615 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8616 (parallel [(const_int 0)]))
8617 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8618 (minus:HI
8619 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8621 (vec_concat:V2HI
8622 (minus:HI
8623 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8624 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8625 (minus:HI
8626 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8627 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8628 (vec_concat:V4HI
8629 (vec_concat:V2HI
8630 (minus:HI
8631 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8632 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8633 (minus:HI
8634 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8635 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8636 (vec_concat:V2HI
8637 (minus:HI
8638 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8639 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8640 (minus:HI
8641 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8642 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8643 "TARGET_AVX2"
8644 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8645 [(set_attr "type" "sseiadd")
8646 (set_attr "prefix_extra" "1")
8647 (set_attr "prefix" "vex")
8648 (set_attr "mode" "OI")])
8649
8650 (define_insn "ssse3_phsubwv8hi3"
8651 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8652 (vec_concat:V8HI
8653 (vec_concat:V4HI
8654 (vec_concat:V2HI
8655 (minus:HI
8656 (vec_select:HI
8657 (match_operand:V8HI 1 "register_operand" "0,x")
8658 (parallel [(const_int 0)]))
8659 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8660 (minus:HI
8661 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8662 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8663 (vec_concat:V2HI
8664 (minus:HI
8665 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8666 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8667 (minus:HI
8668 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8669 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8670 (vec_concat:V4HI
8671 (vec_concat:V2HI
8672 (minus:HI
8673 (vec_select:HI
8674 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8675 (parallel [(const_int 0)]))
8676 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8677 (minus:HI
8678 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8679 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8680 (vec_concat:V2HI
8681 (minus:HI
8682 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8683 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8684 (minus:HI
8685 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8686 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8687 "TARGET_SSSE3"
8688 "@
8689 phsubw\t{%2, %0|%0, %2}
8690 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8691 [(set_attr "isa" "noavx,avx")
8692 (set_attr "type" "sseiadd")
8693 (set_attr "atom_unit" "complex")
8694 (set_attr "prefix_data16" "1,*")
8695 (set_attr "prefix_extra" "1")
8696 (set_attr "prefix" "orig,vex")
8697 (set_attr "mode" "TI")])
8698
8699 (define_insn "ssse3_phsubwv4hi3"
8700 [(set (match_operand:V4HI 0 "register_operand" "=y")
8701 (vec_concat:V4HI
8702 (vec_concat:V2HI
8703 (minus:HI
8704 (vec_select:HI
8705 (match_operand:V4HI 1 "register_operand" "0")
8706 (parallel [(const_int 0)]))
8707 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8708 (minus:HI
8709 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8710 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8711 (vec_concat:V2HI
8712 (minus:HI
8713 (vec_select:HI
8714 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8715 (parallel [(const_int 0)]))
8716 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8717 (minus:HI
8718 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8719 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8720 "TARGET_SSSE3"
8721 "phsubw\t{%2, %0|%0, %2}"
8722 [(set_attr "type" "sseiadd")
8723 (set_attr "atom_unit" "complex")
8724 (set_attr "prefix_extra" "1")
8725 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8726 (set_attr "mode" "DI")])
8727
8728 (define_insn "avx2_phsubdv8si3"
8729 [(set (match_operand:V8SI 0 "register_operand" "=x")
8730 (vec_concat:V8SI
8731 (vec_concat:V4SI
8732 (vec_concat:V2SI
8733 (minus:SI
8734 (vec_select:SI
8735 (match_operand:V8SI 1 "register_operand" "x")
8736 (parallel [(const_int 0)]))
8737 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8738 (minus:SI
8739 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8740 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8741 (vec_concat:V2SI
8742 (minus:SI
8743 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8744 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8745 (minus:SI
8746 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8747 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8748 (vec_concat:V4SI
8749 (vec_concat:V2SI
8750 (minus:SI
8751 (vec_select:SI
8752 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8753 (parallel [(const_int 0)]))
8754 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8755 (minus:SI
8756 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8757 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8758 (vec_concat:V2SI
8759 (minus:SI
8760 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8761 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8762 (minus:SI
8763 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8764 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8765 "TARGET_AVX2"
8766 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8767 [(set_attr "type" "sseiadd")
8768 (set_attr "prefix_extra" "1")
8769 (set_attr "prefix" "vex")
8770 (set_attr "mode" "OI")])
8771
8772 (define_insn "ssse3_phsubdv4si3"
8773 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8774 (vec_concat:V4SI
8775 (vec_concat:V2SI
8776 (minus:SI
8777 (vec_select:SI
8778 (match_operand:V4SI 1 "register_operand" "0,x")
8779 (parallel [(const_int 0)]))
8780 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8781 (minus:SI
8782 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8783 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8784 (vec_concat:V2SI
8785 (minus:SI
8786 (vec_select:SI
8787 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8788 (parallel [(const_int 0)]))
8789 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8790 (minus:SI
8791 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8792 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8793 "TARGET_SSSE3"
8794 "@
8795 phsubd\t{%2, %0|%0, %2}
8796 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8797
8798 [(set_attr "isa" "noavx,avx")
8799 (set_attr "type" "sseiadd")
8800 (set_attr "atom_unit" "complex")
8801 (set_attr "prefix_data16" "1,*")
8802 (set_attr "prefix_extra" "1")
8803 (set_attr "prefix" "orig,vex")
8804 (set_attr "mode" "TI")])
8805
8806 (define_insn "ssse3_phsubdv2si3"
8807 [(set (match_operand:V2SI 0 "register_operand" "=y")
8808 (vec_concat:V2SI
8809 (minus:SI
8810 (vec_select:SI
8811 (match_operand:V2SI 1 "register_operand" "0")
8812 (parallel [(const_int 0)]))
8813 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8814 (minus:SI
8815 (vec_select:SI
8816 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8817 (parallel [(const_int 0)]))
8818 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8819 "TARGET_SSSE3"
8820 "phsubd\t{%2, %0|%0, %2}"
8821 [(set_attr "type" "sseiadd")
8822 (set_attr "atom_unit" "complex")
8823 (set_attr "prefix_extra" "1")
8824 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8825 (set_attr "mode" "DI")])
8826
8827 (define_insn "avx2_phsubswv16hi3"
8828 [(set (match_operand:V16HI 0 "register_operand" "=x")
8829 (vec_concat:V16HI
8830 (vec_concat:V8HI
8831 (vec_concat:V4HI
8832 (vec_concat:V2HI
8833 (ss_minus:HI
8834 (vec_select:HI
8835 (match_operand:V16HI 1 "register_operand" "x")
8836 (parallel [(const_int 0)]))
8837 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8838 (ss_minus:HI
8839 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8840 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8841 (vec_concat:V2HI
8842 (ss_minus:HI
8843 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8844 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8845 (ss_minus:HI
8846 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8847 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8848 (vec_concat:V4HI
8849 (vec_concat:V2HI
8850 (ss_minus:HI
8851 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8852 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8853 (ss_minus:HI
8854 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8855 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8856 (vec_concat:V2HI
8857 (ss_minus:HI
8858 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8859 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8860 (ss_minus:HI
8861 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8862 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8863 (vec_concat:V8HI
8864 (vec_concat:V4HI
8865 (vec_concat:V2HI
8866 (ss_minus:HI
8867 (vec_select:HI
8868 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8869 (parallel [(const_int 0)]))
8870 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8871 (ss_minus:HI
8872 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8873 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8874 (vec_concat:V2HI
8875 (ss_minus:HI
8876 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8877 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8878 (ss_minus:HI
8879 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8880 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8881 (vec_concat:V4HI
8882 (vec_concat:V2HI
8883 (ss_minus:HI
8884 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8885 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8886 (ss_minus:HI
8887 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8888 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8889 (vec_concat:V2HI
8890 (ss_minus:HI
8891 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8892 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8893 (ss_minus:HI
8894 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8895 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8896 "TARGET_AVX2"
8897 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8898 [(set_attr "type" "sseiadd")
8899 (set_attr "prefix_extra" "1")
8900 (set_attr "prefix" "vex")
8901 (set_attr "mode" "OI")])
8902
8903 (define_insn "ssse3_phsubswv8hi3"
8904 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8905 (vec_concat:V8HI
8906 (vec_concat:V4HI
8907 (vec_concat:V2HI
8908 (ss_minus:HI
8909 (vec_select:HI
8910 (match_operand:V8HI 1 "register_operand" "0,x")
8911 (parallel [(const_int 0)]))
8912 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8913 (ss_minus:HI
8914 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8915 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8916 (vec_concat:V2HI
8917 (ss_minus:HI
8918 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8919 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8920 (ss_minus:HI
8921 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8922 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8923 (vec_concat:V4HI
8924 (vec_concat:V2HI
8925 (ss_minus:HI
8926 (vec_select:HI
8927 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8928 (parallel [(const_int 0)]))
8929 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8930 (ss_minus:HI
8931 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8932 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8933 (vec_concat:V2HI
8934 (ss_minus:HI
8935 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8936 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8937 (ss_minus:HI
8938 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8939 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8940 "TARGET_SSSE3"
8941 "@
8942 phsubsw\t{%2, %0|%0, %2}
8943 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8944 [(set_attr "isa" "noavx,avx")
8945 (set_attr "type" "sseiadd")
8946 (set_attr "atom_unit" "complex")
8947 (set_attr "prefix_data16" "1,*")
8948 (set_attr "prefix_extra" "1")
8949 (set_attr "prefix" "orig,vex")
8950 (set_attr "mode" "TI")])
8951
8952 (define_insn "ssse3_phsubswv4hi3"
8953 [(set (match_operand:V4HI 0 "register_operand" "=y")
8954 (vec_concat:V4HI
8955 (vec_concat:V2HI
8956 (ss_minus:HI
8957 (vec_select:HI
8958 (match_operand:V4HI 1 "register_operand" "0")
8959 (parallel [(const_int 0)]))
8960 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8961 (ss_minus:HI
8962 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8963 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8964 (vec_concat:V2HI
8965 (ss_minus:HI
8966 (vec_select:HI
8967 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8968 (parallel [(const_int 0)]))
8969 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8970 (ss_minus:HI
8971 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8972 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8973 "TARGET_SSSE3"
8974 "phsubsw\t{%2, %0|%0, %2}"
8975 [(set_attr "type" "sseiadd")
8976 (set_attr "atom_unit" "complex")
8977 (set_attr "prefix_extra" "1")
8978 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8979 (set_attr "mode" "DI")])
8980
8981 (define_insn "avx2_pmaddubsw256"
8982 [(set (match_operand:V16HI 0 "register_operand" "=x")
8983 (ss_plus:V16HI
8984 (mult:V16HI
8985 (zero_extend:V16HI
8986 (vec_select:V16QI
8987 (match_operand:V32QI 1 "register_operand" "x")
8988 (parallel [(const_int 0)
8989 (const_int 2)
8990 (const_int 4)
8991 (const_int 6)
8992 (const_int 8)
8993 (const_int 10)
8994 (const_int 12)
8995 (const_int 14)
8996 (const_int 16)
8997 (const_int 18)
8998 (const_int 20)
8999 (const_int 22)
9000 (const_int 24)
9001 (const_int 26)
9002 (const_int 28)
9003 (const_int 30)])))
9004 (sign_extend:V16HI
9005 (vec_select:V16QI
9006 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
9007 (parallel [(const_int 0)
9008 (const_int 2)
9009 (const_int 4)
9010 (const_int 6)
9011 (const_int 8)
9012 (const_int 10)
9013 (const_int 12)
9014 (const_int 14)
9015 (const_int 16)
9016 (const_int 18)
9017 (const_int 20)
9018 (const_int 22)
9019 (const_int 24)
9020 (const_int 26)
9021 (const_int 28)
9022 (const_int 30)]))))
9023 (mult:V16HI
9024 (zero_extend:V16HI
9025 (vec_select:V16QI (match_dup 1)
9026 (parallel [(const_int 1)
9027 (const_int 3)
9028 (const_int 5)
9029 (const_int 7)
9030 (const_int 9)
9031 (const_int 11)
9032 (const_int 13)
9033 (const_int 15)
9034 (const_int 17)
9035 (const_int 19)
9036 (const_int 21)
9037 (const_int 23)
9038 (const_int 25)
9039 (const_int 27)
9040 (const_int 29)
9041 (const_int 31)])))
9042 (sign_extend:V16HI
9043 (vec_select:V16QI (match_dup 2)
9044 (parallel [(const_int 1)
9045 (const_int 3)
9046 (const_int 5)
9047 (const_int 7)
9048 (const_int 9)
9049 (const_int 11)
9050 (const_int 13)
9051 (const_int 15)
9052 (const_int 17)
9053 (const_int 19)
9054 (const_int 21)
9055 (const_int 23)
9056 (const_int 25)
9057 (const_int 27)
9058 (const_int 29)
9059 (const_int 31)]))))))]
9060 "TARGET_AVX2"
9061 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9062 [(set_attr "type" "sseiadd")
9063 (set_attr "prefix_extra" "1")
9064 (set_attr "prefix" "vex")
9065 (set_attr "mode" "OI")])
9066
9067 (define_insn "ssse3_pmaddubsw128"
9068 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9069 (ss_plus:V8HI
9070 (mult:V8HI
9071 (zero_extend:V8HI
9072 (vec_select:V8QI
9073 (match_operand:V16QI 1 "register_operand" "0,x")
9074 (parallel [(const_int 0)
9075 (const_int 2)
9076 (const_int 4)
9077 (const_int 6)
9078 (const_int 8)
9079 (const_int 10)
9080 (const_int 12)
9081 (const_int 14)])))
9082 (sign_extend:V8HI
9083 (vec_select:V8QI
9084 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
9085 (parallel [(const_int 0)
9086 (const_int 2)
9087 (const_int 4)
9088 (const_int 6)
9089 (const_int 8)
9090 (const_int 10)
9091 (const_int 12)
9092 (const_int 14)]))))
9093 (mult:V8HI
9094 (zero_extend:V8HI
9095 (vec_select:V8QI (match_dup 1)
9096 (parallel [(const_int 1)
9097 (const_int 3)
9098 (const_int 5)
9099 (const_int 7)
9100 (const_int 9)
9101 (const_int 11)
9102 (const_int 13)
9103 (const_int 15)])))
9104 (sign_extend:V8HI
9105 (vec_select:V8QI (match_dup 2)
9106 (parallel [(const_int 1)
9107 (const_int 3)
9108 (const_int 5)
9109 (const_int 7)
9110 (const_int 9)
9111 (const_int 11)
9112 (const_int 13)
9113 (const_int 15)]))))))]
9114 "TARGET_SSSE3"
9115 "@
9116 pmaddubsw\t{%2, %0|%0, %2}
9117 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9118 [(set_attr "isa" "noavx,avx")
9119 (set_attr "type" "sseiadd")
9120 (set_attr "atom_unit" "simul")
9121 (set_attr "prefix_data16" "1,*")
9122 (set_attr "prefix_extra" "1")
9123 (set_attr "prefix" "orig,vex")
9124 (set_attr "mode" "TI")])
9125
9126 (define_insn "ssse3_pmaddubsw"
9127 [(set (match_operand:V4HI 0 "register_operand" "=y")
9128 (ss_plus:V4HI
9129 (mult:V4HI
9130 (zero_extend:V4HI
9131 (vec_select:V4QI
9132 (match_operand:V8QI 1 "register_operand" "0")
9133 (parallel [(const_int 0)
9134 (const_int 2)
9135 (const_int 4)
9136 (const_int 6)])))
9137 (sign_extend:V4HI
9138 (vec_select:V4QI
9139 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9140 (parallel [(const_int 0)
9141 (const_int 2)
9142 (const_int 4)
9143 (const_int 6)]))))
9144 (mult:V4HI
9145 (zero_extend:V4HI
9146 (vec_select:V4QI (match_dup 1)
9147 (parallel [(const_int 1)
9148 (const_int 3)
9149 (const_int 5)
9150 (const_int 7)])))
9151 (sign_extend:V4HI
9152 (vec_select:V4QI (match_dup 2)
9153 (parallel [(const_int 1)
9154 (const_int 3)
9155 (const_int 5)
9156 (const_int 7)]))))))]
9157 "TARGET_SSSE3"
9158 "pmaddubsw\t{%2, %0|%0, %2}"
9159 [(set_attr "type" "sseiadd")
9160 (set_attr "atom_unit" "simul")
9161 (set_attr "prefix_extra" "1")
9162 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9163 (set_attr "mode" "DI")])
9164
9165 (define_expand "avx2_umulhrswv16hi3"
9166 [(set (match_operand:V16HI 0 "register_operand" "")
9167 (truncate:V16HI
9168 (lshiftrt:V16SI
9169 (plus:V16SI
9170 (lshiftrt:V16SI
9171 (mult:V16SI
9172 (sign_extend:V16SI
9173 (match_operand:V16HI 1 "nonimmediate_operand" ""))
9174 (sign_extend:V16SI
9175 (match_operand:V16HI 2 "nonimmediate_operand" "")))
9176 (const_int 14))
9177 (const_vector:V16HI [(const_int 1) (const_int 1)
9178 (const_int 1) (const_int 1)
9179 (const_int 1) (const_int 1)
9180 (const_int 1) (const_int 1)
9181 (const_int 1) (const_int 1)
9182 (const_int 1) (const_int 1)
9183 (const_int 1) (const_int 1)
9184 (const_int 1) (const_int 1)]))
9185 (const_int 1))))]
9186 "TARGET_AVX2"
9187 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9188
9189 (define_insn "*avx2_umulhrswv16hi3"
9190 [(set (match_operand:V16HI 0 "register_operand" "=x")
9191 (truncate:V16HI
9192 (lshiftrt:V16SI
9193 (plus:V16SI
9194 (lshiftrt:V16SI
9195 (mult:V16SI
9196 (sign_extend:V16SI
9197 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9198 (sign_extend:V16SI
9199 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9200 (const_int 14))
9201 (const_vector:V16HI [(const_int 1) (const_int 1)
9202 (const_int 1) (const_int 1)
9203 (const_int 1) (const_int 1)
9204 (const_int 1) (const_int 1)
9205 (const_int 1) (const_int 1)
9206 (const_int 1) (const_int 1)
9207 (const_int 1) (const_int 1)
9208 (const_int 1) (const_int 1)]))
9209 (const_int 1))))]
9210 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9211 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9212 [(set_attr "type" "sseimul")
9213 (set_attr "prefix_extra" "1")
9214 (set_attr "prefix" "vex")
9215 (set_attr "mode" "OI")])
9216
9217 (define_expand "ssse3_pmulhrswv8hi3"
9218 [(set (match_operand:V8HI 0 "register_operand" "")
9219 (truncate:V8HI
9220 (lshiftrt:V8SI
9221 (plus:V8SI
9222 (lshiftrt:V8SI
9223 (mult:V8SI
9224 (sign_extend:V8SI
9225 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9226 (sign_extend:V8SI
9227 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9228 (const_int 14))
9229 (const_vector:V8HI [(const_int 1) (const_int 1)
9230 (const_int 1) (const_int 1)
9231 (const_int 1) (const_int 1)
9232 (const_int 1) (const_int 1)]))
9233 (const_int 1))))]
9234 "TARGET_SSSE3"
9235 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9236
9237 (define_insn "*ssse3_pmulhrswv8hi3"
9238 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9239 (truncate:V8HI
9240 (lshiftrt:V8SI
9241 (plus:V8SI
9242 (lshiftrt:V8SI
9243 (mult:V8SI
9244 (sign_extend:V8SI
9245 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9246 (sign_extend:V8SI
9247 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9248 (const_int 14))
9249 (const_vector:V8HI [(const_int 1) (const_int 1)
9250 (const_int 1) (const_int 1)
9251 (const_int 1) (const_int 1)
9252 (const_int 1) (const_int 1)]))
9253 (const_int 1))))]
9254 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9255 "@
9256 pmulhrsw\t{%2, %0|%0, %2}
9257 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9258 [(set_attr "isa" "noavx,avx")
9259 (set_attr "type" "sseimul")
9260 (set_attr "prefix_data16" "1,*")
9261 (set_attr "prefix_extra" "1")
9262 (set_attr "prefix" "orig,vex")
9263 (set_attr "mode" "TI")])
9264
9265 (define_expand "ssse3_pmulhrswv4hi3"
9266 [(set (match_operand:V4HI 0 "register_operand" "")
9267 (truncate:V4HI
9268 (lshiftrt:V4SI
9269 (plus:V4SI
9270 (lshiftrt:V4SI
9271 (mult:V4SI
9272 (sign_extend:V4SI
9273 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9274 (sign_extend:V4SI
9275 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9276 (const_int 14))
9277 (const_vector:V4HI [(const_int 1) (const_int 1)
9278 (const_int 1) (const_int 1)]))
9279 (const_int 1))))]
9280 "TARGET_SSSE3"
9281 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9282
9283 (define_insn "*ssse3_pmulhrswv4hi3"
9284 [(set (match_operand:V4HI 0 "register_operand" "=y")
9285 (truncate:V4HI
9286 (lshiftrt:V4SI
9287 (plus:V4SI
9288 (lshiftrt:V4SI
9289 (mult:V4SI
9290 (sign_extend:V4SI
9291 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9292 (sign_extend:V4SI
9293 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9294 (const_int 14))
9295 (const_vector:V4HI [(const_int 1) (const_int 1)
9296 (const_int 1) (const_int 1)]))
9297 (const_int 1))))]
9298 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9299 "pmulhrsw\t{%2, %0|%0, %2}"
9300 [(set_attr "type" "sseimul")
9301 (set_attr "prefix_extra" "1")
9302 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9303 (set_attr "mode" "DI")])
9304
9305 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9306 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9307 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9308 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9309 UNSPEC_PSHUFB))]
9310 "TARGET_SSSE3"
9311 "@
9312 pshufb\t{%2, %0|%0, %2}
9313 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9314 [(set_attr "isa" "noavx,avx")
9315 (set_attr "type" "sselog1")
9316 (set_attr "prefix_data16" "1,*")
9317 (set_attr "prefix_extra" "1")
9318 (set_attr "prefix" "orig,vex")
9319 (set_attr "mode" "<sseinsnmode>")])
9320
9321 (define_insn "ssse3_pshufbv8qi3"
9322 [(set (match_operand:V8QI 0 "register_operand" "=y")
9323 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9324 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9325 UNSPEC_PSHUFB))]
9326 "TARGET_SSSE3"
9327 "pshufb\t{%2, %0|%0, %2}";
9328 [(set_attr "type" "sselog1")
9329 (set_attr "prefix_extra" "1")
9330 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9331 (set_attr "mode" "DI")])
9332
9333 (define_insn "<ssse3_avx2>_psign<mode>3"
9334 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9335 (unspec:VI124_AVX2
9336 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9337 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9338 UNSPEC_PSIGN))]
9339 "TARGET_SSSE3"
9340 "@
9341 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9342 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9343 [(set_attr "isa" "noavx,avx")
9344 (set_attr "type" "sselog1")
9345 (set_attr "prefix_data16" "1,*")
9346 (set_attr "prefix_extra" "1")
9347 (set_attr "prefix" "orig,vex")
9348 (set_attr "mode" "<sseinsnmode>")])
9349
9350 (define_insn "ssse3_psign<mode>3"
9351 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9352 (unspec:MMXMODEI
9353 [(match_operand:MMXMODEI 1 "register_operand" "0")
9354 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9355 UNSPEC_PSIGN))]
9356 "TARGET_SSSE3"
9357 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9358 [(set_attr "type" "sselog1")
9359 (set_attr "prefix_extra" "1")
9360 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9361 (set_attr "mode" "DI")])
9362
9363 (define_insn "<ssse3_avx2>_palignr<mode>"
9364 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9365 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9366 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9367 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9368 UNSPEC_PALIGNR))]
9369 "TARGET_SSSE3"
9370 {
9371 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9372
9373 switch (which_alternative)
9374 {
9375 case 0:
9376 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9377 case 1:
9378 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9379 default:
9380 gcc_unreachable ();
9381 }
9382 }
9383 [(set_attr "isa" "noavx,avx")
9384 (set_attr "type" "sseishft")
9385 (set_attr "atom_unit" "sishuf")
9386 (set_attr "prefix_data16" "1,*")
9387 (set_attr "prefix_extra" "1")
9388 (set_attr "length_immediate" "1")
9389 (set_attr "prefix" "orig,vex")
9390 (set_attr "mode" "<sseinsnmode>")])
9391
9392 (define_insn "ssse3_palignrdi"
9393 [(set (match_operand:DI 0 "register_operand" "=y")
9394 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9395 (match_operand:DI 2 "nonimmediate_operand" "ym")
9396 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9397 UNSPEC_PALIGNR))]
9398 "TARGET_SSSE3"
9399 {
9400 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9401 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9402 }
9403 [(set_attr "type" "sseishft")
9404 (set_attr "atom_unit" "sishuf")
9405 (set_attr "prefix_extra" "1")
9406 (set_attr "length_immediate" "1")
9407 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9408 (set_attr "mode" "DI")])
9409
9410 (define_insn "abs<mode>2"
9411 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9412 (abs:VI124_AVX2
9413 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9414 "TARGET_SSSE3"
9415 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9416 [(set_attr "type" "sselog1")
9417 (set_attr "prefix_data16" "1")
9418 (set_attr "prefix_extra" "1")
9419 (set_attr "prefix" "maybe_vex")
9420 (set_attr "mode" "<sseinsnmode>")])
9421
9422 (define_insn "abs<mode>2"
9423 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9424 (abs:MMXMODEI
9425 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9426 "TARGET_SSSE3"
9427 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9428 [(set_attr "type" "sselog1")
9429 (set_attr "prefix_rep" "0")
9430 (set_attr "prefix_extra" "1")
9431 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9432 (set_attr "mode" "DI")])
9433
9434 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9435 ;;
9436 ;; AMD SSE4A instructions
9437 ;;
9438 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9439
9440 (define_insn "sse4a_movnt<mode>"
9441 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9442 (unspec:MODEF
9443 [(match_operand:MODEF 1 "register_operand" "x")]
9444 UNSPEC_MOVNT))]
9445 "TARGET_SSE4A"
9446 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9447 [(set_attr "type" "ssemov")
9448 (set_attr "mode" "<MODE>")])
9449
9450 (define_insn "sse4a_vmmovnt<mode>"
9451 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9452 (unspec:<ssescalarmode>
9453 [(vec_select:<ssescalarmode>
9454 (match_operand:VF_128 1 "register_operand" "x")
9455 (parallel [(const_int 0)]))]
9456 UNSPEC_MOVNT))]
9457 "TARGET_SSE4A"
9458 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9459 [(set_attr "type" "ssemov")
9460 (set_attr "mode" "<ssescalarmode>")])
9461
9462 (define_insn "sse4a_extrqi"
9463 [(set (match_operand:V2DI 0 "register_operand" "=x")
9464 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9465 (match_operand 2 "const_0_to_255_operand" "")
9466 (match_operand 3 "const_0_to_255_operand" "")]
9467 UNSPEC_EXTRQI))]
9468 "TARGET_SSE4A"
9469 "extrq\t{%3, %2, %0|%0, %2, %3}"
9470 [(set_attr "type" "sse")
9471 (set_attr "prefix_data16" "1")
9472 (set_attr "length_immediate" "2")
9473 (set_attr "mode" "TI")])
9474
9475 (define_insn "sse4a_extrq"
9476 [(set (match_operand:V2DI 0 "register_operand" "=x")
9477 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9478 (match_operand:V16QI 2 "register_operand" "x")]
9479 UNSPEC_EXTRQ))]
9480 "TARGET_SSE4A"
9481 "extrq\t{%2, %0|%0, %2}"
9482 [(set_attr "type" "sse")
9483 (set_attr "prefix_data16" "1")
9484 (set_attr "mode" "TI")])
9485
9486 (define_insn "sse4a_insertqi"
9487 [(set (match_operand:V2DI 0 "register_operand" "=x")
9488 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9489 (match_operand:V2DI 2 "register_operand" "x")
9490 (match_operand 3 "const_0_to_255_operand" "")
9491 (match_operand 4 "const_0_to_255_operand" "")]
9492 UNSPEC_INSERTQI))]
9493 "TARGET_SSE4A"
9494 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9495 [(set_attr "type" "sseins")
9496 (set_attr "prefix_data16" "0")
9497 (set_attr "prefix_rep" "1")
9498 (set_attr "length_immediate" "2")
9499 (set_attr "mode" "TI")])
9500
9501 (define_insn "sse4a_insertq"
9502 [(set (match_operand:V2DI 0 "register_operand" "=x")
9503 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9504 (match_operand:V2DI 2 "register_operand" "x")]
9505 UNSPEC_INSERTQ))]
9506 "TARGET_SSE4A"
9507 "insertq\t{%2, %0|%0, %2}"
9508 [(set_attr "type" "sseins")
9509 (set_attr "prefix_data16" "0")
9510 (set_attr "prefix_rep" "1")
9511 (set_attr "mode" "TI")])
9512
9513 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9514 ;;
9515 ;; Intel SSE4.1 instructions
9516 ;;
9517 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9518
9519 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9520 [(set (match_operand:VF 0 "register_operand" "=x,x")
9521 (vec_merge:VF
9522 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9523 (match_operand:VF 1 "register_operand" "0,x")
9524 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9525 "TARGET_SSE4_1"
9526 "@
9527 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9528 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9529 [(set_attr "isa" "noavx,avx")
9530 (set_attr "type" "ssemov")
9531 (set_attr "length_immediate" "1")
9532 (set_attr "prefix_data16" "1,*")
9533 (set_attr "prefix_extra" "1")
9534 (set_attr "prefix" "orig,vex")
9535 (set_attr "mode" "<MODE>")])
9536
9537 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9538 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9539 (unspec:VF
9540 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9541 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9542 (match_operand:VF 3 "register_operand" "Yz,x")]
9543 UNSPEC_BLENDV))]
9544 "TARGET_SSE4_1"
9545 "@
9546 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9547 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9548 [(set_attr "isa" "noavx,avx")
9549 (set_attr "type" "ssemov")
9550 (set_attr "length_immediate" "1")
9551 (set_attr "prefix_data16" "1,*")
9552 (set_attr "prefix_extra" "1")
9553 (set_attr "prefix" "orig,vex")
9554 (set_attr "mode" "<MODE>")])
9555
9556 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9557 [(set (match_operand:VF 0 "register_operand" "=x,x")
9558 (unspec:VF
9559 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9560 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9561 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9562 UNSPEC_DP))]
9563 "TARGET_SSE4_1"
9564 "@
9565 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9566 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9567 [(set_attr "isa" "noavx,avx")
9568 (set_attr "type" "ssemul")
9569 (set_attr "length_immediate" "1")
9570 (set_attr "prefix_data16" "1,*")
9571 (set_attr "prefix_extra" "1")
9572 (set_attr "prefix" "orig,vex")
9573 (set_attr "mode" "<MODE>")])
9574
9575 (define_insn "<sse4_1_avx2>_movntdqa"
9576 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9577 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9578 UNSPEC_MOVNTDQA))]
9579 "TARGET_SSE4_1"
9580 "%vmovntdqa\t{%1, %0|%0, %1}"
9581 [(set_attr "type" "ssemov")
9582 (set_attr "prefix_extra" "1")
9583 (set_attr "prefix" "maybe_vex")
9584 (set_attr "mode" "<sseinsnmode>")])
9585
9586 (define_insn "<sse4_1_avx2>_mpsadbw"
9587 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9588 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9589 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9590 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9591 UNSPEC_MPSADBW))]
9592 "TARGET_SSE4_1"
9593 "@
9594 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9595 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9596 [(set_attr "isa" "noavx,avx")
9597 (set_attr "type" "sselog1")
9598 (set_attr "length_immediate" "1")
9599 (set_attr "prefix_extra" "1")
9600 (set_attr "prefix" "orig,vex")
9601 (set_attr "mode" "<sseinsnmode>")])
9602
9603 (define_insn "avx2_packusdw"
9604 [(set (match_operand:V16HI 0 "register_operand" "=x")
9605 (vec_concat:V16HI
9606 (us_truncate:V8HI
9607 (match_operand:V8SI 1 "register_operand" "x"))
9608 (us_truncate:V8HI
9609 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9610 "TARGET_AVX2"
9611 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9612 [(set_attr "type" "sselog")
9613 (set_attr "prefix_extra" "1")
9614 (set_attr "prefix" "vex")
9615 (set_attr "mode" "OI")])
9616
9617 (define_insn "sse4_1_packusdw"
9618 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9619 (vec_concat:V8HI
9620 (us_truncate:V4HI
9621 (match_operand:V4SI 1 "register_operand" "0,x"))
9622 (us_truncate:V4HI
9623 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9624 "TARGET_SSE4_1"
9625 "@
9626 packusdw\t{%2, %0|%0, %2}
9627 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9628 [(set_attr "isa" "noavx,avx")
9629 (set_attr "type" "sselog")
9630 (set_attr "prefix_extra" "1")
9631 (set_attr "prefix" "orig,vex")
9632 (set_attr "mode" "TI")])
9633
9634 (define_insn "<sse4_1_avx2>_pblendvb"
9635 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9636 (unspec:VI1_AVX2
9637 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9638 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9639 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9640 UNSPEC_BLENDV))]
9641 "TARGET_SSE4_1"
9642 "@
9643 pblendvb\t{%3, %2, %0|%0, %2, %3}
9644 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9645 [(set_attr "isa" "noavx,avx")
9646 (set_attr "type" "ssemov")
9647 (set_attr "prefix_extra" "1")
9648 (set_attr "length_immediate" "*,1")
9649 (set_attr "prefix" "orig,vex")
9650 (set_attr "mode" "<sseinsnmode>")])
9651
9652 (define_insn "sse4_1_pblendw"
9653 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9654 (vec_merge:V8HI
9655 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9656 (match_operand:V8HI 1 "register_operand" "0,x")
9657 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9658 "TARGET_SSE4_1"
9659 "@
9660 pblendw\t{%3, %2, %0|%0, %2, %3}
9661 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9662 [(set_attr "isa" "noavx,avx")
9663 (set_attr "type" "ssemov")
9664 (set_attr "prefix_extra" "1")
9665 (set_attr "length_immediate" "1")
9666 (set_attr "prefix" "orig,vex")
9667 (set_attr "mode" "TI")])
9668
9669 ;; The builtin uses an 8-bit immediate. Expand that.
9670 (define_expand "avx2_pblendw"
9671 [(set (match_operand:V16HI 0 "register_operand" "")
9672 (vec_merge:V16HI
9673 (match_operand:V16HI 2 "nonimmediate_operand" "")
9674 (match_operand:V16HI 1 "register_operand" "")
9675 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9676 "TARGET_AVX2"
9677 {
9678 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9679 operands[3] = GEN_INT (val << 8 | val);
9680 })
9681
9682 (define_insn "*avx2_pblendw"
9683 [(set (match_operand:V16HI 0 "register_operand" "=x")
9684 (vec_merge:V16HI
9685 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9686 (match_operand:V16HI 1 "register_operand" "x")
9687 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9688 "TARGET_AVX2"
9689 {
9690 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9691 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9692 }
9693 [(set_attr "type" "ssemov")
9694 (set_attr "prefix_extra" "1")
9695 (set_attr "length_immediate" "1")
9696 (set_attr "prefix" "vex")
9697 (set_attr "mode" "OI")])
9698
9699 (define_insn "avx2_pblendd<mode>"
9700 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9701 (vec_merge:VI4_AVX2
9702 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9703 (match_operand:VI4_AVX2 1 "register_operand" "x")
9704 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9705 "TARGET_AVX2"
9706 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9707 [(set_attr "type" "ssemov")
9708 (set_attr "prefix_extra" "1")
9709 (set_attr "length_immediate" "1")
9710 (set_attr "prefix" "vex")
9711 (set_attr "mode" "<sseinsnmode>")])
9712
9713 (define_insn "sse4_1_phminposuw"
9714 [(set (match_operand:V8HI 0 "register_operand" "=x")
9715 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9716 UNSPEC_PHMINPOSUW))]
9717 "TARGET_SSE4_1"
9718 "%vphminposuw\t{%1, %0|%0, %1}"
9719 [(set_attr "type" "sselog1")
9720 (set_attr "prefix_extra" "1")
9721 (set_attr "prefix" "maybe_vex")
9722 (set_attr "mode" "TI")])
9723
9724 (define_insn "avx2_<code>v16qiv16hi2"
9725 [(set (match_operand:V16HI 0 "register_operand" "=x")
9726 (any_extend:V16HI
9727 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9728 "TARGET_AVX2"
9729 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9730 [(set_attr "type" "ssemov")
9731 (set_attr "prefix_extra" "1")
9732 (set_attr "prefix" "vex")
9733 (set_attr "mode" "OI")])
9734
9735 (define_insn "sse4_1_<code>v8qiv8hi2"
9736 [(set (match_operand:V8HI 0 "register_operand" "=x")
9737 (any_extend:V8HI
9738 (vec_select:V8QI
9739 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9740 (parallel [(const_int 0)
9741 (const_int 1)
9742 (const_int 2)
9743 (const_int 3)
9744 (const_int 4)
9745 (const_int 5)
9746 (const_int 6)
9747 (const_int 7)]))))]
9748 "TARGET_SSE4_1"
9749 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
9750 [(set_attr "type" "ssemov")
9751 (set_attr "prefix_extra" "1")
9752 (set_attr "prefix" "maybe_vex")
9753 (set_attr "mode" "TI")])
9754
9755 (define_insn "avx2_<code>v8qiv8si2"
9756 [(set (match_operand:V8SI 0 "register_operand" "=x")
9757 (any_extend:V8SI
9758 (vec_select:V8QI
9759 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9760 (parallel [(const_int 0)
9761 (const_int 1)
9762 (const_int 2)
9763 (const_int 3)
9764 (const_int 4)
9765 (const_int 5)
9766 (const_int 6)
9767 (const_int 7)]))))]
9768 "TARGET_AVX2"
9769 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
9770 [(set_attr "type" "ssemov")
9771 (set_attr "prefix_extra" "1")
9772 (set_attr "prefix" "vex")
9773 (set_attr "mode" "OI")])
9774
9775 (define_insn "sse4_1_<code>v4qiv4si2"
9776 [(set (match_operand:V4SI 0 "register_operand" "=x")
9777 (any_extend:V4SI
9778 (vec_select:V4QI
9779 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9780 (parallel [(const_int 0)
9781 (const_int 1)
9782 (const_int 2)
9783 (const_int 3)]))))]
9784 "TARGET_SSE4_1"
9785 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
9786 [(set_attr "type" "ssemov")
9787 (set_attr "prefix_extra" "1")
9788 (set_attr "prefix" "maybe_vex")
9789 (set_attr "mode" "TI")])
9790
9791 (define_insn "avx2_<code>v8hiv8si2"
9792 [(set (match_operand:V8SI 0 "register_operand" "=x")
9793 (any_extend:V8SI
9794 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9795 "TARGET_AVX2"
9796 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9797 [(set_attr "type" "ssemov")
9798 (set_attr "prefix_extra" "1")
9799 (set_attr "prefix" "vex")
9800 (set_attr "mode" "OI")])
9801
9802 (define_insn "sse4_1_<code>v4hiv4si2"
9803 [(set (match_operand:V4SI 0 "register_operand" "=x")
9804 (any_extend:V4SI
9805 (vec_select:V4HI
9806 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9807 (parallel [(const_int 0)
9808 (const_int 1)
9809 (const_int 2)
9810 (const_int 3)]))))]
9811 "TARGET_SSE4_1"
9812 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9813 [(set_attr "type" "ssemov")
9814 (set_attr "prefix_extra" "1")
9815 (set_attr "prefix" "maybe_vex")
9816 (set_attr "mode" "TI")])
9817
9818 (define_insn "avx2_<code>v4qiv4di2"
9819 [(set (match_operand:V4DI 0 "register_operand" "=x")
9820 (any_extend:V4DI
9821 (vec_select:V4QI
9822 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9823 (parallel [(const_int 0)
9824 (const_int 1)
9825 (const_int 2)
9826 (const_int 3)]))))]
9827 "TARGET_AVX2"
9828 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9829 [(set_attr "type" "ssemov")
9830 (set_attr "prefix_extra" "1")
9831 (set_attr "prefix" "vex")
9832 (set_attr "mode" "OI")])
9833
9834 (define_insn "sse4_1_<code>v2qiv2di2"
9835 [(set (match_operand:V2DI 0 "register_operand" "=x")
9836 (any_extend:V2DI
9837 (vec_select:V2QI
9838 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9839 (parallel [(const_int 0)
9840 (const_int 1)]))))]
9841 "TARGET_SSE4_1"
9842 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9843 [(set_attr "type" "ssemov")
9844 (set_attr "prefix_extra" "1")
9845 (set_attr "prefix" "maybe_vex")
9846 (set_attr "mode" "TI")])
9847
9848 (define_insn "avx2_<code>v4hiv4di2"
9849 [(set (match_operand:V4DI 0 "register_operand" "=x")
9850 (any_extend:V4DI
9851 (vec_select:V4HI
9852 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9853 (parallel [(const_int 0)
9854 (const_int 1)
9855 (const_int 2)
9856 (const_int 3)]))))]
9857 "TARGET_AVX2"
9858 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9859 [(set_attr "type" "ssemov")
9860 (set_attr "prefix_extra" "1")
9861 (set_attr "prefix" "vex")
9862 (set_attr "mode" "OI")])
9863
9864 (define_insn "sse4_1_<code>v2hiv2di2"
9865 [(set (match_operand:V2DI 0 "register_operand" "=x")
9866 (any_extend:V2DI
9867 (vec_select:V2HI
9868 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9869 (parallel [(const_int 0)
9870 (const_int 1)]))))]
9871 "TARGET_SSE4_1"
9872 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9873 [(set_attr "type" "ssemov")
9874 (set_attr "prefix_extra" "1")
9875 (set_attr "prefix" "maybe_vex")
9876 (set_attr "mode" "TI")])
9877
9878 (define_insn "avx2_<code>v4siv4di2"
9879 [(set (match_operand:V4DI 0 "register_operand" "=x")
9880 (any_extend:V4DI
9881 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9882 "TARGET_AVX2"
9883 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9884 [(set_attr "type" "ssemov")
9885 (set_attr "prefix_extra" "1")
9886 (set_attr "mode" "OI")])
9887
9888 (define_insn "sse4_1_<code>v2siv2di2"
9889 [(set (match_operand:V2DI 0 "register_operand" "=x")
9890 (any_extend:V2DI
9891 (vec_select:V2SI
9892 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9893 (parallel [(const_int 0)
9894 (const_int 1)]))))]
9895 "TARGET_SSE4_1"
9896 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9897 [(set_attr "type" "ssemov")
9898 (set_attr "prefix_extra" "1")
9899 (set_attr "prefix" "maybe_vex")
9900 (set_attr "mode" "TI")])
9901
9902 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9903 ;; setting FLAGS_REG. But it is not a really compare instruction.
9904 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9905 [(set (reg:CC FLAGS_REG)
9906 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9907 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9908 UNSPEC_VTESTP))]
9909 "TARGET_AVX"
9910 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9911 [(set_attr "type" "ssecomi")
9912 (set_attr "prefix_extra" "1")
9913 (set_attr "prefix" "vex")
9914 (set_attr "mode" "<MODE>")])
9915
9916 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9917 ;; But it is not a really compare instruction.
9918 (define_insn "avx_ptest256"
9919 [(set (reg:CC FLAGS_REG)
9920 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9921 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9922 UNSPEC_PTEST))]
9923 "TARGET_AVX"
9924 "vptest\t{%1, %0|%0, %1}"
9925 [(set_attr "type" "ssecomi")
9926 (set_attr "prefix_extra" "1")
9927 (set_attr "prefix" "vex")
9928 (set_attr "mode" "OI")])
9929
9930 (define_insn "sse4_1_ptest"
9931 [(set (reg:CC FLAGS_REG)
9932 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9933 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9934 UNSPEC_PTEST))]
9935 "TARGET_SSE4_1"
9936 "%vptest\t{%1, %0|%0, %1}"
9937 [(set_attr "type" "ssecomi")
9938 (set_attr "prefix_extra" "1")
9939 (set_attr "prefix" "maybe_vex")
9940 (set_attr "mode" "TI")])
9941
9942 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9943 [(set (match_operand:VF 0 "register_operand" "=x")
9944 (unspec:VF
9945 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9946 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9947 UNSPEC_ROUND))]
9948 "TARGET_ROUND"
9949 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9950 [(set_attr "type" "ssecvt")
9951 (set (attr "prefix_data16")
9952 (if_then_else
9953 (match_test "TARGET_AVX")
9954 (const_string "*")
9955 (const_string "1")))
9956 (set_attr "prefix_extra" "1")
9957 (set_attr "length_immediate" "1")
9958 (set_attr "prefix" "maybe_vex")
9959 (set_attr "mode" "<MODE>")])
9960
9961 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
9962 [(match_operand:<sseintvecmode> 0 "register_operand" "")
9963 (match_operand:VF1 1 "nonimmediate_operand" "")
9964 (match_operand:SI 2 "const_0_to_15_operand" "")]
9965 "TARGET_ROUND"
9966 {
9967 rtx tmp = gen_reg_rtx (<MODE>mode);
9968
9969 emit_insn
9970 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
9971 operands[2]));
9972 emit_insn
9973 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9974 DONE;
9975 })
9976
9977 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
9978 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
9979 (match_operand:VF2 1 "nonimmediate_operand" "")
9980 (match_operand:VF2 2 "nonimmediate_operand" "")
9981 (match_operand:SI 3 "const_0_to_15_operand" "")]
9982 "TARGET_ROUND"
9983 {
9984 rtx tmp0, tmp1;
9985
9986 if (<MODE>mode == V2DFmode
9987 && TARGET_AVX && !TARGET_PREFER_AVX128)
9988 {
9989 rtx tmp2 = gen_reg_rtx (V4DFmode);
9990
9991 tmp0 = gen_reg_rtx (V4DFmode);
9992 tmp1 = force_reg (V2DFmode, operands[1]);
9993
9994 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9995 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
9996 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9997 }
9998 else
9999 {
10000 tmp0 = gen_reg_rtx (<MODE>mode);
10001 tmp1 = gen_reg_rtx (<MODE>mode);
10002
10003 emit_insn
10004 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
10005 operands[3]));
10006 emit_insn
10007 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
10008 operands[3]));
10009 emit_insn
10010 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
10011 }
10012 DONE;
10013 })
10014
10015 (define_insn "sse4_1_round<ssescalarmodesuffix>"
10016 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
10017 (vec_merge:VF_128
10018 (unspec:VF_128
10019 [(match_operand:VF_128 2 "register_operand" "x,x")
10020 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
10021 UNSPEC_ROUND)
10022 (match_operand:VF_128 1 "register_operand" "0,x")
10023 (const_int 1)))]
10024 "TARGET_ROUND"
10025 "@
10026 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
10027 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10028 [(set_attr "isa" "noavx,avx")
10029 (set_attr "type" "ssecvt")
10030 (set_attr "length_immediate" "1")
10031 (set_attr "prefix_data16" "1,*")
10032 (set_attr "prefix_extra" "1")
10033 (set_attr "prefix" "orig,vex")
10034 (set_attr "mode" "<MODE>")])
10035
10036 (define_expand "round<mode>2"
10037 [(set (match_dup 4)
10038 (plus:VF
10039 (match_operand:VF 1 "register_operand" "")
10040 (match_dup 3)))
10041 (set (match_operand:VF 0 "register_operand" "")
10042 (unspec:VF
10043 [(match_dup 4) (match_dup 5)]
10044 UNSPEC_ROUND))]
10045 "TARGET_ROUND && !flag_trapping_math"
10046 {
10047 enum machine_mode scalar_mode;
10048 const struct real_format *fmt;
10049 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
10050 rtx half, vec_half;
10051
10052 scalar_mode = GET_MODE_INNER (<MODE>mode);
10053
10054 /* load nextafter (0.5, 0.0) */
10055 fmt = REAL_MODE_FORMAT (scalar_mode);
10056 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
10057 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
10058 half = const_double_from_real_value (pred_half, scalar_mode);
10059
10060 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
10061 vec_half = force_reg (<MODE>mode, vec_half);
10062
10063 operands[3] = gen_reg_rtx (<MODE>mode);
10064 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
10065
10066 operands[4] = gen_reg_rtx (<MODE>mode);
10067 operands[5] = GEN_INT (ROUND_TRUNC);
10068 })
10069
10070 (define_expand "round<mode>2_sfix"
10071 [(match_operand:<sseintvecmode> 0 "register_operand" "")
10072 (match_operand:VF1 1 "register_operand" "")]
10073 "TARGET_ROUND && !flag_trapping_math"
10074 {
10075 rtx tmp = gen_reg_rtx (<MODE>mode);
10076
10077 emit_insn (gen_round<mode>2 (tmp, operands[1]));
10078
10079 emit_insn
10080 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
10081 DONE;
10082 })
10083
10084 (define_expand "round<mode>2_vec_pack_sfix"
10085 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
10086 (match_operand:VF2 1 "register_operand" "")
10087 (match_operand:VF2 2 "register_operand" "")]
10088 "TARGET_ROUND && !flag_trapping_math"
10089 {
10090 rtx tmp0, tmp1;
10091
10092 if (<MODE>mode == V2DFmode
10093 && TARGET_AVX && !TARGET_PREFER_AVX128)
10094 {
10095 rtx tmp2 = gen_reg_rtx (V4DFmode);
10096
10097 tmp0 = gen_reg_rtx (V4DFmode);
10098 tmp1 = force_reg (V2DFmode, operands[1]);
10099
10100 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
10101 emit_insn (gen_roundv4df2 (tmp2, tmp0));
10102 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
10103 }
10104 else
10105 {
10106 tmp0 = gen_reg_rtx (<MODE>mode);
10107 tmp1 = gen_reg_rtx (<MODE>mode);
10108
10109 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
10110 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
10111
10112 emit_insn
10113 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
10114 }
10115 DONE;
10116 })
10117
10118 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10119 ;;
10120 ;; Intel SSE4.2 string/text processing instructions
10121 ;;
10122 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10123
10124 (define_insn_and_split "sse4_2_pcmpestr"
10125 [(set (match_operand:SI 0 "register_operand" "=c,c")
10126 (unspec:SI
10127 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10128 (match_operand:SI 3 "register_operand" "a,a")
10129 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10130 (match_operand:SI 5 "register_operand" "d,d")
10131 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10132 UNSPEC_PCMPESTR))
10133 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10134 (unspec:V16QI
10135 [(match_dup 2)
10136 (match_dup 3)
10137 (match_dup 4)
10138 (match_dup 5)
10139 (match_dup 6)]
10140 UNSPEC_PCMPESTR))
10141 (set (reg:CC FLAGS_REG)
10142 (unspec:CC
10143 [(match_dup 2)
10144 (match_dup 3)
10145 (match_dup 4)
10146 (match_dup 5)
10147 (match_dup 6)]
10148 UNSPEC_PCMPESTR))]
10149 "TARGET_SSE4_2
10150 && can_create_pseudo_p ()"
10151 "#"
10152 "&& 1"
10153 [(const_int 0)]
10154 {
10155 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10156 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10157 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10158
10159 if (ecx)
10160 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10161 operands[3], operands[4],
10162 operands[5], operands[6]));
10163 if (xmm0)
10164 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10165 operands[3], operands[4],
10166 operands[5], operands[6]));
10167 if (flags && !(ecx || xmm0))
10168 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10169 operands[2], operands[3],
10170 operands[4], operands[5],
10171 operands[6]));
10172 if (!(flags || ecx || xmm0))
10173 emit_note (NOTE_INSN_DELETED);
10174
10175 DONE;
10176 }
10177 [(set_attr "type" "sselog")
10178 (set_attr "prefix_data16" "1")
10179 (set_attr "prefix_extra" "1")
10180 (set_attr "length_immediate" "1")
10181 (set_attr "memory" "none,load")
10182 (set_attr "mode" "TI")])
10183
10184 (define_insn "sse4_2_pcmpestri"
10185 [(set (match_operand:SI 0 "register_operand" "=c,c")
10186 (unspec:SI
10187 [(match_operand:V16QI 1 "register_operand" "x,x")
10188 (match_operand:SI 2 "register_operand" "a,a")
10189 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10190 (match_operand:SI 4 "register_operand" "d,d")
10191 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10192 UNSPEC_PCMPESTR))
10193 (set (reg:CC FLAGS_REG)
10194 (unspec:CC
10195 [(match_dup 1)
10196 (match_dup 2)
10197 (match_dup 3)
10198 (match_dup 4)
10199 (match_dup 5)]
10200 UNSPEC_PCMPESTR))]
10201 "TARGET_SSE4_2"
10202 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10203 [(set_attr "type" "sselog")
10204 (set_attr "prefix_data16" "1")
10205 (set_attr "prefix_extra" "1")
10206 (set_attr "prefix" "maybe_vex")
10207 (set_attr "length_immediate" "1")
10208 (set_attr "memory" "none,load")
10209 (set_attr "mode" "TI")])
10210
10211 (define_insn "sse4_2_pcmpestrm"
10212 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10213 (unspec:V16QI
10214 [(match_operand:V16QI 1 "register_operand" "x,x")
10215 (match_operand:SI 2 "register_operand" "a,a")
10216 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10217 (match_operand:SI 4 "register_operand" "d,d")
10218 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10219 UNSPEC_PCMPESTR))
10220 (set (reg:CC FLAGS_REG)
10221 (unspec:CC
10222 [(match_dup 1)
10223 (match_dup 2)
10224 (match_dup 3)
10225 (match_dup 4)
10226 (match_dup 5)]
10227 UNSPEC_PCMPESTR))]
10228 "TARGET_SSE4_2"
10229 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10230 [(set_attr "type" "sselog")
10231 (set_attr "prefix_data16" "1")
10232 (set_attr "prefix_extra" "1")
10233 (set_attr "length_immediate" "1")
10234 (set_attr "prefix" "maybe_vex")
10235 (set_attr "memory" "none,load")
10236 (set_attr "mode" "TI")])
10237
10238 (define_insn "sse4_2_pcmpestr_cconly"
10239 [(set (reg:CC FLAGS_REG)
10240 (unspec:CC
10241 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10242 (match_operand:SI 3 "register_operand" "a,a,a,a")
10243 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10244 (match_operand:SI 5 "register_operand" "d,d,d,d")
10245 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10246 UNSPEC_PCMPESTR))
10247 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10248 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10249 "TARGET_SSE4_2"
10250 "@
10251 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10252 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10253 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10254 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10255 [(set_attr "type" "sselog")
10256 (set_attr "prefix_data16" "1")
10257 (set_attr "prefix_extra" "1")
10258 (set_attr "length_immediate" "1")
10259 (set_attr "memory" "none,load,none,load")
10260 (set_attr "prefix" "maybe_vex")
10261 (set_attr "mode" "TI")])
10262
10263 (define_insn_and_split "sse4_2_pcmpistr"
10264 [(set (match_operand:SI 0 "register_operand" "=c,c")
10265 (unspec:SI
10266 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10267 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10268 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10269 UNSPEC_PCMPISTR))
10270 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10271 (unspec:V16QI
10272 [(match_dup 2)
10273 (match_dup 3)
10274 (match_dup 4)]
10275 UNSPEC_PCMPISTR))
10276 (set (reg:CC FLAGS_REG)
10277 (unspec:CC
10278 [(match_dup 2)
10279 (match_dup 3)
10280 (match_dup 4)]
10281 UNSPEC_PCMPISTR))]
10282 "TARGET_SSE4_2
10283 && can_create_pseudo_p ()"
10284 "#"
10285 "&& 1"
10286 [(const_int 0)]
10287 {
10288 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10289 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10290 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10291
10292 if (ecx)
10293 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10294 operands[3], operands[4]));
10295 if (xmm0)
10296 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10297 operands[3], operands[4]));
10298 if (flags && !(ecx || xmm0))
10299 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10300 operands[2], operands[3],
10301 operands[4]));
10302 if (!(flags || ecx || xmm0))
10303 emit_note (NOTE_INSN_DELETED);
10304
10305 DONE;
10306 }
10307 [(set_attr "type" "sselog")
10308 (set_attr "prefix_data16" "1")
10309 (set_attr "prefix_extra" "1")
10310 (set_attr "length_immediate" "1")
10311 (set_attr "memory" "none,load")
10312 (set_attr "mode" "TI")])
10313
10314 (define_insn "sse4_2_pcmpistri"
10315 [(set (match_operand:SI 0 "register_operand" "=c,c")
10316 (unspec:SI
10317 [(match_operand:V16QI 1 "register_operand" "x,x")
10318 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10319 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10320 UNSPEC_PCMPISTR))
10321 (set (reg:CC FLAGS_REG)
10322 (unspec:CC
10323 [(match_dup 1)
10324 (match_dup 2)
10325 (match_dup 3)]
10326 UNSPEC_PCMPISTR))]
10327 "TARGET_SSE4_2"
10328 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10329 [(set_attr "type" "sselog")
10330 (set_attr "prefix_data16" "1")
10331 (set_attr "prefix_extra" "1")
10332 (set_attr "length_immediate" "1")
10333 (set_attr "prefix" "maybe_vex")
10334 (set_attr "memory" "none,load")
10335 (set_attr "mode" "TI")])
10336
10337 (define_insn "sse4_2_pcmpistrm"
10338 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10339 (unspec:V16QI
10340 [(match_operand:V16QI 1 "register_operand" "x,x")
10341 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10342 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10343 UNSPEC_PCMPISTR))
10344 (set (reg:CC FLAGS_REG)
10345 (unspec:CC
10346 [(match_dup 1)
10347 (match_dup 2)
10348 (match_dup 3)]
10349 UNSPEC_PCMPISTR))]
10350 "TARGET_SSE4_2"
10351 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10352 [(set_attr "type" "sselog")
10353 (set_attr "prefix_data16" "1")
10354 (set_attr "prefix_extra" "1")
10355 (set_attr "length_immediate" "1")
10356 (set_attr "prefix" "maybe_vex")
10357 (set_attr "memory" "none,load")
10358 (set_attr "mode" "TI")])
10359
10360 (define_insn "sse4_2_pcmpistr_cconly"
10361 [(set (reg:CC FLAGS_REG)
10362 (unspec:CC
10363 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10364 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10365 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10366 UNSPEC_PCMPISTR))
10367 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10368 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10369 "TARGET_SSE4_2"
10370 "@
10371 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10372 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10373 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10374 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10375 [(set_attr "type" "sselog")
10376 (set_attr "prefix_data16" "1")
10377 (set_attr "prefix_extra" "1")
10378 (set_attr "length_immediate" "1")
10379 (set_attr "memory" "none,load,none,load")
10380 (set_attr "prefix" "maybe_vex")
10381 (set_attr "mode" "TI")])
10382
10383 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10384 ;;
10385 ;; XOP instructions
10386 ;;
10387 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10388
10389 ;; XOP parallel integer multiply/add instructions.
10390 ;; Note the XOP multiply/add instructions
10391 ;; a[i] = b[i] * c[i] + d[i];
10392 ;; do not allow the value being added to be a memory operation.
10393 (define_insn "xop_pmacsww"
10394 [(set (match_operand:V8HI 0 "register_operand" "=x")
10395 (plus:V8HI
10396 (mult:V8HI
10397 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10398 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10399 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10400 "TARGET_XOP"
10401 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10402 [(set_attr "type" "ssemuladd")
10403 (set_attr "mode" "TI")])
10404
10405 (define_insn "xop_pmacssww"
10406 [(set (match_operand:V8HI 0 "register_operand" "=x")
10407 (ss_plus:V8HI
10408 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10409 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10410 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10411 "TARGET_XOP"
10412 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10413 [(set_attr "type" "ssemuladd")
10414 (set_attr "mode" "TI")])
10415
10416 (define_insn "xop_pmacsdd"
10417 [(set (match_operand:V4SI 0 "register_operand" "=x")
10418 (plus:V4SI
10419 (mult:V4SI
10420 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10421 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10422 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10423 "TARGET_XOP"
10424 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10425 [(set_attr "type" "ssemuladd")
10426 (set_attr "mode" "TI")])
10427
10428 (define_insn "xop_pmacssdd"
10429 [(set (match_operand:V4SI 0 "register_operand" "=x")
10430 (ss_plus:V4SI
10431 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10432 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10433 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10434 "TARGET_XOP"
10435 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10436 [(set_attr "type" "ssemuladd")
10437 (set_attr "mode" "TI")])
10438
10439 (define_insn "xop_pmacssdql"
10440 [(set (match_operand:V2DI 0 "register_operand" "=x")
10441 (ss_plus:V2DI
10442 (mult:V2DI
10443 (sign_extend:V2DI
10444 (vec_select:V2SI
10445 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10446 (parallel [(const_int 1)
10447 (const_int 3)])))
10448 (vec_select:V2SI
10449 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10450 (parallel [(const_int 1)
10451 (const_int 3)])))
10452 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10453 "TARGET_XOP"
10454 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10455 [(set_attr "type" "ssemuladd")
10456 (set_attr "mode" "TI")])
10457
10458 (define_insn "xop_pmacssdqh"
10459 [(set (match_operand:V2DI 0 "register_operand" "=x")
10460 (ss_plus:V2DI
10461 (mult:V2DI
10462 (sign_extend:V2DI
10463 (vec_select:V2SI
10464 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10465 (parallel [(const_int 0)
10466 (const_int 2)])))
10467 (sign_extend:V2DI
10468 (vec_select:V2SI
10469 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10470 (parallel [(const_int 0)
10471 (const_int 2)]))))
10472 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10473 "TARGET_XOP"
10474 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10475 [(set_attr "type" "ssemuladd")
10476 (set_attr "mode" "TI")])
10477
10478 (define_insn "xop_pmacsdql"
10479 [(set (match_operand:V2DI 0 "register_operand" "=x")
10480 (plus:V2DI
10481 (mult:V2DI
10482 (sign_extend:V2DI
10483 (vec_select:V2SI
10484 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10485 (parallel [(const_int 1)
10486 (const_int 3)])))
10487 (sign_extend:V2DI
10488 (vec_select:V2SI
10489 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10490 (parallel [(const_int 1)
10491 (const_int 3)]))))
10492 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10493 "TARGET_XOP"
10494 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10495 [(set_attr "type" "ssemuladd")
10496 (set_attr "mode" "TI")])
10497
10498 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10499 ;; fake it with a multiply/add. In general, we expect the define_split to
10500 ;; occur before register allocation, so we have to handle the corner case where
10501 ;; the target is the same as operands 1/2
10502 (define_insn_and_split "xop_mulv2div2di3_low"
10503 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10504 (mult:V2DI
10505 (sign_extend:V2DI
10506 (vec_select:V2SI
10507 (match_operand:V4SI 1 "register_operand" "%x")
10508 (parallel [(const_int 1)
10509 (const_int 3)])))
10510 (sign_extend:V2DI
10511 (vec_select:V2SI
10512 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10513 (parallel [(const_int 1)
10514 (const_int 3)])))))]
10515 "TARGET_XOP"
10516 "#"
10517 "&& reload_completed"
10518 [(set (match_dup 0)
10519 (match_dup 3))
10520 (set (match_dup 0)
10521 (plus:V2DI
10522 (mult:V2DI
10523 (sign_extend:V2DI
10524 (vec_select:V2SI
10525 (match_dup 1)
10526 (parallel [(const_int 1)
10527 (const_int 3)])))
10528 (sign_extend:V2DI
10529 (vec_select:V2SI
10530 (match_dup 2)
10531 (parallel [(const_int 1)
10532 (const_int 3)]))))
10533 (match_dup 0)))]
10534 {
10535 operands[3] = CONST0_RTX (V2DImode);
10536 }
10537 [(set_attr "type" "ssemul")
10538 (set_attr "mode" "TI")])
10539
10540 (define_insn "xop_pmacsdqh"
10541 [(set (match_operand:V2DI 0 "register_operand" "=x")
10542 (plus:V2DI
10543 (mult:V2DI
10544 (sign_extend:V2DI
10545 (vec_select:V2SI
10546 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10547 (parallel [(const_int 0)
10548 (const_int 2)])))
10549 (sign_extend:V2DI
10550 (vec_select:V2SI
10551 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10552 (parallel [(const_int 0)
10553 (const_int 2)]))))
10554 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10555 "TARGET_XOP"
10556 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10557 [(set_attr "type" "ssemuladd")
10558 (set_attr "mode" "TI")])
10559
10560 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10561 ;; fake it with a multiply/add. In general, we expect the define_split to
10562 ;; occur before register allocation, so we have to handle the corner case where
10563 ;; the target is the same as either operands[1] or operands[2]
10564 (define_insn_and_split "xop_mulv2div2di3_high"
10565 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10566 (mult:V2DI
10567 (sign_extend:V2DI
10568 (vec_select:V2SI
10569 (match_operand:V4SI 1 "register_operand" "%x")
10570 (parallel [(const_int 0)
10571 (const_int 2)])))
10572 (sign_extend:V2DI
10573 (vec_select:V2SI
10574 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10575 (parallel [(const_int 0)
10576 (const_int 2)])))))]
10577 "TARGET_XOP"
10578 "#"
10579 "&& reload_completed"
10580 [(set (match_dup 0)
10581 (match_dup 3))
10582 (set (match_dup 0)
10583 (plus:V2DI
10584 (mult:V2DI
10585 (sign_extend:V2DI
10586 (vec_select:V2SI
10587 (match_dup 1)
10588 (parallel [(const_int 0)
10589 (const_int 2)])))
10590 (sign_extend:V2DI
10591 (vec_select:V2SI
10592 (match_dup 2)
10593 (parallel [(const_int 0)
10594 (const_int 2)]))))
10595 (match_dup 0)))]
10596 {
10597 operands[3] = CONST0_RTX (V2DImode);
10598 }
10599 [(set_attr "type" "ssemul")
10600 (set_attr "mode" "TI")])
10601
10602 ;; XOP parallel integer multiply/add instructions for the intrinisics
10603 (define_insn "xop_pmacsswd"
10604 [(set (match_operand:V4SI 0 "register_operand" "=x")
10605 (ss_plus:V4SI
10606 (mult:V4SI
10607 (sign_extend:V4SI
10608 (vec_select:V4HI
10609 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10610 (parallel [(const_int 1)
10611 (const_int 3)
10612 (const_int 5)
10613 (const_int 7)])))
10614 (sign_extend:V4SI
10615 (vec_select:V4HI
10616 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10617 (parallel [(const_int 1)
10618 (const_int 3)
10619 (const_int 5)
10620 (const_int 7)]))))
10621 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10622 "TARGET_XOP"
10623 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10624 [(set_attr "type" "ssemuladd")
10625 (set_attr "mode" "TI")])
10626
10627 (define_insn "xop_pmacswd"
10628 [(set (match_operand:V4SI 0 "register_operand" "=x")
10629 (plus:V4SI
10630 (mult:V4SI
10631 (sign_extend:V4SI
10632 (vec_select:V4HI
10633 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10634 (parallel [(const_int 1)
10635 (const_int 3)
10636 (const_int 5)
10637 (const_int 7)])))
10638 (sign_extend:V4SI
10639 (vec_select:V4HI
10640 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10641 (parallel [(const_int 1)
10642 (const_int 3)
10643 (const_int 5)
10644 (const_int 7)]))))
10645 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10646 "TARGET_XOP"
10647 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10648 [(set_attr "type" "ssemuladd")
10649 (set_attr "mode" "TI")])
10650
10651 (define_insn "xop_pmadcsswd"
10652 [(set (match_operand:V4SI 0 "register_operand" "=x")
10653 (ss_plus:V4SI
10654 (plus:V4SI
10655 (mult:V4SI
10656 (sign_extend:V4SI
10657 (vec_select:V4HI
10658 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10659 (parallel [(const_int 0)
10660 (const_int 2)
10661 (const_int 4)
10662 (const_int 6)])))
10663 (sign_extend:V4SI
10664 (vec_select:V4HI
10665 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10666 (parallel [(const_int 0)
10667 (const_int 2)
10668 (const_int 4)
10669 (const_int 6)]))))
10670 (mult:V4SI
10671 (sign_extend:V4SI
10672 (vec_select:V4HI
10673 (match_dup 1)
10674 (parallel [(const_int 1)
10675 (const_int 3)
10676 (const_int 5)
10677 (const_int 7)])))
10678 (sign_extend:V4SI
10679 (vec_select:V4HI
10680 (match_dup 2)
10681 (parallel [(const_int 1)
10682 (const_int 3)
10683 (const_int 5)
10684 (const_int 7)])))))
10685 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10686 "TARGET_XOP"
10687 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10688 [(set_attr "type" "ssemuladd")
10689 (set_attr "mode" "TI")])
10690
10691 (define_insn "xop_pmadcswd"
10692 [(set (match_operand:V4SI 0 "register_operand" "=x")
10693 (plus:V4SI
10694 (plus:V4SI
10695 (mult:V4SI
10696 (sign_extend:V4SI
10697 (vec_select:V4HI
10698 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10699 (parallel [(const_int 0)
10700 (const_int 2)
10701 (const_int 4)
10702 (const_int 6)])))
10703 (sign_extend:V4SI
10704 (vec_select:V4HI
10705 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10706 (parallel [(const_int 0)
10707 (const_int 2)
10708 (const_int 4)
10709 (const_int 6)]))))
10710 (mult:V4SI
10711 (sign_extend:V4SI
10712 (vec_select:V4HI
10713 (match_dup 1)
10714 (parallel [(const_int 1)
10715 (const_int 3)
10716 (const_int 5)
10717 (const_int 7)])))
10718 (sign_extend:V4SI
10719 (vec_select:V4HI
10720 (match_dup 2)
10721 (parallel [(const_int 1)
10722 (const_int 3)
10723 (const_int 5)
10724 (const_int 7)])))))
10725 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10726 "TARGET_XOP"
10727 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10728 [(set_attr "type" "ssemuladd")
10729 (set_attr "mode" "TI")])
10730
10731 ;; XOP parallel XMM conditional moves
10732 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10733 [(set (match_operand:V 0 "register_operand" "=x,x")
10734 (if_then_else:V
10735 (match_operand:V 3 "nonimmediate_operand" "x,m")
10736 (match_operand:V 1 "register_operand" "x,x")
10737 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10738 "TARGET_XOP"
10739 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10740 [(set_attr "type" "sse4arg")])
10741
10742 ;; XOP horizontal add/subtract instructions
10743 (define_insn "xop_phaddbw"
10744 [(set (match_operand:V8HI 0 "register_operand" "=x")
10745 (plus:V8HI
10746 (sign_extend:V8HI
10747 (vec_select:V8QI
10748 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10749 (parallel [(const_int 0)
10750 (const_int 2)
10751 (const_int 4)
10752 (const_int 6)
10753 (const_int 8)
10754 (const_int 10)
10755 (const_int 12)
10756 (const_int 14)])))
10757 (sign_extend:V8HI
10758 (vec_select:V8QI
10759 (match_dup 1)
10760 (parallel [(const_int 1)
10761 (const_int 3)
10762 (const_int 5)
10763 (const_int 7)
10764 (const_int 9)
10765 (const_int 11)
10766 (const_int 13)
10767 (const_int 15)])))))]
10768 "TARGET_XOP"
10769 "vphaddbw\t{%1, %0|%0, %1}"
10770 [(set_attr "type" "sseiadd1")])
10771
10772 (define_insn "xop_phaddbd"
10773 [(set (match_operand:V4SI 0 "register_operand" "=x")
10774 (plus:V4SI
10775 (plus:V4SI
10776 (sign_extend:V4SI
10777 (vec_select:V4QI
10778 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10779 (parallel [(const_int 0)
10780 (const_int 4)
10781 (const_int 8)
10782 (const_int 12)])))
10783 (sign_extend:V4SI
10784 (vec_select:V4QI
10785 (match_dup 1)
10786 (parallel [(const_int 1)
10787 (const_int 5)
10788 (const_int 9)
10789 (const_int 13)]))))
10790 (plus:V4SI
10791 (sign_extend:V4SI
10792 (vec_select:V4QI
10793 (match_dup 1)
10794 (parallel [(const_int 2)
10795 (const_int 6)
10796 (const_int 10)
10797 (const_int 14)])))
10798 (sign_extend:V4SI
10799 (vec_select:V4QI
10800 (match_dup 1)
10801 (parallel [(const_int 3)
10802 (const_int 7)
10803 (const_int 11)
10804 (const_int 15)]))))))]
10805 "TARGET_XOP"
10806 "vphaddbd\t{%1, %0|%0, %1}"
10807 [(set_attr "type" "sseiadd1")])
10808
10809 (define_insn "xop_phaddbq"
10810 [(set (match_operand:V2DI 0 "register_operand" "=x")
10811 (plus:V2DI
10812 (plus:V2DI
10813 (plus:V2DI
10814 (sign_extend:V2DI
10815 (vec_select:V2QI
10816 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10817 (parallel [(const_int 0)
10818 (const_int 4)])))
10819 (sign_extend:V2DI
10820 (vec_select:V2QI
10821 (match_dup 1)
10822 (parallel [(const_int 1)
10823 (const_int 5)]))))
10824 (plus:V2DI
10825 (sign_extend:V2DI
10826 (vec_select:V2QI
10827 (match_dup 1)
10828 (parallel [(const_int 2)
10829 (const_int 6)])))
10830 (sign_extend:V2DI
10831 (vec_select:V2QI
10832 (match_dup 1)
10833 (parallel [(const_int 3)
10834 (const_int 7)])))))
10835 (plus:V2DI
10836 (plus:V2DI
10837 (sign_extend:V2DI
10838 (vec_select:V2QI
10839 (match_dup 1)
10840 (parallel [(const_int 8)
10841 (const_int 12)])))
10842 (sign_extend:V2DI
10843 (vec_select:V2QI
10844 (match_dup 1)
10845 (parallel [(const_int 9)
10846 (const_int 13)]))))
10847 (plus:V2DI
10848 (sign_extend:V2DI
10849 (vec_select:V2QI
10850 (match_dup 1)
10851 (parallel [(const_int 10)
10852 (const_int 14)])))
10853 (sign_extend:V2DI
10854 (vec_select:V2QI
10855 (match_dup 1)
10856 (parallel [(const_int 11)
10857 (const_int 15)])))))))]
10858 "TARGET_XOP"
10859 "vphaddbq\t{%1, %0|%0, %1}"
10860 [(set_attr "type" "sseiadd1")])
10861
10862 (define_insn "xop_phaddwd"
10863 [(set (match_operand:V4SI 0 "register_operand" "=x")
10864 (plus:V4SI
10865 (sign_extend:V4SI
10866 (vec_select:V4HI
10867 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10868 (parallel [(const_int 0)
10869 (const_int 2)
10870 (const_int 4)
10871 (const_int 6)])))
10872 (sign_extend:V4SI
10873 (vec_select:V4HI
10874 (match_dup 1)
10875 (parallel [(const_int 1)
10876 (const_int 3)
10877 (const_int 5)
10878 (const_int 7)])))))]
10879 "TARGET_XOP"
10880 "vphaddwd\t{%1, %0|%0, %1}"
10881 [(set_attr "type" "sseiadd1")])
10882
10883 (define_insn "xop_phaddwq"
10884 [(set (match_operand:V2DI 0 "register_operand" "=x")
10885 (plus:V2DI
10886 (plus:V2DI
10887 (sign_extend:V2DI
10888 (vec_select:V2HI
10889 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10890 (parallel [(const_int 0)
10891 (const_int 4)])))
10892 (sign_extend:V2DI
10893 (vec_select:V2HI
10894 (match_dup 1)
10895 (parallel [(const_int 1)
10896 (const_int 5)]))))
10897 (plus:V2DI
10898 (sign_extend:V2DI
10899 (vec_select:V2HI
10900 (match_dup 1)
10901 (parallel [(const_int 2)
10902 (const_int 6)])))
10903 (sign_extend:V2DI
10904 (vec_select:V2HI
10905 (match_dup 1)
10906 (parallel [(const_int 3)
10907 (const_int 7)]))))))]
10908 "TARGET_XOP"
10909 "vphaddwq\t{%1, %0|%0, %1}"
10910 [(set_attr "type" "sseiadd1")])
10911
10912 (define_insn "xop_phadddq"
10913 [(set (match_operand:V2DI 0 "register_operand" "=x")
10914 (plus:V2DI
10915 (sign_extend:V2DI
10916 (vec_select:V2SI
10917 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10918 (parallel [(const_int 0)
10919 (const_int 2)])))
10920 (sign_extend:V2DI
10921 (vec_select:V2SI
10922 (match_dup 1)
10923 (parallel [(const_int 1)
10924 (const_int 3)])))))]
10925 "TARGET_XOP"
10926 "vphadddq\t{%1, %0|%0, %1}"
10927 [(set_attr "type" "sseiadd1")])
10928
10929 (define_insn "xop_phaddubw"
10930 [(set (match_operand:V8HI 0 "register_operand" "=x")
10931 (plus:V8HI
10932 (zero_extend:V8HI
10933 (vec_select:V8QI
10934 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10935 (parallel [(const_int 0)
10936 (const_int 2)
10937 (const_int 4)
10938 (const_int 6)
10939 (const_int 8)
10940 (const_int 10)
10941 (const_int 12)
10942 (const_int 14)])))
10943 (zero_extend:V8HI
10944 (vec_select:V8QI
10945 (match_dup 1)
10946 (parallel [(const_int 1)
10947 (const_int 3)
10948 (const_int 5)
10949 (const_int 7)
10950 (const_int 9)
10951 (const_int 11)
10952 (const_int 13)
10953 (const_int 15)])))))]
10954 "TARGET_XOP"
10955 "vphaddubw\t{%1, %0|%0, %1}"
10956 [(set_attr "type" "sseiadd1")])
10957
10958 (define_insn "xop_phaddubd"
10959 [(set (match_operand:V4SI 0 "register_operand" "=x")
10960 (plus:V4SI
10961 (plus:V4SI
10962 (zero_extend:V4SI
10963 (vec_select:V4QI
10964 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10965 (parallel [(const_int 0)
10966 (const_int 4)
10967 (const_int 8)
10968 (const_int 12)])))
10969 (zero_extend:V4SI
10970 (vec_select:V4QI
10971 (match_dup 1)
10972 (parallel [(const_int 1)
10973 (const_int 5)
10974 (const_int 9)
10975 (const_int 13)]))))
10976 (plus:V4SI
10977 (zero_extend:V4SI
10978 (vec_select:V4QI
10979 (match_dup 1)
10980 (parallel [(const_int 2)
10981 (const_int 6)
10982 (const_int 10)
10983 (const_int 14)])))
10984 (zero_extend:V4SI
10985 (vec_select:V4QI
10986 (match_dup 1)
10987 (parallel [(const_int 3)
10988 (const_int 7)
10989 (const_int 11)
10990 (const_int 15)]))))))]
10991 "TARGET_XOP"
10992 "vphaddubd\t{%1, %0|%0, %1}"
10993 [(set_attr "type" "sseiadd1")])
10994
10995 (define_insn "xop_phaddubq"
10996 [(set (match_operand:V2DI 0 "register_operand" "=x")
10997 (plus:V2DI
10998 (plus:V2DI
10999 (plus:V2DI
11000 (zero_extend:V2DI
11001 (vec_select:V2QI
11002 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11003 (parallel [(const_int 0)
11004 (const_int 4)])))
11005 (sign_extend:V2DI
11006 (vec_select:V2QI
11007 (match_dup 1)
11008 (parallel [(const_int 1)
11009 (const_int 5)]))))
11010 (plus:V2DI
11011 (zero_extend:V2DI
11012 (vec_select:V2QI
11013 (match_dup 1)
11014 (parallel [(const_int 2)
11015 (const_int 6)])))
11016 (zero_extend:V2DI
11017 (vec_select:V2QI
11018 (match_dup 1)
11019 (parallel [(const_int 3)
11020 (const_int 7)])))))
11021 (plus:V2DI
11022 (plus:V2DI
11023 (zero_extend:V2DI
11024 (vec_select:V2QI
11025 (match_dup 1)
11026 (parallel [(const_int 8)
11027 (const_int 12)])))
11028 (sign_extend:V2DI
11029 (vec_select:V2QI
11030 (match_dup 1)
11031 (parallel [(const_int 9)
11032 (const_int 13)]))))
11033 (plus:V2DI
11034 (zero_extend:V2DI
11035 (vec_select:V2QI
11036 (match_dup 1)
11037 (parallel [(const_int 10)
11038 (const_int 14)])))
11039 (zero_extend:V2DI
11040 (vec_select:V2QI
11041 (match_dup 1)
11042 (parallel [(const_int 11)
11043 (const_int 15)])))))))]
11044 "TARGET_XOP"
11045 "vphaddubq\t{%1, %0|%0, %1}"
11046 [(set_attr "type" "sseiadd1")])
11047
11048 (define_insn "xop_phadduwd"
11049 [(set (match_operand:V4SI 0 "register_operand" "=x")
11050 (plus:V4SI
11051 (zero_extend:V4SI
11052 (vec_select:V4HI
11053 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11054 (parallel [(const_int 0)
11055 (const_int 2)
11056 (const_int 4)
11057 (const_int 6)])))
11058 (zero_extend:V4SI
11059 (vec_select:V4HI
11060 (match_dup 1)
11061 (parallel [(const_int 1)
11062 (const_int 3)
11063 (const_int 5)
11064 (const_int 7)])))))]
11065 "TARGET_XOP"
11066 "vphadduwd\t{%1, %0|%0, %1}"
11067 [(set_attr "type" "sseiadd1")])
11068
11069 (define_insn "xop_phadduwq"
11070 [(set (match_operand:V2DI 0 "register_operand" "=x")
11071 (plus:V2DI
11072 (plus:V2DI
11073 (zero_extend:V2DI
11074 (vec_select:V2HI
11075 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11076 (parallel [(const_int 0)
11077 (const_int 4)])))
11078 (zero_extend:V2DI
11079 (vec_select:V2HI
11080 (match_dup 1)
11081 (parallel [(const_int 1)
11082 (const_int 5)]))))
11083 (plus:V2DI
11084 (zero_extend:V2DI
11085 (vec_select:V2HI
11086 (match_dup 1)
11087 (parallel [(const_int 2)
11088 (const_int 6)])))
11089 (zero_extend:V2DI
11090 (vec_select:V2HI
11091 (match_dup 1)
11092 (parallel [(const_int 3)
11093 (const_int 7)]))))))]
11094 "TARGET_XOP"
11095 "vphadduwq\t{%1, %0|%0, %1}"
11096 [(set_attr "type" "sseiadd1")])
11097
11098 (define_insn "xop_phaddudq"
11099 [(set (match_operand:V2DI 0 "register_operand" "=x")
11100 (plus:V2DI
11101 (zero_extend:V2DI
11102 (vec_select:V2SI
11103 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11104 (parallel [(const_int 0)
11105 (const_int 2)])))
11106 (zero_extend:V2DI
11107 (vec_select:V2SI
11108 (match_dup 1)
11109 (parallel [(const_int 1)
11110 (const_int 3)])))))]
11111 "TARGET_XOP"
11112 "vphaddudq\t{%1, %0|%0, %1}"
11113 [(set_attr "type" "sseiadd1")])
11114
11115 (define_insn "xop_phsubbw"
11116 [(set (match_operand:V8HI 0 "register_operand" "=x")
11117 (minus:V8HI
11118 (sign_extend:V8HI
11119 (vec_select:V8QI
11120 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11121 (parallel [(const_int 0)
11122 (const_int 2)
11123 (const_int 4)
11124 (const_int 6)
11125 (const_int 8)
11126 (const_int 10)
11127 (const_int 12)
11128 (const_int 14)])))
11129 (sign_extend:V8HI
11130 (vec_select:V8QI
11131 (match_dup 1)
11132 (parallel [(const_int 1)
11133 (const_int 3)
11134 (const_int 5)
11135 (const_int 7)
11136 (const_int 9)
11137 (const_int 11)
11138 (const_int 13)
11139 (const_int 15)])))))]
11140 "TARGET_XOP"
11141 "vphsubbw\t{%1, %0|%0, %1}"
11142 [(set_attr "type" "sseiadd1")])
11143
11144 (define_insn "xop_phsubwd"
11145 [(set (match_operand:V4SI 0 "register_operand" "=x")
11146 (minus:V4SI
11147 (sign_extend:V4SI
11148 (vec_select:V4HI
11149 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11150 (parallel [(const_int 0)
11151 (const_int 2)
11152 (const_int 4)
11153 (const_int 6)])))
11154 (sign_extend:V4SI
11155 (vec_select:V4HI
11156 (match_dup 1)
11157 (parallel [(const_int 1)
11158 (const_int 3)
11159 (const_int 5)
11160 (const_int 7)])))))]
11161 "TARGET_XOP"
11162 "vphsubwd\t{%1, %0|%0, %1}"
11163 [(set_attr "type" "sseiadd1")])
11164
11165 (define_insn "xop_phsubdq"
11166 [(set (match_operand:V2DI 0 "register_operand" "=x")
11167 (minus:V2DI
11168 (sign_extend:V2DI
11169 (vec_select:V2SI
11170 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11171 (parallel [(const_int 0)
11172 (const_int 2)])))
11173 (sign_extend:V2DI
11174 (vec_select:V2SI
11175 (match_dup 1)
11176 (parallel [(const_int 1)
11177 (const_int 3)])))))]
11178 "TARGET_XOP"
11179 "vphsubdq\t{%1, %0|%0, %1}"
11180 [(set_attr "type" "sseiadd1")])
11181
11182 ;; XOP permute instructions
11183 (define_insn "xop_pperm"
11184 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11185 (unspec:V16QI
11186 [(match_operand:V16QI 1 "register_operand" "x,x")
11187 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11188 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11189 UNSPEC_XOP_PERMUTE))]
11190 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11191 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11192 [(set_attr "type" "sse4arg")
11193 (set_attr "mode" "TI")])
11194
11195 ;; XOP pack instructions that combine two vectors into a smaller vector
11196 (define_insn "xop_pperm_pack_v2di_v4si"
11197 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11198 (vec_concat:V4SI
11199 (truncate:V2SI
11200 (match_operand:V2DI 1 "register_operand" "x,x"))
11201 (truncate:V2SI
11202 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11203 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11204 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11205 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11206 [(set_attr "type" "sse4arg")
11207 (set_attr "mode" "TI")])
11208
11209 (define_insn "xop_pperm_pack_v4si_v8hi"
11210 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11211 (vec_concat:V8HI
11212 (truncate:V4HI
11213 (match_operand:V4SI 1 "register_operand" "x,x"))
11214 (truncate:V4HI
11215 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11216 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11217 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11218 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11219 [(set_attr "type" "sse4arg")
11220 (set_attr "mode" "TI")])
11221
11222 (define_insn "xop_pperm_pack_v8hi_v16qi"
11223 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11224 (vec_concat:V16QI
11225 (truncate:V8QI
11226 (match_operand:V8HI 1 "register_operand" "x,x"))
11227 (truncate:V8QI
11228 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11229 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11230 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11231 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11232 [(set_attr "type" "sse4arg")
11233 (set_attr "mode" "TI")])
11234
11235 ;; XOP packed rotate instructions
11236 (define_expand "rotl<mode>3"
11237 [(set (match_operand:VI_128 0 "register_operand" "")
11238 (rotate:VI_128
11239 (match_operand:VI_128 1 "nonimmediate_operand" "")
11240 (match_operand:SI 2 "general_operand")))]
11241 "TARGET_XOP"
11242 {
11243 /* If we were given a scalar, convert it to parallel */
11244 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11245 {
11246 rtvec vs = rtvec_alloc (<ssescalarnum>);
11247 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11248 rtx reg = gen_reg_rtx (<MODE>mode);
11249 rtx op2 = operands[2];
11250 int i;
11251
11252 if (GET_MODE (op2) != <ssescalarmode>mode)
11253 {
11254 op2 = gen_reg_rtx (<ssescalarmode>mode);
11255 convert_move (op2, operands[2], false);
11256 }
11257
11258 for (i = 0; i < <ssescalarnum>; i++)
11259 RTVEC_ELT (vs, i) = op2;
11260
11261 emit_insn (gen_vec_init<mode> (reg, par));
11262 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11263 DONE;
11264 }
11265 })
11266
11267 (define_expand "rotr<mode>3"
11268 [(set (match_operand:VI_128 0 "register_operand" "")
11269 (rotatert:VI_128
11270 (match_operand:VI_128 1 "nonimmediate_operand" "")
11271 (match_operand:SI 2 "general_operand")))]
11272 "TARGET_XOP"
11273 {
11274 /* If we were given a scalar, convert it to parallel */
11275 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11276 {
11277 rtvec vs = rtvec_alloc (<ssescalarnum>);
11278 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11279 rtx neg = gen_reg_rtx (<MODE>mode);
11280 rtx reg = gen_reg_rtx (<MODE>mode);
11281 rtx op2 = operands[2];
11282 int i;
11283
11284 if (GET_MODE (op2) != <ssescalarmode>mode)
11285 {
11286 op2 = gen_reg_rtx (<ssescalarmode>mode);
11287 convert_move (op2, operands[2], false);
11288 }
11289
11290 for (i = 0; i < <ssescalarnum>; i++)
11291 RTVEC_ELT (vs, i) = op2;
11292
11293 emit_insn (gen_vec_init<mode> (reg, par));
11294 emit_insn (gen_neg<mode>2 (neg, reg));
11295 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11296 DONE;
11297 }
11298 })
11299
11300 (define_insn "xop_rotl<mode>3"
11301 [(set (match_operand:VI_128 0 "register_operand" "=x")
11302 (rotate:VI_128
11303 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11304 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11305 "TARGET_XOP"
11306 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11307 [(set_attr "type" "sseishft")
11308 (set_attr "length_immediate" "1")
11309 (set_attr "mode" "TI")])
11310
11311 (define_insn "xop_rotr<mode>3"
11312 [(set (match_operand:VI_128 0 "register_operand" "=x")
11313 (rotatert:VI_128
11314 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11315 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11316 "TARGET_XOP"
11317 {
11318 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11319 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11320 }
11321 [(set_attr "type" "sseishft")
11322 (set_attr "length_immediate" "1")
11323 (set_attr "mode" "TI")])
11324
11325 (define_expand "vrotr<mode>3"
11326 [(match_operand:VI_128 0 "register_operand" "")
11327 (match_operand:VI_128 1 "register_operand" "")
11328 (match_operand:VI_128 2 "register_operand" "")]
11329 "TARGET_XOP"
11330 {
11331 rtx reg = gen_reg_rtx (<MODE>mode);
11332 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11333 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11334 DONE;
11335 })
11336
11337 (define_expand "vrotl<mode>3"
11338 [(match_operand:VI_128 0 "register_operand" "")
11339 (match_operand:VI_128 1 "register_operand" "")
11340 (match_operand:VI_128 2 "register_operand" "")]
11341 "TARGET_XOP"
11342 {
11343 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11344 DONE;
11345 })
11346
11347 (define_insn "xop_vrotl<mode>3"
11348 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11349 (if_then_else:VI_128
11350 (ge:VI_128
11351 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11352 (const_int 0))
11353 (rotate:VI_128
11354 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11355 (match_dup 2))
11356 (rotatert:VI_128
11357 (match_dup 1)
11358 (neg:VI_128 (match_dup 2)))))]
11359 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11360 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11361 [(set_attr "type" "sseishft")
11362 (set_attr "prefix_data16" "0")
11363 (set_attr "prefix_extra" "2")
11364 (set_attr "mode" "TI")])
11365
11366 ;; XOP packed shift instructions.
11367 (define_expand "vlshr<mode>3"
11368 [(set (match_operand:VI12_128 0 "register_operand" "")
11369 (lshiftrt:VI12_128
11370 (match_operand:VI12_128 1 "register_operand" "")
11371 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11372 "TARGET_XOP"
11373 {
11374 rtx neg = gen_reg_rtx (<MODE>mode);
11375 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11376 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11377 DONE;
11378 })
11379
11380 (define_expand "vlshr<mode>3"
11381 [(set (match_operand:VI48_128 0 "register_operand" "")
11382 (lshiftrt:VI48_128
11383 (match_operand:VI48_128 1 "register_operand" "")
11384 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11385 "TARGET_AVX2 || TARGET_XOP"
11386 {
11387 if (!TARGET_AVX2)
11388 {
11389 rtx neg = gen_reg_rtx (<MODE>mode);
11390 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11391 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11392 DONE;
11393 }
11394 })
11395
11396 (define_expand "vlshr<mode>3"
11397 [(set (match_operand:VI48_256 0 "register_operand" "")
11398 (lshiftrt:VI48_256
11399 (match_operand:VI48_256 1 "register_operand" "")
11400 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11401 "TARGET_AVX2")
11402
11403 (define_expand "vashr<mode>3"
11404 [(set (match_operand:VI128_128 0 "register_operand" "")
11405 (ashiftrt:VI128_128
11406 (match_operand:VI128_128 1 "register_operand" "")
11407 (match_operand:VI128_128 2 "nonimmediate_operand" "")))]
11408 "TARGET_XOP"
11409 {
11410 rtx neg = gen_reg_rtx (<MODE>mode);
11411 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11412 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
11413 DONE;
11414 })
11415
11416 (define_expand "vashrv4si3"
11417 [(set (match_operand:V4SI 0 "register_operand" "")
11418 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "")
11419 (match_operand:V4SI 2 "nonimmediate_operand" "")))]
11420 "TARGET_AVX2 || TARGET_XOP"
11421 {
11422 if (!TARGET_AVX2)
11423 {
11424 rtx neg = gen_reg_rtx (V4SImode);
11425 emit_insn (gen_negv4si2 (neg, operands[2]));
11426 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
11427 DONE;
11428 }
11429 })
11430
11431 (define_expand "vashrv8si3"
11432 [(set (match_operand:V8SI 0 "register_operand" "")
11433 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "")
11434 (match_operand:V8SI 2 "nonimmediate_operand" "")))]
11435 "TARGET_AVX2")
11436
11437 (define_expand "vashl<mode>3"
11438 [(set (match_operand:VI12_128 0 "register_operand" "")
11439 (ashift:VI12_128
11440 (match_operand:VI12_128 1 "register_operand" "")
11441 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11442 "TARGET_XOP"
11443 {
11444 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11445 DONE;
11446 })
11447
11448 (define_expand "vashl<mode>3"
11449 [(set (match_operand:VI48_128 0 "register_operand" "")
11450 (ashift:VI48_128
11451 (match_operand:VI48_128 1 "register_operand" "")
11452 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11453 "TARGET_AVX2 || TARGET_XOP"
11454 {
11455 if (!TARGET_AVX2)
11456 {
11457 operands[2] = force_reg (<MODE>mode, operands[2]);
11458 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11459 DONE;
11460 }
11461 })
11462
11463 (define_expand "vashl<mode>3"
11464 [(set (match_operand:VI48_256 0 "register_operand" "")
11465 (ashift:VI48_256
11466 (match_operand:VI48_256 1 "register_operand" "")
11467 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11468 "TARGET_AVX2")
11469
11470 (define_insn "xop_sha<mode>3"
11471 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11472 (if_then_else:VI_128
11473 (ge:VI_128
11474 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11475 (const_int 0))
11476 (ashift:VI_128
11477 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11478 (match_dup 2))
11479 (ashiftrt:VI_128
11480 (match_dup 1)
11481 (neg:VI_128 (match_dup 2)))))]
11482 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11483 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11484 [(set_attr "type" "sseishft")
11485 (set_attr "prefix_data16" "0")
11486 (set_attr "prefix_extra" "2")
11487 (set_attr "mode" "TI")])
11488
11489 (define_insn "xop_shl<mode>3"
11490 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11491 (if_then_else:VI_128
11492 (ge:VI_128
11493 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11494 (const_int 0))
11495 (ashift:VI_128
11496 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11497 (match_dup 2))
11498 (lshiftrt:VI_128
11499 (match_dup 1)
11500 (neg:VI_128 (match_dup 2)))))]
11501 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11502 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11503 [(set_attr "type" "sseishft")
11504 (set_attr "prefix_data16" "0")
11505 (set_attr "prefix_extra" "2")
11506 (set_attr "mode" "TI")])
11507
11508 ;; SSE2 doesn't have some shift variants, so define versions for XOP
11509 (define_expand "ashlv16qi3"
11510 [(set (match_operand:V16QI 0 "register_operand" "")
11511 (ashift:V16QI
11512 (match_operand:V16QI 1 "register_operand" "")
11513 (match_operand:SI 2 "nonmemory_operand" "")))]
11514 "TARGET_XOP"
11515 {
11516 rtx reg = gen_reg_rtx (V16QImode);
11517 rtx par;
11518 int i;
11519
11520 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11521 for (i = 0; i < 16; i++)
11522 XVECEXP (par, 0, i) = operands[2];
11523
11524 emit_insn (gen_vec_initv16qi (reg, par));
11525 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg));
11526 DONE;
11527 })
11528
11529 (define_expand "<shift_insn>v16qi3"
11530 [(set (match_operand:V16QI 0 "register_operand" "")
11531 (any_shiftrt:V16QI
11532 (match_operand:V16QI 1 "register_operand" "")
11533 (match_operand:SI 2 "nonmemory_operand" "")))]
11534 "TARGET_XOP"
11535 {
11536 rtx reg = gen_reg_rtx (V16QImode);
11537 rtx par;
11538 bool negate = false;
11539 rtx (*shift_insn)(rtx, rtx, rtx);
11540 int i;
11541
11542 if (CONST_INT_P (operands[2]))
11543 operands[2] = GEN_INT (-INTVAL (operands[2]));
11544 else
11545 negate = true;
11546
11547 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11548 for (i = 0; i < 16; i++)
11549 XVECEXP (par, 0, i) = operands[2];
11550
11551 emit_insn (gen_vec_initv16qi (reg, par));
11552
11553 if (negate)
11554 emit_insn (gen_negv16qi2 (reg, reg));
11555
11556 if (<CODE> == LSHIFTRT)
11557 shift_insn = gen_xop_shlv16qi3;
11558 else
11559 shift_insn = gen_xop_shav16qi3;
11560
11561 emit_insn (shift_insn (operands[0], operands[1], reg));
11562 DONE;
11563 })
11564
11565 (define_expand "ashrv2di3"
11566 [(set (match_operand:V2DI 0 "register_operand" "")
11567 (ashiftrt:V2DI
11568 (match_operand:V2DI 1 "register_operand" "")
11569 (match_operand:DI 2 "nonmemory_operand" "")))]
11570 "TARGET_XOP"
11571 {
11572 rtx reg = gen_reg_rtx (V2DImode);
11573 rtx par;
11574 bool negate = false;
11575 int i;
11576
11577 if (CONST_INT_P (operands[2]))
11578 operands[2] = GEN_INT (-INTVAL (operands[2]));
11579 else
11580 negate = true;
11581
11582 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
11583 for (i = 0; i < 2; i++)
11584 XVECEXP (par, 0, i) = operands[2];
11585
11586 emit_insn (gen_vec_initv2di (reg, par));
11587
11588 if (negate)
11589 emit_insn (gen_negv2di2 (reg, reg));
11590
11591 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
11592 DONE;
11593 })
11594
11595 ;; XOP FRCZ support
11596 (define_insn "xop_frcz<mode>2"
11597 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11598 (unspec:FMAMODE
11599 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11600 UNSPEC_FRCZ))]
11601 "TARGET_XOP"
11602 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11603 [(set_attr "type" "ssecvt1")
11604 (set_attr "mode" "<MODE>")])
11605
11606 ;; scalar insns
11607 (define_expand "xop_vmfrcz<mode>2"
11608 [(set (match_operand:VF_128 0 "register_operand")
11609 (vec_merge:VF_128
11610 (unspec:VF_128
11611 [(match_operand:VF_128 1 "nonimmediate_operand")]
11612 UNSPEC_FRCZ)
11613 (match_dup 3)
11614 (const_int 1)))]
11615 "TARGET_XOP"
11616 {
11617 operands[3] = CONST0_RTX (<MODE>mode);
11618 })
11619
11620 (define_insn "*xop_vmfrcz_<mode>"
11621 [(set (match_operand:VF_128 0 "register_operand" "=x")
11622 (vec_merge:VF_128
11623 (unspec:VF_128
11624 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11625 UNSPEC_FRCZ)
11626 (match_operand:VF_128 2 "const0_operand")
11627 (const_int 1)))]
11628 "TARGET_XOP"
11629 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11630 [(set_attr "type" "ssecvt1")
11631 (set_attr "mode" "<MODE>")])
11632
11633 (define_insn "xop_maskcmp<mode>3"
11634 [(set (match_operand:VI_128 0 "register_operand" "=x")
11635 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11636 [(match_operand:VI_128 2 "register_operand" "x")
11637 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11638 "TARGET_XOP"
11639 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11640 [(set_attr "type" "sse4arg")
11641 (set_attr "prefix_data16" "0")
11642 (set_attr "prefix_rep" "0")
11643 (set_attr "prefix_extra" "2")
11644 (set_attr "length_immediate" "1")
11645 (set_attr "mode" "TI")])
11646
11647 (define_insn "xop_maskcmp_uns<mode>3"
11648 [(set (match_operand:VI_128 0 "register_operand" "=x")
11649 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11650 [(match_operand:VI_128 2 "register_operand" "x")
11651 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11652 "TARGET_XOP"
11653 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11654 [(set_attr "type" "ssecmp")
11655 (set_attr "prefix_data16" "0")
11656 (set_attr "prefix_rep" "0")
11657 (set_attr "prefix_extra" "2")
11658 (set_attr "length_immediate" "1")
11659 (set_attr "mode" "TI")])
11660
11661 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11662 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11663 ;; the exact instruction generated for the intrinsic.
11664 (define_insn "xop_maskcmp_uns2<mode>3"
11665 [(set (match_operand:VI_128 0 "register_operand" "=x")
11666 (unspec:VI_128
11667 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11668 [(match_operand:VI_128 2 "register_operand" "x")
11669 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11670 UNSPEC_XOP_UNSIGNED_CMP))]
11671 "TARGET_XOP"
11672 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11673 [(set_attr "type" "ssecmp")
11674 (set_attr "prefix_data16" "0")
11675 (set_attr "prefix_extra" "2")
11676 (set_attr "length_immediate" "1")
11677 (set_attr "mode" "TI")])
11678
11679 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11680 ;; being added here to be complete.
11681 (define_insn "xop_pcom_tf<mode>3"
11682 [(set (match_operand:VI_128 0 "register_operand" "=x")
11683 (unspec:VI_128
11684 [(match_operand:VI_128 1 "register_operand" "x")
11685 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11686 (match_operand:SI 3 "const_int_operand" "n")]
11687 UNSPEC_XOP_TRUEFALSE))]
11688 "TARGET_XOP"
11689 {
11690 return ((INTVAL (operands[3]) != 0)
11691 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11692 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11693 }
11694 [(set_attr "type" "ssecmp")
11695 (set_attr "prefix_data16" "0")
11696 (set_attr "prefix_extra" "2")
11697 (set_attr "length_immediate" "1")
11698 (set_attr "mode" "TI")])
11699
11700 (define_insn "xop_vpermil2<mode>3"
11701 [(set (match_operand:VF 0 "register_operand" "=x")
11702 (unspec:VF
11703 [(match_operand:VF 1 "register_operand" "x")
11704 (match_operand:VF 2 "nonimmediate_operand" "%x")
11705 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11706 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11707 UNSPEC_VPERMIL2))]
11708 "TARGET_XOP"
11709 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11710 [(set_attr "type" "sse4arg")
11711 (set_attr "length_immediate" "1")
11712 (set_attr "mode" "<MODE>")])
11713
11714 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11715
11716 (define_insn "aesenc"
11717 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11718 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11719 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11720 UNSPEC_AESENC))]
11721 "TARGET_AES"
11722 "@
11723 aesenc\t{%2, %0|%0, %2}
11724 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11725 [(set_attr "isa" "noavx,avx")
11726 (set_attr "type" "sselog1")
11727 (set_attr "prefix_extra" "1")
11728 (set_attr "prefix" "orig,vex")
11729 (set_attr "mode" "TI")])
11730
11731 (define_insn "aesenclast"
11732 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11733 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11734 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11735 UNSPEC_AESENCLAST))]
11736 "TARGET_AES"
11737 "@
11738 aesenclast\t{%2, %0|%0, %2}
11739 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11740 [(set_attr "isa" "noavx,avx")
11741 (set_attr "type" "sselog1")
11742 (set_attr "prefix_extra" "1")
11743 (set_attr "prefix" "orig,vex")
11744 (set_attr "mode" "TI")])
11745
11746 (define_insn "aesdec"
11747 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11748 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11749 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11750 UNSPEC_AESDEC))]
11751 "TARGET_AES"
11752 "@
11753 aesdec\t{%2, %0|%0, %2}
11754 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11755 [(set_attr "isa" "noavx,avx")
11756 (set_attr "type" "sselog1")
11757 (set_attr "prefix_extra" "1")
11758 (set_attr "prefix" "orig,vex")
11759 (set_attr "mode" "TI")])
11760
11761 (define_insn "aesdeclast"
11762 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11763 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11764 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11765 UNSPEC_AESDECLAST))]
11766 "TARGET_AES"
11767 "@
11768 aesdeclast\t{%2, %0|%0, %2}
11769 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11770 [(set_attr "isa" "noavx,avx")
11771 (set_attr "type" "sselog1")
11772 (set_attr "prefix_extra" "1")
11773 (set_attr "prefix" "orig,vex")
11774 (set_attr "mode" "TI")])
11775
11776 (define_insn "aesimc"
11777 [(set (match_operand:V2DI 0 "register_operand" "=x")
11778 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11779 UNSPEC_AESIMC))]
11780 "TARGET_AES"
11781 "%vaesimc\t{%1, %0|%0, %1}"
11782 [(set_attr "type" "sselog1")
11783 (set_attr "prefix_extra" "1")
11784 (set_attr "prefix" "maybe_vex")
11785 (set_attr "mode" "TI")])
11786
11787 (define_insn "aeskeygenassist"
11788 [(set (match_operand:V2DI 0 "register_operand" "=x")
11789 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11790 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11791 UNSPEC_AESKEYGENASSIST))]
11792 "TARGET_AES"
11793 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11794 [(set_attr "type" "sselog1")
11795 (set_attr "prefix_extra" "1")
11796 (set_attr "length_immediate" "1")
11797 (set_attr "prefix" "maybe_vex")
11798 (set_attr "mode" "TI")])
11799
11800 (define_insn "pclmulqdq"
11801 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11802 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11803 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11804 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11805 UNSPEC_PCLMUL))]
11806 "TARGET_PCLMUL"
11807 "@
11808 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11809 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11810 [(set_attr "isa" "noavx,avx")
11811 (set_attr "type" "sselog1")
11812 (set_attr "prefix_extra" "1")
11813 (set_attr "length_immediate" "1")
11814 (set_attr "prefix" "orig,vex")
11815 (set_attr "mode" "TI")])
11816
11817 (define_expand "avx_vzeroall"
11818 [(match_par_dup 0 [(const_int 0)])]
11819 "TARGET_AVX"
11820 {
11821 int nregs = TARGET_64BIT ? 16 : 8;
11822 int regno;
11823
11824 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11825
11826 XVECEXP (operands[0], 0, 0)
11827 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11828 UNSPECV_VZEROALL);
11829
11830 for (regno = 0; regno < nregs; regno++)
11831 XVECEXP (operands[0], 0, regno + 1)
11832 = gen_rtx_SET (VOIDmode,
11833 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11834 CONST0_RTX (V8SImode));
11835 })
11836
11837 (define_insn "*avx_vzeroall"
11838 [(match_parallel 0 "vzeroall_operation"
11839 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11840 "TARGET_AVX"
11841 "vzeroall"
11842 [(set_attr "type" "sse")
11843 (set_attr "modrm" "0")
11844 (set_attr "memory" "none")
11845 (set_attr "prefix" "vex")
11846 (set_attr "mode" "OI")])
11847
11848 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11849 ;; if the upper 128bits are unused.
11850 (define_insn "avx_vzeroupper"
11851 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11852 UNSPECV_VZEROUPPER)]
11853 "TARGET_AVX"
11854 "vzeroupper"
11855 [(set_attr "type" "sse")
11856 (set_attr "modrm" "0")
11857 (set_attr "memory" "none")
11858 (set_attr "prefix" "vex")
11859 (set_attr "mode" "OI")])
11860
11861 (define_mode_attr AVXTOSSEMODE
11862 [(V4DI "V2DI") (V2DI "V2DI")
11863 (V8SI "V4SI") (V4SI "V4SI")
11864 (V16HI "V8HI") (V8HI "V8HI")
11865 (V32QI "V16QI") (V16QI "V16QI")])
11866
11867 (define_insn "avx2_pbroadcast<mode>"
11868 [(set (match_operand:VI 0 "register_operand" "=x")
11869 (vec_duplicate:VI
11870 (vec_select:<ssescalarmode>
11871 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11872 (parallel [(const_int 0)]))))]
11873 "TARGET_AVX2"
11874 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11875 [(set_attr "type" "ssemov")
11876 (set_attr "prefix_extra" "1")
11877 (set_attr "prefix" "vex")
11878 (set_attr "mode" "<sseinsnmode>")])
11879
11880 (define_insn "avx2_permvarv8si"
11881 [(set (match_operand:V8SI 0 "register_operand" "=x")
11882 (unspec:V8SI
11883 [(match_operand:V8SI 1 "register_operand" "x")
11884 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11885 UNSPEC_VPERMSI))]
11886 "TARGET_AVX2"
11887 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11888 [(set_attr "type" "sselog")
11889 (set_attr "prefix" "vex")
11890 (set_attr "mode" "OI")])
11891
11892 (define_insn "avx2_permv4df"
11893 [(set (match_operand:V4DF 0 "register_operand" "=x")
11894 (unspec:V4DF
11895 [(match_operand:V4DF 1 "register_operand" "xm")
11896 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11897 UNSPEC_VPERMDF))]
11898 "TARGET_AVX2"
11899 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11900 [(set_attr "type" "sselog")
11901 (set_attr "prefix_extra" "1")
11902 (set_attr "prefix" "vex")
11903 (set_attr "mode" "OI")])
11904
11905 (define_insn "avx2_permvarv8sf"
11906 [(set (match_operand:V8SF 0 "register_operand" "=x")
11907 (unspec:V8SF
11908 [(match_operand:V8SF 1 "register_operand" "x")
11909 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11910 UNSPEC_VPERMSF))]
11911 "TARGET_AVX2"
11912 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11913 [(set_attr "type" "sselog")
11914 (set_attr "prefix" "vex")
11915 (set_attr "mode" "OI")])
11916
11917 (define_expand "avx2_permv4di"
11918 [(match_operand:V4DI 0 "register_operand" "")
11919 (match_operand:V4DI 1 "nonimmediate_operand" "")
11920 (match_operand:SI 2 "const_0_to_255_operand" "")]
11921 "TARGET_AVX2"
11922 {
11923 int mask = INTVAL (operands[2]);
11924 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11925 GEN_INT ((mask >> 0) & 3),
11926 GEN_INT ((mask >> 2) & 3),
11927 GEN_INT ((mask >> 4) & 3),
11928 GEN_INT ((mask >> 6) & 3)));
11929 DONE;
11930 })
11931
11932 (define_insn "avx2_permv4di_1"
11933 [(set (match_operand:V4DI 0 "register_operand" "=x")
11934 (vec_select:V4DI
11935 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11936 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11937 (match_operand 3 "const_0_to_3_operand" "")
11938 (match_operand 4 "const_0_to_3_operand" "")
11939 (match_operand 5 "const_0_to_3_operand" "")])))]
11940 "TARGET_AVX2"
11941 {
11942 int mask = 0;
11943 mask |= INTVAL (operands[2]) << 0;
11944 mask |= INTVAL (operands[3]) << 2;
11945 mask |= INTVAL (operands[4]) << 4;
11946 mask |= INTVAL (operands[5]) << 6;
11947 operands[2] = GEN_INT (mask);
11948 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11949 }
11950 [(set_attr "type" "sselog")
11951 (set_attr "prefix" "vex")
11952 (set_attr "mode" "OI")])
11953
11954 (define_insn "avx2_permv2ti"
11955 [(set (match_operand:V4DI 0 "register_operand" "=x")
11956 (unspec:V4DI
11957 [(match_operand:V4DI 1 "register_operand" "x")
11958 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11959 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11960 UNSPEC_VPERMTI))]
11961 "TARGET_AVX2"
11962 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11963 [(set_attr "type" "sselog")
11964 (set_attr "prefix" "vex")
11965 (set_attr "mode" "OI")])
11966
11967 (define_insn "avx2_vec_dupv4df"
11968 [(set (match_operand:V4DF 0 "register_operand" "=x")
11969 (vec_duplicate:V4DF
11970 (vec_select:DF
11971 (match_operand:V2DF 1 "register_operand" "x")
11972 (parallel [(const_int 0)]))))]
11973 "TARGET_AVX2"
11974 "vbroadcastsd\t{%1, %0|%0, %1}"
11975 [(set_attr "type" "sselog1")
11976 (set_attr "prefix" "vex")
11977 (set_attr "mode" "V4DF")])
11978
11979 ;; Modes handled by AVX vec_dup patterns.
11980 (define_mode_iterator AVX_VEC_DUP_MODE
11981 [V8SI V8SF V4DI V4DF])
11982
11983 (define_insn "vec_dup<mode>"
11984 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11985 (vec_duplicate:AVX_VEC_DUP_MODE
11986 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11987 "TARGET_AVX"
11988 "@
11989 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11990 #"
11991 [(set_attr "type" "ssemov")
11992 (set_attr "prefix_extra" "1")
11993 (set_attr "prefix" "vex")
11994 (set_attr "mode" "V8SF")])
11995
11996 (define_insn "avx2_vbroadcasti128_<mode>"
11997 [(set (match_operand:VI_256 0 "register_operand" "=x")
11998 (vec_concat:VI_256
11999 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
12000 (match_dup 1)))]
12001 "TARGET_AVX2"
12002 "vbroadcasti128\t{%1, %0|%0, %1}"
12003 [(set_attr "type" "ssemov")
12004 (set_attr "prefix_extra" "1")
12005 (set_attr "prefix" "vex")
12006 (set_attr "mode" "OI")])
12007
12008 (define_split
12009 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
12010 (vec_duplicate:AVX_VEC_DUP_MODE
12011 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
12012 "TARGET_AVX && reload_completed"
12013 [(set (match_dup 2)
12014 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
12015 (set (match_dup 0)
12016 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
12017 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
12018
12019 (define_insn "avx_vbroadcastf128_<mode>"
12020 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
12021 (vec_concat:V_256
12022 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
12023 (match_dup 1)))]
12024 "TARGET_AVX"
12025 "@
12026 vbroadcast<i128>\t{%1, %0|%0, %1}
12027 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
12028 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
12029 [(set_attr "type" "ssemov,sselog1,sselog1")
12030 (set_attr "prefix_extra" "1")
12031 (set_attr "length_immediate" "0,1,1")
12032 (set_attr "prefix" "vex")
12033 (set_attr "mode" "<sseinsnmode>")])
12034
12035 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
12036 ;; If it so happens that the input is in memory, use vbroadcast.
12037 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
12038 (define_insn "*avx_vperm_broadcast_v4sf"
12039 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
12040 (vec_select:V4SF
12041 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
12042 (match_parallel 2 "avx_vbroadcast_operand"
12043 [(match_operand 3 "const_int_operand" "C,n,n")])))]
12044 "TARGET_AVX"
12045 {
12046 int elt = INTVAL (operands[3]);
12047 switch (which_alternative)
12048 {
12049 case 0:
12050 case 1:
12051 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
12052 return "vbroadcastss\t{%1, %0|%0, %1}";
12053 case 2:
12054 operands[2] = GEN_INT (elt * 0x55);
12055 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
12056 default:
12057 gcc_unreachable ();
12058 }
12059 }
12060 [(set_attr "type" "ssemov,ssemov,sselog1")
12061 (set_attr "prefix_extra" "1")
12062 (set_attr "length_immediate" "0,0,1")
12063 (set_attr "prefix" "vex")
12064 (set_attr "mode" "SF,SF,V4SF")])
12065
12066 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
12067 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
12068 (vec_select:VF_256
12069 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
12070 (match_parallel 2 "avx_vbroadcast_operand"
12071 [(match_operand 3 "const_int_operand" "C,n,n")])))]
12072 "TARGET_AVX"
12073 "#"
12074 "&& reload_completed"
12075 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
12076 {
12077 rtx op0 = operands[0], op1 = operands[1];
12078 int elt = INTVAL (operands[3]);
12079
12080 if (REG_P (op1))
12081 {
12082 int mask;
12083
12084 /* Shuffle element we care about into all elements of the 128-bit lane.
12085 The other lane gets shuffled too, but we don't care. */
12086 if (<MODE>mode == V4DFmode)
12087 mask = (elt & 1 ? 15 : 0);
12088 else
12089 mask = (elt & 3) * 0x55;
12090 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
12091
12092 /* Shuffle the lane we care about into both lanes of the dest. */
12093 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
12094 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
12095 DONE;
12096 }
12097
12098 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
12099 elt * GET_MODE_SIZE (<ssescalarmode>mode));
12100 })
12101
12102 (define_expand "avx_vpermil<mode>"
12103 [(set (match_operand:VF2 0 "register_operand" "")
12104 (vec_select:VF2
12105 (match_operand:VF2 1 "nonimmediate_operand" "")
12106 (match_operand:SI 2 "const_0_to_255_operand" "")))]
12107 "TARGET_AVX"
12108 {
12109 int mask = INTVAL (operands[2]);
12110 rtx perm[<ssescalarnum>];
12111
12112 perm[0] = GEN_INT (mask & 1);
12113 perm[1] = GEN_INT ((mask >> 1) & 1);
12114 if (<MODE>mode == V4DFmode)
12115 {
12116 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
12117 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
12118 }
12119
12120 operands[2]
12121 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12122 })
12123
12124 (define_expand "avx_vpermil<mode>"
12125 [(set (match_operand:VF1 0 "register_operand" "")
12126 (vec_select:VF1
12127 (match_operand:VF1 1 "nonimmediate_operand" "")
12128 (match_operand:SI 2 "const_0_to_255_operand" "")))]
12129 "TARGET_AVX"
12130 {
12131 int mask = INTVAL (operands[2]);
12132 rtx perm[<ssescalarnum>];
12133
12134 perm[0] = GEN_INT (mask & 3);
12135 perm[1] = GEN_INT ((mask >> 2) & 3);
12136 perm[2] = GEN_INT ((mask >> 4) & 3);
12137 perm[3] = GEN_INT ((mask >> 6) & 3);
12138 if (<MODE>mode == V8SFmode)
12139 {
12140 perm[4] = GEN_INT ((mask & 3) + 4);
12141 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
12142 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
12143 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
12144 }
12145
12146 operands[2]
12147 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12148 })
12149
12150 (define_insn "*avx_vpermilp<mode>"
12151 [(set (match_operand:VF 0 "register_operand" "=x")
12152 (vec_select:VF
12153 (match_operand:VF 1 "nonimmediate_operand" "xm")
12154 (match_parallel 2 ""
12155 [(match_operand 3 "const_int_operand" "")])))]
12156 "TARGET_AVX
12157 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
12158 {
12159 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
12160 operands[2] = GEN_INT (mask);
12161 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
12162 }
12163 [(set_attr "type" "sselog")
12164 (set_attr "prefix_extra" "1")
12165 (set_attr "length_immediate" "1")
12166 (set_attr "prefix" "vex")
12167 (set_attr "mode" "<MODE>")])
12168
12169 (define_insn "avx_vpermilvar<mode>3"
12170 [(set (match_operand:VF 0 "register_operand" "=x")
12171 (unspec:VF
12172 [(match_operand:VF 1 "register_operand" "x")
12173 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
12174 UNSPEC_VPERMIL))]
12175 "TARGET_AVX"
12176 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12177 [(set_attr "type" "sselog")
12178 (set_attr "prefix_extra" "1")
12179 (set_attr "prefix" "vex")
12180 (set_attr "mode" "<MODE>")])
12181
12182 (define_expand "avx_vperm2f128<mode>3"
12183 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12184 (unspec:AVX256MODE2P
12185 [(match_operand:AVX256MODE2P 1 "register_operand" "")
12186 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12187 (match_operand:SI 3 "const_0_to_255_operand" "")]
12188 UNSPEC_VPERMIL2F128))]
12189 "TARGET_AVX"
12190 {
12191 int mask = INTVAL (operands[3]);
12192 if ((mask & 0x88) == 0)
12193 {
12194 rtx perm[<ssescalarnum>], t1, t2;
12195 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12196
12197 base = (mask & 3) * nelt2;
12198 for (i = 0; i < nelt2; ++i)
12199 perm[i] = GEN_INT (base + i);
12200
12201 base = ((mask >> 4) & 3) * nelt2;
12202 for (i = 0; i < nelt2; ++i)
12203 perm[i + nelt2] = GEN_INT (base + i);
12204
12205 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
12206 operands[1], operands[2]);
12207 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12208 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12209 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12210 emit_insn (t2);
12211 DONE;
12212 }
12213 })
12214
12215 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12216 ;; means that in order to represent this properly in rtl we'd have to
12217 ;; nest *another* vec_concat with a zero operand and do the select from
12218 ;; a 4x wide vector. That doesn't seem very nice.
12219 (define_insn "*avx_vperm2f128<mode>_full"
12220 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12221 (unspec:AVX256MODE2P
12222 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12223 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12224 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12225 UNSPEC_VPERMIL2F128))]
12226 "TARGET_AVX"
12227 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12228 [(set_attr "type" "sselog")
12229 (set_attr "prefix_extra" "1")
12230 (set_attr "length_immediate" "1")
12231 (set_attr "prefix" "vex")
12232 (set_attr "mode" "<sseinsnmode>")])
12233
12234 (define_insn "*avx_vperm2f128<mode>_nozero"
12235 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12236 (vec_select:AVX256MODE2P
12237 (vec_concat:<ssedoublevecmode>
12238 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12239 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12240 (match_parallel 3 ""
12241 [(match_operand 4 "const_int_operand" "")])))]
12242 "TARGET_AVX
12243 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12244 {
12245 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12246 if (mask == 0x12)
12247 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
12248 if (mask == 0x20)
12249 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
12250 operands[3] = GEN_INT (mask);
12251 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12252 }
12253 [(set_attr "type" "sselog")
12254 (set_attr "prefix_extra" "1")
12255 (set_attr "length_immediate" "1")
12256 (set_attr "prefix" "vex")
12257 (set_attr "mode" "<sseinsnmode>")])
12258
12259 (define_expand "avx_vinsertf128<mode>"
12260 [(match_operand:V_256 0 "register_operand" "")
12261 (match_operand:V_256 1 "register_operand" "")
12262 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12263 (match_operand:SI 3 "const_0_to_1_operand" "")]
12264 "TARGET_AVX"
12265 {
12266 rtx (*insn)(rtx, rtx, rtx);
12267
12268 switch (INTVAL (operands[3]))
12269 {
12270 case 0:
12271 insn = gen_vec_set_lo_<mode>;
12272 break;
12273 case 1:
12274 insn = gen_vec_set_hi_<mode>;
12275 break;
12276 default:
12277 gcc_unreachable ();
12278 }
12279
12280 emit_insn (insn (operands[0], operands[1], operands[2]));
12281 DONE;
12282 })
12283
12284 (define_insn "avx2_vec_set_lo_v4di"
12285 [(set (match_operand:V4DI 0 "register_operand" "=x")
12286 (vec_concat:V4DI
12287 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12288 (vec_select:V2DI
12289 (match_operand:V4DI 1 "register_operand" "x")
12290 (parallel [(const_int 2) (const_int 3)]))))]
12291 "TARGET_AVX2"
12292 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12293 [(set_attr "type" "sselog")
12294 (set_attr "prefix_extra" "1")
12295 (set_attr "length_immediate" "1")
12296 (set_attr "prefix" "vex")
12297 (set_attr "mode" "OI")])
12298
12299 (define_insn "avx2_vec_set_hi_v4di"
12300 [(set (match_operand:V4DI 0 "register_operand" "=x")
12301 (vec_concat:V4DI
12302 (vec_select:V2DI
12303 (match_operand:V4DI 1 "register_operand" "x")
12304 (parallel [(const_int 0) (const_int 1)]))
12305 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12306 "TARGET_AVX2"
12307 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12308 [(set_attr "type" "sselog")
12309 (set_attr "prefix_extra" "1")
12310 (set_attr "length_immediate" "1")
12311 (set_attr "prefix" "vex")
12312 (set_attr "mode" "OI")])
12313
12314 (define_insn "vec_set_lo_<mode>"
12315 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12316 (vec_concat:VI8F_256
12317 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12318 (vec_select:<ssehalfvecmode>
12319 (match_operand:VI8F_256 1 "register_operand" "x")
12320 (parallel [(const_int 2) (const_int 3)]))))]
12321 "TARGET_AVX"
12322 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12323 [(set_attr "type" "sselog")
12324 (set_attr "prefix_extra" "1")
12325 (set_attr "length_immediate" "1")
12326 (set_attr "prefix" "vex")
12327 (set_attr "mode" "<sseinsnmode>")])
12328
12329 (define_insn "vec_set_hi_<mode>"
12330 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12331 (vec_concat:VI8F_256
12332 (vec_select:<ssehalfvecmode>
12333 (match_operand:VI8F_256 1 "register_operand" "x")
12334 (parallel [(const_int 0) (const_int 1)]))
12335 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12336 "TARGET_AVX"
12337 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12338 [(set_attr "type" "sselog")
12339 (set_attr "prefix_extra" "1")
12340 (set_attr "length_immediate" "1")
12341 (set_attr "prefix" "vex")
12342 (set_attr "mode" "<sseinsnmode>")])
12343
12344 (define_insn "vec_set_lo_<mode>"
12345 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12346 (vec_concat:VI4F_256
12347 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12348 (vec_select:<ssehalfvecmode>
12349 (match_operand:VI4F_256 1 "register_operand" "x")
12350 (parallel [(const_int 4) (const_int 5)
12351 (const_int 6) (const_int 7)]))))]
12352 "TARGET_AVX"
12353 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12354 [(set_attr "type" "sselog")
12355 (set_attr "prefix_extra" "1")
12356 (set_attr "length_immediate" "1")
12357 (set_attr "prefix" "vex")
12358 (set_attr "mode" "<sseinsnmode>")])
12359
12360 (define_insn "vec_set_hi_<mode>"
12361 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12362 (vec_concat:VI4F_256
12363 (vec_select:<ssehalfvecmode>
12364 (match_operand:VI4F_256 1 "register_operand" "x")
12365 (parallel [(const_int 0) (const_int 1)
12366 (const_int 2) (const_int 3)]))
12367 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12368 "TARGET_AVX"
12369 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12370 [(set_attr "type" "sselog")
12371 (set_attr "prefix_extra" "1")
12372 (set_attr "length_immediate" "1")
12373 (set_attr "prefix" "vex")
12374 (set_attr "mode" "<sseinsnmode>")])
12375
12376 (define_insn "vec_set_lo_v16hi"
12377 [(set (match_operand:V16HI 0 "register_operand" "=x")
12378 (vec_concat:V16HI
12379 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12380 (vec_select:V8HI
12381 (match_operand:V16HI 1 "register_operand" "x")
12382 (parallel [(const_int 8) (const_int 9)
12383 (const_int 10) (const_int 11)
12384 (const_int 12) (const_int 13)
12385 (const_int 14) (const_int 15)]))))]
12386 "TARGET_AVX"
12387 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12388 [(set_attr "type" "sselog")
12389 (set_attr "prefix_extra" "1")
12390 (set_attr "length_immediate" "1")
12391 (set_attr "prefix" "vex")
12392 (set_attr "mode" "OI")])
12393
12394 (define_insn "vec_set_hi_v16hi"
12395 [(set (match_operand:V16HI 0 "register_operand" "=x")
12396 (vec_concat:V16HI
12397 (vec_select:V8HI
12398 (match_operand:V16HI 1 "register_operand" "x")
12399 (parallel [(const_int 0) (const_int 1)
12400 (const_int 2) (const_int 3)
12401 (const_int 4) (const_int 5)
12402 (const_int 6) (const_int 7)]))
12403 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12404 "TARGET_AVX"
12405 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12406 [(set_attr "type" "sselog")
12407 (set_attr "prefix_extra" "1")
12408 (set_attr "length_immediate" "1")
12409 (set_attr "prefix" "vex")
12410 (set_attr "mode" "OI")])
12411
12412 (define_insn "vec_set_lo_v32qi"
12413 [(set (match_operand:V32QI 0 "register_operand" "=x")
12414 (vec_concat:V32QI
12415 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12416 (vec_select:V16QI
12417 (match_operand:V32QI 1 "register_operand" "x")
12418 (parallel [(const_int 16) (const_int 17)
12419 (const_int 18) (const_int 19)
12420 (const_int 20) (const_int 21)
12421 (const_int 22) (const_int 23)
12422 (const_int 24) (const_int 25)
12423 (const_int 26) (const_int 27)
12424 (const_int 28) (const_int 29)
12425 (const_int 30) (const_int 31)]))))]
12426 "TARGET_AVX"
12427 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12428 [(set_attr "type" "sselog")
12429 (set_attr "prefix_extra" "1")
12430 (set_attr "length_immediate" "1")
12431 (set_attr "prefix" "vex")
12432 (set_attr "mode" "OI")])
12433
12434 (define_insn "vec_set_hi_v32qi"
12435 [(set (match_operand:V32QI 0 "register_operand" "=x")
12436 (vec_concat:V32QI
12437 (vec_select:V16QI
12438 (match_operand:V32QI 1 "register_operand" "x")
12439 (parallel [(const_int 0) (const_int 1)
12440 (const_int 2) (const_int 3)
12441 (const_int 4) (const_int 5)
12442 (const_int 6) (const_int 7)
12443 (const_int 8) (const_int 9)
12444 (const_int 10) (const_int 11)
12445 (const_int 12) (const_int 13)
12446 (const_int 14) (const_int 15)]))
12447 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12448 "TARGET_AVX"
12449 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12450 [(set_attr "type" "sselog")
12451 (set_attr "prefix_extra" "1")
12452 (set_attr "length_immediate" "1")
12453 (set_attr "prefix" "vex")
12454 (set_attr "mode" "OI")])
12455
12456 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12457 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12458 (unspec:V48_AVX2
12459 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12460 (match_operand:V48_AVX2 1 "memory_operand" "m")]
12461 UNSPEC_MASKMOV))]
12462 "TARGET_AVX"
12463 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12464 [(set_attr "type" "sselog1")
12465 (set_attr "prefix_extra" "1")
12466 (set_attr "prefix" "vex")
12467 (set_attr "mode" "<sseinsnmode>")])
12468
12469 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12470 [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
12471 (unspec:V48_AVX2
12472 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12473 (match_operand:V48_AVX2 2 "register_operand" "x")
12474 (match_dup 0)]
12475 UNSPEC_MASKMOV))]
12476 "TARGET_AVX"
12477 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12478 [(set_attr "type" "sselog1")
12479 (set_attr "prefix_extra" "1")
12480 (set_attr "prefix" "vex")
12481 (set_attr "mode" "<sseinsnmode>")])
12482
12483 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12484 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12485 (unspec:AVX256MODE2P
12486 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12487 UNSPEC_CAST))]
12488 "TARGET_AVX"
12489 "#"
12490 "&& reload_completed"
12491 [(const_int 0)]
12492 {
12493 rtx op0 = operands[0];
12494 rtx op1 = operands[1];
12495 if (REG_P (op0))
12496 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12497 else
12498 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12499 emit_move_insn (op0, op1);
12500 DONE;
12501 })
12502
12503 (define_expand "vec_init<mode>"
12504 [(match_operand:V_256 0 "register_operand" "")
12505 (match_operand 1 "" "")]
12506 "TARGET_AVX"
12507 {
12508 ix86_expand_vector_init (false, operands[0], operands[1]);
12509 DONE;
12510 })
12511
12512 (define_expand "avx2_extracti128"
12513 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12514 (match_operand:V4DI 1 "register_operand" "")
12515 (match_operand:SI 2 "const_0_to_1_operand" "")]
12516 "TARGET_AVX2"
12517 {
12518 rtx (*insn)(rtx, rtx);
12519
12520 switch (INTVAL (operands[2]))
12521 {
12522 case 0:
12523 insn = gen_vec_extract_lo_v4di;
12524 break;
12525 case 1:
12526 insn = gen_vec_extract_hi_v4di;
12527 break;
12528 default:
12529 gcc_unreachable ();
12530 }
12531
12532 emit_insn (insn (operands[0], operands[1]));
12533 DONE;
12534 })
12535
12536 (define_expand "avx2_inserti128"
12537 [(match_operand:V4DI 0 "register_operand" "")
12538 (match_operand:V4DI 1 "register_operand" "")
12539 (match_operand:V2DI 2 "nonimmediate_operand" "")
12540 (match_operand:SI 3 "const_0_to_1_operand" "")]
12541 "TARGET_AVX2"
12542 {
12543 rtx (*insn)(rtx, rtx, rtx);
12544
12545 switch (INTVAL (operands[3]))
12546 {
12547 case 0:
12548 insn = gen_avx2_vec_set_lo_v4di;
12549 break;
12550 case 1:
12551 insn = gen_avx2_vec_set_hi_v4di;
12552 break;
12553 default:
12554 gcc_unreachable ();
12555 }
12556
12557 emit_insn (insn (operands[0], operands[1], operands[2]));
12558 DONE;
12559 })
12560
12561 (define_insn "avx2_ashrv<mode>"
12562 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
12563 (ashiftrt:VI4_AVX2
12564 (match_operand:VI4_AVX2 1 "register_operand" "x")
12565 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
12566 "TARGET_AVX2"
12567 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12568 [(set_attr "type" "sseishft")
12569 (set_attr "prefix" "vex")
12570 (set_attr "mode" "<sseinsnmode>")])
12571
12572 (define_insn "avx2_<shift_insn>v<mode>"
12573 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
12574 (any_lshift:VI48_AVX2
12575 (match_operand:VI48_AVX2 1 "register_operand" "x")
12576 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
12577 "TARGET_AVX2"
12578 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12579 [(set_attr "type" "sseishft")
12580 (set_attr "prefix" "vex")
12581 (set_attr "mode" "<sseinsnmode>")])
12582
12583 (define_insn "avx_vec_concat<mode>"
12584 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12585 (vec_concat:V_256
12586 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12587 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12588 "TARGET_AVX"
12589 {
12590 switch (which_alternative)
12591 {
12592 case 0:
12593 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12594 case 1:
12595 switch (get_attr_mode (insn))
12596 {
12597 case MODE_V8SF:
12598 return "vmovaps\t{%1, %x0|%x0, %1}";
12599 case MODE_V4DF:
12600 return "vmovapd\t{%1, %x0|%x0, %1}";
12601 default:
12602 return "vmovdqa\t{%1, %x0|%x0, %1}";
12603 }
12604 default:
12605 gcc_unreachable ();
12606 }
12607 }
12608 [(set_attr "type" "sselog,ssemov")
12609 (set_attr "prefix_extra" "1,*")
12610 (set_attr "length_immediate" "1,*")
12611 (set_attr "prefix" "vex")
12612 (set_attr "mode" "<sseinsnmode>")])
12613
12614 (define_insn "vcvtph2ps"
12615 [(set (match_operand:V4SF 0 "register_operand" "=x")
12616 (vec_select:V4SF
12617 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12618 UNSPEC_VCVTPH2PS)
12619 (parallel [(const_int 0) (const_int 1)
12620 (const_int 1) (const_int 2)])))]
12621 "TARGET_F16C"
12622 "vcvtph2ps\t{%1, %0|%0, %1}"
12623 [(set_attr "type" "ssecvt")
12624 (set_attr "prefix" "vex")
12625 (set_attr "mode" "V4SF")])
12626
12627 (define_insn "*vcvtph2ps_load"
12628 [(set (match_operand:V4SF 0 "register_operand" "=x")
12629 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12630 UNSPEC_VCVTPH2PS))]
12631 "TARGET_F16C"
12632 "vcvtph2ps\t{%1, %0|%0, %1}"
12633 [(set_attr "type" "ssecvt")
12634 (set_attr "prefix" "vex")
12635 (set_attr "mode" "V8SF")])
12636
12637 (define_insn "vcvtph2ps256"
12638 [(set (match_operand:V8SF 0 "register_operand" "=x")
12639 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12640 UNSPEC_VCVTPH2PS))]
12641 "TARGET_F16C"
12642 "vcvtph2ps\t{%1, %0|%0, %1}"
12643 [(set_attr "type" "ssecvt")
12644 (set_attr "prefix" "vex")
12645 (set_attr "mode" "V8SF")])
12646
12647 (define_expand "vcvtps2ph"
12648 [(set (match_operand:V8HI 0 "register_operand" "")
12649 (vec_concat:V8HI
12650 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12651 (match_operand:SI 2 "const_0_to_255_operand" "")]
12652 UNSPEC_VCVTPS2PH)
12653 (match_dup 3)))]
12654 "TARGET_F16C"
12655 "operands[3] = CONST0_RTX (V4HImode);")
12656
12657 (define_insn "*vcvtps2ph"
12658 [(set (match_operand:V8HI 0 "register_operand" "=x")
12659 (vec_concat:V8HI
12660 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12661 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12662 UNSPEC_VCVTPS2PH)
12663 (match_operand:V4HI 3 "const0_operand" "")))]
12664 "TARGET_F16C"
12665 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12666 [(set_attr "type" "ssecvt")
12667 (set_attr "prefix" "vex")
12668 (set_attr "mode" "V4SF")])
12669
12670 (define_insn "*vcvtps2ph_store"
12671 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12672 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12673 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12674 UNSPEC_VCVTPS2PH))]
12675 "TARGET_F16C"
12676 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12677 [(set_attr "type" "ssecvt")
12678 (set_attr "prefix" "vex")
12679 (set_attr "mode" "V4SF")])
12680
12681 (define_insn "vcvtps2ph256"
12682 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12683 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12684 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12685 UNSPEC_VCVTPS2PH))]
12686 "TARGET_F16C"
12687 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12688 [(set_attr "type" "ssecvt")
12689 (set_attr "prefix" "vex")
12690 (set_attr "mode" "V8SF")])
12691
12692 ;; For gather* insn patterns
12693 (define_mode_iterator VEC_GATHER_MODE
12694 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12695 (define_mode_attr VEC_GATHER_IDXSI
12696 [(V2DI "V4SI") (V2DF "V4SI")
12697 (V4DI "V4SI") (V4DF "V4SI")
12698 (V4SI "V4SI") (V4SF "V4SI")
12699 (V8SI "V8SI") (V8SF "V8SI")])
12700 (define_mode_attr VEC_GATHER_IDXDI
12701 [(V2DI "V2DI") (V2DF "V2DI")
12702 (V4DI "V4DI") (V4DF "V4DI")
12703 (V4SI "V2DI") (V4SF "V2DI")
12704 (V8SI "V4DI") (V8SF "V4DI")])
12705 (define_mode_attr VEC_GATHER_SRCDI
12706 [(V2DI "V2DI") (V2DF "V2DF")
12707 (V4DI "V4DI") (V4DF "V4DF")
12708 (V4SI "V4SI") (V4SF "V4SF")
12709 (V8SI "V4SI") (V8SF "V4SF")])
12710
12711 (define_expand "avx2_gathersi<mode>"
12712 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12713 (unspec:VEC_GATHER_MODE
12714 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12715 (mem:<ssescalarmode>
12716 (match_par_dup 7
12717 [(match_operand 2 "vsib_address_operand" "")
12718 (match_operand:<VEC_GATHER_IDXSI>
12719 3 "register_operand" "")
12720 (match_operand:SI 5 "const1248_operand " "")]))
12721 (mem:BLK (scratch))
12722 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12723 UNSPEC_GATHER))
12724 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12725 "TARGET_AVX2"
12726 {
12727 operands[7]
12728 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12729 operands[5]), UNSPEC_VSIBADDR);
12730 })
12731
12732 (define_insn "*avx2_gathersi<mode>"
12733 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12734 (unspec:VEC_GATHER_MODE
12735 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12736 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12737 [(unspec:P
12738 [(match_operand:P 3 "vsib_address_operand" "p")
12739 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
12740 (match_operand:SI 6 "const1248_operand" "n")]
12741 UNSPEC_VSIBADDR)])
12742 (mem:BLK (scratch))
12743 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12744 UNSPEC_GATHER))
12745 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12746 "TARGET_AVX2"
12747 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12748 [(set_attr "type" "ssemov")
12749 (set_attr "prefix" "vex")
12750 (set_attr "mode" "<sseinsnmode>")])
12751
12752 (define_insn "*avx2_gathersi<mode>_2"
12753 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12754 (unspec:VEC_GATHER_MODE
12755 [(pc)
12756 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12757 [(unspec:P
12758 [(match_operand:P 2 "vsib_address_operand" "p")
12759 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
12760 (match_operand:SI 5 "const1248_operand" "n")]
12761 UNSPEC_VSIBADDR)])
12762 (mem:BLK (scratch))
12763 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
12764 UNSPEC_GATHER))
12765 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12766 "TARGET_AVX2"
12767 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
12768 [(set_attr "type" "ssemov")
12769 (set_attr "prefix" "vex")
12770 (set_attr "mode" "<sseinsnmode>")])
12771
12772 (define_expand "avx2_gatherdi<mode>"
12773 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12774 (unspec:VEC_GATHER_MODE
12775 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "")
12776 (mem:<ssescalarmode>
12777 (match_par_dup 7
12778 [(match_operand 2 "vsib_address_operand" "")
12779 (match_operand:<VEC_GATHER_IDXDI>
12780 3 "register_operand" "")
12781 (match_operand:SI 5 "const1248_operand " "")]))
12782 (mem:BLK (scratch))
12783 (match_operand:<VEC_GATHER_SRCDI>
12784 4 "register_operand" "")]
12785 UNSPEC_GATHER))
12786 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12787 "TARGET_AVX2"
12788 {
12789 operands[7]
12790 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12791 operands[5]), UNSPEC_VSIBADDR);
12792 })
12793
12794 (define_insn "*avx2_gatherdi<mode>"
12795 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12796 (unspec:VEC_GATHER_MODE
12797 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12798 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12799 [(unspec:P
12800 [(match_operand:P 3 "vsib_address_operand" "p")
12801 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12802 (match_operand:SI 6 "const1248_operand" "n")]
12803 UNSPEC_VSIBADDR)])
12804 (mem:BLK (scratch))
12805 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12806 UNSPEC_GATHER))
12807 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12808 "TARGET_AVX2"
12809 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
12810 [(set_attr "type" "ssemov")
12811 (set_attr "prefix" "vex")
12812 (set_attr "mode" "<sseinsnmode>")])
12813
12814 (define_insn "*avx2_gatherdi<mode>_2"
12815 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12816 (unspec:VEC_GATHER_MODE
12817 [(pc)
12818 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12819 [(unspec:P
12820 [(match_operand:P 2 "vsib_address_operand" "p")
12821 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12822 (match_operand:SI 5 "const1248_operand" "n")]
12823 UNSPEC_VSIBADDR)])
12824 (mem:BLK (scratch))
12825 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12826 UNSPEC_GATHER))
12827 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12828 "TARGET_AVX2"
12829 {
12830 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
12831 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
12832 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
12833 }
12834 [(set_attr "type" "ssemov")
12835 (set_attr "prefix" "vex")
12836 (set_attr "mode" "<sseinsnmode>")])
12837
12838 (define_insn "*avx2_gatherdi<mode>_3"
12839 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12840 (vec_select:<VEC_GATHER_SRCDI>
12841 (unspec:VI4F_256
12842 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12843 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12844 [(unspec:P
12845 [(match_operand:P 3 "vsib_address_operand" "p")
12846 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12847 (match_operand:SI 6 "const1248_operand" "n")]
12848 UNSPEC_VSIBADDR)])
12849 (mem:BLK (scratch))
12850 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12851 UNSPEC_GATHER)
12852 (parallel [(const_int 0) (const_int 1)
12853 (const_int 2) (const_int 3)])))
12854 (clobber (match_scratch:VI4F_256 1 "=&x"))]
12855 "TARGET_AVX2"
12856 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
12857 [(set_attr "type" "ssemov")
12858 (set_attr "prefix" "vex")
12859 (set_attr "mode" "<sseinsnmode>")])
12860
12861 (define_insn "*avx2_gatherdi<mode>_4"
12862 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12863 (vec_select:<VEC_GATHER_SRCDI>
12864 (unspec:VI4F_256
12865 [(pc)
12866 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12867 [(unspec:P
12868 [(match_operand:P 2 "vsib_address_operand" "p")
12869 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12870 (match_operand:SI 5 "const1248_operand" "n")]
12871 UNSPEC_VSIBADDR)])
12872 (mem:BLK (scratch))
12873 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12874 UNSPEC_GATHER)
12875 (parallel [(const_int 0) (const_int 1)
12876 (const_int 2) (const_int 3)])))
12877 (clobber (match_scratch:VI4F_256 1 "=&x"))]
12878 "TARGET_AVX2"
12879 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
12880 [(set_attr "type" "ssemov")
12881 (set_attr "prefix" "vex")
12882 (set_attr "mode" "<sseinsnmode>")])