1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
53 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
55 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
56 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
57 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
58 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
59 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
60 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
61 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
62 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
63 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
64 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
66 ;; Int-float size matches
67 (define_mode_iterator SSEMODE4S [V4SF V4SI])
68 (define_mode_iterator SSEMODE2D [V2DF V2DI])
70 ;; Modes handled by integer vcond pattern
71 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
72 (V2DI "TARGET_SSE4_2")])
74 ;; Mapping from float mode to required SSE level
75 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
77 ;; Mapping from integer vector mode to mnemonic suffix
78 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
80 ;; Mapping of the fma4 suffix
81 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
82 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
83 (V4SF "ss") (V2DF "sd")])
85 ;; Mapping of the avx suffix
86 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
87 (V4SF "ps") (V2DF "pd")])
89 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
91 ;; Mapping of the max integer size for xop rotate immediate constraint
92 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
94 ;; Mapping of vector modes back to the scalar modes
95 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
96 (V16QI "QI") (V8HI "HI")
97 (V4SI "SI") (V2DI "DI")])
99 ;; Mapping of vector modes to a vector mode of double size
100 (define_mode_attr ssedoublesizemode
101 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
102 (V8HI "V16HI") (V16QI "V32QI")
103 (V4DF "V8DF") (V8SF "V16SF")
104 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
106 ;; Number of scalar elements in each vector type
107 (define_mode_attr ssescalarnum
108 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
109 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
112 (define_mode_attr avxvecmode
113 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
114 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
115 (V8SF "V8SF") (V4DF "V4DF")])
116 (define_mode_attr avxvecpsmode
117 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
118 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
119 (define_mode_attr avxhalfvecmode
120 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
121 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
122 (define_mode_attr avxscalarmode
123 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
124 (V8SF "SF") (V4DF "DF")])
125 (define_mode_attr avxcvtvecmode
126 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
127 (define_mode_attr avxpermvecmode
128 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
129 (define_mode_attr avxmodesuffixf2c
130 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
131 (define_mode_attr avxmodesuffixp
132 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
134 (define_mode_attr avxmodesuffix
135 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
136 (V8SI "256") (V8SF "256") (V4DF "256")])
138 ;; Mapping of immediate bits for blend instructions
139 (define_mode_attr blendbits
140 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
142 ;; Mapping of immediate bits for pinsr instructions
143 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
145 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
147 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
151 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
153 (define_expand "mov<mode>"
154 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
155 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
158 ix86_expand_vector_move (<MODE>mode, operands);
162 (define_insn "*avx_mov<mode>_internal"
163 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
164 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
166 && (register_operand (operands[0], <MODE>mode)
167 || register_operand (operands[1], <MODE>mode))"
169 switch (which_alternative)
172 return standard_sse_constant_opcode (insn, operands[1]);
175 switch (get_attr_mode (insn))
179 return "vmovaps\t{%1, %0|%0, %1}";
182 return "vmovapd\t{%1, %0|%0, %1}";
184 return "vmovdqa\t{%1, %0|%0, %1}";
190 [(set_attr "type" "sselog1,ssemov,ssemov")
191 (set_attr "prefix" "vex")
192 (set_attr "mode" "<avxvecmode>")])
194 ;; All of these patterns are enabled for SSE1 as well as SSE2.
195 ;; This is essential for maintaining stable calling conventions.
197 (define_expand "mov<mode>"
198 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
199 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
202 ix86_expand_vector_move (<MODE>mode, operands);
206 (define_insn "*mov<mode>_internal"
207 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
208 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
210 && (register_operand (operands[0], <MODE>mode)
211 || register_operand (operands[1], <MODE>mode))"
213 switch (which_alternative)
216 return standard_sse_constant_opcode (insn, operands[1]);
219 switch (get_attr_mode (insn))
222 return "movaps\t{%1, %0|%0, %1}";
224 return "movapd\t{%1, %0|%0, %1}";
226 return "movdqa\t{%1, %0|%0, %1}";
232 [(set_attr "type" "sselog1,ssemov,ssemov")
234 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
235 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
236 (and (eq_attr "alternative" "2")
237 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
239 (const_string "V4SF")
240 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
241 (const_string "V4SF")
242 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
243 (const_string "V2DF")
245 (const_string "TI")))])
247 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
248 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
249 ;; from memory, we'd prefer to load the memory directly into the %xmm
250 ;; register. To facilitate this happy circumstance, this pattern won't
251 ;; split until after register allocation. If the 64-bit value didn't
252 ;; come from memory, this is the best we can do. This is much better
253 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
256 (define_insn_and_split "movdi_to_sse"
258 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
259 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
260 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
261 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
263 "&& reload_completed"
266 if (register_operand (operands[1], DImode))
268 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
269 Assemble the 64-bit DImode value in an xmm register. */
270 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
271 gen_rtx_SUBREG (SImode, operands[1], 0)));
272 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
273 gen_rtx_SUBREG (SImode, operands[1], 4)));
274 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
276 else if (memory_operand (operands[1], DImode))
277 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
283 [(set (match_operand:V4SF 0 "register_operand" "")
284 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
285 "TARGET_SSE && reload_completed"
288 (vec_duplicate:V4SF (match_dup 1))
292 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
293 operands[2] = CONST0_RTX (V4SFmode);
297 [(set (match_operand:V2DF 0 "register_operand" "")
298 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
299 "TARGET_SSE2 && reload_completed"
300 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
302 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
303 operands[2] = CONST0_RTX (DFmode);
306 (define_expand "push<mode>1"
307 [(match_operand:AVX256MODE 0 "register_operand" "")]
310 ix86_expand_push (<MODE>mode, operands[0]);
314 (define_expand "push<mode>1"
315 [(match_operand:SSEMODE 0 "register_operand" "")]
318 ix86_expand_push (<MODE>mode, operands[0]);
322 (define_expand "movmisalign<mode>"
323 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
324 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
327 ix86_expand_vector_move_misalign (<MODE>mode, operands);
331 (define_expand "movmisalign<mode>"
332 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
333 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
336 ix86_expand_vector_move_misalign (<MODE>mode, operands);
340 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
341 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
343 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
345 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
346 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
347 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
348 [(set_attr "type" "ssemov")
349 (set_attr "movu" "1")
350 (set_attr "prefix" "vex")
351 (set_attr "mode" "<MODE>")])
353 (define_insn "sse2_movq128"
354 [(set (match_operand:V2DI 0 "register_operand" "=x")
357 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
358 (parallel [(const_int 0)]))
361 "%vmovq\t{%1, %0|%0, %1}"
362 [(set_attr "type" "ssemov")
363 (set_attr "prefix" "maybe_vex")
364 (set_attr "mode" "TI")])
366 (define_insn "<sse>_movup<ssemodesuffixf2c>"
367 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
369 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
371 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
372 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
373 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
374 [(set_attr "type" "ssemov")
375 (set_attr "movu" "1")
376 (set_attr "mode" "<MODE>")])
378 (define_insn "avx_movdqu<avxmodesuffix>"
379 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
381 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
383 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
384 "vmovdqu\t{%1, %0|%0, %1}"
385 [(set_attr "type" "ssemov")
386 (set_attr "movu" "1")
387 (set_attr "prefix" "vex")
388 (set_attr "mode" "<avxvecmode>")])
390 (define_insn "sse2_movdqu"
391 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
392 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
394 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
395 "movdqu\t{%1, %0|%0, %1}"
396 [(set_attr "type" "ssemov")
397 (set_attr "movu" "1")
398 (set_attr "prefix_data16" "1")
399 (set_attr "mode" "TI")])
401 (define_insn "avx_movnt<mode>"
402 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
404 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
406 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
407 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
408 [(set_attr "type" "ssemov")
409 (set_attr "prefix" "vex")
410 (set_attr "mode" "<MODE>")])
412 (define_insn "<sse>_movnt<mode>"
413 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
415 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
417 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
418 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
419 [(set_attr "type" "ssemov")
420 (set_attr "mode" "<MODE>")])
422 (define_insn "avx_movnt<mode>"
423 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
425 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
428 "vmovntdq\t{%1, %0|%0, %1}"
429 [(set_attr "type" "ssecvt")
430 (set_attr "prefix" "vex")
431 (set_attr "mode" "<avxvecmode>")])
433 (define_insn "sse2_movntv2di"
434 [(set (match_operand:V2DI 0 "memory_operand" "=m")
435 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
438 "movntdq\t{%1, %0|%0, %1}"
439 [(set_attr "type" "ssemov")
440 (set_attr "prefix_data16" "1")
441 (set_attr "mode" "TI")])
443 (define_insn "sse2_movntsi"
444 [(set (match_operand:SI 0 "memory_operand" "=m")
445 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
448 "movnti\t{%1, %0|%0, %1}"
449 [(set_attr "type" "ssemov")
450 (set_attr "prefix_data16" "0")
451 (set_attr "mode" "V2DF")])
453 (define_insn "avx_lddqu<avxmodesuffix>"
454 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
456 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
459 "vlddqu\t{%1, %0|%0, %1}"
460 [(set_attr "type" "ssecvt")
461 (set_attr "movu" "1")
462 (set_attr "prefix" "vex")
463 (set_attr "mode" "<avxvecmode>")])
465 (define_insn "sse3_lddqu"
466 [(set (match_operand:V16QI 0 "register_operand" "=x")
467 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
470 "lddqu\t{%1, %0|%0, %1}"
471 [(set_attr "type" "ssemov")
472 (set_attr "movu" "1")
473 (set_attr "prefix_data16" "0")
474 (set_attr "prefix_rep" "1")
475 (set_attr "mode" "TI")])
477 ; Expand patterns for non-temporal stores. At the moment, only those
478 ; that directly map to insns are defined; it would be possible to
479 ; define patterns for other modes that would expand to several insns.
481 (define_expand "storent<mode>"
482 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
484 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
486 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
489 (define_expand "storent<mode>"
490 [(set (match_operand:MODEF 0 "memory_operand" "")
492 [(match_operand:MODEF 1 "register_operand" "")]
497 (define_expand "storentv2di"
498 [(set (match_operand:V2DI 0 "memory_operand" "")
499 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
504 (define_expand "storentsi"
505 [(set (match_operand:SI 0 "memory_operand" "")
506 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
511 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
513 ;; Parallel floating point arithmetic
515 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
517 (define_expand "<code><mode>2"
518 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
520 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
521 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
522 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
524 (define_expand "<plusminus_insn><mode>3"
525 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
526 (plusminus:AVX256MODEF2P
527 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
528 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
529 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
530 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
532 (define_insn "*avx_<plusminus_insn><mode>3"
533 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
534 (plusminus:AVXMODEF2P
535 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
536 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
537 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
538 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
539 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
540 [(set_attr "type" "sseadd")
541 (set_attr "prefix" "vex")
542 (set_attr "mode" "<avxvecmode>")])
544 (define_expand "<plusminus_insn><mode>3"
545 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
546 (plusminus:SSEMODEF2P
547 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
548 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
549 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
550 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
552 (define_insn "*<plusminus_insn><mode>3"
553 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
554 (plusminus:SSEMODEF2P
555 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
556 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
557 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
558 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
559 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
560 [(set_attr "type" "sseadd")
561 (set_attr "mode" "<MODE>")])
563 (define_insn "*avx_vm<plusminus_insn><mode>3"
564 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
565 (vec_merge:SSEMODEF2P
566 (plusminus:SSEMODEF2P
567 (match_operand:SSEMODEF2P 1 "register_operand" "x")
568 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
571 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
572 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
573 [(set_attr "type" "sseadd")
574 (set_attr "prefix" "vex")
575 (set_attr "mode" "<ssescalarmode>")])
577 (define_insn "<sse>_vm<plusminus_insn><mode>3"
578 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
579 (vec_merge:SSEMODEF2P
580 (plusminus:SSEMODEF2P
581 (match_operand:SSEMODEF2P 1 "register_operand" "0")
582 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
585 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
586 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
587 [(set_attr "type" "sseadd")
588 (set_attr "mode" "<ssescalarmode>")])
590 (define_expand "mul<mode>3"
591 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
593 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
594 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
595 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
596 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
598 (define_insn "*avx_mul<mode>3"
599 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
601 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
602 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
603 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
604 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
605 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
606 [(set_attr "type" "ssemul")
607 (set_attr "prefix" "vex")
608 (set_attr "mode" "<avxvecmode>")])
610 (define_expand "mul<mode>3"
611 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
613 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
614 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
615 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
616 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
618 (define_insn "*mul<mode>3"
619 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
621 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
622 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
623 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
624 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
625 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
626 [(set_attr "type" "ssemul")
627 (set_attr "mode" "<MODE>")])
629 (define_insn "*avx_vmmul<mode>3"
630 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
631 (vec_merge:SSEMODEF2P
633 (match_operand:SSEMODEF2P 1 "register_operand" "x")
634 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
637 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
638 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
639 [(set_attr "type" "ssemul")
640 (set_attr "prefix" "vex")
641 (set_attr "mode" "<ssescalarmode>")])
643 (define_insn "<sse>_vmmul<mode>3"
644 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
645 (vec_merge:SSEMODEF2P
647 (match_operand:SSEMODEF2P 1 "register_operand" "0")
648 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
651 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
652 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
653 [(set_attr "type" "ssemul")
654 (set_attr "mode" "<ssescalarmode>")])
656 (define_expand "divv8sf3"
657 [(set (match_operand:V8SF 0 "register_operand" "")
658 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
659 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
662 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
664 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
665 && flag_finite_math_only && !flag_trapping_math
666 && flag_unsafe_math_optimizations)
668 ix86_emit_swdivsf (operands[0], operands[1],
669 operands[2], V8SFmode);
674 (define_expand "divv4df3"
675 [(set (match_operand:V4DF 0 "register_operand" "")
676 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
677 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
679 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
681 (define_insn "avx_div<mode>3"
682 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
684 (match_operand:AVXMODEF2P 1 "register_operand" "x")
685 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
686 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
687 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
688 [(set_attr "type" "ssediv")
689 (set_attr "prefix" "vex")
690 (set_attr "mode" "<MODE>")])
692 (define_expand "divv4sf3"
693 [(set (match_operand:V4SF 0 "register_operand" "")
694 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
695 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
698 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
699 && flag_finite_math_only && !flag_trapping_math
700 && flag_unsafe_math_optimizations)
702 ix86_emit_swdivsf (operands[0], operands[1],
703 operands[2], V4SFmode);
708 (define_expand "divv2df3"
709 [(set (match_operand:V2DF 0 "register_operand" "")
710 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
711 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
715 (define_insn "*avx_div<mode>3"
716 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
718 (match_operand:SSEMODEF2P 1 "register_operand" "x")
719 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
720 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
721 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
722 [(set_attr "type" "ssediv")
723 (set_attr "prefix" "vex")
724 (set_attr "mode" "<MODE>")])
726 (define_insn "<sse>_div<mode>3"
727 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
729 (match_operand:SSEMODEF2P 1 "register_operand" "0")
730 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
731 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
732 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
733 [(set_attr "type" "ssediv")
734 (set_attr "mode" "<MODE>")])
736 (define_insn "*avx_vmdiv<mode>3"
737 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
738 (vec_merge:SSEMODEF2P
740 (match_operand:SSEMODEF2P 1 "register_operand" "x")
741 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
744 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
745 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
746 [(set_attr "type" "ssediv")
747 (set_attr "prefix" "vex")
748 (set_attr "mode" "<ssescalarmode>")])
750 (define_insn "<sse>_vmdiv<mode>3"
751 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
752 (vec_merge:SSEMODEF2P
754 (match_operand:SSEMODEF2P 1 "register_operand" "0")
755 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
758 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
759 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
760 [(set_attr "type" "ssediv")
761 (set_attr "mode" "<ssescalarmode>")])
763 (define_insn "avx_rcpv8sf2"
764 [(set (match_operand:V8SF 0 "register_operand" "=x")
766 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
768 "vrcpps\t{%1, %0|%0, %1}"
769 [(set_attr "type" "sse")
770 (set_attr "prefix" "vex")
771 (set_attr "mode" "V8SF")])
773 (define_insn "sse_rcpv4sf2"
774 [(set (match_operand:V4SF 0 "register_operand" "=x")
776 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
778 "%vrcpps\t{%1, %0|%0, %1}"
779 [(set_attr "type" "sse")
780 (set_attr "atom_sse_attr" "rcp")
781 (set_attr "prefix" "maybe_vex")
782 (set_attr "mode" "V4SF")])
784 (define_insn "*avx_vmrcpv4sf2"
785 [(set (match_operand:V4SF 0 "register_operand" "=x")
787 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
789 (match_operand:V4SF 2 "register_operand" "x")
792 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
793 [(set_attr "type" "sse")
794 (set_attr "prefix" "vex")
795 (set_attr "mode" "SF")])
797 (define_insn "sse_vmrcpv4sf2"
798 [(set (match_operand:V4SF 0 "register_operand" "=x")
800 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
802 (match_operand:V4SF 2 "register_operand" "0")
805 "rcpss\t{%1, %0|%0, %1}"
806 [(set_attr "type" "sse")
807 (set_attr "atom_sse_attr" "rcp")
808 (set_attr "mode" "SF")])
810 (define_expand "sqrtv8sf2"
811 [(set (match_operand:V8SF 0 "register_operand" "")
812 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
815 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
816 && flag_finite_math_only && !flag_trapping_math
817 && flag_unsafe_math_optimizations)
819 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
824 (define_insn "avx_sqrtv8sf2"
825 [(set (match_operand:V8SF 0 "register_operand" "=x")
826 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
828 "vsqrtps\t{%1, %0|%0, %1}"
829 [(set_attr "type" "sse")
830 (set_attr "prefix" "vex")
831 (set_attr "mode" "V8SF")])
833 (define_expand "sqrtv4sf2"
834 [(set (match_operand:V4SF 0 "register_operand" "")
835 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
838 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
839 && flag_finite_math_only && !flag_trapping_math
840 && flag_unsafe_math_optimizations)
842 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
847 (define_insn "sse_sqrtv4sf2"
848 [(set (match_operand:V4SF 0 "register_operand" "=x")
849 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
851 "%vsqrtps\t{%1, %0|%0, %1}"
852 [(set_attr "type" "sse")
853 (set_attr "atom_sse_attr" "sqrt")
854 (set_attr "prefix" "maybe_vex")
855 (set_attr "mode" "V4SF")])
857 (define_insn "sqrtv4df2"
858 [(set (match_operand:V4DF 0 "register_operand" "=x")
859 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
861 "vsqrtpd\t{%1, %0|%0, %1}"
862 [(set_attr "type" "sse")
863 (set_attr "prefix" "vex")
864 (set_attr "mode" "V4DF")])
866 (define_insn "sqrtv2df2"
867 [(set (match_operand:V2DF 0 "register_operand" "=x")
868 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
870 "%vsqrtpd\t{%1, %0|%0, %1}"
871 [(set_attr "type" "sse")
872 (set_attr "prefix" "maybe_vex")
873 (set_attr "mode" "V2DF")])
875 (define_insn "*avx_vmsqrt<mode>2"
876 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
877 (vec_merge:SSEMODEF2P
879 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
880 (match_operand:SSEMODEF2P 2 "register_operand" "x")
882 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
883 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
884 [(set_attr "type" "sse")
885 (set_attr "prefix" "vex")
886 (set_attr "mode" "<ssescalarmode>")])
888 (define_insn "<sse>_vmsqrt<mode>2"
889 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
890 (vec_merge:SSEMODEF2P
892 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
893 (match_operand:SSEMODEF2P 2 "register_operand" "0")
895 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
896 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
897 [(set_attr "type" "sse")
898 (set_attr "atom_sse_attr" "sqrt")
899 (set_attr "mode" "<ssescalarmode>")])
901 (define_expand "rsqrtv8sf2"
902 [(set (match_operand:V8SF 0 "register_operand" "")
904 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
905 "TARGET_AVX && TARGET_SSE_MATH"
907 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
911 (define_insn "avx_rsqrtv8sf2"
912 [(set (match_operand:V8SF 0 "register_operand" "=x")
914 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
916 "vrsqrtps\t{%1, %0|%0, %1}"
917 [(set_attr "type" "sse")
918 (set_attr "prefix" "vex")
919 (set_attr "mode" "V8SF")])
921 (define_expand "rsqrtv4sf2"
922 [(set (match_operand:V4SF 0 "register_operand" "")
924 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
927 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
931 (define_insn "sse_rsqrtv4sf2"
932 [(set (match_operand:V4SF 0 "register_operand" "=x")
934 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
936 "%vrsqrtps\t{%1, %0|%0, %1}"
937 [(set_attr "type" "sse")
938 (set_attr "prefix" "maybe_vex")
939 (set_attr "mode" "V4SF")])
941 (define_insn "*avx_vmrsqrtv4sf2"
942 [(set (match_operand:V4SF 0 "register_operand" "=x")
944 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
946 (match_operand:V4SF 2 "register_operand" "x")
949 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
950 [(set_attr "type" "sse")
951 (set_attr "prefix" "vex")
952 (set_attr "mode" "SF")])
954 (define_insn "sse_vmrsqrtv4sf2"
955 [(set (match_operand:V4SF 0 "register_operand" "=x")
957 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
959 (match_operand:V4SF 2 "register_operand" "0")
962 "rsqrtss\t{%1, %0|%0, %1}"
963 [(set_attr "type" "sse")
964 (set_attr "mode" "SF")])
966 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
967 ;; isn't really correct, as those rtl operators aren't defined when
968 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
970 (define_expand "<code><mode>3"
971 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
972 (smaxmin:AVX256MODEF2P
973 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
974 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
975 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
977 if (!flag_finite_math_only)
978 operands[1] = force_reg (<MODE>mode, operands[1]);
979 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
982 (define_expand "<code><mode>3"
983 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
985 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
986 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
987 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
989 if (!flag_finite_math_only)
990 operands[1] = force_reg (<MODE>mode, operands[1]);
991 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
994 (define_insn "*avx_<code><mode>3_finite"
995 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
997 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
998 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
999 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1000 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1001 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1002 [(set_attr "type" "sseadd")
1003 (set_attr "prefix" "vex")
1004 (set_attr "mode" "<MODE>")])
1006 (define_insn "*<code><mode>3_finite"
1007 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1009 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1010 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1011 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1012 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1013 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1014 [(set_attr "type" "sseadd")
1015 (set_attr "mode" "<MODE>")])
1017 (define_insn "*avx_<code><mode>3"
1018 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1020 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1021 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1022 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1023 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1024 [(set_attr "type" "sseadd")
1025 (set_attr "prefix" "vex")
1026 (set_attr "mode" "<avxvecmode>")])
1028 (define_insn "*<code><mode>3"
1029 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1031 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1032 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1033 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1034 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1035 [(set_attr "type" "sseadd")
1036 (set_attr "mode" "<MODE>")])
1038 (define_insn "*avx_vm<code><mode>3"
1039 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1040 (vec_merge:SSEMODEF2P
1042 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1043 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1046 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1047 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1048 [(set_attr "type" "sse")
1049 (set_attr "prefix" "vex")
1050 (set_attr "mode" "<ssescalarmode>")])
1052 (define_insn "<sse>_vm<code><mode>3"
1053 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1054 (vec_merge:SSEMODEF2P
1056 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1057 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1060 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1061 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1062 [(set_attr "type" "sseadd")
1063 (set_attr "mode" "<ssescalarmode>")])
1065 ;; These versions of the min/max patterns implement exactly the operations
1066 ;; min = (op1 < op2 ? op1 : op2)
1067 ;; max = (!(op1 < op2) ? op1 : op2)
1068 ;; Their operands are not commutative, and thus they may be used in the
1069 ;; presence of -0.0 and NaN.
1071 (define_insn "*avx_ieee_smin<mode>3"
1072 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1074 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1075 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1077 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1078 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1079 [(set_attr "type" "sseadd")
1080 (set_attr "prefix" "vex")
1081 (set_attr "mode" "<avxvecmode>")])
1083 (define_insn "*avx_ieee_smax<mode>3"
1084 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1086 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1087 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1089 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1090 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1091 [(set_attr "type" "sseadd")
1092 (set_attr "prefix" "vex")
1093 (set_attr "mode" "<avxvecmode>")])
1095 (define_insn "*ieee_smin<mode>3"
1096 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1098 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1099 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1101 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1102 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1103 [(set_attr "type" "sseadd")
1104 (set_attr "mode" "<MODE>")])
1106 (define_insn "*ieee_smax<mode>3"
1107 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1109 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1110 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1112 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1113 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1114 [(set_attr "type" "sseadd")
1115 (set_attr "mode" "<MODE>")])
1117 (define_insn "avx_addsubv8sf3"
1118 [(set (match_operand:V8SF 0 "register_operand" "=x")
1121 (match_operand:V8SF 1 "register_operand" "x")
1122 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1123 (minus:V8SF (match_dup 1) (match_dup 2))
1126 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1127 [(set_attr "type" "sseadd")
1128 (set_attr "prefix" "vex")
1129 (set_attr "mode" "V8SF")])
1131 (define_insn "avx_addsubv4df3"
1132 [(set (match_operand:V4DF 0 "register_operand" "=x")
1135 (match_operand:V4DF 1 "register_operand" "x")
1136 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1137 (minus:V4DF (match_dup 1) (match_dup 2))
1140 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1141 [(set_attr "type" "sseadd")
1142 (set_attr "prefix" "vex")
1143 (set_attr "mode" "V4DF")])
1145 (define_insn "*avx_addsubv4sf3"
1146 [(set (match_operand:V4SF 0 "register_operand" "=x")
1149 (match_operand:V4SF 1 "register_operand" "x")
1150 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1151 (minus:V4SF (match_dup 1) (match_dup 2))
1154 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1155 [(set_attr "type" "sseadd")
1156 (set_attr "prefix" "vex")
1157 (set_attr "mode" "V4SF")])
1159 (define_insn "sse3_addsubv4sf3"
1160 [(set (match_operand:V4SF 0 "register_operand" "=x")
1163 (match_operand:V4SF 1 "register_operand" "0")
1164 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1165 (minus:V4SF (match_dup 1) (match_dup 2))
1168 "addsubps\t{%2, %0|%0, %2}"
1169 [(set_attr "type" "sseadd")
1170 (set_attr "prefix_rep" "1")
1171 (set_attr "mode" "V4SF")])
1173 (define_insn "*avx_addsubv2df3"
1174 [(set (match_operand:V2DF 0 "register_operand" "=x")
1177 (match_operand:V2DF 1 "register_operand" "x")
1178 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1179 (minus:V2DF (match_dup 1) (match_dup 2))
1182 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1183 [(set_attr "type" "sseadd")
1184 (set_attr "prefix" "vex")
1185 (set_attr "mode" "V2DF")])
1187 (define_insn "sse3_addsubv2df3"
1188 [(set (match_operand:V2DF 0 "register_operand" "=x")
1191 (match_operand:V2DF 1 "register_operand" "0")
1192 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1193 (minus:V2DF (match_dup 1) (match_dup 2))
1196 "addsubpd\t{%2, %0|%0, %2}"
1197 [(set_attr "type" "sseadd")
1198 (set_attr "atom_unit" "complex")
1199 (set_attr "mode" "V2DF")])
1201 (define_insn "avx_h<plusminus_insn>v4df3"
1202 [(set (match_operand:V4DF 0 "register_operand" "=x")
1207 (match_operand:V4DF 1 "register_operand" "x")
1208 (parallel [(const_int 0)]))
1209 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1211 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1212 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1216 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1217 (parallel [(const_int 0)]))
1218 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1220 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1221 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1223 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1224 [(set_attr "type" "sseadd")
1225 (set_attr "prefix" "vex")
1226 (set_attr "mode" "V4DF")])
1228 (define_insn "avx_h<plusminus_insn>v8sf3"
1229 [(set (match_operand:V8SF 0 "register_operand" "=x")
1235 (match_operand:V8SF 1 "register_operand" "x")
1236 (parallel [(const_int 0)]))
1237 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1239 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1240 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1244 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1245 (parallel [(const_int 0)]))
1246 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1248 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1249 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1253 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1254 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1256 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1257 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1260 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1261 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1263 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1264 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1266 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1267 [(set_attr "type" "sseadd")
1268 (set_attr "prefix" "vex")
1269 (set_attr "mode" "V8SF")])
1271 (define_insn "*avx_h<plusminus_insn>v4sf3"
1272 [(set (match_operand:V4SF 0 "register_operand" "=x")
1277 (match_operand:V4SF 1 "register_operand" "x")
1278 (parallel [(const_int 0)]))
1279 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1281 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1282 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1286 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1287 (parallel [(const_int 0)]))
1288 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1290 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1291 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1293 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1294 [(set_attr "type" "sseadd")
1295 (set_attr "prefix" "vex")
1296 (set_attr "mode" "V4SF")])
1298 (define_insn "sse3_h<plusminus_insn>v4sf3"
1299 [(set (match_operand:V4SF 0 "register_operand" "=x")
1304 (match_operand:V4SF 1 "register_operand" "0")
1305 (parallel [(const_int 0)]))
1306 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1308 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1309 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1313 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1314 (parallel [(const_int 0)]))
1315 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1317 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1318 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1320 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1321 [(set_attr "type" "sseadd")
1322 (set_attr "atom_unit" "complex")
1323 (set_attr "prefix_rep" "1")
1324 (set_attr "mode" "V4SF")])
1326 (define_insn "*avx_h<plusminus_insn>v2df3"
1327 [(set (match_operand:V2DF 0 "register_operand" "=x")
1331 (match_operand:V2DF 1 "register_operand" "x")
1332 (parallel [(const_int 0)]))
1333 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1336 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1337 (parallel [(const_int 0)]))
1338 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1340 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1341 [(set_attr "type" "sseadd")
1342 (set_attr "prefix" "vex")
1343 (set_attr "mode" "V2DF")])
1345 (define_insn "sse3_h<plusminus_insn>v2df3"
1346 [(set (match_operand:V2DF 0 "register_operand" "=x")
1350 (match_operand:V2DF 1 "register_operand" "0")
1351 (parallel [(const_int 0)]))
1352 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1355 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1356 (parallel [(const_int 0)]))
1357 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1359 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1360 [(set_attr "type" "sseadd")
1361 (set_attr "mode" "V2DF")])
1363 (define_expand "reduc_splus_v4sf"
1364 [(match_operand:V4SF 0 "register_operand" "")
1365 (match_operand:V4SF 1 "register_operand" "")]
1370 rtx tmp = gen_reg_rtx (V4SFmode);
1371 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1372 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1375 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1379 (define_expand "reduc_splus_v2df"
1380 [(match_operand:V2DF 0 "register_operand" "")
1381 (match_operand:V2DF 1 "register_operand" "")]
1384 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1388 (define_expand "reduc_smax_v4sf"
1389 [(match_operand:V4SF 0 "register_operand" "")
1390 (match_operand:V4SF 1 "register_operand" "")]
1393 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1397 (define_expand "reduc_smin_v4sf"
1398 [(match_operand:V4SF 0 "register_operand" "")
1399 (match_operand:V4SF 1 "register_operand" "")]
1402 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1408 ;; Parallel floating point comparisons
1410 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1412 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1413 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1415 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1416 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1417 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1420 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1421 [(set_attr "type" "ssecmp")
1422 (set_attr "length_immediate" "1")
1423 (set_attr "prefix" "vex")
1424 (set_attr "mode" "<MODE>")])
1426 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1427 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1428 (vec_merge:SSEMODEF2P
1430 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1431 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1432 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1437 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1438 [(set_attr "type" "ssecmp")
1439 (set_attr "length_immediate" "1")
1440 (set_attr "prefix" "vex")
1441 (set_attr "mode" "<ssescalarmode>")])
1443 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1444 ;; may generate 256bit vector compare instructions.
1445 (define_insn "*avx_maskcmp<mode>3"
1446 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1447 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1448 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1449 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1450 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1451 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1452 [(set_attr "type" "ssecmp")
1453 (set_attr "prefix" "vex")
1454 (set_attr "length_immediate" "1")
1455 (set_attr "mode" "<avxvecmode>")])
1457 (define_insn "<sse>_maskcmp<mode>3"
1458 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1459 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1460 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1461 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1463 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1464 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1465 [(set_attr "type" "ssecmp")
1466 (set_attr "length_immediate" "1")
1467 (set_attr "mode" "<MODE>")])
1469 (define_insn "<sse>_vmmaskcmp<mode>3"
1470 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1471 (vec_merge:SSEMODEF2P
1472 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1473 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1474 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1477 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1478 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1479 [(set_attr "type" "ssecmp")
1480 (set_attr "length_immediate" "1")
1481 (set_attr "mode" "<ssescalarmode>")])
1483 (define_insn "<sse>_comi"
1484 [(set (reg:CCFP FLAGS_REG)
1487 (match_operand:<ssevecmode> 0 "register_operand" "x")
1488 (parallel [(const_int 0)]))
1490 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1491 (parallel [(const_int 0)]))))]
1492 "SSE_FLOAT_MODE_P (<MODE>mode)"
1493 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1494 [(set_attr "type" "ssecomi")
1495 (set_attr "prefix" "maybe_vex")
1496 (set_attr "prefix_rep" "0")
1497 (set (attr "prefix_data16")
1498 (if_then_else (eq_attr "mode" "DF")
1500 (const_string "0")))
1501 (set_attr "mode" "<MODE>")])
1503 (define_insn "<sse>_ucomi"
1504 [(set (reg:CCFPU FLAGS_REG)
1507 (match_operand:<ssevecmode> 0 "register_operand" "x")
1508 (parallel [(const_int 0)]))
1510 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1511 (parallel [(const_int 0)]))))]
1512 "SSE_FLOAT_MODE_P (<MODE>mode)"
1513 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1514 [(set_attr "type" "ssecomi")
1515 (set_attr "prefix" "maybe_vex")
1516 (set_attr "prefix_rep" "0")
1517 (set (attr "prefix_data16")
1518 (if_then_else (eq_attr "mode" "DF")
1520 (const_string "0")))
1521 (set_attr "mode" "<MODE>")])
1523 (define_expand "vcond<mode>"
1524 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1525 (if_then_else:SSEMODEF2P
1526 (match_operator 3 ""
1527 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1528 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1529 (match_operand:SSEMODEF2P 1 "general_operand" "")
1530 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1531 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1533 bool ok = ix86_expand_fp_vcond (operands);
1538 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1540 ;; Parallel floating point logical operations
1542 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1544 (define_insn "avx_andnot<mode>3"
1545 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1548 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1549 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1550 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1551 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1552 [(set_attr "type" "sselog")
1553 (set_attr "prefix" "vex")
1554 (set_attr "mode" "<avxvecmode>")])
1556 (define_insn "<sse>_andnot<mode>3"
1557 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1560 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1561 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1562 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1563 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1564 [(set_attr "type" "sselog")
1565 (set_attr "mode" "<MODE>")])
1567 (define_expand "<code><mode>3"
1568 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1569 (plogic:AVX256MODEF2P
1570 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1571 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1572 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1573 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1575 (define_insn "*avx_<code><mode>3"
1576 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1578 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1579 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1580 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1581 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1582 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1583 [(set_attr "type" "sselog")
1584 (set_attr "prefix" "vex")
1585 (set_attr "mode" "<avxvecmode>")])
1587 (define_expand "<code><mode>3"
1588 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1590 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1591 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1592 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1593 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1595 (define_insn "*<code><mode>3"
1596 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1598 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1599 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1600 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1601 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1602 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1603 [(set_attr "type" "sselog")
1604 (set_attr "mode" "<MODE>")])
1606 (define_expand "copysign<mode>3"
1609 (not:SSEMODEF2P (match_dup 3))
1610 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1612 (and:SSEMODEF2P (match_dup 3)
1613 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1614 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1615 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1616 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1618 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1620 operands[4] = gen_reg_rtx (<MODE>mode);
1621 operands[5] = gen_reg_rtx (<MODE>mode);
1624 ;; Also define scalar versions. These are used for abs, neg, and
1625 ;; conditional move. Using subregs into vector modes causes register
1626 ;; allocation lossage. These patterns do not allow memory operands
1627 ;; because the native instructions read the full 128-bits.
1629 (define_insn "*avx_andnot<mode>3"
1630 [(set (match_operand:MODEF 0 "register_operand" "=x")
1633 (match_operand:MODEF 1 "register_operand" "x"))
1634 (match_operand:MODEF 2 "register_operand" "x")))]
1635 "AVX_FLOAT_MODE_P (<MODE>mode)"
1636 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1637 [(set_attr "type" "sselog")
1638 (set_attr "prefix" "vex")
1639 (set_attr "mode" "<ssevecmode>")])
1641 (define_insn "*andnot<mode>3"
1642 [(set (match_operand:MODEF 0 "register_operand" "=x")
1645 (match_operand:MODEF 1 "register_operand" "0"))
1646 (match_operand:MODEF 2 "register_operand" "x")))]
1647 "SSE_FLOAT_MODE_P (<MODE>mode)"
1648 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1649 [(set_attr "type" "sselog")
1650 (set_attr "mode" "<ssevecmode>")])
1652 (define_insn "*avx_<code><mode>3"
1653 [(set (match_operand:MODEF 0 "register_operand" "=x")
1655 (match_operand:MODEF 1 "register_operand" "x")
1656 (match_operand:MODEF 2 "register_operand" "x")))]
1657 "AVX_FLOAT_MODE_P (<MODE>mode)"
1658 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1659 [(set_attr "type" "sselog")
1660 (set_attr "prefix" "vex")
1661 (set_attr "mode" "<ssevecmode>")])
1663 (define_insn "*<code><mode>3"
1664 [(set (match_operand:MODEF 0 "register_operand" "=x")
1666 (match_operand:MODEF 1 "register_operand" "0")
1667 (match_operand:MODEF 2 "register_operand" "x")))]
1668 "SSE_FLOAT_MODE_P (<MODE>mode)"
1669 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1670 [(set_attr "type" "sselog")
1671 (set_attr "mode" "<ssevecmode>")])
1673 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1675 ;; FMA4 floating point multiply/accumulate instructions This includes the
1676 ;; scalar version of the instructions as well as the vector
1678 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1680 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1681 ;; combine to generate a multiply/add with two memory references. We then
1682 ;; split this insn, into loading up the destination register with one of the
1683 ;; memory operations. If we don't manage to split the insn, reload will
1684 ;; generate the appropriate moves. The reason this is needed, is that combine
1685 ;; has already folded one of the memory references into both the multiply and
1686 ;; add insns, and it can't generate a new pseudo. I.e.:
1687 ;; (set (reg1) (mem (addr1)))
1688 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1689 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1691 (define_insn "fma4_fmadd<mode>4256"
1692 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1695 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1696 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))
1697 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1699 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1700 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1701 [(set_attr "type" "ssemuladd")
1702 (set_attr "mode" "<MODE>")])
1704 ;; Split fmadd with two memory operands into a load and the fmadd.
1706 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1709 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1710 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1711 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1713 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1714 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1715 && !reg_mentioned_p (operands[0], operands[1])
1716 && !reg_mentioned_p (operands[0], operands[2])
1717 && !reg_mentioned_p (operands[0], operands[3])"
1720 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1721 emit_insn (gen_fma4_fmadd<mode>4256 (operands[0], operands[1],
1722 operands[2], operands[3]));
1726 ;; Floating multiply and subtract
1727 ;; Allow two memory operands the same as fmadd
1728 (define_insn "fma4_fmsub<mode>4256"
1729 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1732 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1733 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))
1734 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1736 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1737 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1738 [(set_attr "type" "ssemuladd")
1739 (set_attr "mode" "<MODE>")])
1741 ;; Split fmsub with two memory operands into a load and the fmsub.
1743 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1746 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1747 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1748 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1750 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1751 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1752 && !reg_mentioned_p (operands[0], operands[1])
1753 && !reg_mentioned_p (operands[0], operands[2])
1754 && !reg_mentioned_p (operands[0], operands[3])"
1757 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1758 emit_insn (gen_fma4_fmsub<mode>4256 (operands[0], operands[1],
1759 operands[2], operands[3]));
1763 ;; Floating point negative multiply and add
1764 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1765 ;; Note operands are out of order to simplify call to ix86_fma4_valid_p
1766 ;; Allow two memory operands to help in optimizing.
1767 (define_insn "fma4_fnmadd<mode>4256"
1768 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1770 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")
1772 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1773 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))))]
1775 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1776 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1777 [(set_attr "type" "ssemuladd")
1778 (set_attr "mode" "<MODE>")])
1780 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1782 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1784 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")
1786 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1787 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))))]
1789 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1790 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1791 && !reg_mentioned_p (operands[0], operands[1])
1792 && !reg_mentioned_p (operands[0], operands[2])
1793 && !reg_mentioned_p (operands[0], operands[3])"
1796 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1797 emit_insn (gen_fma4_fnmadd<mode>4256 (operands[0], operands[1],
1798 operands[2], operands[3]));
1802 ;; Floating point negative multiply and subtract
1803 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1804 ;; Allow 2 memory operands to help with optimization
1805 (define_insn "fma4_fnmsub<mode>4256"
1806 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1810 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x"))
1811 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
1812 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1814 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
1815 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1816 [(set_attr "type" "ssemuladd")
1817 (set_attr "mode" "<MODE>")])
1819 ;; Split fnmsub with two memory operands into a load and the fmsub.
1821 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1825 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" ""))
1826 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1827 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1829 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)
1830 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)
1831 && !reg_mentioned_p (operands[0], operands[1])
1832 && !reg_mentioned_p (operands[0], operands[2])
1833 && !reg_mentioned_p (operands[0], operands[3])"
1836 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1837 emit_insn (gen_fma4_fnmsub<mode>4256 (operands[0], operands[1],
1838 operands[2], operands[3]));
1842 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1843 (define_insn "fma4_fmadd<mode>4"
1844 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1847 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1848 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))
1849 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1851 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1852 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1853 [(set_attr "type" "ssemuladd")
1854 (set_attr "mode" "<MODE>")])
1856 ;; Split fmadd with two memory operands into a load and the fmadd.
1858 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1861 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1862 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1863 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1865 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1866 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1867 && !reg_mentioned_p (operands[0], operands[1])
1868 && !reg_mentioned_p (operands[0], operands[2])
1869 && !reg_mentioned_p (operands[0], operands[3])"
1872 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1873 emit_insn (gen_fma4_fmadd<mode>4 (operands[0], operands[1],
1874 operands[2], operands[3]));
1878 ;; For the scalar operations, use operand1 for the upper words that aren't
1879 ;; modified, so restrict the forms that are generated.
1880 ;; Scalar version of fmadd
1881 (define_insn "fma4_vmfmadd<mode>4"
1882 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1883 (vec_merge:SSEMODEF2P
1886 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
1887 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1888 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1892 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
1893 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1894 [(set_attr "type" "ssemuladd")
1895 (set_attr "mode" "<MODE>")])
1897 ;; Floating multiply and subtract
1898 ;; Allow two memory operands the same as fmadd
1899 (define_insn "fma4_fmsub<mode>4"
1900 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1903 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1904 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))
1905 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1907 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1908 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1909 [(set_attr "type" "ssemuladd")
1910 (set_attr "mode" "<MODE>")])
1912 ;; Split fmsub with two memory operands into a load and the fmsub.
1914 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1917 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1918 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1919 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1921 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1922 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1923 && !reg_mentioned_p (operands[0], operands[1])
1924 && !reg_mentioned_p (operands[0], operands[2])
1925 && !reg_mentioned_p (operands[0], operands[3])"
1928 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1929 emit_insn (gen_fma4_fmsub<mode>4 (operands[0], operands[1],
1930 operands[2], operands[3]));
1934 ;; For the scalar operations, use operand1 for the upper words that aren't
1935 ;; modified, so restrict the forms that are generated.
1936 ;; Scalar version of fmsub
1937 (define_insn "fma4_vmfmsub<mode>4"
1938 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1939 (vec_merge:SSEMODEF2P
1942 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
1943 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1944 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1948 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
1949 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1950 [(set_attr "type" "ssemuladd")
1951 (set_attr "mode" "<MODE>")])
1953 ;; Floating point negative multiply and add
1954 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1955 ;; Note operands are out of order to simplify call to ix86_fma4_valid_p
1956 ;; Allow two memory operands to help in optimizing.
1957 (define_insn "fma4_fnmadd<mode>4"
1958 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1960 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")
1962 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1963 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))))]
1965 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1966 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1967 [(set_attr "type" "ssemuladd")
1968 (set_attr "mode" "<MODE>")])
1970 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1972 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1974 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1976 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1977 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1979 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1980 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1981 && !reg_mentioned_p (operands[0], operands[1])
1982 && !reg_mentioned_p (operands[0], operands[2])
1983 && !reg_mentioned_p (operands[0], operands[3])"
1986 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1987 emit_insn (gen_fma4_fnmadd<mode>4 (operands[0], operands[1],
1988 operands[2], operands[3]));
1992 ;; For the scalar operations, use operand1 for the upper words that aren't
1993 ;; modified, so restrict the forms that are generated.
1994 ;; Scalar version of fnmadd
1995 (define_insn "fma4_vmfnmadd<mode>4"
1996 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1997 (vec_merge:SSEMODEF2P
1999 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2001 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2002 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2006 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2007 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2008 [(set_attr "type" "ssemuladd")
2009 (set_attr "mode" "<MODE>")])
2011 ;; Floating point negative multiply and subtract
2012 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
2013 ;; Allow 2 memory operands to help with optimization
2014 (define_insn "fma4_fnmsub<mode>4"
2015 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
2019 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x"))
2020 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
2021 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
2023 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
2024 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2025 [(set_attr "type" "ssemuladd")
2026 (set_attr "mode" "<MODE>")])
2028 ;; Split fnmsub with two memory operands into a load and the fmsub.
2030 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
2034 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
2035 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
2036 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
2038 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)
2039 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)
2040 && !reg_mentioned_p (operands[0], operands[1])
2041 && !reg_mentioned_p (operands[0], operands[2])
2042 && !reg_mentioned_p (operands[0], operands[3])"
2045 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
2046 emit_insn (gen_fma4_fnmsub<mode>4 (operands[0], operands[1],
2047 operands[2], operands[3]));
2051 ;; For the scalar operations, use operand1 for the upper words that aren't
2052 ;; modified, so restrict the forms that are generated.
2053 ;; Scalar version of fnmsub
2054 (define_insn "fma4_vmfnmsub<mode>4"
2055 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2056 (vec_merge:SSEMODEF2P
2060 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x"))
2061 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2062 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2066 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
2067 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2068 [(set_attr "type" "ssemuladd")
2069 (set_attr "mode" "<MODE>")])
2071 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2073 (define_insn "fma4i_fmadd<mode>4256"
2074 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2078 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2079 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2080 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2081 UNSPEC_FMA4_INTRINSIC))]
2082 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2083 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2084 [(set_attr "type" "ssemuladd")
2085 (set_attr "mode" "<MODE>")])
2087 (define_insn "fma4i_fmsub<mode>4256"
2088 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2092 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2093 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2094 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2095 UNSPEC_FMA4_INTRINSIC))]
2096 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2097 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2098 [(set_attr "type" "ssemuladd")
2099 (set_attr "mode" "<MODE>")])
2101 (define_insn "fma4i_fnmadd<mode>4256"
2102 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2105 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
2107 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2108 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm")))]
2109 UNSPEC_FMA4_INTRINSIC))]
2110 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2111 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2112 [(set_attr "type" "ssemuladd")
2113 (set_attr "mode" "<MODE>")])
2115 (define_insn "fma4i_fnmsub<mode>4256"
2116 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2121 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x"))
2122 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2123 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2124 UNSPEC_FMA4_INTRINSIC))]
2125 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2126 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2127 [(set_attr "type" "ssemuladd")
2128 (set_attr "mode" "<MODE>")])
2129 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2131 (define_insn "fma4i_fmadd<mode>4"
2132 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2136 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2137 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2138 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2139 UNSPEC_FMA4_INTRINSIC))]
2140 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2141 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2142 [(set_attr "type" "ssemuladd")
2143 (set_attr "mode" "<MODE>")])
2145 (define_insn "fma4i_fmsub<mode>4"
2146 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2150 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2151 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2152 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2153 UNSPEC_FMA4_INTRINSIC))]
2154 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2155 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2156 [(set_attr "type" "ssemuladd")
2157 (set_attr "mode" "<MODE>")])
2159 (define_insn "fma4i_fnmadd<mode>4"
2160 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2163 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2165 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2166 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))]
2167 UNSPEC_FMA4_INTRINSIC))]
2168 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2169 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2170 [(set_attr "type" "ssemuladd")
2171 (set_attr "mode" "<MODE>")])
2173 (define_insn "fma4i_fnmsub<mode>4"
2174 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2179 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x"))
2180 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2181 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2182 UNSPEC_FMA4_INTRINSIC))]
2183 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2184 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2185 [(set_attr "type" "ssemuladd")
2186 (set_attr "mode" "<MODE>")])
2188 ;; For the scalar operations, use operand1 for the upper words that aren't
2189 ;; modified, so restrict the forms that are accepted.
2190 (define_insn "fma4i_vmfmadd<mode>4"
2191 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2193 [(vec_merge:SSEMODEF2P
2196 (match_operand:SSEMODEF2P 1 "register_operand" "x,x")
2197 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2198 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2201 UNSPEC_FMA4_INTRINSIC))]
2202 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2203 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2204 [(set_attr "type" "ssemuladd")
2205 (set_attr "mode" "<ssescalarmode>")])
2207 (define_insn "fma4i_vmfmsub<mode>4"
2208 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2210 [(vec_merge:SSEMODEF2P
2213 (match_operand:SSEMODEF2P 1 "register_operand" "x,x")
2214 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2215 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2218 UNSPEC_FMA4_INTRINSIC))]
2219 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2220 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2221 [(set_attr "type" "ssemuladd")
2222 (set_attr "mode" "<ssescalarmode>")])
2224 (define_insn "fma4i_vmfnmadd<mode>4"
2225 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2227 [(vec_merge:SSEMODEF2P
2229 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2231 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2232 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2235 UNSPEC_FMA4_INTRINSIC))]
2236 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2237 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2238 [(set_attr "type" "ssemuladd")
2239 (set_attr "mode" "<ssescalarmode>")])
2241 (define_insn "fma4i_vmfnmsub<mode>4"
2242 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2244 [(vec_merge:SSEMODEF2P
2248 (match_operand:SSEMODEF2P 1 "register_operand" "x,x"))
2249 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2250 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2253 UNSPEC_FMA4_INTRINSIC))]
2254 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2255 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2256 [(set_attr "type" "ssemuladd")
2257 (set_attr "mode" "<ssescalarmode>")])
2259 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2261 ;; FMA4 Parallel floating point multiply addsub and subadd operations
2263 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2265 (define_insn "fma4_fmaddsubv8sf4"
2266 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2270 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2271 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2272 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2280 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2281 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2282 [(set_attr "type" "ssemuladd")
2283 (set_attr "mode" "V8SF")])
2285 (define_insn "fma4_fmaddsubv4df4"
2286 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2290 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2291 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2292 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2300 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2301 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2302 [(set_attr "type" "ssemuladd")
2303 (set_attr "mode" "V4DF")])
2305 (define_insn "fma4_fmaddsubv4sf4"
2306 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2310 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2311 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2312 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2320 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2321 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2322 [(set_attr "type" "ssemuladd")
2323 (set_attr "mode" "V4SF")])
2325 (define_insn "fma4_fmaddsubv2df4"
2326 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2330 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2331 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2332 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2340 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2341 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2342 [(set_attr "type" "ssemuladd")
2343 (set_attr "mode" "V2DF")])
2345 (define_insn "fma4_fmsubaddv8sf4"
2346 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2350 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2351 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2352 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2360 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2361 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2362 [(set_attr "type" "ssemuladd")
2363 (set_attr "mode" "V8SF")])
2365 (define_insn "fma4_fmsubaddv4df4"
2366 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2370 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2371 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2372 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2380 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2381 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2382 [(set_attr "type" "ssemuladd")
2383 (set_attr "mode" "V4DF")])
2385 (define_insn "fma4_fmsubaddv4sf4"
2386 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2390 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2391 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2392 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2400 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2401 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2402 [(set_attr "type" "ssemuladd")
2403 (set_attr "mode" "V4SF")])
2405 (define_insn "fma4_fmsubaddv2df4"
2406 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2410 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2411 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2412 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2420 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2421 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2422 [(set_attr "type" "ssemuladd")
2423 (set_attr "mode" "V2DF")])
2425 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2427 (define_insn "fma4i_fmaddsubv8sf4"
2428 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2433 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2434 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2435 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2442 UNSPEC_FMA4_INTRINSIC))]
2444 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2445 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2446 [(set_attr "type" "ssemuladd")
2447 (set_attr "mode" "V8SF")])
2449 (define_insn "fma4i_fmaddsubv4df4"
2450 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2455 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2456 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2457 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2464 UNSPEC_FMA4_INTRINSIC))]
2466 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2467 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2468 [(set_attr "type" "ssemuladd")
2469 (set_attr "mode" "V4DF")])
2471 (define_insn "fma4i_fmaddsubv4sf4"
2472 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2477 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2478 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2479 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2486 UNSPEC_FMA4_INTRINSIC))]
2488 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2489 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2490 [(set_attr "type" "ssemuladd")
2491 (set_attr "mode" "V4SF")])
2493 (define_insn "fma4i_fmaddsubv2df4"
2494 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2499 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2500 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2501 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2508 UNSPEC_FMA4_INTRINSIC))]
2510 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2511 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2512 [(set_attr "type" "ssemuladd")
2513 (set_attr "mode" "V2DF")])
2515 (define_insn "fma4i_fmsubaddv8sf4"
2516 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2521 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2522 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2523 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2530 UNSPEC_FMA4_INTRINSIC))]
2532 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2533 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2534 [(set_attr "type" "ssemuladd")
2535 (set_attr "mode" "V8SF")])
2537 (define_insn "fma4i_fmsubaddv4df4"
2538 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2543 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2544 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2545 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2552 UNSPEC_FMA4_INTRINSIC))]
2554 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2555 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2556 [(set_attr "type" "ssemuladd")
2557 (set_attr "mode" "V4DF")])
2559 (define_insn "fma4i_fmsubaddv4sf4"
2560 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2565 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2566 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2567 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2574 UNSPEC_FMA4_INTRINSIC))]
2576 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2577 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2578 [(set_attr "type" "ssemuladd")
2579 (set_attr "mode" "V4SF")])
2581 (define_insn "fma4i_fmsubaddv2df4"
2582 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2587 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2588 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2589 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2596 UNSPEC_FMA4_INTRINSIC))]
2598 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2599 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2600 [(set_attr "type" "ssemuladd")
2601 (set_attr "mode" "V2DF")])
2603 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2605 ;; Parallel single-precision floating point conversion operations
2607 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2609 (define_insn "sse_cvtpi2ps"
2610 [(set (match_operand:V4SF 0 "register_operand" "=x")
2613 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2614 (match_operand:V4SF 1 "register_operand" "0")
2617 "cvtpi2ps\t{%2, %0|%0, %2}"
2618 [(set_attr "type" "ssecvt")
2619 (set_attr "mode" "V4SF")])
2621 (define_insn "sse_cvtps2pi"
2622 [(set (match_operand:V2SI 0 "register_operand" "=y")
2624 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2626 (parallel [(const_int 0) (const_int 1)])))]
2628 "cvtps2pi\t{%1, %0|%0, %1}"
2629 [(set_attr "type" "ssecvt")
2630 (set_attr "unit" "mmx")
2631 (set_attr "mode" "DI")])
2633 (define_insn "sse_cvttps2pi"
2634 [(set (match_operand:V2SI 0 "register_operand" "=y")
2636 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2637 (parallel [(const_int 0) (const_int 1)])))]
2639 "cvttps2pi\t{%1, %0|%0, %1}"
2640 [(set_attr "type" "ssecvt")
2641 (set_attr "unit" "mmx")
2642 (set_attr "prefix_rep" "0")
2643 (set_attr "mode" "SF")])
2645 (define_insn "*avx_cvtsi2ss"
2646 [(set (match_operand:V4SF 0 "register_operand" "=x")
2649 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2650 (match_operand:V4SF 1 "register_operand" "x")
2653 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2654 [(set_attr "type" "sseicvt")
2655 (set_attr "prefix" "vex")
2656 (set_attr "mode" "SF")])
2658 (define_insn "sse_cvtsi2ss"
2659 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2662 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2663 (match_operand:V4SF 1 "register_operand" "0,0")
2666 "cvtsi2ss\t{%2, %0|%0, %2}"
2667 [(set_attr "type" "sseicvt")
2668 (set_attr "athlon_decode" "vector,double")
2669 (set_attr "amdfam10_decode" "vector,double")
2670 (set_attr "mode" "SF")])
2672 (define_insn "*avx_cvtsi2ssq"
2673 [(set (match_operand:V4SF 0 "register_operand" "=x")
2676 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2677 (match_operand:V4SF 1 "register_operand" "x")
2679 "TARGET_AVX && TARGET_64BIT"
2680 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2681 [(set_attr "type" "sseicvt")
2682 (set_attr "length_vex" "4")
2683 (set_attr "prefix" "vex")
2684 (set_attr "mode" "SF")])
2686 (define_insn "sse_cvtsi2ssq"
2687 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2690 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2691 (match_operand:V4SF 1 "register_operand" "0,0")
2693 "TARGET_SSE && TARGET_64BIT"
2694 "cvtsi2ssq\t{%2, %0|%0, %2}"
2695 [(set_attr "type" "sseicvt")
2696 (set_attr "prefix_rex" "1")
2697 (set_attr "athlon_decode" "vector,double")
2698 (set_attr "amdfam10_decode" "vector,double")
2699 (set_attr "mode" "SF")])
2701 (define_insn "sse_cvtss2si"
2702 [(set (match_operand:SI 0 "register_operand" "=r,r")
2705 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2706 (parallel [(const_int 0)]))]
2707 UNSPEC_FIX_NOTRUNC))]
2709 "%vcvtss2si\t{%1, %0|%0, %1}"
2710 [(set_attr "type" "sseicvt")
2711 (set_attr "athlon_decode" "double,vector")
2712 (set_attr "prefix_rep" "1")
2713 (set_attr "prefix" "maybe_vex")
2714 (set_attr "mode" "SI")])
2716 (define_insn "sse_cvtss2si_2"
2717 [(set (match_operand:SI 0 "register_operand" "=r,r")
2718 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2719 UNSPEC_FIX_NOTRUNC))]
2721 "%vcvtss2si\t{%1, %0|%0, %1}"
2722 [(set_attr "type" "sseicvt")
2723 (set_attr "athlon_decode" "double,vector")
2724 (set_attr "amdfam10_decode" "double,double")
2725 (set_attr "prefix_rep" "1")
2726 (set_attr "prefix" "maybe_vex")
2727 (set_attr "mode" "SI")])
2729 (define_insn "sse_cvtss2siq"
2730 [(set (match_operand:DI 0 "register_operand" "=r,r")
2733 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2734 (parallel [(const_int 0)]))]
2735 UNSPEC_FIX_NOTRUNC))]
2736 "TARGET_SSE && TARGET_64BIT"
2737 "%vcvtss2siq\t{%1, %0|%0, %1}"
2738 [(set_attr "type" "sseicvt")
2739 (set_attr "athlon_decode" "double,vector")
2740 (set_attr "prefix_rep" "1")
2741 (set_attr "prefix" "maybe_vex")
2742 (set_attr "mode" "DI")])
2744 (define_insn "sse_cvtss2siq_2"
2745 [(set (match_operand:DI 0 "register_operand" "=r,r")
2746 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2747 UNSPEC_FIX_NOTRUNC))]
2748 "TARGET_SSE && TARGET_64BIT"
2749 "%vcvtss2siq\t{%1, %0|%0, %1}"
2750 [(set_attr "type" "sseicvt")
2751 (set_attr "athlon_decode" "double,vector")
2752 (set_attr "amdfam10_decode" "double,double")
2753 (set_attr "prefix_rep" "1")
2754 (set_attr "prefix" "maybe_vex")
2755 (set_attr "mode" "DI")])
2757 (define_insn "sse_cvttss2si"
2758 [(set (match_operand:SI 0 "register_operand" "=r,r")
2761 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2762 (parallel [(const_int 0)]))))]
2764 "%vcvttss2si\t{%1, %0|%0, %1}"
2765 [(set_attr "type" "sseicvt")
2766 (set_attr "athlon_decode" "double,vector")
2767 (set_attr "amdfam10_decode" "double,double")
2768 (set_attr "prefix_rep" "1")
2769 (set_attr "prefix" "maybe_vex")
2770 (set_attr "mode" "SI")])
2772 (define_insn "sse_cvttss2siq"
2773 [(set (match_operand:DI 0 "register_operand" "=r,r")
2776 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2777 (parallel [(const_int 0)]))))]
2778 "TARGET_SSE && TARGET_64BIT"
2779 "%vcvttss2siq\t{%1, %0|%0, %1}"
2780 [(set_attr "type" "sseicvt")
2781 (set_attr "athlon_decode" "double,vector")
2782 (set_attr "amdfam10_decode" "double,double")
2783 (set_attr "prefix_rep" "1")
2784 (set_attr "prefix" "maybe_vex")
2785 (set_attr "mode" "DI")])
2787 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2788 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2789 (float:AVXMODEDCVTDQ2PS
2790 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2792 "vcvtdq2ps\t{%1, %0|%0, %1}"
2793 [(set_attr "type" "ssecvt")
2794 (set_attr "prefix" "vex")
2795 (set_attr "mode" "<avxvecmode>")])
2797 (define_insn "sse2_cvtdq2ps"
2798 [(set (match_operand:V4SF 0 "register_operand" "=x")
2799 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2801 "cvtdq2ps\t{%1, %0|%0, %1}"
2802 [(set_attr "type" "ssecvt")
2803 (set_attr "mode" "V4SF")])
2805 (define_expand "sse2_cvtudq2ps"
2807 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2809 (lt:V4SF (match_dup 5) (match_dup 3)))
2811 (and:V4SF (match_dup 6) (match_dup 4)))
2812 (set (match_operand:V4SF 0 "register_operand" "")
2813 (plus:V4SF (match_dup 5) (match_dup 7)))]
2816 REAL_VALUE_TYPE TWO32r;
2820 real_ldexp (&TWO32r, &dconst1, 32);
2821 x = const_double_from_real_value (TWO32r, SFmode);
2823 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2824 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2826 for (i = 5; i < 8; i++)
2827 operands[i] = gen_reg_rtx (V4SFmode);
2830 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2831 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2832 (unspec:AVXMODEDCVTPS2DQ
2833 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2834 UNSPEC_FIX_NOTRUNC))]
2836 "vcvtps2dq\t{%1, %0|%0, %1}"
2837 [(set_attr "type" "ssecvt")
2838 (set_attr "prefix" "vex")
2839 (set_attr "mode" "<avxvecmode>")])
2841 (define_insn "sse2_cvtps2dq"
2842 [(set (match_operand:V4SI 0 "register_operand" "=x")
2843 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2844 UNSPEC_FIX_NOTRUNC))]
2846 "cvtps2dq\t{%1, %0|%0, %1}"
2847 [(set_attr "type" "ssecvt")
2848 (set_attr "prefix_data16" "1")
2849 (set_attr "mode" "TI")])
2851 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2852 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2853 (fix:AVXMODEDCVTPS2DQ
2854 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2856 "vcvttps2dq\t{%1, %0|%0, %1}"
2857 [(set_attr "type" "ssecvt")
2858 (set_attr "prefix" "vex")
2859 (set_attr "mode" "<avxvecmode>")])
2861 (define_insn "sse2_cvttps2dq"
2862 [(set (match_operand:V4SI 0 "register_operand" "=x")
2863 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2865 "cvttps2dq\t{%1, %0|%0, %1}"
2866 [(set_attr "type" "ssecvt")
2867 (set_attr "prefix_rep" "1")
2868 (set_attr "prefix_data16" "0")
2869 (set_attr "mode" "TI")])
2871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2873 ;; Parallel double-precision floating point conversion operations
2875 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2877 (define_insn "sse2_cvtpi2pd"
2878 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2879 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2881 "cvtpi2pd\t{%1, %0|%0, %1}"
2882 [(set_attr "type" "ssecvt")
2883 (set_attr "unit" "mmx,*")
2884 (set_attr "prefix_data16" "1,*")
2885 (set_attr "mode" "V2DF")])
2887 (define_insn "sse2_cvtpd2pi"
2888 [(set (match_operand:V2SI 0 "register_operand" "=y")
2889 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2890 UNSPEC_FIX_NOTRUNC))]
2892 "cvtpd2pi\t{%1, %0|%0, %1}"
2893 [(set_attr "type" "ssecvt")
2894 (set_attr "unit" "mmx")
2895 (set_attr "prefix_data16" "1")
2896 (set_attr "mode" "DI")])
2898 (define_insn "sse2_cvttpd2pi"
2899 [(set (match_operand:V2SI 0 "register_operand" "=y")
2900 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2902 "cvttpd2pi\t{%1, %0|%0, %1}"
2903 [(set_attr "type" "ssecvt")
2904 (set_attr "unit" "mmx")
2905 (set_attr "prefix_data16" "1")
2906 (set_attr "mode" "TI")])
2908 (define_insn "*avx_cvtsi2sd"
2909 [(set (match_operand:V2DF 0 "register_operand" "=x")
2912 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2913 (match_operand:V2DF 1 "register_operand" "x")
2916 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2917 [(set_attr "type" "sseicvt")
2918 (set_attr "prefix" "vex")
2919 (set_attr "mode" "DF")])
2921 (define_insn "sse2_cvtsi2sd"
2922 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2925 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2926 (match_operand:V2DF 1 "register_operand" "0,0")
2929 "cvtsi2sd\t{%2, %0|%0, %2}"
2930 [(set_attr "type" "sseicvt")
2931 (set_attr "mode" "DF")
2932 (set_attr "athlon_decode" "double,direct")
2933 (set_attr "amdfam10_decode" "vector,double")])
2935 (define_insn "*avx_cvtsi2sdq"
2936 [(set (match_operand:V2DF 0 "register_operand" "=x")
2939 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2940 (match_operand:V2DF 1 "register_operand" "x")
2942 "TARGET_AVX && TARGET_64BIT"
2943 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2944 [(set_attr "type" "sseicvt")
2945 (set_attr "length_vex" "4")
2946 (set_attr "prefix" "vex")
2947 (set_attr "mode" "DF")])
2949 (define_insn "sse2_cvtsi2sdq"
2950 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2953 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2954 (match_operand:V2DF 1 "register_operand" "0,0")
2956 "TARGET_SSE2 && TARGET_64BIT"
2957 "cvtsi2sdq\t{%2, %0|%0, %2}"
2958 [(set_attr "type" "sseicvt")
2959 (set_attr "prefix_rex" "1")
2960 (set_attr "mode" "DF")
2961 (set_attr "athlon_decode" "double,direct")
2962 (set_attr "amdfam10_decode" "vector,double")])
2964 (define_insn "sse2_cvtsd2si"
2965 [(set (match_operand:SI 0 "register_operand" "=r,r")
2968 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2969 (parallel [(const_int 0)]))]
2970 UNSPEC_FIX_NOTRUNC))]
2972 "%vcvtsd2si\t{%1, %0|%0, %1}"
2973 [(set_attr "type" "sseicvt")
2974 (set_attr "athlon_decode" "double,vector")
2975 (set_attr "prefix_rep" "1")
2976 (set_attr "prefix" "maybe_vex")
2977 (set_attr "mode" "SI")])
2979 (define_insn "sse2_cvtsd2si_2"
2980 [(set (match_operand:SI 0 "register_operand" "=r,r")
2981 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2982 UNSPEC_FIX_NOTRUNC))]
2984 "%vcvtsd2si\t{%1, %0|%0, %1}"
2985 [(set_attr "type" "sseicvt")
2986 (set_attr "athlon_decode" "double,vector")
2987 (set_attr "amdfam10_decode" "double,double")
2988 (set_attr "prefix_rep" "1")
2989 (set_attr "prefix" "maybe_vex")
2990 (set_attr "mode" "SI")])
2992 (define_insn "sse2_cvtsd2siq"
2993 [(set (match_operand:DI 0 "register_operand" "=r,r")
2996 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2997 (parallel [(const_int 0)]))]
2998 UNSPEC_FIX_NOTRUNC))]
2999 "TARGET_SSE2 && TARGET_64BIT"
3000 "%vcvtsd2siq\t{%1, %0|%0, %1}"
3001 [(set_attr "type" "sseicvt")
3002 (set_attr "athlon_decode" "double,vector")
3003 (set_attr "prefix_rep" "1")
3004 (set_attr "prefix" "maybe_vex")
3005 (set_attr "mode" "DI")])
3007 (define_insn "sse2_cvtsd2siq_2"
3008 [(set (match_operand:DI 0 "register_operand" "=r,r")
3009 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
3010 UNSPEC_FIX_NOTRUNC))]
3011 "TARGET_SSE2 && TARGET_64BIT"
3012 "%vcvtsd2siq\t{%1, %0|%0, %1}"
3013 [(set_attr "type" "sseicvt")
3014 (set_attr "athlon_decode" "double,vector")
3015 (set_attr "amdfam10_decode" "double,double")
3016 (set_attr "prefix_rep" "1")
3017 (set_attr "prefix" "maybe_vex")
3018 (set_attr "mode" "DI")])
3020 (define_insn "sse2_cvttsd2si"
3021 [(set (match_operand:SI 0 "register_operand" "=r,r")
3024 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
3025 (parallel [(const_int 0)]))))]
3027 "%vcvttsd2si\t{%1, %0|%0, %1}"
3028 [(set_attr "type" "sseicvt")
3029 (set_attr "prefix_rep" "1")
3030 (set_attr "prefix" "maybe_vex")
3031 (set_attr "mode" "SI")
3032 (set_attr "athlon_decode" "double,vector")
3033 (set_attr "amdfam10_decode" "double,double")])
3035 (define_insn "sse2_cvttsd2siq"
3036 [(set (match_operand:DI 0 "register_operand" "=r,r")
3039 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
3040 (parallel [(const_int 0)]))))]
3041 "TARGET_SSE2 && TARGET_64BIT"
3042 "%vcvttsd2siq\t{%1, %0|%0, %1}"
3043 [(set_attr "type" "sseicvt")
3044 (set_attr "prefix_rep" "1")
3045 (set_attr "prefix" "maybe_vex")
3046 (set_attr "mode" "DI")
3047 (set_attr "athlon_decode" "double,vector")
3048 (set_attr "amdfam10_decode" "double,double")])
3050 (define_insn "avx_cvtdq2pd256"
3051 [(set (match_operand:V4DF 0 "register_operand" "=x")
3052 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
3054 "vcvtdq2pd\t{%1, %0|%0, %1}"
3055 [(set_attr "type" "ssecvt")
3056 (set_attr "prefix" "vex")
3057 (set_attr "mode" "V4DF")])
3059 (define_insn "sse2_cvtdq2pd"
3060 [(set (match_operand:V2DF 0 "register_operand" "=x")
3063 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3064 (parallel [(const_int 0) (const_int 1)]))))]
3066 "%vcvtdq2pd\t{%1, %0|%0, %1}"
3067 [(set_attr "type" "ssecvt")
3068 (set_attr "prefix" "maybe_vex")
3069 (set_attr "mode" "V2DF")])
3071 (define_insn "avx_cvtpd2dq256"
3072 [(set (match_operand:V4SI 0 "register_operand" "=x")
3073 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3074 UNSPEC_FIX_NOTRUNC))]
3076 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
3077 [(set_attr "type" "ssecvt")
3078 (set_attr "prefix" "vex")
3079 (set_attr "mode" "OI")])
3081 (define_expand "sse2_cvtpd2dq"
3082 [(set (match_operand:V4SI 0 "register_operand" "")
3084 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
3088 "operands[2] = CONST0_RTX (V2SImode);")
3090 (define_insn "*sse2_cvtpd2dq"
3091 [(set (match_operand:V4SI 0 "register_operand" "=x")
3093 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3095 (match_operand:V2SI 2 "const0_operand" "")))]
3097 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
3098 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
3099 [(set_attr "type" "ssecvt")
3100 (set_attr "prefix_rep" "1")
3101 (set_attr "prefix_data16" "0")
3102 (set_attr "prefix" "maybe_vex")
3103 (set_attr "mode" "TI")
3104 (set_attr "amdfam10_decode" "double")])
3106 (define_insn "avx_cvttpd2dq256"
3107 [(set (match_operand:V4SI 0 "register_operand" "=x")
3108 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3110 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
3111 [(set_attr "type" "ssecvt")
3112 (set_attr "prefix" "vex")
3113 (set_attr "mode" "OI")])
3115 (define_expand "sse2_cvttpd2dq"
3116 [(set (match_operand:V4SI 0 "register_operand" "")
3118 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
3121 "operands[2] = CONST0_RTX (V2SImode);")
3123 (define_insn "*sse2_cvttpd2dq"
3124 [(set (match_operand:V4SI 0 "register_operand" "=x")
3126 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3127 (match_operand:V2SI 2 "const0_operand" "")))]
3129 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
3130 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
3131 [(set_attr "type" "ssecvt")
3132 (set_attr "prefix" "maybe_vex")
3133 (set_attr "mode" "TI")
3134 (set_attr "amdfam10_decode" "double")])
3136 (define_insn "*avx_cvtsd2ss"
3137 [(set (match_operand:V4SF 0 "register_operand" "=x")
3140 (float_truncate:V2SF
3141 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
3142 (match_operand:V4SF 1 "register_operand" "x")
3145 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
3146 [(set_attr "type" "ssecvt")
3147 (set_attr "prefix" "vex")
3148 (set_attr "mode" "SF")])
3150 (define_insn "sse2_cvtsd2ss"
3151 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3154 (float_truncate:V2SF
3155 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
3156 (match_operand:V4SF 1 "register_operand" "0,0")
3159 "cvtsd2ss\t{%2, %0|%0, %2}"
3160 [(set_attr "type" "ssecvt")
3161 (set_attr "athlon_decode" "vector,double")
3162 (set_attr "amdfam10_decode" "vector,double")
3163 (set_attr "mode" "SF")])
3165 (define_insn "*avx_cvtss2sd"
3166 [(set (match_operand:V2DF 0 "register_operand" "=x")
3170 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
3171 (parallel [(const_int 0) (const_int 1)])))
3172 (match_operand:V2DF 1 "register_operand" "x")
3175 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3176 [(set_attr "type" "ssecvt")
3177 (set_attr "prefix" "vex")
3178 (set_attr "mode" "DF")])
3180 (define_insn "sse2_cvtss2sd"
3181 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3185 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
3186 (parallel [(const_int 0) (const_int 1)])))
3187 (match_operand:V2DF 1 "register_operand" "0,0")
3190 "cvtss2sd\t{%2, %0|%0, %2}"
3191 [(set_attr "type" "ssecvt")
3192 (set_attr "amdfam10_decode" "vector,double")
3193 (set_attr "mode" "DF")])
3195 (define_insn "avx_cvtpd2ps256"
3196 [(set (match_operand:V4SF 0 "register_operand" "=x")
3197 (float_truncate:V4SF
3198 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3200 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3201 [(set_attr "type" "ssecvt")
3202 (set_attr "prefix" "vex")
3203 (set_attr "mode" "V4SF")])
3205 (define_expand "sse2_cvtpd2ps"
3206 [(set (match_operand:V4SF 0 "register_operand" "")
3208 (float_truncate:V2SF
3209 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3212 "operands[2] = CONST0_RTX (V2SFmode);")
3214 (define_insn "*sse2_cvtpd2ps"
3215 [(set (match_operand:V4SF 0 "register_operand" "=x")
3217 (float_truncate:V2SF
3218 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3219 (match_operand:V2SF 2 "const0_operand" "")))]
3221 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3222 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3223 [(set_attr "type" "ssecvt")
3224 (set_attr "prefix_data16" "1")
3225 (set_attr "prefix" "maybe_vex")
3226 (set_attr "mode" "V4SF")
3227 (set_attr "amdfam10_decode" "double")])
3229 (define_insn "avx_cvtps2pd256"
3230 [(set (match_operand:V4DF 0 "register_operand" "=x")
3232 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3234 "vcvtps2pd\t{%1, %0|%0, %1}"
3235 [(set_attr "type" "ssecvt")
3236 (set_attr "prefix" "vex")
3237 (set_attr "mode" "V4DF")])
3239 (define_insn "sse2_cvtps2pd"
3240 [(set (match_operand:V2DF 0 "register_operand" "=x")
3243 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3244 (parallel [(const_int 0) (const_int 1)]))))]
3246 "%vcvtps2pd\t{%1, %0|%0, %1}"
3247 [(set_attr "type" "ssecvt")
3248 (set_attr "prefix" "maybe_vex")
3249 (set_attr "mode" "V2DF")
3250 (set_attr "prefix_data16" "0")
3251 (set_attr "amdfam10_decode" "direct")])
3253 (define_expand "vec_unpacks_hi_v4sf"
3258 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3259 (parallel [(const_int 6)
3263 (set (match_operand:V2DF 0 "register_operand" "")
3267 (parallel [(const_int 0) (const_int 1)]))))]
3270 operands[2] = gen_reg_rtx (V4SFmode);
3273 (define_expand "vec_unpacks_lo_v4sf"
3274 [(set (match_operand:V2DF 0 "register_operand" "")
3277 (match_operand:V4SF 1 "nonimmediate_operand" "")
3278 (parallel [(const_int 0) (const_int 1)]))))]
3281 (define_expand "vec_unpacks_float_hi_v8hi"
3282 [(match_operand:V4SF 0 "register_operand" "")
3283 (match_operand:V8HI 1 "register_operand" "")]
3286 rtx tmp = gen_reg_rtx (V4SImode);
3288 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3289 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3293 (define_expand "vec_unpacks_float_lo_v8hi"
3294 [(match_operand:V4SF 0 "register_operand" "")
3295 (match_operand:V8HI 1 "register_operand" "")]
3298 rtx tmp = gen_reg_rtx (V4SImode);
3300 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3301 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3305 (define_expand "vec_unpacku_float_hi_v8hi"
3306 [(match_operand:V4SF 0 "register_operand" "")
3307 (match_operand:V8HI 1 "register_operand" "")]
3310 rtx tmp = gen_reg_rtx (V4SImode);
3312 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3313 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3317 (define_expand "vec_unpacku_float_lo_v8hi"
3318 [(match_operand:V4SF 0 "register_operand" "")
3319 (match_operand:V8HI 1 "register_operand" "")]
3322 rtx tmp = gen_reg_rtx (V4SImode);
3324 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3325 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3329 (define_expand "vec_unpacks_float_hi_v4si"
3332 (match_operand:V4SI 1 "nonimmediate_operand" "")
3333 (parallel [(const_int 2)
3337 (set (match_operand:V2DF 0 "register_operand" "")
3341 (parallel [(const_int 0) (const_int 1)]))))]
3343 "operands[2] = gen_reg_rtx (V4SImode);")
3345 (define_expand "vec_unpacks_float_lo_v4si"
3346 [(set (match_operand:V2DF 0 "register_operand" "")
3349 (match_operand:V4SI 1 "nonimmediate_operand" "")
3350 (parallel [(const_int 0) (const_int 1)]))))]
3353 (define_expand "vec_unpacku_float_hi_v4si"
3356 (match_operand:V4SI 1 "nonimmediate_operand" "")
3357 (parallel [(const_int 2)
3365 (parallel [(const_int 0) (const_int 1)]))))
3367 (lt:V2DF (match_dup 6) (match_dup 3)))
3369 (and:V2DF (match_dup 7) (match_dup 4)))
3370 (set (match_operand:V2DF 0 "register_operand" "")
3371 (plus:V2DF (match_dup 6) (match_dup 8)))]
3374 REAL_VALUE_TYPE TWO32r;
3378 real_ldexp (&TWO32r, &dconst1, 32);
3379 x = const_double_from_real_value (TWO32r, DFmode);
3381 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3382 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3384 operands[5] = gen_reg_rtx (V4SImode);
3386 for (i = 6; i < 9; i++)
3387 operands[i] = gen_reg_rtx (V2DFmode);
3390 (define_expand "vec_unpacku_float_lo_v4si"
3394 (match_operand:V4SI 1 "nonimmediate_operand" "")
3395 (parallel [(const_int 0) (const_int 1)]))))
3397 (lt:V2DF (match_dup 5) (match_dup 3)))
3399 (and:V2DF (match_dup 6) (match_dup 4)))
3400 (set (match_operand:V2DF 0 "register_operand" "")
3401 (plus:V2DF (match_dup 5) (match_dup 7)))]
3404 REAL_VALUE_TYPE TWO32r;
3408 real_ldexp (&TWO32r, &dconst1, 32);
3409 x = const_double_from_real_value (TWO32r, DFmode);
3411 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3412 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3414 for (i = 5; i < 8; i++)
3415 operands[i] = gen_reg_rtx (V2DFmode);
3418 (define_expand "vec_pack_trunc_v2df"
3419 [(match_operand:V4SF 0 "register_operand" "")
3420 (match_operand:V2DF 1 "nonimmediate_operand" "")
3421 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3426 r1 = gen_reg_rtx (V4SFmode);
3427 r2 = gen_reg_rtx (V4SFmode);
3429 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3430 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3431 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3435 (define_expand "vec_pack_sfix_trunc_v2df"
3436 [(match_operand:V4SI 0 "register_operand" "")
3437 (match_operand:V2DF 1 "nonimmediate_operand" "")
3438 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3443 r1 = gen_reg_rtx (V4SImode);
3444 r2 = gen_reg_rtx (V4SImode);
3446 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3447 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3448 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3449 gen_lowpart (V2DImode, r1),
3450 gen_lowpart (V2DImode, r2)));
3454 (define_expand "vec_pack_sfix_v2df"
3455 [(match_operand:V4SI 0 "register_operand" "")
3456 (match_operand:V2DF 1 "nonimmediate_operand" "")
3457 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3462 r1 = gen_reg_rtx (V4SImode);
3463 r2 = gen_reg_rtx (V4SImode);
3465 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3466 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3467 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3468 gen_lowpart (V2DImode, r1),
3469 gen_lowpart (V2DImode, r2)));
3473 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3475 ;; Parallel single-precision floating point element swizzling
3477 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3479 (define_expand "sse_movhlps_exp"
3480 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3483 (match_operand:V4SF 1 "nonimmediate_operand" "")
3484 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3485 (parallel [(const_int 6)
3490 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3492 (define_insn "*avx_movhlps"
3493 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3496 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3497 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3498 (parallel [(const_int 6)
3502 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3504 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3505 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3506 vmovhps\t{%2, %0|%0, %2}"
3507 [(set_attr "type" "ssemov")
3508 (set_attr "prefix" "vex")
3509 (set_attr "mode" "V4SF,V2SF,V2SF")])
3511 (define_insn "sse_movhlps"
3512 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3515 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3516 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3517 (parallel [(const_int 6)
3521 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3523 movhlps\t{%2, %0|%0, %2}
3524 movlps\t{%H2, %0|%0, %H2}
3525 movhps\t{%2, %0|%0, %2}"
3526 [(set_attr "type" "ssemov")
3527 (set_attr "mode" "V4SF,V2SF,V2SF")])
3529 (define_expand "sse_movlhps_exp"
3530 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3533 (match_operand:V4SF 1 "nonimmediate_operand" "")
3534 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3535 (parallel [(const_int 0)
3540 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3542 (define_insn "*avx_movlhps"
3543 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3546 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3547 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3548 (parallel [(const_int 0)
3552 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3554 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3555 vmovhps\t{%2, %1, %0|%0, %1, %2}
3556 vmovlps\t{%2, %H0|%H0, %2}"
3557 [(set_attr "type" "ssemov")
3558 (set_attr "prefix" "vex")
3559 (set_attr "mode" "V4SF,V2SF,V2SF")])
3561 (define_insn "sse_movlhps"
3562 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3565 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3566 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3567 (parallel [(const_int 0)
3571 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3573 movlhps\t{%2, %0|%0, %2}
3574 movhps\t{%2, %0|%0, %2}
3575 movlps\t{%2, %H0|%H0, %2}"
3576 [(set_attr "type" "ssemov")
3577 (set_attr "mode" "V4SF,V2SF,V2SF")])
3579 (define_insn "avx_unpckhps256"
3580 [(set (match_operand:V8SF 0 "register_operand" "=x")
3583 (match_operand:V8SF 1 "register_operand" "x")
3584 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3585 (parallel [(const_int 2) (const_int 10)
3586 (const_int 3) (const_int 11)
3587 (const_int 6) (const_int 14)
3588 (const_int 7) (const_int 15)])))]
3590 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3591 [(set_attr "type" "sselog")
3592 (set_attr "prefix" "vex")
3593 (set_attr "mode" "V8SF")])
3595 (define_insn "*avx_unpckhps"
3596 [(set (match_operand:V4SF 0 "register_operand" "=x")
3599 (match_operand:V4SF 1 "register_operand" "x")
3600 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3601 (parallel [(const_int 2) (const_int 6)
3602 (const_int 3) (const_int 7)])))]
3604 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3605 [(set_attr "type" "sselog")
3606 (set_attr "prefix" "vex")
3607 (set_attr "mode" "V4SF")])
3609 (define_insn "sse_unpckhps"
3610 [(set (match_operand:V4SF 0 "register_operand" "=x")
3613 (match_operand:V4SF 1 "register_operand" "0")
3614 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3615 (parallel [(const_int 2) (const_int 6)
3616 (const_int 3) (const_int 7)])))]
3618 "unpckhps\t{%2, %0|%0, %2}"
3619 [(set_attr "type" "sselog")
3620 (set_attr "mode" "V4SF")])
3622 (define_insn "avx_unpcklps256"
3623 [(set (match_operand:V8SF 0 "register_operand" "=x")
3626 (match_operand:V8SF 1 "register_operand" "x")
3627 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3628 (parallel [(const_int 0) (const_int 8)
3629 (const_int 1) (const_int 9)
3630 (const_int 4) (const_int 12)
3631 (const_int 5) (const_int 13)])))]
3633 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3634 [(set_attr "type" "sselog")
3635 (set_attr "prefix" "vex")
3636 (set_attr "mode" "V8SF")])
3638 (define_insn "*avx_unpcklps"
3639 [(set (match_operand:V4SF 0 "register_operand" "=x")
3642 (match_operand:V4SF 1 "register_operand" "x")
3643 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3644 (parallel [(const_int 0) (const_int 4)
3645 (const_int 1) (const_int 5)])))]
3647 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3648 [(set_attr "type" "sselog")
3649 (set_attr "prefix" "vex")
3650 (set_attr "mode" "V4SF")])
3652 (define_insn "sse_unpcklps"
3653 [(set (match_operand:V4SF 0 "register_operand" "=x")
3656 (match_operand:V4SF 1 "register_operand" "0")
3657 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3658 (parallel [(const_int 0) (const_int 4)
3659 (const_int 1) (const_int 5)])))]
3661 "unpcklps\t{%2, %0|%0, %2}"
3662 [(set_attr "type" "sselog")
3663 (set_attr "mode" "V4SF")])
3665 ;; These are modeled with the same vec_concat as the others so that we
3666 ;; capture users of shufps that can use the new instructions
3667 (define_insn "avx_movshdup256"
3668 [(set (match_operand:V8SF 0 "register_operand" "=x")
3671 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3673 (parallel [(const_int 1) (const_int 1)
3674 (const_int 3) (const_int 3)
3675 (const_int 5) (const_int 5)
3676 (const_int 7) (const_int 7)])))]
3678 "vmovshdup\t{%1, %0|%0, %1}"
3679 [(set_attr "type" "sse")
3680 (set_attr "prefix" "vex")
3681 (set_attr "mode" "V8SF")])
3683 (define_insn "sse3_movshdup"
3684 [(set (match_operand:V4SF 0 "register_operand" "=x")
3687 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3689 (parallel [(const_int 1)
3694 "%vmovshdup\t{%1, %0|%0, %1}"
3695 [(set_attr "type" "sse")
3696 (set_attr "prefix_rep" "1")
3697 (set_attr "prefix" "maybe_vex")
3698 (set_attr "mode" "V4SF")])
3700 (define_insn "avx_movsldup256"
3701 [(set (match_operand:V8SF 0 "register_operand" "=x")
3704 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3706 (parallel [(const_int 0) (const_int 0)
3707 (const_int 2) (const_int 2)
3708 (const_int 4) (const_int 4)
3709 (const_int 6) (const_int 6)])))]
3711 "vmovsldup\t{%1, %0|%0, %1}"
3712 [(set_attr "type" "sse")
3713 (set_attr "prefix" "vex")
3714 (set_attr "mode" "V8SF")])
3716 (define_insn "sse3_movsldup"
3717 [(set (match_operand:V4SF 0 "register_operand" "=x")
3720 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3722 (parallel [(const_int 0)
3727 "%vmovsldup\t{%1, %0|%0, %1}"
3728 [(set_attr "type" "sse")
3729 (set_attr "prefix_rep" "1")
3730 (set_attr "prefix" "maybe_vex")
3731 (set_attr "mode" "V4SF")])
3733 (define_expand "avx_shufps256"
3734 [(match_operand:V8SF 0 "register_operand" "")
3735 (match_operand:V8SF 1 "register_operand" "")
3736 (match_operand:V8SF 2 "nonimmediate_operand" "")
3737 (match_operand:SI 3 "const_int_operand" "")]
3740 int mask = INTVAL (operands[3]);
3741 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3742 GEN_INT ((mask >> 0) & 3),
3743 GEN_INT ((mask >> 2) & 3),
3744 GEN_INT (((mask >> 4) & 3) + 8),
3745 GEN_INT (((mask >> 6) & 3) + 8),
3746 GEN_INT (((mask >> 0) & 3) + 4),
3747 GEN_INT (((mask >> 2) & 3) + 4),
3748 GEN_INT (((mask >> 4) & 3) + 12),
3749 GEN_INT (((mask >> 6) & 3) + 12)));
3753 ;; One bit in mask selects 2 elements.
3754 (define_insn "avx_shufps256_1"
3755 [(set (match_operand:V8SF 0 "register_operand" "=x")
3758 (match_operand:V8SF 1 "register_operand" "x")
3759 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3760 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3761 (match_operand 4 "const_0_to_3_operand" "")
3762 (match_operand 5 "const_8_to_11_operand" "")
3763 (match_operand 6 "const_8_to_11_operand" "")
3764 (match_operand 7 "const_4_to_7_operand" "")
3765 (match_operand 8 "const_4_to_7_operand" "")
3766 (match_operand 9 "const_12_to_15_operand" "")
3767 (match_operand 10 "const_12_to_15_operand" "")])))]
3769 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3770 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3771 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3772 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3775 mask = INTVAL (operands[3]);
3776 mask |= INTVAL (operands[4]) << 2;
3777 mask |= (INTVAL (operands[5]) - 8) << 4;
3778 mask |= (INTVAL (operands[6]) - 8) << 6;
3779 operands[3] = GEN_INT (mask);
3781 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3783 [(set_attr "type" "sselog")
3784 (set_attr "length_immediate" "1")
3785 (set_attr "prefix" "vex")
3786 (set_attr "mode" "V8SF")])
3788 (define_expand "sse_shufps"
3789 [(match_operand:V4SF 0 "register_operand" "")
3790 (match_operand:V4SF 1 "register_operand" "")
3791 (match_operand:V4SF 2 "nonimmediate_operand" "")
3792 (match_operand:SI 3 "const_int_operand" "")]
3795 int mask = INTVAL (operands[3]);
3796 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3797 GEN_INT ((mask >> 0) & 3),
3798 GEN_INT ((mask >> 2) & 3),
3799 GEN_INT (((mask >> 4) & 3) + 4),
3800 GEN_INT (((mask >> 6) & 3) + 4)));
3804 (define_insn "*avx_shufps_<mode>"
3805 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3806 (vec_select:SSEMODE4S
3807 (vec_concat:<ssedoublesizemode>
3808 (match_operand:SSEMODE4S 1 "register_operand" "x")
3809 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3810 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3811 (match_operand 4 "const_0_to_3_operand" "")
3812 (match_operand 5 "const_4_to_7_operand" "")
3813 (match_operand 6 "const_4_to_7_operand" "")])))]
3817 mask |= INTVAL (operands[3]) << 0;
3818 mask |= INTVAL (operands[4]) << 2;
3819 mask |= (INTVAL (operands[5]) - 4) << 4;
3820 mask |= (INTVAL (operands[6]) - 4) << 6;
3821 operands[3] = GEN_INT (mask);
3823 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3825 [(set_attr "type" "sselog")
3826 (set_attr "length_immediate" "1")
3827 (set_attr "prefix" "vex")
3828 (set_attr "mode" "V4SF")])
3830 (define_insn "sse_shufps_<mode>"
3831 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3832 (vec_select:SSEMODE4S
3833 (vec_concat:<ssedoublesizemode>
3834 (match_operand:SSEMODE4S 1 "register_operand" "0")
3835 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3836 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3837 (match_operand 4 "const_0_to_3_operand" "")
3838 (match_operand 5 "const_4_to_7_operand" "")
3839 (match_operand 6 "const_4_to_7_operand" "")])))]
3843 mask |= INTVAL (operands[3]) << 0;
3844 mask |= INTVAL (operands[4]) << 2;
3845 mask |= (INTVAL (operands[5]) - 4) << 4;
3846 mask |= (INTVAL (operands[6]) - 4) << 6;
3847 operands[3] = GEN_INT (mask);
3849 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3851 [(set_attr "type" "sselog")
3852 (set_attr "length_immediate" "1")
3853 (set_attr "mode" "V4SF")])
3855 (define_insn "sse_storehps"
3856 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3858 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3859 (parallel [(const_int 2) (const_int 3)])))]
3862 %vmovhps\t{%1, %0|%0, %1}
3863 %vmovhlps\t{%1, %d0|%d0, %1}
3864 %vmovlps\t{%H1, %d0|%d0, %H1}"
3865 [(set_attr "type" "ssemov")
3866 (set_attr "prefix" "maybe_vex")
3867 (set_attr "mode" "V2SF,V4SF,V2SF")])
3869 (define_expand "sse_loadhps_exp"
3870 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3873 (match_operand:V4SF 1 "nonimmediate_operand" "")
3874 (parallel [(const_int 0) (const_int 1)]))
3875 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3877 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3879 (define_insn "*avx_loadhps"
3880 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3883 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3884 (parallel [(const_int 0) (const_int 1)]))
3885 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3888 vmovhps\t{%2, %1, %0|%0, %1, %2}
3889 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3890 vmovlps\t{%2, %H0|%H0, %2}"
3891 [(set_attr "type" "ssemov")
3892 (set_attr "prefix" "vex")
3893 (set_attr "mode" "V2SF,V4SF,V2SF")])
3895 (define_insn "sse_loadhps"
3896 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3899 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3900 (parallel [(const_int 0) (const_int 1)]))
3901 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3904 movhps\t{%2, %0|%0, %2}
3905 movlhps\t{%2, %0|%0, %2}
3906 movlps\t{%2, %H0|%H0, %2}"
3907 [(set_attr "type" "ssemov")
3908 (set_attr "mode" "V2SF,V4SF,V2SF")])
3910 (define_insn "*avx_storelps"
3911 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3913 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3914 (parallel [(const_int 0) (const_int 1)])))]
3917 vmovlps\t{%1, %0|%0, %1}
3918 vmovaps\t{%1, %0|%0, %1}
3919 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3920 [(set_attr "type" "ssemov")
3921 (set_attr "prefix" "vex")
3922 (set_attr "mode" "V2SF,V2DF,V2SF")])
3924 (define_insn "sse_storelps"
3925 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3927 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3928 (parallel [(const_int 0) (const_int 1)])))]
3931 movlps\t{%1, %0|%0, %1}
3932 movaps\t{%1, %0|%0, %1}
3933 movlps\t{%1, %0|%0, %1}"
3934 [(set_attr "type" "ssemov")
3935 (set_attr "mode" "V2SF,V4SF,V2SF")])
3937 (define_expand "sse_loadlps_exp"
3938 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3940 (match_operand:V2SF 2 "nonimmediate_operand" "")
3942 (match_operand:V4SF 1 "nonimmediate_operand" "")
3943 (parallel [(const_int 2) (const_int 3)]))))]
3945 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3947 (define_insn "*avx_loadlps"
3948 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3950 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3952 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3953 (parallel [(const_int 2) (const_int 3)]))))]
3956 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3957 vmovlps\t{%2, %1, %0|%0, %1, %2}
3958 vmovlps\t{%2, %0|%0, %2}"
3959 [(set_attr "type" "sselog,ssemov,ssemov")
3960 (set_attr "length_immediate" "1,*,*")
3961 (set_attr "prefix" "vex")
3962 (set_attr "mode" "V4SF,V2SF,V2SF")])
3964 (define_insn "sse_loadlps"
3965 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3967 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3969 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3970 (parallel [(const_int 2) (const_int 3)]))))]
3973 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3974 movlps\t{%2, %0|%0, %2}
3975 movlps\t{%2, %0|%0, %2}"
3976 [(set_attr "type" "sselog,ssemov,ssemov")
3977 (set_attr "length_immediate" "1,*,*")
3978 (set_attr "mode" "V4SF,V2SF,V2SF")])
3980 (define_insn "*avx_movss"
3981 [(set (match_operand:V4SF 0 "register_operand" "=x")
3983 (match_operand:V4SF 2 "register_operand" "x")
3984 (match_operand:V4SF 1 "register_operand" "x")
3987 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3988 [(set_attr "type" "ssemov")
3989 (set_attr "prefix" "vex")
3990 (set_attr "mode" "SF")])
3992 (define_insn "sse_movss"
3993 [(set (match_operand:V4SF 0 "register_operand" "=x")
3995 (match_operand:V4SF 2 "register_operand" "x")
3996 (match_operand:V4SF 1 "register_operand" "0")
3999 "movss\t{%2, %0|%0, %2}"
4000 [(set_attr "type" "ssemov")
4001 (set_attr "mode" "SF")])
4003 (define_insn "*vec_dupv4sf_avx"
4004 [(set (match_operand:V4SF 0 "register_operand" "=x")
4006 (match_operand:SF 1 "register_operand" "x")))]
4008 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
4009 [(set_attr "type" "sselog1")
4010 (set_attr "length_immediate" "1")
4011 (set_attr "prefix" "vex")
4012 (set_attr "mode" "V4SF")])
4014 (define_insn "*vec_dupv4sf"
4015 [(set (match_operand:V4SF 0 "register_operand" "=x")
4017 (match_operand:SF 1 "register_operand" "0")))]
4019 "shufps\t{$0, %0, %0|%0, %0, 0}"
4020 [(set_attr "type" "sselog1")
4021 (set_attr "length_immediate" "1")
4022 (set_attr "mode" "V4SF")])
4024 (define_insn "*vec_concatv2sf_avx"
4025 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
4027 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
4028 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
4031 vunpcklps\t{%2, %1, %0|%0, %1, %2}
4032 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
4033 vmovss\t{%1, %0|%0, %1}
4034 punpckldq\t{%2, %0|%0, %2}
4035 movd\t{%1, %0|%0, %1}"
4036 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4037 (set_attr "length_immediate" "*,1,*,*,*")
4038 (set_attr "prefix_extra" "*,1,*,*,*")
4039 (set (attr "prefix")
4040 (if_then_else (eq_attr "alternative" "3,4")
4041 (const_string "orig")
4042 (const_string "vex")))
4043 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
4045 ;; Although insertps takes register source, we prefer
4046 ;; unpcklps with register source since it is shorter.
4047 (define_insn "*vec_concatv2sf_sse4_1"
4048 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
4050 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
4051 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
4054 unpcklps\t{%2, %0|%0, %2}
4055 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
4056 movss\t{%1, %0|%0, %1}
4057 punpckldq\t{%2, %0|%0, %2}
4058 movd\t{%1, %0|%0, %1}"
4059 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4060 (set_attr "prefix_data16" "*,1,*,*,*")
4061 (set_attr "prefix_extra" "*,1,*,*,*")
4062 (set_attr "length_immediate" "*,1,*,*,*")
4063 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
4065 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4066 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4067 ;; alternatives pretty much forces the MMX alternative to be chosen.
4068 (define_insn "*vec_concatv2sf_sse"
4069 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
4071 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
4072 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
4075 unpcklps\t{%2, %0|%0, %2}
4076 movss\t{%1, %0|%0, %1}
4077 punpckldq\t{%2, %0|%0, %2}
4078 movd\t{%1, %0|%0, %1}"
4079 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4080 (set_attr "mode" "V4SF,SF,DI,DI")])
4082 (define_insn "*vec_concatv4sf_avx"
4083 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4085 (match_operand:V2SF 1 "register_operand" " x,x")
4086 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
4089 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4090 vmovhps\t{%2, %1, %0|%0, %1, %2}"
4091 [(set_attr "type" "ssemov")
4092 (set_attr "prefix" "vex")
4093 (set_attr "mode" "V4SF,V2SF")])
4095 (define_insn "*vec_concatv4sf_sse"
4096 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4098 (match_operand:V2SF 1 "register_operand" " 0,0")
4099 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
4102 movlhps\t{%2, %0|%0, %2}
4103 movhps\t{%2, %0|%0, %2}"
4104 [(set_attr "type" "ssemov")
4105 (set_attr "mode" "V4SF,V2SF")])
4107 (define_expand "vec_init<mode>"
4108 [(match_operand:SSEMODE 0 "register_operand" "")
4109 (match_operand 1 "" "")]
4112 ix86_expand_vector_init (false, operands[0], operands[1]);
4116 (define_insn "*vec_setv4sf_0_avx"
4117 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
4120 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
4121 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
4125 vmovss\t{%2, %1, %0|%0, %1, %2}
4126 vmovss\t{%2, %0|%0, %2}
4127 vmovd\t{%2, %0|%0, %2}
4129 [(set_attr "type" "ssemov")
4130 (set_attr "prefix" "vex")
4131 (set_attr "mode" "SF")])
4133 (define_insn "vec_setv4sf_0"
4134 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
4137 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
4138 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
4142 movss\t{%2, %0|%0, %2}
4143 movss\t{%2, %0|%0, %2}
4144 movd\t{%2, %0|%0, %2}
4146 [(set_attr "type" "ssemov")
4147 (set_attr "mode" "SF")])
4149 ;; A subset is vec_setv4sf.
4150 (define_insn "*vec_setv4sf_avx"
4151 [(set (match_operand:V4SF 0 "register_operand" "=x")
4154 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4155 (match_operand:V4SF 1 "register_operand" "x")
4156 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4159 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4160 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4162 [(set_attr "type" "sselog")
4163 (set_attr "prefix_extra" "1")
4164 (set_attr "length_immediate" "1")
4165 (set_attr "prefix" "vex")
4166 (set_attr "mode" "V4SF")])
4168 (define_insn "*vec_setv4sf_sse4_1"
4169 [(set (match_operand:V4SF 0 "register_operand" "=x")
4172 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4173 (match_operand:V4SF 1 "register_operand" "0")
4174 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4177 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4178 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4180 [(set_attr "type" "sselog")
4181 (set_attr "prefix_data16" "1")
4182 (set_attr "prefix_extra" "1")
4183 (set_attr "length_immediate" "1")
4184 (set_attr "mode" "V4SF")])
4186 (define_insn "*avx_insertps"
4187 [(set (match_operand:V4SF 0 "register_operand" "=x")
4188 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4189 (match_operand:V4SF 1 "register_operand" "x")
4190 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4193 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4194 [(set_attr "type" "sselog")
4195 (set_attr "prefix" "vex")
4196 (set_attr "prefix_extra" "1")
4197 (set_attr "length_immediate" "1")
4198 (set_attr "mode" "V4SF")])
4200 (define_insn "sse4_1_insertps"
4201 [(set (match_operand:V4SF 0 "register_operand" "=x")
4202 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4203 (match_operand:V4SF 1 "register_operand" "0")
4204 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4207 "insertps\t{%3, %2, %0|%0, %2, %3}";
4208 [(set_attr "type" "sselog")
4209 (set_attr "prefix_data16" "1")
4210 (set_attr "prefix_extra" "1")
4211 (set_attr "length_immediate" "1")
4212 (set_attr "mode" "V4SF")])
4215 [(set (match_operand:V4SF 0 "memory_operand" "")
4218 (match_operand:SF 1 "nonmemory_operand" ""))
4221 "TARGET_SSE && reload_completed"
4224 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
4228 (define_expand "vec_set<mode>"
4229 [(match_operand:SSEMODE 0 "register_operand" "")
4230 (match_operand:<ssescalarmode> 1 "register_operand" "")
4231 (match_operand 2 "const_int_operand" "")]
4234 ix86_expand_vector_set (false, operands[0], operands[1],
4235 INTVAL (operands[2]));
4239 (define_insn_and_split "*vec_extractv4sf_0"
4240 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4242 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4243 (parallel [(const_int 0)])))]
4244 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4246 "&& reload_completed"
4249 rtx op1 = operands[1];
4251 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4253 op1 = gen_lowpart (SFmode, op1);
4254 emit_move_insn (operands[0], op1);
4258 (define_expand "avx_vextractf128<mode>"
4259 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4260 (match_operand:AVX256MODE 1 "register_operand" "")
4261 (match_operand:SI 2 "const_0_to_1_operand" "")]
4264 switch (INTVAL (operands[2]))
4267 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4270 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4278 (define_insn "vec_extract_lo_<mode>"
4279 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4280 (vec_select:<avxhalfvecmode>
4281 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4282 (parallel [(const_int 0) (const_int 1)])))]
4284 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4285 [(set_attr "type" "sselog")
4286 (set_attr "prefix_extra" "1")
4287 (set_attr "length_immediate" "1")
4288 (set_attr "memory" "none,store")
4289 (set_attr "prefix" "vex")
4290 (set_attr "mode" "V8SF")])
4292 (define_insn "vec_extract_hi_<mode>"
4293 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4294 (vec_select:<avxhalfvecmode>
4295 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4296 (parallel [(const_int 2) (const_int 3)])))]
4298 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4299 [(set_attr "type" "sselog")
4300 (set_attr "prefix_extra" "1")
4301 (set_attr "length_immediate" "1")
4302 (set_attr "memory" "none,store")
4303 (set_attr "prefix" "vex")
4304 (set_attr "mode" "V8SF")])
4306 (define_insn "vec_extract_lo_<mode>"
4307 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4308 (vec_select:<avxhalfvecmode>
4309 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4310 (parallel [(const_int 0) (const_int 1)
4311 (const_int 2) (const_int 3)])))]
4313 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4314 [(set_attr "type" "sselog")
4315 (set_attr "prefix_extra" "1")
4316 (set_attr "length_immediate" "1")
4317 (set_attr "memory" "none,store")
4318 (set_attr "prefix" "vex")
4319 (set_attr "mode" "V8SF")])
4321 (define_insn "vec_extract_hi_<mode>"
4322 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4323 (vec_select:<avxhalfvecmode>
4324 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4325 (parallel [(const_int 4) (const_int 5)
4326 (const_int 6) (const_int 7)])))]
4328 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4329 [(set_attr "type" "sselog")
4330 (set_attr "prefix_extra" "1")
4331 (set_attr "length_immediate" "1")
4332 (set_attr "memory" "none,store")
4333 (set_attr "prefix" "vex")
4334 (set_attr "mode" "V8SF")])
4336 (define_insn "vec_extract_lo_v16hi"
4337 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4339 (match_operand:V16HI 1 "register_operand" "x,x")
4340 (parallel [(const_int 0) (const_int 1)
4341 (const_int 2) (const_int 3)
4342 (const_int 4) (const_int 5)
4343 (const_int 6) (const_int 7)])))]
4345 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4346 [(set_attr "type" "sselog")
4347 (set_attr "prefix_extra" "1")
4348 (set_attr "length_immediate" "1")
4349 (set_attr "memory" "none,store")
4350 (set_attr "prefix" "vex")
4351 (set_attr "mode" "V8SF")])
4353 (define_insn "vec_extract_hi_v16hi"
4354 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4356 (match_operand:V16HI 1 "register_operand" "x,x")
4357 (parallel [(const_int 8) (const_int 9)
4358 (const_int 10) (const_int 11)
4359 (const_int 12) (const_int 13)
4360 (const_int 14) (const_int 15)])))]
4362 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4363 [(set_attr "type" "sselog")
4364 (set_attr "prefix_extra" "1")
4365 (set_attr "length_immediate" "1")
4366 (set_attr "memory" "none,store")
4367 (set_attr "prefix" "vex")
4368 (set_attr "mode" "V8SF")])
4370 (define_insn "vec_extract_lo_v32qi"
4371 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4373 (match_operand:V32QI 1 "register_operand" "x,x")
4374 (parallel [(const_int 0) (const_int 1)
4375 (const_int 2) (const_int 3)
4376 (const_int 4) (const_int 5)
4377 (const_int 6) (const_int 7)
4378 (const_int 8) (const_int 9)
4379 (const_int 10) (const_int 11)
4380 (const_int 12) (const_int 13)
4381 (const_int 14) (const_int 15)])))]
4383 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4384 [(set_attr "type" "sselog")
4385 (set_attr "prefix_extra" "1")
4386 (set_attr "length_immediate" "1")
4387 (set_attr "memory" "none,store")
4388 (set_attr "prefix" "vex")
4389 (set_attr "mode" "V8SF")])
4391 (define_insn "vec_extract_hi_v32qi"
4392 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4394 (match_operand:V32QI 1 "register_operand" "x,x")
4395 (parallel [(const_int 16) (const_int 17)
4396 (const_int 18) (const_int 19)
4397 (const_int 20) (const_int 21)
4398 (const_int 22) (const_int 23)
4399 (const_int 24) (const_int 25)
4400 (const_int 26) (const_int 27)
4401 (const_int 28) (const_int 29)
4402 (const_int 30) (const_int 31)])))]
4404 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4405 [(set_attr "type" "sselog")
4406 (set_attr "prefix_extra" "1")
4407 (set_attr "length_immediate" "1")
4408 (set_attr "memory" "none,store")
4409 (set_attr "prefix" "vex")
4410 (set_attr "mode" "V8SF")])
4412 (define_insn "*sse4_1_extractps"
4413 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4415 (match_operand:V4SF 1 "register_operand" "x")
4416 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4418 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4419 [(set_attr "type" "sselog")
4420 (set_attr "prefix_data16" "1")
4421 (set_attr "prefix_extra" "1")
4422 (set_attr "length_immediate" "1")
4423 (set_attr "prefix" "maybe_vex")
4424 (set_attr "mode" "V4SF")])
4426 (define_insn_and_split "*vec_extract_v4sf_mem"
4427 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4429 (match_operand:V4SF 1 "memory_operand" "o")
4430 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4436 int i = INTVAL (operands[2]);
4438 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4442 (define_expand "vec_extract<mode>"
4443 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4444 (match_operand:SSEMODE 1 "register_operand" "")
4445 (match_operand 2 "const_int_operand" "")]
4448 ix86_expand_vector_extract (false, operands[0], operands[1],
4449 INTVAL (operands[2]));
4453 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4455 ;; Parallel double-precision floating point element swizzling
4457 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4459 (define_insn "avx_unpckhpd256"
4460 [(set (match_operand:V4DF 0 "register_operand" "=x")
4463 (match_operand:V4DF 1 "register_operand" "x")
4464 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4465 (parallel [(const_int 1) (const_int 5)
4466 (const_int 3) (const_int 7)])))]
4468 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4469 [(set_attr "type" "sselog")
4470 (set_attr "prefix" "vex")
4471 (set_attr "mode" "V4DF")])
4473 (define_expand "sse2_unpckhpd_exp"
4474 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4477 (match_operand:V2DF 1 "nonimmediate_operand" "")
4478 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4479 (parallel [(const_int 1)
4482 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4484 (define_insn "*avx_unpckhpd"
4485 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4488 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
4489 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
4490 (parallel [(const_int 1)
4492 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4494 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4495 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4496 vmovhpd\t{%1, %0|%0, %1}"
4497 [(set_attr "type" "sselog,ssemov,ssemov")
4498 (set_attr "prefix" "vex")
4499 (set_attr "mode" "V2DF,V1DF,V1DF")])
4501 (define_insn "sse2_unpckhpd"
4502 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4505 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4506 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4507 (parallel [(const_int 1)
4509 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4511 unpckhpd\t{%2, %0|%0, %2}
4512 movlpd\t{%H1, %0|%0, %H1}
4513 movhpd\t{%1, %0|%0, %1}"
4514 [(set_attr "type" "sselog,ssemov,ssemov")
4515 (set_attr "prefix_data16" "*,1,1")
4516 (set_attr "mode" "V2DF,V1DF,V1DF")])
4518 (define_insn "avx_movddup256"
4519 [(set (match_operand:V4DF 0 "register_operand" "=x")
4522 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
4524 (parallel [(const_int 0) (const_int 2)
4525 (const_int 4) (const_int 6)])))]
4527 "vmovddup\t{%1, %0|%0, %1}"
4528 [(set_attr "type" "sselog1")
4529 (set_attr "prefix" "vex")
4530 (set_attr "mode" "V4DF")])
4532 (define_insn "*avx_movddup"
4533 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4536 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4538 (parallel [(const_int 0)
4540 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4542 vmovddup\t{%1, %0|%0, %1}
4544 [(set_attr "type" "sselog1,ssemov")
4545 (set_attr "prefix" "vex")
4546 (set_attr "mode" "V2DF")])
4548 (define_insn "*sse3_movddup"
4549 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4552 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4554 (parallel [(const_int 0)
4556 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4558 movddup\t{%1, %0|%0, %1}
4560 [(set_attr "type" "sselog1,ssemov")
4561 (set_attr "mode" "V2DF")])
4564 [(set (match_operand:V2DF 0 "memory_operand" "")
4567 (match_operand:V2DF 1 "register_operand" "")
4569 (parallel [(const_int 0)
4571 "TARGET_SSE3 && reload_completed"
4574 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4575 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4576 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4580 (define_insn "avx_unpcklpd256"
4581 [(set (match_operand:V4DF 0 "register_operand" "=x")
4584 (match_operand:V4DF 1 "register_operand" "x")
4585 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4586 (parallel [(const_int 0) (const_int 4)
4587 (const_int 2) (const_int 6)])))]
4589 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4590 [(set_attr "type" "sselog")
4591 (set_attr "prefix" "vex")
4592 (set_attr "mode" "V4DF")])
4594 (define_expand "sse2_unpcklpd_exp"
4595 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4598 (match_operand:V2DF 1 "nonimmediate_operand" "")
4599 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4600 (parallel [(const_int 0)
4603 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4605 (define_insn "*avx_unpcklpd"
4606 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4609 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4610 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4611 (parallel [(const_int 0)
4613 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4615 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4616 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4617 vmovlpd\t{%2, %H0|%H0, %2}"
4618 [(set_attr "type" "sselog,ssemov,ssemov")
4619 (set_attr "prefix" "vex")
4620 (set_attr "mode" "V2DF,V1DF,V1DF")])
4622 (define_insn "sse2_unpcklpd"
4623 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4626 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4627 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4628 (parallel [(const_int 0)
4630 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4632 unpcklpd\t{%2, %0|%0, %2}
4633 movhpd\t{%2, %0|%0, %2}
4634 movlpd\t{%2, %H0|%H0, %2}"
4635 [(set_attr "type" "sselog,ssemov,ssemov")
4636 (set_attr "prefix_data16" "*,1,1")
4637 (set_attr "mode" "V2DF,V1DF,V1DF")])
4639 (define_expand "avx_shufpd256"
4640 [(match_operand:V4DF 0 "register_operand" "")
4641 (match_operand:V4DF 1 "register_operand" "")
4642 (match_operand:V4DF 2 "nonimmediate_operand" "")
4643 (match_operand:SI 3 "const_int_operand" "")]
4646 int mask = INTVAL (operands[3]);
4647 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4649 GEN_INT (mask & 2 ? 5 : 4),
4650 GEN_INT (mask & 4 ? 3 : 2),
4651 GEN_INT (mask & 8 ? 7 : 6)));
4655 (define_insn "avx_shufpd256_1"
4656 [(set (match_operand:V4DF 0 "register_operand" "=x")
4659 (match_operand:V4DF 1 "register_operand" "x")
4660 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4661 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4662 (match_operand 4 "const_4_to_5_operand" "")
4663 (match_operand 5 "const_2_to_3_operand" "")
4664 (match_operand 6 "const_6_to_7_operand" "")])))]
4668 mask = INTVAL (operands[3]);
4669 mask |= (INTVAL (operands[4]) - 4) << 1;
4670 mask |= (INTVAL (operands[5]) - 2) << 2;
4671 mask |= (INTVAL (operands[6]) - 6) << 3;
4672 operands[3] = GEN_INT (mask);
4674 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4676 [(set_attr "type" "sselog")
4677 (set_attr "length_immediate" "1")
4678 (set_attr "prefix" "vex")
4679 (set_attr "mode" "V4DF")])
4681 (define_expand "sse2_shufpd"
4682 [(match_operand:V2DF 0 "register_operand" "")
4683 (match_operand:V2DF 1 "register_operand" "")
4684 (match_operand:V2DF 2 "nonimmediate_operand" "")
4685 (match_operand:SI 3 "const_int_operand" "")]
4688 int mask = INTVAL (operands[3]);
4689 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4691 GEN_INT (mask & 2 ? 3 : 2)));
4695 (define_expand "vec_extract_even<mode>"
4696 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4697 (vec_select:SSEMODE4S
4698 (vec_concat:<ssedoublesizemode>
4699 (match_operand:SSEMODE4S 1 "register_operand" "")
4700 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4701 (parallel [(const_int 0)
4707 (define_expand "vec_extract_odd<mode>"
4708 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4709 (vec_select:SSEMODE4S
4710 (vec_concat:<ssedoublesizemode>
4711 (match_operand:SSEMODE4S 1 "register_operand" "")
4712 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4713 (parallel [(const_int 1)
4719 (define_expand "vec_extract_even<mode>"
4720 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4721 (vec_select:SSEMODE2D
4722 (vec_concat:<ssedoublesizemode>
4723 (match_operand:SSEMODE2D 1 "register_operand" "")
4724 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4725 (parallel [(const_int 0)
4729 (define_expand "vec_extract_odd<mode>"
4730 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4731 (vec_select:SSEMODE2D
4732 (vec_concat:<ssedoublesizemode>
4733 (match_operand:SSEMODE2D 1 "register_operand" "")
4734 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4735 (parallel [(const_int 1)
4739 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4740 (define_insn "*avx_punpckhqdq"
4741 [(set (match_operand:V2DI 0 "register_operand" "=x")
4744 (match_operand:V2DI 1 "register_operand" "x")
4745 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4746 (parallel [(const_int 1)
4749 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4750 [(set_attr "type" "sselog")
4751 (set_attr "prefix" "vex")
4752 (set_attr "mode" "TI")])
4754 (define_insn "sse2_punpckhqdq"
4755 [(set (match_operand:V2DI 0 "register_operand" "=x")
4758 (match_operand:V2DI 1 "register_operand" "0")
4759 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4760 (parallel [(const_int 1)
4763 "punpckhqdq\t{%2, %0|%0, %2}"
4764 [(set_attr "type" "sselog")
4765 (set_attr "prefix_data16" "1")
4766 (set_attr "mode" "TI")])
4768 (define_insn "*avx_punpcklqdq"
4769 [(set (match_operand:V2DI 0 "register_operand" "=x")
4772 (match_operand:V2DI 1 "register_operand" "x")
4773 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4774 (parallel [(const_int 0)
4777 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4778 [(set_attr "type" "sselog")
4779 (set_attr "prefix" "vex")
4780 (set_attr "mode" "TI")])
4782 (define_insn "sse2_punpcklqdq"
4783 [(set (match_operand:V2DI 0 "register_operand" "=x")
4786 (match_operand:V2DI 1 "register_operand" "0")
4787 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4788 (parallel [(const_int 0)
4791 "punpcklqdq\t{%2, %0|%0, %2}"
4792 [(set_attr "type" "sselog")
4793 (set_attr "prefix_data16" "1")
4794 (set_attr "mode" "TI")])
4796 (define_insn "*avx_shufpd_<mode>"
4797 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4798 (vec_select:SSEMODE2D
4799 (vec_concat:<ssedoublesizemode>
4800 (match_operand:SSEMODE2D 1 "register_operand" "x")
4801 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4802 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4803 (match_operand 4 "const_2_to_3_operand" "")])))]
4807 mask = INTVAL (operands[3]);
4808 mask |= (INTVAL (operands[4]) - 2) << 1;
4809 operands[3] = GEN_INT (mask);
4811 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4813 [(set_attr "type" "sselog")
4814 (set_attr "length_immediate" "1")
4815 (set_attr "prefix" "vex")
4816 (set_attr "mode" "V2DF")])
4818 (define_insn "sse2_shufpd_<mode>"
4819 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4820 (vec_select:SSEMODE2D
4821 (vec_concat:<ssedoublesizemode>
4822 (match_operand:SSEMODE2D 1 "register_operand" "0")
4823 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4824 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4825 (match_operand 4 "const_2_to_3_operand" "")])))]
4829 mask = INTVAL (operands[3]);
4830 mask |= (INTVAL (operands[4]) - 2) << 1;
4831 operands[3] = GEN_INT (mask);
4833 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4835 [(set_attr "type" "sselog")
4836 (set_attr "length_immediate" "1")
4837 (set_attr "mode" "V2DF")])
4839 ;; Avoid combining registers from different units in a single alternative,
4840 ;; see comment above inline_secondary_memory_needed function in i386.c
4841 (define_insn "*avx_storehpd"
4842 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4844 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4845 (parallel [(const_int 1)])))]
4846 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4848 vmovhpd\t{%1, %0|%0, %1}
4849 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4853 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4854 (set_attr "prefix" "vex")
4855 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4857 (define_insn "sse2_storehpd"
4858 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4860 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4861 (parallel [(const_int 1)])))]
4862 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4864 movhpd\t{%1, %0|%0, %1}
4869 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4870 (set_attr "prefix_data16" "1,*,*,*,*")
4871 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4874 [(set (match_operand:DF 0 "register_operand" "")
4876 (match_operand:V2DF 1 "memory_operand" "")
4877 (parallel [(const_int 1)])))]
4878 "TARGET_SSE2 && reload_completed"
4879 [(set (match_dup 0) (match_dup 1))]
4881 operands[1] = adjust_address (operands[1], DFmode, 8);
4884 ;; Avoid combining registers from different units in a single alternative,
4885 ;; see comment above inline_secondary_memory_needed function in i386.c
4886 (define_insn "sse2_storelpd"
4887 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4889 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4890 (parallel [(const_int 0)])))]
4891 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4893 %vmovlpd\t{%1, %0|%0, %1}
4898 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4899 (set_attr "prefix_data16" "1,*,*,*,*")
4900 (set_attr "prefix" "maybe_vex")
4901 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4904 [(set (match_operand:DF 0 "register_operand" "")
4906 (match_operand:V2DF 1 "nonimmediate_operand" "")
4907 (parallel [(const_int 0)])))]
4908 "TARGET_SSE2 && reload_completed"
4911 rtx op1 = operands[1];
4913 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4915 op1 = gen_lowpart (DFmode, op1);
4916 emit_move_insn (operands[0], op1);
4920 (define_expand "sse2_loadhpd_exp"
4921 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4924 (match_operand:V2DF 1 "nonimmediate_operand" "")
4925 (parallel [(const_int 0)]))
4926 (match_operand:DF 2 "nonimmediate_operand" "")))]
4928 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4930 ;; Avoid combining registers from different units in a single alternative,
4931 ;; see comment above inline_secondary_memory_needed function in i386.c
4932 (define_insn "*avx_loadhpd"
4933 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4936 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4937 (parallel [(const_int 0)]))
4938 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4939 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4941 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4942 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4946 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4947 (set_attr "prefix" "vex")
4948 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4950 (define_insn "sse2_loadhpd"
4951 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4954 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4955 (parallel [(const_int 0)]))
4956 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4957 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4959 movhpd\t{%2, %0|%0, %2}
4960 unpcklpd\t{%2, %0|%0, %2}
4961 shufpd\t{$1, %1, %0|%0, %1, 1}
4965 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4966 (set_attr "prefix_data16" "1,*,*,*,*,*")
4967 (set_attr "length_immediate" "*,*,1,*,*,*")
4968 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4971 [(set (match_operand:V2DF 0 "memory_operand" "")
4973 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4974 (match_operand:DF 1 "register_operand" "")))]
4975 "TARGET_SSE2 && reload_completed"
4976 [(set (match_dup 0) (match_dup 1))]
4978 operands[0] = adjust_address (operands[0], DFmode, 8);
4981 (define_expand "sse2_loadlpd_exp"
4982 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4984 (match_operand:DF 2 "nonimmediate_operand" "")
4986 (match_operand:V2DF 1 "nonimmediate_operand" "")
4987 (parallel [(const_int 1)]))))]
4989 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4991 ;; Avoid combining registers from different units in a single alternative,
4992 ;; see comment above inline_secondary_memory_needed function in i386.c
4993 (define_insn "*avx_loadlpd"
4994 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4996 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4998 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4999 (parallel [(const_int 1)]))))]
5000 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5002 vmovsd\t{%2, %0|%0, %2}
5003 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5004 vmovsd\t{%2, %1, %0|%0, %1, %2}
5005 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5009 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
5010 (set_attr "prefix" "vex")
5011 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
5013 (define_insn "sse2_loadlpd"
5014 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
5016 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
5018 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
5019 (parallel [(const_int 1)]))))]
5020 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5022 movsd\t{%2, %0|%0, %2}
5023 movlpd\t{%2, %0|%0, %2}
5024 movsd\t{%2, %0|%0, %2}
5025 shufpd\t{$2, %2, %0|%0, %2, 2}
5026 movhpd\t{%H1, %0|%0, %H1}
5030 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
5031 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
5032 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
5033 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
5036 [(set (match_operand:V2DF 0 "memory_operand" "")
5038 (match_operand:DF 1 "register_operand" "")
5039 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5040 "TARGET_SSE2 && reload_completed"
5041 [(set (match_dup 0) (match_dup 1))]
5043 operands[0] = adjust_address (operands[0], DFmode, 8);
5046 ;; Not sure these two are ever used, but it doesn't hurt to have
5048 (define_insn "*vec_extractv2df_1_sse"
5049 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5051 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
5052 (parallel [(const_int 1)])))]
5053 "!TARGET_SSE2 && TARGET_SSE
5054 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5056 movhps\t{%1, %0|%0, %1}
5057 movhlps\t{%1, %0|%0, %1}
5058 movlps\t{%H1, %0|%0, %H1}"
5059 [(set_attr "type" "ssemov")
5060 (set_attr "mode" "V2SF,V4SF,V2SF")])
5062 (define_insn "*vec_extractv2df_0_sse"
5063 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5065 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5066 (parallel [(const_int 0)])))]
5067 "!TARGET_SSE2 && TARGET_SSE
5068 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5070 movlps\t{%1, %0|%0, %1}
5071 movaps\t{%1, %0|%0, %1}
5072 movlps\t{%1, %0|%0, %1}"
5073 [(set_attr "type" "ssemov")
5074 (set_attr "mode" "V2SF,V4SF,V2SF")])
5076 (define_insn "*avx_movsd"
5077 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5079 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5080 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5084 vmovsd\t{%2, %1, %0|%0, %1, %2}
5085 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5086 vmovlpd\t{%2, %0|%0, %2}
5087 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5088 vmovhps\t{%1, %H0|%H0, %1}"
5089 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5090 (set_attr "prefix" "vex")
5091 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5093 (define_insn "sse2_movsd"
5094 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5096 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5097 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5101 movsd\t{%2, %0|%0, %2}
5102 movlpd\t{%2, %0|%0, %2}
5103 movlpd\t{%2, %0|%0, %2}
5104 shufpd\t{$2, %2, %0|%0, %2, 2}
5105 movhps\t{%H1, %0|%0, %H1}
5106 movhps\t{%1, %H0|%H0, %1}"
5107 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5108 (set_attr "prefix_data16" "*,1,1,*,*,*")
5109 (set_attr "length_immediate" "*,*,*,1,*,*")
5110 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5112 (define_insn "*vec_dupv2df_sse3"
5113 [(set (match_operand:V2DF 0 "register_operand" "=x")
5115 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5117 "%vmovddup\t{%1, %0|%0, %1}"
5118 [(set_attr "type" "sselog1")
5119 (set_attr "prefix" "maybe_vex")
5120 (set_attr "mode" "DF")])
5122 (define_insn "vec_dupv2df"
5123 [(set (match_operand:V2DF 0 "register_operand" "=x")
5125 (match_operand:DF 1 "register_operand" "0")))]
5128 [(set_attr "type" "sselog1")
5129 (set_attr "mode" "V2DF")])
5131 (define_insn "*vec_concatv2df_sse3"
5132 [(set (match_operand:V2DF 0 "register_operand" "=x")
5134 (match_operand:DF 1 "nonimmediate_operand" "xm")
5137 "%vmovddup\t{%1, %0|%0, %1}"
5138 [(set_attr "type" "sselog1")
5139 (set_attr "prefix" "maybe_vex")
5140 (set_attr "mode" "DF")])
5142 (define_insn "*vec_concatv2df_avx"
5143 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5145 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5146 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5149 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5150 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5151 vmovsd\t{%1, %0|%0, %1}"
5152 [(set_attr "type" "ssemov")
5153 (set_attr "prefix" "vex")
5154 (set_attr "mode" "DF,V1DF,DF")])
5156 (define_insn "*vec_concatv2df"
5157 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5159 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5160 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5163 unpcklpd\t{%2, %0|%0, %2}
5164 movhpd\t{%2, %0|%0, %2}
5165 movsd\t{%1, %0|%0, %1}
5166 movlhps\t{%2, %0|%0, %2}
5167 movhps\t{%2, %0|%0, %2}"
5168 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5169 (set_attr "prefix_data16" "*,1,*,*,*")
5170 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5172 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5174 ;; Parallel integral arithmetic
5176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5178 (define_expand "neg<mode>2"
5179 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5182 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5184 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5186 (define_expand "<plusminus_insn><mode>3"
5187 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5189 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5190 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5192 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5194 (define_insn "*avx_<plusminus_insn><mode>3"
5195 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5197 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5198 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5199 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5200 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5201 [(set_attr "type" "sseiadd")
5202 (set_attr "prefix" "vex")
5203 (set_attr "mode" "TI")])
5205 (define_insn "*<plusminus_insn><mode>3"
5206 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5208 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5209 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5210 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5211 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5212 [(set_attr "type" "sseiadd")
5213 (set_attr "prefix_data16" "1")
5214 (set_attr "mode" "TI")])
5216 (define_expand "sse2_<plusminus_insn><mode>3"
5217 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5218 (sat_plusminus:SSEMODE12
5219 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5220 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5222 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5224 (define_insn "*avx_<plusminus_insn><mode>3"
5225 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5226 (sat_plusminus:SSEMODE12
5227 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5228 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5229 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5230 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5231 [(set_attr "type" "sseiadd")
5232 (set_attr "prefix" "vex")
5233 (set_attr "mode" "TI")])
5235 (define_insn "*sse2_<plusminus_insn><mode>3"
5236 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5237 (sat_plusminus:SSEMODE12
5238 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5239 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5240 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5241 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5242 [(set_attr "type" "sseiadd")
5243 (set_attr "prefix_data16" "1")
5244 (set_attr "mode" "TI")])
5246 (define_insn_and_split "mulv16qi3"
5247 [(set (match_operand:V16QI 0 "register_operand" "")
5248 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5249 (match_operand:V16QI 2 "register_operand" "")))]
5251 && can_create_pseudo_p ()"
5259 for (i = 0; i < 12; ++i)
5260 t[i] = gen_reg_rtx (V16QImode);
5262 /* Unpack data such that we've got a source byte in each low byte of
5263 each word. We don't care what goes into the high byte of each word.
5264 Rather than trying to get zero in there, most convenient is to let
5265 it be a copy of the low byte. */
5266 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
5267 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
5268 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
5269 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
5271 /* Multiply words. The end-of-line annotations here give a picture of what
5272 the output of that instruction looks like. Dot means don't care; the
5273 letters are the bytes of the result with A being the most significant. */
5274 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5275 gen_lowpart (V8HImode, t[0]),
5276 gen_lowpart (V8HImode, t[1])));
5277 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5278 gen_lowpart (V8HImode, t[2]),
5279 gen_lowpart (V8HImode, t[3])));
5281 /* Extract the relevant bytes and merge them back together. */
5282 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
5283 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
5284 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
5285 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
5286 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
5287 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
5289 emit_insn (gen_sse2_punpcklbw (operands[0], t[11], t[10])); /* ABCDEFGHIJKLMNOP */
5293 (define_expand "mulv8hi3"
5294 [(set (match_operand:V8HI 0 "register_operand" "")
5295 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5296 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5298 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5300 (define_insn "*avx_mulv8hi3"
5301 [(set (match_operand:V8HI 0 "register_operand" "=x")
5302 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5303 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5304 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5305 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5306 [(set_attr "type" "sseimul")
5307 (set_attr "prefix" "vex")
5308 (set_attr "mode" "TI")])
5310 (define_insn "*mulv8hi3"
5311 [(set (match_operand:V8HI 0 "register_operand" "=x")
5312 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5313 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5314 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5315 "pmullw\t{%2, %0|%0, %2}"
5316 [(set_attr "type" "sseimul")
5317 (set_attr "prefix_data16" "1")
5318 (set_attr "mode" "TI")])
5320 (define_expand "smulv8hi3_highpart"
5321 [(set (match_operand:V8HI 0 "register_operand" "")
5326 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5328 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5331 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5333 (define_insn "*avxv8hi3_highpart"
5334 [(set (match_operand:V8HI 0 "register_operand" "=x")
5339 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5341 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5343 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5344 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5345 [(set_attr "type" "sseimul")
5346 (set_attr "prefix" "vex")
5347 (set_attr "mode" "TI")])
5349 (define_insn "*smulv8hi3_highpart"
5350 [(set (match_operand:V8HI 0 "register_operand" "=x")
5355 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5357 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5359 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5360 "pmulhw\t{%2, %0|%0, %2}"
5361 [(set_attr "type" "sseimul")
5362 (set_attr "prefix_data16" "1")
5363 (set_attr "mode" "TI")])
5365 (define_expand "umulv8hi3_highpart"
5366 [(set (match_operand:V8HI 0 "register_operand" "")
5371 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5373 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5376 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5378 (define_insn "*avx_umulv8hi3_highpart"
5379 [(set (match_operand:V8HI 0 "register_operand" "=x")
5384 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5386 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5388 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5389 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5390 [(set_attr "type" "sseimul")
5391 (set_attr "prefix" "vex")
5392 (set_attr "mode" "TI")])
5394 (define_insn "*umulv8hi3_highpart"
5395 [(set (match_operand:V8HI 0 "register_operand" "=x")
5400 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5402 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5404 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5405 "pmulhuw\t{%2, %0|%0, %2}"
5406 [(set_attr "type" "sseimul")
5407 (set_attr "prefix_data16" "1")
5408 (set_attr "mode" "TI")])
5410 (define_expand "sse2_umulv2siv2di3"
5411 [(set (match_operand:V2DI 0 "register_operand" "")
5415 (match_operand:V4SI 1 "nonimmediate_operand" "")
5416 (parallel [(const_int 0) (const_int 2)])))
5419 (match_operand:V4SI 2 "nonimmediate_operand" "")
5420 (parallel [(const_int 0) (const_int 2)])))))]
5422 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5424 (define_insn "*avx_umulv2siv2di3"
5425 [(set (match_operand:V2DI 0 "register_operand" "=x")
5429 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5430 (parallel [(const_int 0) (const_int 2)])))
5433 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5434 (parallel [(const_int 0) (const_int 2)])))))]
5435 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5436 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5437 [(set_attr "type" "sseimul")
5438 (set_attr "prefix" "vex")
5439 (set_attr "mode" "TI")])
5441 (define_insn "*sse2_umulv2siv2di3"
5442 [(set (match_operand:V2DI 0 "register_operand" "=x")
5446 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5447 (parallel [(const_int 0) (const_int 2)])))
5450 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5451 (parallel [(const_int 0) (const_int 2)])))))]
5452 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5453 "pmuludq\t{%2, %0|%0, %2}"
5454 [(set_attr "type" "sseimul")
5455 (set_attr "prefix_data16" "1")
5456 (set_attr "mode" "TI")])
5458 (define_expand "sse4_1_mulv2siv2di3"
5459 [(set (match_operand:V2DI 0 "register_operand" "")
5463 (match_operand:V4SI 1 "nonimmediate_operand" "")
5464 (parallel [(const_int 0) (const_int 2)])))
5467 (match_operand:V4SI 2 "nonimmediate_operand" "")
5468 (parallel [(const_int 0) (const_int 2)])))))]
5470 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5472 (define_insn "*avx_mulv2siv2di3"
5473 [(set (match_operand:V2DI 0 "register_operand" "=x")
5477 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5478 (parallel [(const_int 0) (const_int 2)])))
5481 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5482 (parallel [(const_int 0) (const_int 2)])))))]
5483 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5484 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5485 [(set_attr "type" "sseimul")
5486 (set_attr "prefix_extra" "1")
5487 (set_attr "prefix" "vex")
5488 (set_attr "mode" "TI")])
5490 (define_insn "*sse4_1_mulv2siv2di3"
5491 [(set (match_operand:V2DI 0 "register_operand" "=x")
5495 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5496 (parallel [(const_int 0) (const_int 2)])))
5499 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5500 (parallel [(const_int 0) (const_int 2)])))))]
5501 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5502 "pmuldq\t{%2, %0|%0, %2}"
5503 [(set_attr "type" "sseimul")
5504 (set_attr "prefix_extra" "1")
5505 (set_attr "mode" "TI")])
5507 (define_expand "sse2_pmaddwd"
5508 [(set (match_operand:V4SI 0 "register_operand" "")
5513 (match_operand:V8HI 1 "nonimmediate_operand" "")
5514 (parallel [(const_int 0)
5520 (match_operand:V8HI 2 "nonimmediate_operand" "")
5521 (parallel [(const_int 0)
5527 (vec_select:V4HI (match_dup 1)
5528 (parallel [(const_int 1)
5533 (vec_select:V4HI (match_dup 2)
5534 (parallel [(const_int 1)
5537 (const_int 7)]))))))]
5539 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5541 (define_insn "*avx_pmaddwd"
5542 [(set (match_operand:V4SI 0 "register_operand" "=x")
5547 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5548 (parallel [(const_int 0)
5554 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5555 (parallel [(const_int 0)
5561 (vec_select:V4HI (match_dup 1)
5562 (parallel [(const_int 1)
5567 (vec_select:V4HI (match_dup 2)
5568 (parallel [(const_int 1)
5571 (const_int 7)]))))))]
5572 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5573 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5574 [(set_attr "type" "sseiadd")
5575 (set_attr "prefix" "vex")
5576 (set_attr "mode" "TI")])
5578 (define_insn "*sse2_pmaddwd"
5579 [(set (match_operand:V4SI 0 "register_operand" "=x")
5584 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5585 (parallel [(const_int 0)
5591 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5592 (parallel [(const_int 0)
5598 (vec_select:V4HI (match_dup 1)
5599 (parallel [(const_int 1)
5604 (vec_select:V4HI (match_dup 2)
5605 (parallel [(const_int 1)
5608 (const_int 7)]))))))]
5609 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5610 "pmaddwd\t{%2, %0|%0, %2}"
5611 [(set_attr "type" "sseiadd")
5612 (set_attr "atom_unit" "simul")
5613 (set_attr "prefix_data16" "1")
5614 (set_attr "mode" "TI")])
5616 (define_expand "mulv4si3"
5617 [(set (match_operand:V4SI 0 "register_operand" "")
5618 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5619 (match_operand:V4SI 2 "register_operand" "")))]
5622 if (TARGET_SSE4_1 || TARGET_XOP)
5623 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5626 (define_insn "*avx_mulv4si3"
5627 [(set (match_operand:V4SI 0 "register_operand" "=x")
5628 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5629 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5630 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5631 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5632 [(set_attr "type" "sseimul")
5633 (set_attr "prefix_extra" "1")
5634 (set_attr "prefix" "vex")
5635 (set_attr "mode" "TI")])
5637 (define_insn "*sse4_1_mulv4si3"
5638 [(set (match_operand:V4SI 0 "register_operand" "=x")
5639 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5640 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5641 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5642 "pmulld\t{%2, %0|%0, %2}"
5643 [(set_attr "type" "sseimul")
5644 (set_attr "prefix_extra" "1")
5645 (set_attr "mode" "TI")])
5647 (define_insn_and_split "*sse2_mulv4si3"
5648 [(set (match_operand:V4SI 0 "register_operand" "")
5649 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5650 (match_operand:V4SI 2 "register_operand" "")))]
5651 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_XOP
5652 && can_create_pseudo_p ()"
5657 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5663 t1 = gen_reg_rtx (V4SImode);
5664 t2 = gen_reg_rtx (V4SImode);
5665 t3 = gen_reg_rtx (V4SImode);
5666 t4 = gen_reg_rtx (V4SImode);
5667 t5 = gen_reg_rtx (V4SImode);
5668 t6 = gen_reg_rtx (V4SImode);
5669 thirtytwo = GEN_INT (32);
5671 /* Multiply elements 2 and 0. */
5672 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5675 /* Shift both input vectors down one element, so that elements 3
5676 and 1 are now in the slots for elements 2 and 0. For K8, at
5677 least, this is faster than using a shuffle. */
5678 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5679 gen_lowpart (TImode, op1),
5681 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5682 gen_lowpart (TImode, op2),
5684 /* Multiply elements 3 and 1. */
5685 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5688 /* Move the results in element 2 down to element 1; we don't care
5689 what goes in elements 2 and 3. */
5690 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5691 const0_rtx, const0_rtx));
5692 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5693 const0_rtx, const0_rtx));
5695 /* Merge the parts back together. */
5696 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5700 (define_insn_and_split "mulv2di3"
5701 [(set (match_operand:V2DI 0 "register_operand" "")
5702 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5703 (match_operand:V2DI 2 "register_operand" "")))]
5705 && can_create_pseudo_p ()"
5710 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5715 /* op1: A,B,C,D, op2: E,F,G,H */
5717 op1 = gen_lowpart (V4SImode, operands[1]);
5718 op2 = gen_lowpart (V4SImode, operands[2]);
5719 t1 = gen_reg_rtx (V4SImode);
5720 t2 = gen_reg_rtx (V4SImode);
5721 t3 = gen_reg_rtx (V4SImode);
5722 t4 = gen_reg_rtx (V2DImode);
5723 t5 = gen_reg_rtx (V2DImode);
5726 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5733 emit_move_insn (t2, CONST0_RTX (V4SImode));
5735 /* t3: (B*E),(A*F),(D*G),(C*H) */
5736 emit_insn (gen_xop_pmacsdd (t3, t1, op2, t2));
5738 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5739 emit_insn (gen_xop_phadddq (t4, t3));
5741 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5742 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5744 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5745 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t5));
5752 t1 = gen_reg_rtx (V2DImode);
5753 t2 = gen_reg_rtx (V2DImode);
5754 t3 = gen_reg_rtx (V2DImode);
5755 t4 = gen_reg_rtx (V2DImode);
5756 t5 = gen_reg_rtx (V2DImode);
5757 t6 = gen_reg_rtx (V2DImode);
5758 thirtytwo = GEN_INT (32);
5760 /* Multiply low parts. */
5761 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5762 gen_lowpart (V4SImode, op2)));
5764 /* Shift input vectors left 32 bits so we can multiply high parts. */
5765 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5766 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5768 /* Multiply high parts by low parts. */
5769 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5770 gen_lowpart (V4SImode, t3)));
5771 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5772 gen_lowpart (V4SImode, t2)));
5774 /* Shift them back. */
5775 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5776 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5778 /* Add the three parts together. */
5779 emit_insn (gen_addv2di3 (t6, t1, t4));
5780 emit_insn (gen_addv2di3 (op0, t6, t5));
5784 (define_expand "vec_widen_smult_hi_v8hi"
5785 [(match_operand:V4SI 0 "register_operand" "")
5786 (match_operand:V8HI 1 "register_operand" "")
5787 (match_operand:V8HI 2 "register_operand" "")]
5790 rtx op1, op2, t1, t2, dest;
5794 t1 = gen_reg_rtx (V8HImode);
5795 t2 = gen_reg_rtx (V8HImode);
5796 dest = gen_lowpart (V8HImode, operands[0]);
5798 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5799 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5800 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5804 (define_expand "vec_widen_smult_lo_v8hi"
5805 [(match_operand:V4SI 0 "register_operand" "")
5806 (match_operand:V8HI 1 "register_operand" "")
5807 (match_operand:V8HI 2 "register_operand" "")]
5810 rtx op1, op2, t1, t2, dest;
5814 t1 = gen_reg_rtx (V8HImode);
5815 t2 = gen_reg_rtx (V8HImode);
5816 dest = gen_lowpart (V8HImode, operands[0]);
5818 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5819 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5820 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5824 (define_expand "vec_widen_umult_hi_v8hi"
5825 [(match_operand:V4SI 0 "register_operand" "")
5826 (match_operand:V8HI 1 "register_operand" "")
5827 (match_operand:V8HI 2 "register_operand" "")]
5830 rtx op1, op2, t1, t2, dest;
5834 t1 = gen_reg_rtx (V8HImode);
5835 t2 = gen_reg_rtx (V8HImode);
5836 dest = gen_lowpart (V8HImode, operands[0]);
5838 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5839 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5840 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5844 (define_expand "vec_widen_umult_lo_v8hi"
5845 [(match_operand:V4SI 0 "register_operand" "")
5846 (match_operand:V8HI 1 "register_operand" "")
5847 (match_operand:V8HI 2 "register_operand" "")]
5850 rtx op1, op2, t1, t2, dest;
5854 t1 = gen_reg_rtx (V8HImode);
5855 t2 = gen_reg_rtx (V8HImode);
5856 dest = gen_lowpart (V8HImode, operands[0]);
5858 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5859 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5860 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5864 (define_expand "vec_widen_smult_hi_v4si"
5865 [(match_operand:V2DI 0 "register_operand" "")
5866 (match_operand:V4SI 1 "register_operand" "")
5867 (match_operand:V4SI 2 "register_operand" "")]
5872 t1 = gen_reg_rtx (V4SImode);
5873 t2 = gen_reg_rtx (V4SImode);
5875 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5880 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5885 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5889 (define_expand "vec_widen_smult_lo_v4si"
5890 [(match_operand:V2DI 0 "register_operand" "")
5891 (match_operand:V4SI 1 "register_operand" "")
5892 (match_operand:V4SI 2 "register_operand" "")]
5897 t1 = gen_reg_rtx (V4SImode);
5898 t2 = gen_reg_rtx (V4SImode);
5900 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5905 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5910 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5914 (define_expand "vec_widen_umult_hi_v4si"
5915 [(match_operand:V2DI 0 "register_operand" "")
5916 (match_operand:V4SI 1 "register_operand" "")
5917 (match_operand:V4SI 2 "register_operand" "")]
5920 rtx op1, op2, t1, t2;
5924 t1 = gen_reg_rtx (V4SImode);
5925 t2 = gen_reg_rtx (V4SImode);
5927 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5928 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5929 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5933 (define_expand "vec_widen_umult_lo_v4si"
5934 [(match_operand:V2DI 0 "register_operand" "")
5935 (match_operand:V4SI 1 "register_operand" "")
5936 (match_operand:V4SI 2 "register_operand" "")]
5939 rtx op1, op2, t1, t2;
5943 t1 = gen_reg_rtx (V4SImode);
5944 t2 = gen_reg_rtx (V4SImode);
5946 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5947 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5948 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5952 (define_expand "sdot_prodv8hi"
5953 [(match_operand:V4SI 0 "register_operand" "")
5954 (match_operand:V8HI 1 "register_operand" "")
5955 (match_operand:V8HI 2 "register_operand" "")
5956 (match_operand:V4SI 3 "register_operand" "")]
5959 rtx t = gen_reg_rtx (V4SImode);
5960 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5961 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5965 (define_expand "udot_prodv4si"
5966 [(match_operand:V2DI 0 "register_operand" "")
5967 (match_operand:V4SI 1 "register_operand" "")
5968 (match_operand:V4SI 2 "register_operand" "")
5969 (match_operand:V2DI 3 "register_operand" "")]
5974 t1 = gen_reg_rtx (V2DImode);
5975 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5976 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5978 t2 = gen_reg_rtx (V4SImode);
5979 t3 = gen_reg_rtx (V4SImode);
5980 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5981 gen_lowpart (TImode, operands[1]),
5983 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5984 gen_lowpart (TImode, operands[2]),
5987 t4 = gen_reg_rtx (V2DImode);
5988 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5990 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5994 (define_insn "*avx_ashr<mode>3"
5995 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5997 (match_operand:SSEMODE24 1 "register_operand" "x")
5998 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6000 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6001 [(set_attr "type" "sseishft")
6002 (set_attr "prefix" "vex")
6003 (set (attr "length_immediate")
6004 (if_then_else (match_operand 2 "const_int_operand" "")
6006 (const_string "0")))
6007 (set_attr "mode" "TI")])
6009 (define_insn "ashr<mode>3"
6010 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6012 (match_operand:SSEMODE24 1 "register_operand" "0")
6013 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6015 "psra<ssevecsize>\t{%2, %0|%0, %2}"
6016 [(set_attr "type" "sseishft")
6017 (set_attr "prefix_data16" "1")
6018 (set (attr "length_immediate")
6019 (if_then_else (match_operand 2 "const_int_operand" "")
6021 (const_string "0")))
6022 (set_attr "mode" "TI")])
6024 (define_insn "*avx_lshr<mode>3"
6025 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6026 (lshiftrt:SSEMODE248
6027 (match_operand:SSEMODE248 1 "register_operand" "x")
6028 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6030 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6031 [(set_attr "type" "sseishft")
6032 (set_attr "prefix" "vex")
6033 (set (attr "length_immediate")
6034 (if_then_else (match_operand 2 "const_int_operand" "")
6036 (const_string "0")))
6037 (set_attr "mode" "TI")])
6039 (define_insn "lshr<mode>3"
6040 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6041 (lshiftrt:SSEMODE248
6042 (match_operand:SSEMODE248 1 "register_operand" "0")
6043 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6045 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
6046 [(set_attr "type" "sseishft")
6047 (set_attr "prefix_data16" "1")
6048 (set (attr "length_immediate")
6049 (if_then_else (match_operand 2 "const_int_operand" "")
6051 (const_string "0")))
6052 (set_attr "mode" "TI")])
6054 (define_insn "*avx_ashl<mode>3"
6055 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6057 (match_operand:SSEMODE248 1 "register_operand" "x")
6058 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6060 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6061 [(set_attr "type" "sseishft")
6062 (set_attr "prefix" "vex")
6063 (set (attr "length_immediate")
6064 (if_then_else (match_operand 2 "const_int_operand" "")
6066 (const_string "0")))
6067 (set_attr "mode" "TI")])
6069 (define_insn "ashl<mode>3"
6070 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6072 (match_operand:SSEMODE248 1 "register_operand" "0")
6073 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6075 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6076 [(set_attr "type" "sseishft")
6077 (set_attr "prefix_data16" "1")
6078 (set (attr "length_immediate")
6079 (if_then_else (match_operand 2 "const_int_operand" "")
6081 (const_string "0")))
6082 (set_attr "mode" "TI")])
6084 (define_expand "vec_shl_<mode>"
6085 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6086 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
6087 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6090 operands[0] = gen_lowpart (TImode, operands[0]);
6091 operands[1] = gen_lowpart (TImode, operands[1]);
6094 (define_expand "vec_shr_<mode>"
6095 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6096 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
6097 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6100 operands[0] = gen_lowpart (TImode, operands[0]);
6101 operands[1] = gen_lowpart (TImode, operands[1]);
6104 (define_insn "*avx_<code><mode>3"
6105 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6107 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6108 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6109 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6110 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6111 [(set_attr "type" "sseiadd")
6112 (set (attr "prefix_extra")
6114 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6117 (const_string "0")))
6118 (set_attr "prefix" "vex")
6119 (set_attr "mode" "TI")])
6121 (define_expand "<code>v16qi3"
6122 [(set (match_operand:V16QI 0 "register_operand" "")
6124 (match_operand:V16QI 1 "nonimmediate_operand" "")
6125 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6127 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6129 (define_insn "*<code>v16qi3"
6130 [(set (match_operand:V16QI 0 "register_operand" "=x")
6132 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6133 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6134 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6135 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
6136 [(set_attr "type" "sseiadd")
6137 (set_attr "prefix_data16" "1")
6138 (set_attr "mode" "TI")])
6140 (define_expand "<code>v8hi3"
6141 [(set (match_operand:V8HI 0 "register_operand" "")
6143 (match_operand:V8HI 1 "nonimmediate_operand" "")
6144 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6146 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6148 (define_insn "*<code>v8hi3"
6149 [(set (match_operand:V8HI 0 "register_operand" "=x")
6151 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6152 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6153 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6154 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
6155 [(set_attr "type" "sseiadd")
6156 (set_attr "prefix_data16" "1")
6157 (set_attr "mode" "TI")])
6159 (define_expand "umaxv8hi3"
6160 [(set (match_operand:V8HI 0 "register_operand" "")
6161 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6162 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6166 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6169 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6170 if (rtx_equal_p (op3, op2))
6171 op3 = gen_reg_rtx (V8HImode);
6172 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6173 emit_insn (gen_addv8hi3 (op0, op3, op2));
6178 (define_expand "smax<mode>3"
6179 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6180 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6181 (match_operand:SSEMODE14 2 "register_operand" "")))]
6185 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6191 xops[0] = operands[0];
6192 xops[1] = operands[1];
6193 xops[2] = operands[2];
6194 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6195 xops[4] = operands[1];
6196 xops[5] = operands[2];
6197 ok = ix86_expand_int_vcond (xops);
6203 (define_insn "*sse4_1_<code><mode>3"
6204 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6206 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6207 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6208 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6209 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
6210 [(set_attr "type" "sseiadd")
6211 (set_attr "prefix_extra" "1")
6212 (set_attr "mode" "TI")])
6214 (define_expand "umaxv4si3"
6215 [(set (match_operand:V4SI 0 "register_operand" "")
6216 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6217 (match_operand:V4SI 2 "register_operand" "")))]
6221 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6227 xops[0] = operands[0];
6228 xops[1] = operands[1];
6229 xops[2] = operands[2];
6230 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6231 xops[4] = operands[1];
6232 xops[5] = operands[2];
6233 ok = ix86_expand_int_vcond (xops);
6239 (define_insn "*sse4_1_<code><mode>3"
6240 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6242 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6243 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6244 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6245 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
6246 [(set_attr "type" "sseiadd")
6247 (set_attr "prefix_extra" "1")
6248 (set_attr "mode" "TI")])
6250 (define_expand "smin<mode>3"
6251 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6252 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6253 (match_operand:SSEMODE14 2 "register_operand" "")))]
6257 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6263 xops[0] = operands[0];
6264 xops[1] = operands[2];
6265 xops[2] = operands[1];
6266 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6267 xops[4] = operands[1];
6268 xops[5] = operands[2];
6269 ok = ix86_expand_int_vcond (xops);
6275 (define_expand "umin<mode>3"
6276 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6277 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6278 (match_operand:SSEMODE24 2 "register_operand" "")))]
6282 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6288 xops[0] = operands[0];
6289 xops[1] = operands[2];
6290 xops[2] = operands[1];
6291 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6292 xops[4] = operands[1];
6293 xops[5] = operands[2];
6294 ok = ix86_expand_int_vcond (xops);
6300 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6302 ;; Parallel integral comparisons
6304 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6306 (define_expand "sse2_eq<mode>3"
6307 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6309 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6310 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6311 "TARGET_SSE2 && !TARGET_XOP "
6312 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6314 (define_insn "*avx_eq<mode>3"
6315 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6317 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6318 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6319 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6320 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6321 [(set_attr "type" "ssecmp")
6322 (set (attr "prefix_extra")
6323 (if_then_else (match_operand:V2DI 0 "" "")
6325 (const_string "*")))
6326 (set_attr "prefix" "vex")
6327 (set_attr "mode" "TI")])
6329 (define_insn "*sse2_eq<mode>3"
6330 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6332 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6333 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6334 "TARGET_SSE2 && !TARGET_XOP
6335 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6336 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6337 [(set_attr "type" "ssecmp")
6338 (set_attr "prefix_data16" "1")
6339 (set_attr "mode" "TI")])
6341 (define_expand "sse4_1_eqv2di3"
6342 [(set (match_operand:V2DI 0 "register_operand" "")
6344 (match_operand:V2DI 1 "nonimmediate_operand" "")
6345 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6347 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6349 (define_insn "*sse4_1_eqv2di3"
6350 [(set (match_operand:V2DI 0 "register_operand" "=x")
6352 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6353 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6354 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6355 "pcmpeqq\t{%2, %0|%0, %2}"
6356 [(set_attr "type" "ssecmp")
6357 (set_attr "prefix_extra" "1")
6358 (set_attr "mode" "TI")])
6360 (define_insn "*avx_gt<mode>3"
6361 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6363 (match_operand:SSEMODE1248 1 "register_operand" "x")
6364 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6366 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6367 [(set_attr "type" "ssecmp")
6368 (set (attr "prefix_extra")
6369 (if_then_else (match_operand:V2DI 0 "" "")
6371 (const_string "*")))
6372 (set_attr "prefix" "vex")
6373 (set_attr "mode" "TI")])
6375 (define_insn "sse2_gt<mode>3"
6376 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6378 (match_operand:SSEMODE124 1 "register_operand" "0")
6379 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6380 "TARGET_SSE2 && !TARGET_XOP"
6381 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6382 [(set_attr "type" "ssecmp")
6383 (set_attr "prefix_data16" "1")
6384 (set_attr "mode" "TI")])
6386 (define_insn "sse4_2_gtv2di3"
6387 [(set (match_operand:V2DI 0 "register_operand" "=x")
6389 (match_operand:V2DI 1 "register_operand" "0")
6390 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6392 "pcmpgtq\t{%2, %0|%0, %2}"
6393 [(set_attr "type" "ssecmp")
6394 (set_attr "prefix_extra" "1")
6395 (set_attr "mode" "TI")])
6397 (define_expand "vcond<mode>"
6398 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6399 (if_then_else:SSEMODE124C8
6400 (match_operator 3 ""
6401 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6402 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6403 (match_operand:SSEMODE124C8 1 "general_operand" "")
6404 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6407 bool ok = ix86_expand_int_vcond (operands);
6412 (define_expand "vcondu<mode>"
6413 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6414 (if_then_else:SSEMODE124C8
6415 (match_operator 3 ""
6416 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6417 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6418 (match_operand:SSEMODE124C8 1 "general_operand" "")
6419 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6422 bool ok = ix86_expand_int_vcond (operands);
6427 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6429 ;; Parallel bitwise logical operations
6431 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6433 (define_expand "one_cmpl<mode>2"
6434 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6435 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6439 int i, n = GET_MODE_NUNITS (<MODE>mode);
6440 rtvec v = rtvec_alloc (n);
6442 for (i = 0; i < n; ++i)
6443 RTVEC_ELT (v, i) = constm1_rtx;
6445 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6448 (define_insn "*avx_andnot<mode>3"
6449 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6451 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6452 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6454 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6455 [(set_attr "type" "sselog")
6456 (set_attr "prefix" "vex")
6457 (set_attr "mode" "<avxvecpsmode>")])
6459 (define_insn "*sse_andnot<mode>3"
6460 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6462 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6463 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6464 "(TARGET_SSE && !TARGET_SSE2)"
6465 "andnps\t{%2, %0|%0, %2}"
6466 [(set_attr "type" "sselog")
6467 (set_attr "mode" "V4SF")])
6469 (define_insn "*avx_andnot<mode>3"
6470 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6472 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6473 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6475 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6476 [(set_attr "type" "sselog")
6477 (set_attr "prefix" "vex")
6478 (set_attr "mode" "TI")])
6480 (define_insn "sse2_andnot<mode>3"
6481 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6483 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6484 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6486 "pandn\t{%2, %0|%0, %2}"
6487 [(set_attr "type" "sselog")
6488 (set_attr "prefix_data16" "1")
6489 (set_attr "mode" "TI")])
6491 (define_insn "*andnottf3"
6492 [(set (match_operand:TF 0 "register_operand" "=x")
6494 (not:TF (match_operand:TF 1 "register_operand" "0"))
6495 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6497 "pandn\t{%2, %0|%0, %2}"
6498 [(set_attr "type" "sselog")
6499 (set_attr "prefix_data16" "1")
6500 (set_attr "mode" "TI")])
6502 (define_expand "<code><mode>3"
6503 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6505 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6506 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6508 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6510 (define_insn "*avx_<code><mode>3"
6511 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6513 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6514 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6516 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6517 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
6518 [(set_attr "type" "sselog")
6519 (set_attr "prefix" "vex")
6520 (set_attr "mode" "<avxvecpsmode>")])
6522 (define_insn "*sse_<code><mode>3"
6523 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6525 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6526 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6527 "(TARGET_SSE && !TARGET_SSE2)
6528 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6529 "<plogicprefix>ps\t{%2, %0|%0, %2}"
6530 [(set_attr "type" "sselog")
6531 (set_attr "mode" "V4SF")])
6533 (define_insn "*avx_<code><mode>3"
6534 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6536 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6537 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6539 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6540 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
6541 [(set_attr "type" "sselog")
6542 (set_attr "prefix" "vex")
6543 (set_attr "mode" "TI")])
6545 (define_insn "*sse2_<code><mode>3"
6546 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6548 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6549 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6550 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6551 "p<plogicprefix>\t{%2, %0|%0, %2}"
6552 [(set_attr "type" "sselog")
6553 (set_attr "prefix_data16" "1")
6554 (set_attr "mode" "TI")])
6556 (define_expand "<code>tf3"
6557 [(set (match_operand:TF 0 "register_operand" "")
6559 (match_operand:TF 1 "nonimmediate_operand" "")
6560 (match_operand:TF 2 "nonimmediate_operand" "")))]
6562 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6564 (define_insn "*<code>tf3"
6565 [(set (match_operand:TF 0 "register_operand" "=x")
6567 (match_operand:TF 1 "nonimmediate_operand" "%0")
6568 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6569 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6570 "p<plogicprefix>\t{%2, %0|%0, %2}"
6571 [(set_attr "type" "sselog")
6572 (set_attr "prefix_data16" "1")
6573 (set_attr "mode" "TI")])
6575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6577 ;; Parallel integral element swizzling
6579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6582 ;; op1 = abcdefghijklmnop
6583 ;; op2 = qrstuvwxyz012345
6584 ;; h1 = aqbrcsdteufvgwhx
6585 ;; l1 = iyjzk0l1m2n3o4p5
6586 ;; h2 = aiqybjrzcks0dlt1
6587 ;; l2 = emu2fnv3gow4hpx5
6588 ;; h3 = aeimquy2bfjnrvz3
6589 ;; l3 = cgkosw04dhlptx15
6590 ;; result = bdfhjlnprtvxz135
6591 (define_expand "vec_pack_trunc_v8hi"
6592 [(match_operand:V16QI 0 "register_operand" "")
6593 (match_operand:V8HI 1 "register_operand" "")
6594 (match_operand:V8HI 2 "register_operand" "")]
6597 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6599 op1 = gen_lowpart (V16QImode, operands[1]);
6600 op2 = gen_lowpart (V16QImode, operands[2]);
6601 h1 = gen_reg_rtx (V16QImode);
6602 l1 = gen_reg_rtx (V16QImode);
6603 h2 = gen_reg_rtx (V16QImode);
6604 l2 = gen_reg_rtx (V16QImode);
6605 h3 = gen_reg_rtx (V16QImode);
6606 l3 = gen_reg_rtx (V16QImode);
6608 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6609 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6610 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6611 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6612 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6613 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6614 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6625 ;; result = bdfhjlnp
6626 (define_expand "vec_pack_trunc_v4si"
6627 [(match_operand:V8HI 0 "register_operand" "")
6628 (match_operand:V4SI 1 "register_operand" "")
6629 (match_operand:V4SI 2 "register_operand" "")]
6632 rtx op1, op2, h1, l1, h2, l2;
6634 op1 = gen_lowpart (V8HImode, operands[1]);
6635 op2 = gen_lowpart (V8HImode, operands[2]);
6636 h1 = gen_reg_rtx (V8HImode);
6637 l1 = gen_reg_rtx (V8HImode);
6638 h2 = gen_reg_rtx (V8HImode);
6639 l2 = gen_reg_rtx (V8HImode);
6641 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6642 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6643 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6644 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6645 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6655 (define_expand "vec_pack_trunc_v2di"
6656 [(match_operand:V4SI 0 "register_operand" "")
6657 (match_operand:V2DI 1 "register_operand" "")
6658 (match_operand:V2DI 2 "register_operand" "")]
6661 rtx op1, op2, h1, l1;
6663 op1 = gen_lowpart (V4SImode, operands[1]);
6664 op2 = gen_lowpart (V4SImode, operands[2]);
6665 h1 = gen_reg_rtx (V4SImode);
6666 l1 = gen_reg_rtx (V4SImode);
6668 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6669 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6670 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6674 (define_expand "vec_interleave_highv16qi"
6675 [(set (match_operand:V16QI 0 "register_operand" "")
6678 (match_operand:V16QI 1 "register_operand" "")
6679 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6680 (parallel [(const_int 8) (const_int 24)
6681 (const_int 9) (const_int 25)
6682 (const_int 10) (const_int 26)
6683 (const_int 11) (const_int 27)
6684 (const_int 12) (const_int 28)
6685 (const_int 13) (const_int 29)
6686 (const_int 14) (const_int 30)
6687 (const_int 15) (const_int 31)])))]
6690 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6694 (define_expand "vec_interleave_lowv16qi"
6695 [(set (match_operand:V16QI 0 "register_operand" "")
6698 (match_operand:V16QI 1 "register_operand" "")
6699 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6700 (parallel [(const_int 0) (const_int 16)
6701 (const_int 1) (const_int 17)
6702 (const_int 2) (const_int 18)
6703 (const_int 3) (const_int 19)
6704 (const_int 4) (const_int 20)
6705 (const_int 5) (const_int 21)
6706 (const_int 6) (const_int 22)
6707 (const_int 7) (const_int 23)])))]
6710 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6714 (define_expand "vec_interleave_highv8hi"
6715 [(set (match_operand:V8HI 0 "register_operand" "=")
6718 (match_operand:V8HI 1 "register_operand" "")
6719 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6720 (parallel [(const_int 4) (const_int 12)
6721 (const_int 5) (const_int 13)
6722 (const_int 6) (const_int 14)
6723 (const_int 7) (const_int 15)])))]
6726 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6730 (define_expand "vec_interleave_lowv8hi"
6731 [(set (match_operand:V8HI 0 "register_operand" "")
6734 (match_operand:V8HI 1 "register_operand" "")
6735 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6736 (parallel [(const_int 0) (const_int 8)
6737 (const_int 1) (const_int 9)
6738 (const_int 2) (const_int 10)
6739 (const_int 3) (const_int 11)])))]
6742 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6746 (define_expand "vec_interleave_highv4si"
6747 [(set (match_operand:V4SI 0 "register_operand" "")
6750 (match_operand:V4SI 1 "register_operand" "")
6751 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6752 (parallel [(const_int 2) (const_int 6)
6753 (const_int 3) (const_int 7)])))]
6756 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6760 (define_expand "vec_interleave_lowv4si"
6761 [(set (match_operand:V4SI 0 "register_operand" "")
6764 (match_operand:V4SI 1 "register_operand" "")
6765 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6766 (parallel [(const_int 0) (const_int 4)
6767 (const_int 1) (const_int 5)])))]
6770 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6774 (define_expand "vec_interleave_highv2di"
6775 [(set (match_operand:V2DI 0 "register_operand" "")
6778 (match_operand:V2DI 1 "register_operand" "")
6779 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6780 (parallel [(const_int 1)
6784 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6788 (define_expand "vec_interleave_lowv2di"
6789 [(set (match_operand:V2DI 0 "register_operand" "")
6792 (match_operand:V2DI 1 "register_operand" "")
6793 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6794 (parallel [(const_int 0)
6798 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6802 (define_expand "vec_interleave_highv4sf"
6803 [(set (match_operand:V4SF 0 "register_operand" "")
6806 (match_operand:V4SF 1 "register_operand" "")
6807 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6808 (parallel [(const_int 2) (const_int 6)
6809 (const_int 3) (const_int 7)])))]
6812 (define_expand "vec_interleave_lowv4sf"
6813 [(set (match_operand:V4SF 0 "register_operand" "")
6816 (match_operand:V4SF 1 "register_operand" "")
6817 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6818 (parallel [(const_int 0) (const_int 4)
6819 (const_int 1) (const_int 5)])))]
6822 (define_expand "vec_interleave_highv2df"
6823 [(set (match_operand:V2DF 0 "register_operand" "")
6826 (match_operand:V2DF 1 "register_operand" "")
6827 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6828 (parallel [(const_int 1)
6832 (define_expand "vec_interleave_lowv2df"
6833 [(set (match_operand:V2DF 0 "register_operand" "")
6836 (match_operand:V2DF 1 "register_operand" "")
6837 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6838 (parallel [(const_int 0)
6842 (define_insn "*avx_packsswb"
6843 [(set (match_operand:V16QI 0 "register_operand" "=x")
6846 (match_operand:V8HI 1 "register_operand" "x"))
6848 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6850 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6851 [(set_attr "type" "sselog")
6852 (set_attr "prefix" "vex")
6853 (set_attr "mode" "TI")])
6855 (define_insn "sse2_packsswb"
6856 [(set (match_operand:V16QI 0 "register_operand" "=x")
6859 (match_operand:V8HI 1 "register_operand" "0"))
6861 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6863 "packsswb\t{%2, %0|%0, %2}"
6864 [(set_attr "type" "sselog")
6865 (set_attr "prefix_data16" "1")
6866 (set_attr "mode" "TI")])
6868 (define_insn "*avx_packssdw"
6869 [(set (match_operand:V8HI 0 "register_operand" "=x")
6872 (match_operand:V4SI 1 "register_operand" "x"))
6874 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6876 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6877 [(set_attr "type" "sselog")
6878 (set_attr "prefix" "vex")
6879 (set_attr "mode" "TI")])
6881 (define_insn "sse2_packssdw"
6882 [(set (match_operand:V8HI 0 "register_operand" "=x")
6885 (match_operand:V4SI 1 "register_operand" "0"))
6887 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6889 "packssdw\t{%2, %0|%0, %2}"
6890 [(set_attr "type" "sselog")
6891 (set_attr "prefix_data16" "1")
6892 (set_attr "mode" "TI")])
6894 (define_insn "*avx_packuswb"
6895 [(set (match_operand:V16QI 0 "register_operand" "=x")
6898 (match_operand:V8HI 1 "register_operand" "x"))
6900 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6902 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6903 [(set_attr "type" "sselog")
6904 (set_attr "prefix" "vex")
6905 (set_attr "mode" "TI")])
6907 (define_insn "sse2_packuswb"
6908 [(set (match_operand:V16QI 0 "register_operand" "=x")
6911 (match_operand:V8HI 1 "register_operand" "0"))
6913 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6915 "packuswb\t{%2, %0|%0, %2}"
6916 [(set_attr "type" "sselog")
6917 (set_attr "prefix_data16" "1")
6918 (set_attr "mode" "TI")])
6920 (define_insn "*avx_punpckhbw"
6921 [(set (match_operand:V16QI 0 "register_operand" "=x")
6924 (match_operand:V16QI 1 "register_operand" "x")
6925 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6926 (parallel [(const_int 8) (const_int 24)
6927 (const_int 9) (const_int 25)
6928 (const_int 10) (const_int 26)
6929 (const_int 11) (const_int 27)
6930 (const_int 12) (const_int 28)
6931 (const_int 13) (const_int 29)
6932 (const_int 14) (const_int 30)
6933 (const_int 15) (const_int 31)])))]
6935 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6936 [(set_attr "type" "sselog")
6937 (set_attr "prefix" "vex")
6938 (set_attr "mode" "TI")])
6940 (define_insn "sse2_punpckhbw"
6941 [(set (match_operand:V16QI 0 "register_operand" "=x")
6944 (match_operand:V16QI 1 "register_operand" "0")
6945 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6946 (parallel [(const_int 8) (const_int 24)
6947 (const_int 9) (const_int 25)
6948 (const_int 10) (const_int 26)
6949 (const_int 11) (const_int 27)
6950 (const_int 12) (const_int 28)
6951 (const_int 13) (const_int 29)
6952 (const_int 14) (const_int 30)
6953 (const_int 15) (const_int 31)])))]
6955 "punpckhbw\t{%2, %0|%0, %2}"
6956 [(set_attr "type" "sselog")
6957 (set_attr "prefix_data16" "1")
6958 (set_attr "mode" "TI")])
6960 (define_insn "*avx_punpcklbw"
6961 [(set (match_operand:V16QI 0 "register_operand" "=x")
6964 (match_operand:V16QI 1 "register_operand" "x")
6965 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6966 (parallel [(const_int 0) (const_int 16)
6967 (const_int 1) (const_int 17)
6968 (const_int 2) (const_int 18)
6969 (const_int 3) (const_int 19)
6970 (const_int 4) (const_int 20)
6971 (const_int 5) (const_int 21)
6972 (const_int 6) (const_int 22)
6973 (const_int 7) (const_int 23)])))]
6975 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6976 [(set_attr "type" "sselog")
6977 (set_attr "prefix" "vex")
6978 (set_attr "mode" "TI")])
6980 (define_insn "sse2_punpcklbw"
6981 [(set (match_operand:V16QI 0 "register_operand" "=x")
6984 (match_operand:V16QI 1 "register_operand" "0")
6985 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6986 (parallel [(const_int 0) (const_int 16)
6987 (const_int 1) (const_int 17)
6988 (const_int 2) (const_int 18)
6989 (const_int 3) (const_int 19)
6990 (const_int 4) (const_int 20)
6991 (const_int 5) (const_int 21)
6992 (const_int 6) (const_int 22)
6993 (const_int 7) (const_int 23)])))]
6995 "punpcklbw\t{%2, %0|%0, %2}"
6996 [(set_attr "type" "sselog")
6997 (set_attr "prefix_data16" "1")
6998 (set_attr "mode" "TI")])
7000 (define_insn "*avx_punpckhwd"
7001 [(set (match_operand:V8HI 0 "register_operand" "=x")
7004 (match_operand:V8HI 1 "register_operand" "x")
7005 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7006 (parallel [(const_int 4) (const_int 12)
7007 (const_int 5) (const_int 13)
7008 (const_int 6) (const_int 14)
7009 (const_int 7) (const_int 15)])))]
7011 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
7012 [(set_attr "type" "sselog")
7013 (set_attr "prefix" "vex")
7014 (set_attr "mode" "TI")])
7016 (define_insn "sse2_punpckhwd"
7017 [(set (match_operand:V8HI 0 "register_operand" "=x")
7020 (match_operand:V8HI 1 "register_operand" "0")
7021 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7022 (parallel [(const_int 4) (const_int 12)
7023 (const_int 5) (const_int 13)
7024 (const_int 6) (const_int 14)
7025 (const_int 7) (const_int 15)])))]
7027 "punpckhwd\t{%2, %0|%0, %2}"
7028 [(set_attr "type" "sselog")
7029 (set_attr "prefix_data16" "1")
7030 (set_attr "mode" "TI")])
7032 (define_insn "*avx_punpcklwd"
7033 [(set (match_operand:V8HI 0 "register_operand" "=x")
7036 (match_operand:V8HI 1 "register_operand" "x")
7037 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7038 (parallel [(const_int 0) (const_int 8)
7039 (const_int 1) (const_int 9)
7040 (const_int 2) (const_int 10)
7041 (const_int 3) (const_int 11)])))]
7043 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
7044 [(set_attr "type" "sselog")
7045 (set_attr "prefix" "vex")
7046 (set_attr "mode" "TI")])
7048 (define_insn "sse2_punpcklwd"
7049 [(set (match_operand:V8HI 0 "register_operand" "=x")
7052 (match_operand:V8HI 1 "register_operand" "0")
7053 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7054 (parallel [(const_int 0) (const_int 8)
7055 (const_int 1) (const_int 9)
7056 (const_int 2) (const_int 10)
7057 (const_int 3) (const_int 11)])))]
7059 "punpcklwd\t{%2, %0|%0, %2}"
7060 [(set_attr "type" "sselog")
7061 (set_attr "prefix_data16" "1")
7062 (set_attr "mode" "TI")])
7064 (define_insn "*avx_punpckhdq"
7065 [(set (match_operand:V4SI 0 "register_operand" "=x")
7068 (match_operand:V4SI 1 "register_operand" "x")
7069 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7070 (parallel [(const_int 2) (const_int 6)
7071 (const_int 3) (const_int 7)])))]
7073 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
7074 [(set_attr "type" "sselog")
7075 (set_attr "prefix" "vex")
7076 (set_attr "mode" "TI")])
7078 (define_insn "sse2_punpckhdq"
7079 [(set (match_operand:V4SI 0 "register_operand" "=x")
7082 (match_operand:V4SI 1 "register_operand" "0")
7083 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7084 (parallel [(const_int 2) (const_int 6)
7085 (const_int 3) (const_int 7)])))]
7087 "punpckhdq\t{%2, %0|%0, %2}"
7088 [(set_attr "type" "sselog")
7089 (set_attr "prefix_data16" "1")
7090 (set_attr "mode" "TI")])
7092 (define_insn "*avx_punpckldq"
7093 [(set (match_operand:V4SI 0 "register_operand" "=x")
7096 (match_operand:V4SI 1 "register_operand" "x")
7097 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7098 (parallel [(const_int 0) (const_int 4)
7099 (const_int 1) (const_int 5)])))]
7101 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
7102 [(set_attr "type" "sselog")
7103 (set_attr "prefix" "vex")
7104 (set_attr "mode" "TI")])
7106 (define_insn "sse2_punpckldq"
7107 [(set (match_operand:V4SI 0 "register_operand" "=x")
7110 (match_operand:V4SI 1 "register_operand" "0")
7111 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7112 (parallel [(const_int 0) (const_int 4)
7113 (const_int 1) (const_int 5)])))]
7115 "punpckldq\t{%2, %0|%0, %2}"
7116 [(set_attr "type" "sselog")
7117 (set_attr "prefix_data16" "1")
7118 (set_attr "mode" "TI")])
7120 (define_insn "*avx_pinsr<ssevecsize>"
7121 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
7122 (vec_merge:SSEMODE124
7123 (vec_duplicate:SSEMODE124
7124 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
7125 (match_operand:SSEMODE124 1 "register_operand" "x")
7126 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
7129 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7130 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7132 [(set_attr "type" "sselog")
7133 (set (attr "prefix_extra")
7134 (if_then_else (match_operand:V8HI 0 "register_operand" "")
7136 (const_string "1")))
7137 (set_attr "length_immediate" "1")
7138 (set_attr "prefix" "vex")
7139 (set_attr "mode" "TI")])
7141 (define_insn "*sse4_1_pinsrb"
7142 [(set (match_operand:V16QI 0 "register_operand" "=x")
7144 (vec_duplicate:V16QI
7145 (match_operand:QI 2 "nonimmediate_operand" "rm"))
7146 (match_operand:V16QI 1 "register_operand" "0")
7147 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
7150 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7151 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
7153 [(set_attr "type" "sselog")
7154 (set_attr "prefix_extra" "1")
7155 (set_attr "length_immediate" "1")
7156 (set_attr "mode" "TI")])
7158 (define_insn "*sse2_pinsrw"
7159 [(set (match_operand:V8HI 0 "register_operand" "=x")
7162 (match_operand:HI 2 "nonimmediate_operand" "rm"))
7163 (match_operand:V8HI 1 "register_operand" "0")
7164 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
7167 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7168 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
7170 [(set_attr "type" "sselog")
7171 (set_attr "prefix_data16" "1")
7172 (set_attr "length_immediate" "1")
7173 (set_attr "mode" "TI")])
7175 ;; It must come before sse2_loadld since it is preferred.
7176 (define_insn "*sse4_1_pinsrd"
7177 [(set (match_operand:V4SI 0 "register_operand" "=x")
7180 (match_operand:SI 2 "nonimmediate_operand" "rm"))
7181 (match_operand:V4SI 1 "register_operand" "0")
7182 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
7185 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7186 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
7188 [(set_attr "type" "sselog")
7189 (set_attr "prefix_extra" "1")
7190 (set_attr "length_immediate" "1")
7191 (set_attr "mode" "TI")])
7193 (define_insn "*avx_pinsrq"
7194 [(set (match_operand:V2DI 0 "register_operand" "=x")
7197 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7198 (match_operand:V2DI 1 "register_operand" "x")
7199 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7200 "TARGET_AVX && TARGET_64BIT"
7202 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7203 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7205 [(set_attr "type" "sselog")
7206 (set_attr "prefix_extra" "1")
7207 (set_attr "length_immediate" "1")
7208 (set_attr "prefix" "vex")
7209 (set_attr "mode" "TI")])
7211 (define_insn "*sse4_1_pinsrq"
7212 [(set (match_operand:V2DI 0 "register_operand" "=x")
7215 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7216 (match_operand:V2DI 1 "register_operand" "0")
7217 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7218 "TARGET_SSE4_1 && TARGET_64BIT"
7220 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7221 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7223 [(set_attr "type" "sselog")
7224 (set_attr "prefix_rex" "1")
7225 (set_attr "prefix_extra" "1")
7226 (set_attr "length_immediate" "1")
7227 (set_attr "mode" "TI")])
7229 (define_insn "*sse4_1_pextrb"
7230 [(set (match_operand:SI 0 "register_operand" "=r")
7233 (match_operand:V16QI 1 "register_operand" "x")
7234 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7236 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7237 [(set_attr "type" "sselog")
7238 (set_attr "prefix_extra" "1")
7239 (set_attr "length_immediate" "1")
7240 (set_attr "prefix" "maybe_vex")
7241 (set_attr "mode" "TI")])
7243 (define_insn "*sse4_1_pextrb_memory"
7244 [(set (match_operand:QI 0 "memory_operand" "=m")
7246 (match_operand:V16QI 1 "register_operand" "x")
7247 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7249 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7250 [(set_attr "type" "sselog")
7251 (set_attr "prefix_extra" "1")
7252 (set_attr "length_immediate" "1")
7253 (set_attr "prefix" "maybe_vex")
7254 (set_attr "mode" "TI")])
7256 (define_insn "*sse2_pextrw"
7257 [(set (match_operand:SI 0 "register_operand" "=r")
7260 (match_operand:V8HI 1 "register_operand" "x")
7261 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7263 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7264 [(set_attr "type" "sselog")
7265 (set_attr "prefix_data16" "1")
7266 (set_attr "length_immediate" "1")
7267 (set_attr "prefix" "maybe_vex")
7268 (set_attr "mode" "TI")])
7270 (define_insn "*sse4_1_pextrw_memory"
7271 [(set (match_operand:HI 0 "memory_operand" "=m")
7273 (match_operand:V8HI 1 "register_operand" "x")
7274 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7276 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7277 [(set_attr "type" "sselog")
7278 (set_attr "prefix_extra" "1")
7279 (set_attr "length_immediate" "1")
7280 (set_attr "prefix" "maybe_vex")
7281 (set_attr "mode" "TI")])
7283 (define_insn "*sse4_1_pextrd"
7284 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7286 (match_operand:V4SI 1 "register_operand" "x")
7287 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7289 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7290 [(set_attr "type" "sselog")
7291 (set_attr "prefix_extra" "1")
7292 (set_attr "length_immediate" "1")
7293 (set_attr "prefix" "maybe_vex")
7294 (set_attr "mode" "TI")])
7296 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7297 (define_insn "*sse4_1_pextrq"
7298 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7300 (match_operand:V2DI 1 "register_operand" "x")
7301 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7302 "TARGET_SSE4_1 && TARGET_64BIT"
7303 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7304 [(set_attr "type" "sselog")
7305 (set_attr "prefix_rex" "1")
7306 (set_attr "prefix_extra" "1")
7307 (set_attr "length_immediate" "1")
7308 (set_attr "prefix" "maybe_vex")
7309 (set_attr "mode" "TI")])
7311 (define_expand "sse2_pshufd"
7312 [(match_operand:V4SI 0 "register_operand" "")
7313 (match_operand:V4SI 1 "nonimmediate_operand" "")
7314 (match_operand:SI 2 "const_int_operand" "")]
7317 int mask = INTVAL (operands[2]);
7318 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7319 GEN_INT ((mask >> 0) & 3),
7320 GEN_INT ((mask >> 2) & 3),
7321 GEN_INT ((mask >> 4) & 3),
7322 GEN_INT ((mask >> 6) & 3)));
7326 (define_insn "sse2_pshufd_1"
7327 [(set (match_operand:V4SI 0 "register_operand" "=x")
7329 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7330 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7331 (match_operand 3 "const_0_to_3_operand" "")
7332 (match_operand 4 "const_0_to_3_operand" "")
7333 (match_operand 5 "const_0_to_3_operand" "")])))]
7337 mask |= INTVAL (operands[2]) << 0;
7338 mask |= INTVAL (operands[3]) << 2;
7339 mask |= INTVAL (operands[4]) << 4;
7340 mask |= INTVAL (operands[5]) << 6;
7341 operands[2] = GEN_INT (mask);
7343 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7345 [(set_attr "type" "sselog1")
7346 (set_attr "prefix_data16" "1")
7347 (set_attr "prefix" "maybe_vex")
7348 (set_attr "length_immediate" "1")
7349 (set_attr "mode" "TI")])
7351 (define_expand "sse2_pshuflw"
7352 [(match_operand:V8HI 0 "register_operand" "")
7353 (match_operand:V8HI 1 "nonimmediate_operand" "")
7354 (match_operand:SI 2 "const_int_operand" "")]
7357 int mask = INTVAL (operands[2]);
7358 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7359 GEN_INT ((mask >> 0) & 3),
7360 GEN_INT ((mask >> 2) & 3),
7361 GEN_INT ((mask >> 4) & 3),
7362 GEN_INT ((mask >> 6) & 3)));
7366 (define_insn "sse2_pshuflw_1"
7367 [(set (match_operand:V8HI 0 "register_operand" "=x")
7369 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7370 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7371 (match_operand 3 "const_0_to_3_operand" "")
7372 (match_operand 4 "const_0_to_3_operand" "")
7373 (match_operand 5 "const_0_to_3_operand" "")
7381 mask |= INTVAL (operands[2]) << 0;
7382 mask |= INTVAL (operands[3]) << 2;
7383 mask |= INTVAL (operands[4]) << 4;
7384 mask |= INTVAL (operands[5]) << 6;
7385 operands[2] = GEN_INT (mask);
7387 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7389 [(set_attr "type" "sselog")
7390 (set_attr "prefix_data16" "0")
7391 (set_attr "prefix_rep" "1")
7392 (set_attr "prefix" "maybe_vex")
7393 (set_attr "length_immediate" "1")
7394 (set_attr "mode" "TI")])
7396 (define_expand "sse2_pshufhw"
7397 [(match_operand:V8HI 0 "register_operand" "")
7398 (match_operand:V8HI 1 "nonimmediate_operand" "")
7399 (match_operand:SI 2 "const_int_operand" "")]
7402 int mask = INTVAL (operands[2]);
7403 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7404 GEN_INT (((mask >> 0) & 3) + 4),
7405 GEN_INT (((mask >> 2) & 3) + 4),
7406 GEN_INT (((mask >> 4) & 3) + 4),
7407 GEN_INT (((mask >> 6) & 3) + 4)));
7411 (define_insn "sse2_pshufhw_1"
7412 [(set (match_operand:V8HI 0 "register_operand" "=x")
7414 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7415 (parallel [(const_int 0)
7419 (match_operand 2 "const_4_to_7_operand" "")
7420 (match_operand 3 "const_4_to_7_operand" "")
7421 (match_operand 4 "const_4_to_7_operand" "")
7422 (match_operand 5 "const_4_to_7_operand" "")])))]
7426 mask |= (INTVAL (operands[2]) - 4) << 0;
7427 mask |= (INTVAL (operands[3]) - 4) << 2;
7428 mask |= (INTVAL (operands[4]) - 4) << 4;
7429 mask |= (INTVAL (operands[5]) - 4) << 6;
7430 operands[2] = GEN_INT (mask);
7432 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7434 [(set_attr "type" "sselog")
7435 (set_attr "prefix_rep" "1")
7436 (set_attr "prefix_data16" "0")
7437 (set_attr "prefix" "maybe_vex")
7438 (set_attr "length_immediate" "1")
7439 (set_attr "mode" "TI")])
7441 (define_expand "sse2_loadd"
7442 [(set (match_operand:V4SI 0 "register_operand" "")
7445 (match_operand:SI 1 "nonimmediate_operand" ""))
7449 "operands[2] = CONST0_RTX (V4SImode);")
7451 (define_insn "*avx_loadld"
7452 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7455 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7456 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7460 vmovd\t{%2, %0|%0, %2}
7461 vmovd\t{%2, %0|%0, %2}
7462 vmovss\t{%2, %1, %0|%0, %1, %2}"
7463 [(set_attr "type" "ssemov")
7464 (set_attr "prefix" "vex")
7465 (set_attr "mode" "TI,TI,V4SF")])
7467 (define_insn "sse2_loadld"
7468 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7471 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7472 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7476 movd\t{%2, %0|%0, %2}
7477 movd\t{%2, %0|%0, %2}
7478 movss\t{%2, %0|%0, %2}
7479 movss\t{%2, %0|%0, %2}"
7480 [(set_attr "type" "ssemov")
7481 (set_attr "mode" "TI,TI,V4SF,SF")])
7483 (define_insn_and_split "sse2_stored"
7484 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7486 (match_operand:V4SI 1 "register_operand" "x,Yi")
7487 (parallel [(const_int 0)])))]
7490 "&& reload_completed
7491 && (TARGET_INTER_UNIT_MOVES
7492 || MEM_P (operands [0])
7493 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7494 [(set (match_dup 0) (match_dup 1))]
7496 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7499 (define_insn_and_split "*vec_ext_v4si_mem"
7500 [(set (match_operand:SI 0 "register_operand" "=r")
7502 (match_operand:V4SI 1 "memory_operand" "o")
7503 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7509 int i = INTVAL (operands[2]);
7511 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7515 (define_expand "sse_storeq"
7516 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7518 (match_operand:V2DI 1 "register_operand" "")
7519 (parallel [(const_int 0)])))]
7523 (define_insn "*sse2_storeq_rex64"
7524 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7526 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7527 (parallel [(const_int 0)])))]
7528 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7532 %vmov{q}\t{%1, %0|%0, %1}"
7533 [(set_attr "type" "*,*,imov")
7534 (set_attr "prefix" "*,*,maybe_vex")
7535 (set_attr "mode" "*,*,DI")])
7537 (define_insn "*sse2_storeq"
7538 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7540 (match_operand:V2DI 1 "register_operand" "x")
7541 (parallel [(const_int 0)])))]
7546 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7548 (match_operand:V2DI 1 "register_operand" "")
7549 (parallel [(const_int 0)])))]
7552 && (TARGET_INTER_UNIT_MOVES
7553 || MEM_P (operands [0])
7554 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7555 [(set (match_dup 0) (match_dup 1))]
7557 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7560 (define_insn "*vec_extractv2di_1_rex64_avx"
7561 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7563 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7564 (parallel [(const_int 1)])))]
7567 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7569 vmovhps\t{%1, %0|%0, %1}
7570 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7571 vmovq\t{%H1, %0|%0, %H1}
7572 vmov{q}\t{%H1, %0|%0, %H1}"
7573 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7574 (set_attr "length_immediate" "*,1,*,*")
7575 (set_attr "memory" "*,none,*,*")
7576 (set_attr "prefix" "vex")
7577 (set_attr "mode" "V2SF,TI,TI,DI")])
7579 (define_insn "*vec_extractv2di_1_rex64"
7580 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7582 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7583 (parallel [(const_int 1)])))]
7584 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7586 movhps\t{%1, %0|%0, %1}
7587 psrldq\t{$8, %0|%0, 8}
7588 movq\t{%H1, %0|%0, %H1}
7589 mov{q}\t{%H1, %0|%0, %H1}"
7590 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7591 (set_attr "length_immediate" "*,1,*,*")
7592 (set_attr "atom_unit" "*,sishuf,*,*")
7593 (set_attr "memory" "*,none,*,*")
7594 (set_attr "mode" "V2SF,TI,TI,DI")])
7596 (define_insn "*vec_extractv2di_1_avx"
7597 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7599 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7600 (parallel [(const_int 1)])))]
7603 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7605 vmovhps\t{%1, %0|%0, %1}
7606 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7607 vmovq\t{%H1, %0|%0, %H1}"
7608 [(set_attr "type" "ssemov,sseishft,ssemov")
7609 (set_attr "length_immediate" "*,1,*")
7610 (set_attr "memory" "*,none,*")
7611 (set_attr "prefix" "vex")
7612 (set_attr "mode" "V2SF,TI,TI")])
7614 (define_insn "*vec_extractv2di_1_sse2"
7615 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7617 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7618 (parallel [(const_int 1)])))]
7620 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7622 movhps\t{%1, %0|%0, %1}
7623 psrldq\t{$8, %0|%0, 8}
7624 movq\t{%H1, %0|%0, %H1}"
7625 [(set_attr "type" "ssemov,sseishft,ssemov")
7626 (set_attr "length_immediate" "*,1,*")
7627 (set_attr "atom_unit" "*,sishuf,*")
7628 (set_attr "memory" "*,none,*")
7629 (set_attr "mode" "V2SF,TI,TI")])
7631 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7632 (define_insn "*vec_extractv2di_1_sse"
7633 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7635 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7636 (parallel [(const_int 1)])))]
7637 "!TARGET_SSE2 && TARGET_SSE
7638 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7640 movhps\t{%1, %0|%0, %1}
7641 movhlps\t{%1, %0|%0, %1}
7642 movlps\t{%H1, %0|%0, %H1}"
7643 [(set_attr "type" "ssemov")
7644 (set_attr "mode" "V2SF,V4SF,V2SF")])
7646 (define_insn "*vec_dupv4si"
7647 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7649 (match_operand:SI 1 "register_operand" " Y2,0")))]
7652 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7653 shufps\t{$0, %0, %0|%0, %0, 0}"
7654 [(set_attr "type" "sselog1")
7655 (set_attr "prefix" "maybe_vex,orig")
7656 (set_attr "length_immediate" "1")
7657 (set_attr "mode" "TI,V4SF")])
7659 (define_insn "*vec_dupv2di_avx"
7660 [(set (match_operand:V2DI 0 "register_operand" "=x")
7662 (match_operand:DI 1 "register_operand" "x")))]
7664 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7665 [(set_attr "type" "sselog1")
7666 (set_attr "prefix" "vex")
7667 (set_attr "mode" "TI")])
7669 (define_insn "*vec_dupv2di"
7670 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7672 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7677 [(set_attr "type" "sselog1,ssemov")
7678 (set_attr "mode" "TI,V4SF")])
7680 (define_insn "*vec_concatv2si_avx"
7681 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7683 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7684 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7687 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7688 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7689 vmovd\t{%1, %0|%0, %1}
7690 punpckldq\t{%2, %0|%0, %2}
7691 movd\t{%1, %0|%0, %1}"
7692 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7693 (set_attr "prefix_extra" "1,*,*,*,*")
7694 (set_attr "length_immediate" "1,*,*,*,*")
7695 (set (attr "prefix")
7696 (if_then_else (eq_attr "alternative" "3,4")
7697 (const_string "orig")
7698 (const_string "vex")))
7699 (set_attr "mode" "TI,TI,TI,DI,DI")])
7701 (define_insn "*vec_concatv2si_sse4_1"
7702 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7704 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7705 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7708 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7709 punpckldq\t{%2, %0|%0, %2}
7710 movd\t{%1, %0|%0, %1}
7711 punpckldq\t{%2, %0|%0, %2}
7712 movd\t{%1, %0|%0, %1}"
7713 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7714 (set_attr "prefix_extra" "1,*,*,*,*")
7715 (set_attr "length_immediate" "1,*,*,*,*")
7716 (set_attr "mode" "TI,TI,TI,DI,DI")])
7718 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7719 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7720 ;; alternatives pretty much forces the MMX alternative to be chosen.
7721 (define_insn "*vec_concatv2si_sse2"
7722 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7724 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7725 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7728 punpckldq\t{%2, %0|%0, %2}
7729 movd\t{%1, %0|%0, %1}
7730 punpckldq\t{%2, %0|%0, %2}
7731 movd\t{%1, %0|%0, %1}"
7732 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7733 (set_attr "mode" "TI,TI,DI,DI")])
7735 (define_insn "*vec_concatv2si_sse"
7736 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7738 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7739 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7742 unpcklps\t{%2, %0|%0, %2}
7743 movss\t{%1, %0|%0, %1}
7744 punpckldq\t{%2, %0|%0, %2}
7745 movd\t{%1, %0|%0, %1}"
7746 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7747 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7749 (define_insn "*vec_concatv4si_1_avx"
7750 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7752 (match_operand:V2SI 1 "register_operand" " x,x")
7753 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7756 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7757 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7758 [(set_attr "type" "sselog,ssemov")
7759 (set_attr "prefix" "vex")
7760 (set_attr "mode" "TI,V2SF")])
7762 (define_insn "*vec_concatv4si_1"
7763 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7765 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7766 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7769 punpcklqdq\t{%2, %0|%0, %2}
7770 movlhps\t{%2, %0|%0, %2}
7771 movhps\t{%2, %0|%0, %2}"
7772 [(set_attr "type" "sselog,ssemov,ssemov")
7773 (set_attr "mode" "TI,V4SF,V2SF")])
7775 (define_insn "*vec_concatv2di_avx"
7776 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7778 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7779 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7780 "!TARGET_64BIT && TARGET_AVX"
7782 vmovq\t{%1, %0|%0, %1}
7783 movq2dq\t{%1, %0|%0, %1}
7784 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7785 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7786 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7787 (set (attr "prefix")
7788 (if_then_else (eq_attr "alternative" "1")
7789 (const_string "orig")
7790 (const_string "vex")))
7791 (set_attr "mode" "TI,TI,TI,V2SF")])
7793 (define_insn "vec_concatv2di"
7794 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7796 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7797 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7798 "!TARGET_64BIT && TARGET_SSE"
7800 movq\t{%1, %0|%0, %1}
7801 movq2dq\t{%1, %0|%0, %1}
7802 punpcklqdq\t{%2, %0|%0, %2}
7803 movlhps\t{%2, %0|%0, %2}
7804 movhps\t{%2, %0|%0, %2}"
7805 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7806 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7808 (define_insn "*vec_concatv2di_rex64_avx"
7809 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7811 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7812 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7813 "TARGET_64BIT && TARGET_AVX"
7815 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7816 vmovq\t{%1, %0|%0, %1}
7817 vmovq\t{%1, %0|%0, %1}
7818 movq2dq\t{%1, %0|%0, %1}
7819 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7820 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7821 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7822 (set_attr "prefix_extra" "1,*,*,*,*,*")
7823 (set_attr "length_immediate" "1,*,*,*,*,*")
7824 (set (attr "prefix")
7825 (if_then_else (eq_attr "alternative" "3")
7826 (const_string "orig")
7827 (const_string "vex")))
7828 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7830 (define_insn "*vec_concatv2di_rex64_sse4_1"
7831 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7833 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7834 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7835 "TARGET_64BIT && TARGET_SSE4_1"
7837 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7838 movq\t{%1, %0|%0, %1}
7839 movq\t{%1, %0|%0, %1}
7840 movq2dq\t{%1, %0|%0, %1}
7841 punpcklqdq\t{%2, %0|%0, %2}
7842 movlhps\t{%2, %0|%0, %2}
7843 movhps\t{%2, %0|%0, %2}"
7844 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7845 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7846 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7847 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7848 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7850 (define_insn "*vec_concatv2di_rex64_sse"
7851 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7853 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7854 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7855 "TARGET_64BIT && TARGET_SSE"
7857 movq\t{%1, %0|%0, %1}
7858 movq\t{%1, %0|%0, %1}
7859 movq2dq\t{%1, %0|%0, %1}
7860 punpcklqdq\t{%2, %0|%0, %2}
7861 movlhps\t{%2, %0|%0, %2}
7862 movhps\t{%2, %0|%0, %2}"
7863 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7864 (set_attr "prefix_rex" "*,1,*,*,*,*")
7865 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7867 (define_expand "vec_unpacku_hi_v16qi"
7868 [(match_operand:V8HI 0 "register_operand" "")
7869 (match_operand:V16QI 1 "register_operand" "")]
7873 ix86_expand_sse4_unpack (operands, true, true);
7875 ix86_expand_sse_unpack (operands, true, true);
7879 (define_expand "vec_unpacks_hi_v16qi"
7880 [(match_operand:V8HI 0 "register_operand" "")
7881 (match_operand:V16QI 1 "register_operand" "")]
7885 ix86_expand_sse4_unpack (operands, false, true);
7887 ix86_expand_sse_unpack (operands, false, true);
7891 (define_expand "vec_unpacku_lo_v16qi"
7892 [(match_operand:V8HI 0 "register_operand" "")
7893 (match_operand:V16QI 1 "register_operand" "")]
7897 ix86_expand_sse4_unpack (operands, true, false);
7899 ix86_expand_sse_unpack (operands, true, false);
7903 (define_expand "vec_unpacks_lo_v16qi"
7904 [(match_operand:V8HI 0 "register_operand" "")
7905 (match_operand:V16QI 1 "register_operand" "")]
7909 ix86_expand_sse4_unpack (operands, false, false);
7911 ix86_expand_sse_unpack (operands, false, false);
7915 (define_expand "vec_unpacku_hi_v8hi"
7916 [(match_operand:V4SI 0 "register_operand" "")
7917 (match_operand:V8HI 1 "register_operand" "")]
7921 ix86_expand_sse4_unpack (operands, true, true);
7923 ix86_expand_sse_unpack (operands, true, true);
7927 (define_expand "vec_unpacks_hi_v8hi"
7928 [(match_operand:V4SI 0 "register_operand" "")
7929 (match_operand:V8HI 1 "register_operand" "")]
7933 ix86_expand_sse4_unpack (operands, false, true);
7935 ix86_expand_sse_unpack (operands, false, true);
7939 (define_expand "vec_unpacku_lo_v8hi"
7940 [(match_operand:V4SI 0 "register_operand" "")
7941 (match_operand:V8HI 1 "register_operand" "")]
7945 ix86_expand_sse4_unpack (operands, true, false);
7947 ix86_expand_sse_unpack (operands, true, false);
7951 (define_expand "vec_unpacks_lo_v8hi"
7952 [(match_operand:V4SI 0 "register_operand" "")
7953 (match_operand:V8HI 1 "register_operand" "")]
7957 ix86_expand_sse4_unpack (operands, false, false);
7959 ix86_expand_sse_unpack (operands, false, false);
7963 (define_expand "vec_unpacku_hi_v4si"
7964 [(match_operand:V2DI 0 "register_operand" "")
7965 (match_operand:V4SI 1 "register_operand" "")]
7969 ix86_expand_sse4_unpack (operands, true, true);
7971 ix86_expand_sse_unpack (operands, true, true);
7975 (define_expand "vec_unpacks_hi_v4si"
7976 [(match_operand:V2DI 0 "register_operand" "")
7977 (match_operand:V4SI 1 "register_operand" "")]
7981 ix86_expand_sse4_unpack (operands, false, true);
7983 ix86_expand_sse_unpack (operands, false, true);
7987 (define_expand "vec_unpacku_lo_v4si"
7988 [(match_operand:V2DI 0 "register_operand" "")
7989 (match_operand:V4SI 1 "register_operand" "")]
7993 ix86_expand_sse4_unpack (operands, true, false);
7995 ix86_expand_sse_unpack (operands, true, false);
7999 (define_expand "vec_unpacks_lo_v4si"
8000 [(match_operand:V2DI 0 "register_operand" "")
8001 (match_operand:V4SI 1 "register_operand" "")]
8005 ix86_expand_sse4_unpack (operands, false, false);
8007 ix86_expand_sse_unpack (operands, false, false);
8011 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8015 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8017 (define_expand "sse2_uavgv16qi3"
8018 [(set (match_operand:V16QI 0 "register_operand" "")
8024 (match_operand:V16QI 1 "nonimmediate_operand" ""))
8026 (match_operand:V16QI 2 "nonimmediate_operand" "")))
8027 (const_vector:V16QI [(const_int 1) (const_int 1)
8028 (const_int 1) (const_int 1)
8029 (const_int 1) (const_int 1)
8030 (const_int 1) (const_int 1)
8031 (const_int 1) (const_int 1)
8032 (const_int 1) (const_int 1)
8033 (const_int 1) (const_int 1)
8034 (const_int 1) (const_int 1)]))
8037 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
8039 (define_insn "*avx_uavgv16qi3"
8040 [(set (match_operand:V16QI 0 "register_operand" "=x")
8046 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
8048 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8049 (const_vector:V16QI [(const_int 1) (const_int 1)
8050 (const_int 1) (const_int 1)
8051 (const_int 1) (const_int 1)
8052 (const_int 1) (const_int 1)
8053 (const_int 1) (const_int 1)
8054 (const_int 1) (const_int 1)
8055 (const_int 1) (const_int 1)
8056 (const_int 1) (const_int 1)]))
8058 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8059 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
8060 [(set_attr "type" "sseiadd")
8061 (set_attr "prefix" "vex")
8062 (set_attr "mode" "TI")])
8064 (define_insn "*sse2_uavgv16qi3"
8065 [(set (match_operand:V16QI 0 "register_operand" "=x")
8071 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
8073 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8074 (const_vector:V16QI [(const_int 1) (const_int 1)
8075 (const_int 1) (const_int 1)
8076 (const_int 1) (const_int 1)
8077 (const_int 1) (const_int 1)
8078 (const_int 1) (const_int 1)
8079 (const_int 1) (const_int 1)
8080 (const_int 1) (const_int 1)
8081 (const_int 1) (const_int 1)]))
8083 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8084 "pavgb\t{%2, %0|%0, %2}"
8085 [(set_attr "type" "sseiadd")
8086 (set_attr "prefix_data16" "1")
8087 (set_attr "mode" "TI")])
8089 (define_expand "sse2_uavgv8hi3"
8090 [(set (match_operand:V8HI 0 "register_operand" "")
8096 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8098 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8099 (const_vector:V8HI [(const_int 1) (const_int 1)
8100 (const_int 1) (const_int 1)
8101 (const_int 1) (const_int 1)
8102 (const_int 1) (const_int 1)]))
8105 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
8107 (define_insn "*avx_uavgv8hi3"
8108 [(set (match_operand:V8HI 0 "register_operand" "=x")
8114 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8116 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8117 (const_vector:V8HI [(const_int 1) (const_int 1)
8118 (const_int 1) (const_int 1)
8119 (const_int 1) (const_int 1)
8120 (const_int 1) (const_int 1)]))
8122 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8123 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
8124 [(set_attr "type" "sseiadd")
8125 (set_attr "prefix" "vex")
8126 (set_attr "mode" "TI")])
8128 (define_insn "*sse2_uavgv8hi3"
8129 [(set (match_operand:V8HI 0 "register_operand" "=x")
8135 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8137 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8138 (const_vector:V8HI [(const_int 1) (const_int 1)
8139 (const_int 1) (const_int 1)
8140 (const_int 1) (const_int 1)
8141 (const_int 1) (const_int 1)]))
8143 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8144 "pavgw\t{%2, %0|%0, %2}"
8145 [(set_attr "type" "sseiadd")
8146 (set_attr "prefix_data16" "1")
8147 (set_attr "mode" "TI")])
8149 ;; The correct representation for this is absolutely enormous, and
8150 ;; surely not generally useful.
8151 (define_insn "*avx_psadbw"
8152 [(set (match_operand:V2DI 0 "register_operand" "=x")
8153 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
8154 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8157 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8158 [(set_attr "type" "sseiadd")
8159 (set_attr "prefix" "vex")
8160 (set_attr "mode" "TI")])
8162 (define_insn "sse2_psadbw"
8163 [(set (match_operand:V2DI 0 "register_operand" "=x")
8164 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8165 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8168 "psadbw\t{%2, %0|%0, %2}"
8169 [(set_attr "type" "sseiadd")
8170 (set_attr "atom_unit" "simul")
8171 (set_attr "prefix_data16" "1")
8172 (set_attr "mode" "TI")])
8174 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
8175 [(set (match_operand:SI 0 "register_operand" "=r")
8177 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8179 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8180 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
8181 [(set_attr "type" "ssecvt")
8182 (set_attr "prefix" "vex")
8183 (set_attr "mode" "<MODE>")])
8185 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
8186 [(set (match_operand:SI 0 "register_operand" "=r")
8188 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8190 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8191 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8192 [(set_attr "type" "ssemov")
8193 (set_attr "prefix" "maybe_vex")
8194 (set_attr "mode" "<MODE>")])
8196 (define_insn "sse2_pmovmskb"
8197 [(set (match_operand:SI 0 "register_operand" "=r")
8198 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8201 "%vpmovmskb\t{%1, %0|%0, %1}"
8202 [(set_attr "type" "ssemov")
8203 (set_attr "prefix_data16" "1")
8204 (set_attr "prefix" "maybe_vex")
8205 (set_attr "mode" "SI")])
8207 (define_expand "sse2_maskmovdqu"
8208 [(set (match_operand:V16QI 0 "memory_operand" "")
8209 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8210 (match_operand:V16QI 2 "register_operand" "")
8216 (define_insn "*sse2_maskmovdqu"
8217 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8218 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8219 (match_operand:V16QI 2 "register_operand" "x")
8220 (mem:V16QI (match_dup 0))]
8222 "TARGET_SSE2 && !TARGET_64BIT"
8223 ;; @@@ check ordering of operands in intel/nonintel syntax
8224 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8225 [(set_attr "type" "ssemov")
8226 (set_attr "prefix_data16" "1")
8227 ;; The implicit %rdi operand confuses default length_vex computation.
8228 (set_attr "length_vex" "3")
8229 (set_attr "prefix" "maybe_vex")
8230 (set_attr "mode" "TI")])
8232 (define_insn "*sse2_maskmovdqu_rex64"
8233 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8234 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8235 (match_operand:V16QI 2 "register_operand" "x")
8236 (mem:V16QI (match_dup 0))]
8238 "TARGET_SSE2 && TARGET_64BIT"
8239 ;; @@@ check ordering of operands in intel/nonintel syntax
8240 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8241 [(set_attr "type" "ssemov")
8242 (set_attr "prefix_data16" "1")
8243 ;; The implicit %rdi operand confuses default length_vex computation.
8244 (set (attr "length_vex")
8245 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8246 (set_attr "prefix" "maybe_vex")
8247 (set_attr "mode" "TI")])
8249 (define_insn "sse_ldmxcsr"
8250 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8254 [(set_attr "type" "sse")
8255 (set_attr "atom_sse_attr" "mxcsr")
8256 (set_attr "prefix" "maybe_vex")
8257 (set_attr "memory" "load")])
8259 (define_insn "sse_stmxcsr"
8260 [(set (match_operand:SI 0 "memory_operand" "=m")
8261 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8264 [(set_attr "type" "sse")
8265 (set_attr "atom_sse_attr" "mxcsr")
8266 (set_attr "prefix" "maybe_vex")
8267 (set_attr "memory" "store")])
8269 (define_expand "sse_sfence"
8271 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8272 "TARGET_SSE || TARGET_3DNOW_A"
8274 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8275 MEM_VOLATILE_P (operands[0]) = 1;
8278 (define_insn "*sse_sfence"
8279 [(set (match_operand:BLK 0 "" "")
8280 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8281 "TARGET_SSE || TARGET_3DNOW_A"
8283 [(set_attr "type" "sse")
8284 (set_attr "length_address" "0")
8285 (set_attr "atom_sse_attr" "fence")
8286 (set_attr "memory" "unknown")])
8288 (define_insn "sse2_clflush"
8289 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8293 [(set_attr "type" "sse")
8294 (set_attr "atom_sse_attr" "fence")
8295 (set_attr "memory" "unknown")])
8297 (define_expand "sse2_mfence"
8299 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8302 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8303 MEM_VOLATILE_P (operands[0]) = 1;
8306 (define_insn "*sse2_mfence"
8307 [(set (match_operand:BLK 0 "" "")
8308 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8309 "TARGET_64BIT || TARGET_SSE2"
8311 [(set_attr "type" "sse")
8312 (set_attr "length_address" "0")
8313 (set_attr "atom_sse_attr" "fence")
8314 (set_attr "memory" "unknown")])
8316 (define_expand "sse2_lfence"
8318 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8321 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8322 MEM_VOLATILE_P (operands[0]) = 1;
8325 (define_insn "*sse2_lfence"
8326 [(set (match_operand:BLK 0 "" "")
8327 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8330 [(set_attr "type" "sse")
8331 (set_attr "length_address" "0")
8332 (set_attr "atom_sse_attr" "lfence")
8333 (set_attr "memory" "unknown")])
8335 (define_insn "sse3_mwait"
8336 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8337 (match_operand:SI 1 "register_operand" "c")]
8340 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8341 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8342 ;; we only need to set up 32bit registers.
8344 [(set_attr "length" "3")])
8346 (define_insn "sse3_monitor"
8347 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8348 (match_operand:SI 1 "register_operand" "c")
8349 (match_operand:SI 2 "register_operand" "d")]
8351 "TARGET_SSE3 && !TARGET_64BIT"
8352 "monitor\t%0, %1, %2"
8353 [(set_attr "length" "3")])
8355 (define_insn "sse3_monitor64"
8356 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8357 (match_operand:SI 1 "register_operand" "c")
8358 (match_operand:SI 2 "register_operand" "d")]
8360 "TARGET_SSE3 && TARGET_64BIT"
8361 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8362 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8363 ;; zero extended to 64bit, we only need to set up 32bit registers.
8365 [(set_attr "length" "3")])
8367 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8369 ;; SSSE3 instructions
8371 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8373 (define_insn "*avx_phaddwv8hi3"
8374 [(set (match_operand:V8HI 0 "register_operand" "=x")
8380 (match_operand:V8HI 1 "register_operand" "x")
8381 (parallel [(const_int 0)]))
8382 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8384 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8385 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8388 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8389 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8391 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8392 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8397 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8398 (parallel [(const_int 0)]))
8399 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8401 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8402 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8405 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8406 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8408 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8409 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8411 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8412 [(set_attr "type" "sseiadd")
8413 (set_attr "prefix_extra" "1")
8414 (set_attr "prefix" "vex")
8415 (set_attr "mode" "TI")])
8417 (define_insn "ssse3_phaddwv8hi3"
8418 [(set (match_operand:V8HI 0 "register_operand" "=x")
8424 (match_operand:V8HI 1 "register_operand" "0")
8425 (parallel [(const_int 0)]))
8426 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8428 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8429 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8432 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8433 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8435 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8436 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8441 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8442 (parallel [(const_int 0)]))
8443 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8445 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8446 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8449 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8450 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8452 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8453 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8455 "phaddw\t{%2, %0|%0, %2}"
8456 [(set_attr "type" "sseiadd")
8457 (set_attr "atom_unit" "complex")
8458 (set_attr "prefix_data16" "1")
8459 (set_attr "prefix_extra" "1")
8460 (set_attr "mode" "TI")])
8462 (define_insn "ssse3_phaddwv4hi3"
8463 [(set (match_operand:V4HI 0 "register_operand" "=y")
8468 (match_operand:V4HI 1 "register_operand" "0")
8469 (parallel [(const_int 0)]))
8470 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8472 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8473 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8477 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8478 (parallel [(const_int 0)]))
8479 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8482 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8484 "phaddw\t{%2, %0|%0, %2}"
8485 [(set_attr "type" "sseiadd")
8486 (set_attr "atom_unit" "complex")
8487 (set_attr "prefix_extra" "1")
8488 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8489 (set_attr "mode" "DI")])
8491 (define_insn "*avx_phadddv4si3"
8492 [(set (match_operand:V4SI 0 "register_operand" "=x")
8497 (match_operand:V4SI 1 "register_operand" "x")
8498 (parallel [(const_int 0)]))
8499 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8501 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8502 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8506 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8507 (parallel [(const_int 0)]))
8508 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8510 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8511 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8513 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8514 [(set_attr "type" "sseiadd")
8515 (set_attr "prefix_extra" "1")
8516 (set_attr "prefix" "vex")
8517 (set_attr "mode" "TI")])
8519 (define_insn "ssse3_phadddv4si3"
8520 [(set (match_operand:V4SI 0 "register_operand" "=x")
8525 (match_operand:V4SI 1 "register_operand" "0")
8526 (parallel [(const_int 0)]))
8527 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8529 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8530 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8534 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8535 (parallel [(const_int 0)]))
8536 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8538 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8539 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8541 "phaddd\t{%2, %0|%0, %2}"
8542 [(set_attr "type" "sseiadd")
8543 (set_attr "atom_unit" "complex")
8544 (set_attr "prefix_data16" "1")
8545 (set_attr "prefix_extra" "1")
8546 (set_attr "mode" "TI")])
8548 (define_insn "ssse3_phadddv2si3"
8549 [(set (match_operand:V2SI 0 "register_operand" "=y")
8553 (match_operand:V2SI 1 "register_operand" "0")
8554 (parallel [(const_int 0)]))
8555 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8558 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8559 (parallel [(const_int 0)]))
8560 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8562 "phaddd\t{%2, %0|%0, %2}"
8563 [(set_attr "type" "sseiadd")
8564 (set_attr "atom_unit" "complex")
8565 (set_attr "prefix_extra" "1")
8566 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8567 (set_attr "mode" "DI")])
8569 (define_insn "*avx_phaddswv8hi3"
8570 [(set (match_operand:V8HI 0 "register_operand" "=x")
8576 (match_operand:V8HI 1 "register_operand" "x")
8577 (parallel [(const_int 0)]))
8578 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8580 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8581 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8584 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8585 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8587 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8588 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8593 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8594 (parallel [(const_int 0)]))
8595 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8597 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8598 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8601 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8602 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8604 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8605 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8607 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8608 [(set_attr "type" "sseiadd")
8609 (set_attr "prefix_extra" "1")
8610 (set_attr "prefix" "vex")
8611 (set_attr "mode" "TI")])
8613 (define_insn "ssse3_phaddswv8hi3"
8614 [(set (match_operand:V8HI 0 "register_operand" "=x")
8620 (match_operand:V8HI 1 "register_operand" "0")
8621 (parallel [(const_int 0)]))
8622 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8624 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8625 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8628 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8629 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8631 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8632 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8637 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8638 (parallel [(const_int 0)]))
8639 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8641 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8642 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8645 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8646 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8648 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8649 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8651 "phaddsw\t{%2, %0|%0, %2}"
8652 [(set_attr "type" "sseiadd")
8653 (set_attr "atom_unit" "complex")
8654 (set_attr "prefix_data16" "1")
8655 (set_attr "prefix_extra" "1")
8656 (set_attr "mode" "TI")])
8658 (define_insn "ssse3_phaddswv4hi3"
8659 [(set (match_operand:V4HI 0 "register_operand" "=y")
8664 (match_operand:V4HI 1 "register_operand" "0")
8665 (parallel [(const_int 0)]))
8666 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8668 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8669 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8673 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8674 (parallel [(const_int 0)]))
8675 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8677 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8678 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8680 "phaddsw\t{%2, %0|%0, %2}"
8681 [(set_attr "type" "sseiadd")
8682 (set_attr "atom_unit" "complex")
8683 (set_attr "prefix_extra" "1")
8684 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8685 (set_attr "mode" "DI")])
8687 (define_insn "*avx_phsubwv8hi3"
8688 [(set (match_operand:V8HI 0 "register_operand" "=x")
8694 (match_operand:V8HI 1 "register_operand" "x")
8695 (parallel [(const_int 0)]))
8696 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8698 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8699 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8702 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8703 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8705 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8706 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8711 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8712 (parallel [(const_int 0)]))
8713 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8715 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8716 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8719 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8720 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8722 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8723 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8725 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8726 [(set_attr "type" "sseiadd")
8727 (set_attr "prefix_extra" "1")
8728 (set_attr "prefix" "vex")
8729 (set_attr "mode" "TI")])
8731 (define_insn "ssse3_phsubwv8hi3"
8732 [(set (match_operand:V8HI 0 "register_operand" "=x")
8738 (match_operand:V8HI 1 "register_operand" "0")
8739 (parallel [(const_int 0)]))
8740 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8742 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8743 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8746 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8747 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8749 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8750 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8755 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8756 (parallel [(const_int 0)]))
8757 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8759 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8760 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8763 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8764 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8766 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8767 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8769 "phsubw\t{%2, %0|%0, %2}"
8770 [(set_attr "type" "sseiadd")
8771 (set_attr "atom_unit" "complex")
8772 (set_attr "prefix_data16" "1")
8773 (set_attr "prefix_extra" "1")
8774 (set_attr "mode" "TI")])
8776 (define_insn "ssse3_phsubwv4hi3"
8777 [(set (match_operand:V4HI 0 "register_operand" "=y")
8782 (match_operand:V4HI 1 "register_operand" "0")
8783 (parallel [(const_int 0)]))
8784 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8786 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8787 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8791 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8792 (parallel [(const_int 0)]))
8793 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8795 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8796 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8798 "phsubw\t{%2, %0|%0, %2}"
8799 [(set_attr "type" "sseiadd")
8800 (set_attr "atom_unit" "complex")
8801 (set_attr "prefix_extra" "1")
8802 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8803 (set_attr "mode" "DI")])
8805 (define_insn "*avx_phsubdv4si3"
8806 [(set (match_operand:V4SI 0 "register_operand" "=x")
8811 (match_operand:V4SI 1 "register_operand" "x")
8812 (parallel [(const_int 0)]))
8813 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8815 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8816 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8820 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8821 (parallel [(const_int 0)]))
8822 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8824 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8825 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8827 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8828 [(set_attr "type" "sseiadd")
8829 (set_attr "prefix_extra" "1")
8830 (set_attr "prefix" "vex")
8831 (set_attr "mode" "TI")])
8833 (define_insn "ssse3_phsubdv4si3"
8834 [(set (match_operand:V4SI 0 "register_operand" "=x")
8839 (match_operand:V4SI 1 "register_operand" "0")
8840 (parallel [(const_int 0)]))
8841 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8843 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8844 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8848 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8849 (parallel [(const_int 0)]))
8850 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8852 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8853 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8855 "phsubd\t{%2, %0|%0, %2}"
8856 [(set_attr "type" "sseiadd")
8857 (set_attr "atom_unit" "complex")
8858 (set_attr "prefix_data16" "1")
8859 (set_attr "prefix_extra" "1")
8860 (set_attr "mode" "TI")])
8862 (define_insn "ssse3_phsubdv2si3"
8863 [(set (match_operand:V2SI 0 "register_operand" "=y")
8867 (match_operand:V2SI 1 "register_operand" "0")
8868 (parallel [(const_int 0)]))
8869 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8872 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8873 (parallel [(const_int 0)]))
8874 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8876 "phsubd\t{%2, %0|%0, %2}"
8877 [(set_attr "type" "sseiadd")
8878 (set_attr "atom_unit" "complex")
8879 (set_attr "prefix_extra" "1")
8880 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8881 (set_attr "mode" "DI")])
8883 (define_insn "*avx_phsubswv8hi3"
8884 [(set (match_operand:V8HI 0 "register_operand" "=x")
8890 (match_operand:V8HI 1 "register_operand" "x")
8891 (parallel [(const_int 0)]))
8892 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8894 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8895 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8898 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8899 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8901 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8902 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8907 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8908 (parallel [(const_int 0)]))
8909 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8911 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8912 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8915 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8916 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8918 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8919 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8921 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8922 [(set_attr "type" "sseiadd")
8923 (set_attr "prefix_extra" "1")
8924 (set_attr "prefix" "vex")
8925 (set_attr "mode" "TI")])
8927 (define_insn "ssse3_phsubswv8hi3"
8928 [(set (match_operand:V8HI 0 "register_operand" "=x")
8934 (match_operand:V8HI 1 "register_operand" "0")
8935 (parallel [(const_int 0)]))
8936 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8938 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8939 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8942 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8943 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8945 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8946 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8951 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8952 (parallel [(const_int 0)]))
8953 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8955 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8956 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8959 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8960 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8962 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8963 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8965 "phsubsw\t{%2, %0|%0, %2}"
8966 [(set_attr "type" "sseiadd")
8967 (set_attr "atom_unit" "complex")
8968 (set_attr "prefix_data16" "1")
8969 (set_attr "prefix_extra" "1")
8970 (set_attr "mode" "TI")])
8972 (define_insn "ssse3_phsubswv4hi3"
8973 [(set (match_operand:V4HI 0 "register_operand" "=y")
8978 (match_operand:V4HI 1 "register_operand" "0")
8979 (parallel [(const_int 0)]))
8980 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8982 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8983 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8987 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8988 (parallel [(const_int 0)]))
8989 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8991 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8992 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8994 "phsubsw\t{%2, %0|%0, %2}"
8995 [(set_attr "type" "sseiadd")
8996 (set_attr "atom_unit" "complex")
8997 (set_attr "prefix_extra" "1")
8998 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8999 (set_attr "mode" "DI")])
9001 (define_insn "*avx_pmaddubsw128"
9002 [(set (match_operand:V8HI 0 "register_operand" "=x")
9007 (match_operand:V16QI 1 "register_operand" "x")
9008 (parallel [(const_int 0)
9018 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9019 (parallel [(const_int 0)
9029 (vec_select:V16QI (match_dup 1)
9030 (parallel [(const_int 1)
9039 (vec_select:V16QI (match_dup 2)
9040 (parallel [(const_int 1)
9047 (const_int 15)]))))))]
9049 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9050 [(set_attr "type" "sseiadd")
9051 (set_attr "prefix_extra" "1")
9052 (set_attr "prefix" "vex")
9053 (set_attr "mode" "TI")])
9055 (define_insn "ssse3_pmaddubsw128"
9056 [(set (match_operand:V8HI 0 "register_operand" "=x")
9061 (match_operand:V16QI 1 "register_operand" "0")
9062 (parallel [(const_int 0)
9072 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9073 (parallel [(const_int 0)
9083 (vec_select:V16QI (match_dup 1)
9084 (parallel [(const_int 1)
9093 (vec_select:V16QI (match_dup 2)
9094 (parallel [(const_int 1)
9101 (const_int 15)]))))))]
9103 "pmaddubsw\t{%2, %0|%0, %2}"
9104 [(set_attr "type" "sseiadd")
9105 (set_attr "atom_unit" "simul")
9106 (set_attr "prefix_data16" "1")
9107 (set_attr "prefix_extra" "1")
9108 (set_attr "mode" "TI")])
9110 (define_insn "ssse3_pmaddubsw"
9111 [(set (match_operand:V4HI 0 "register_operand" "=y")
9116 (match_operand:V8QI 1 "register_operand" "0")
9117 (parallel [(const_int 0)
9123 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9124 (parallel [(const_int 0)
9130 (vec_select:V8QI (match_dup 1)
9131 (parallel [(const_int 1)
9136 (vec_select:V8QI (match_dup 2)
9137 (parallel [(const_int 1)
9140 (const_int 7)]))))))]
9142 "pmaddubsw\t{%2, %0|%0, %2}"
9143 [(set_attr "type" "sseiadd")
9144 (set_attr "atom_unit" "simul")
9145 (set_attr "prefix_extra" "1")
9146 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9147 (set_attr "mode" "DI")])
9149 (define_expand "ssse3_pmulhrswv8hi3"
9150 [(set (match_operand:V8HI 0 "register_operand" "")
9157 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9159 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9161 (const_vector:V8HI [(const_int 1) (const_int 1)
9162 (const_int 1) (const_int 1)
9163 (const_int 1) (const_int 1)
9164 (const_int 1) (const_int 1)]))
9167 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9169 (define_insn "*avx_pmulhrswv8hi3"
9170 [(set (match_operand:V8HI 0 "register_operand" "=x")
9177 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9179 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9181 (const_vector:V8HI [(const_int 1) (const_int 1)
9182 (const_int 1) (const_int 1)
9183 (const_int 1) (const_int 1)
9184 (const_int 1) (const_int 1)]))
9186 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9187 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9188 [(set_attr "type" "sseimul")
9189 (set_attr "prefix_extra" "1")
9190 (set_attr "prefix" "vex")
9191 (set_attr "mode" "TI")])
9193 (define_insn "*ssse3_pmulhrswv8hi3"
9194 [(set (match_operand:V8HI 0 "register_operand" "=x")
9201 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9203 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9205 (const_vector:V8HI [(const_int 1) (const_int 1)
9206 (const_int 1) (const_int 1)
9207 (const_int 1) (const_int 1)
9208 (const_int 1) (const_int 1)]))
9210 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9211 "pmulhrsw\t{%2, %0|%0, %2}"
9212 [(set_attr "type" "sseimul")
9213 (set_attr "prefix_data16" "1")
9214 (set_attr "prefix_extra" "1")
9215 (set_attr "mode" "TI")])
9217 (define_expand "ssse3_pmulhrswv4hi3"
9218 [(set (match_operand:V4HI 0 "register_operand" "")
9225 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9227 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9229 (const_vector:V4HI [(const_int 1) (const_int 1)
9230 (const_int 1) (const_int 1)]))
9233 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9235 (define_insn "*ssse3_pmulhrswv4hi3"
9236 [(set (match_operand:V4HI 0 "register_operand" "=y")
9243 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9245 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9247 (const_vector:V4HI [(const_int 1) (const_int 1)
9248 (const_int 1) (const_int 1)]))
9250 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9251 "pmulhrsw\t{%2, %0|%0, %2}"
9252 [(set_attr "type" "sseimul")
9253 (set_attr "prefix_extra" "1")
9254 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9255 (set_attr "mode" "DI")])
9257 (define_insn "*avx_pshufbv16qi3"
9258 [(set (match_operand:V16QI 0 "register_operand" "=x")
9259 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9260 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9263 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9264 [(set_attr "type" "sselog1")
9265 (set_attr "prefix_extra" "1")
9266 (set_attr "prefix" "vex")
9267 (set_attr "mode" "TI")])
9269 (define_insn "ssse3_pshufbv16qi3"
9270 [(set (match_operand:V16QI 0 "register_operand" "=x")
9271 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9272 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9275 "pshufb\t{%2, %0|%0, %2}";
9276 [(set_attr "type" "sselog1")
9277 (set_attr "prefix_data16" "1")
9278 (set_attr "prefix_extra" "1")
9279 (set_attr "mode" "TI")])
9281 (define_insn "ssse3_pshufbv8qi3"
9282 [(set (match_operand:V8QI 0 "register_operand" "=y")
9283 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9284 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9287 "pshufb\t{%2, %0|%0, %2}";
9288 [(set_attr "type" "sselog1")
9289 (set_attr "prefix_extra" "1")
9290 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9291 (set_attr "mode" "DI")])
9293 (define_insn "*avx_psign<mode>3"
9294 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9296 [(match_operand:SSEMODE124 1 "register_operand" "x")
9297 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9300 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9301 [(set_attr "type" "sselog1")
9302 (set_attr "prefix_extra" "1")
9303 (set_attr "prefix" "vex")
9304 (set_attr "mode" "TI")])
9306 (define_insn "ssse3_psign<mode>3"
9307 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9309 [(match_operand:SSEMODE124 1 "register_operand" "0")
9310 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9313 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9314 [(set_attr "type" "sselog1")
9315 (set_attr "prefix_data16" "1")
9316 (set_attr "prefix_extra" "1")
9317 (set_attr "mode" "TI")])
9319 (define_insn "ssse3_psign<mode>3"
9320 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9322 [(match_operand:MMXMODEI 1 "register_operand" "0")
9323 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9326 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9327 [(set_attr "type" "sselog1")
9328 (set_attr "prefix_extra" "1")
9329 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9330 (set_attr "mode" "DI")])
9332 (define_insn "*avx_palignrti"
9333 [(set (match_operand:TI 0 "register_operand" "=x")
9334 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9335 (match_operand:TI 2 "nonimmediate_operand" "xm")
9336 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9340 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9341 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9343 [(set_attr "type" "sseishft")
9344 (set_attr "prefix_extra" "1")
9345 (set_attr "length_immediate" "1")
9346 (set_attr "prefix" "vex")
9347 (set_attr "mode" "TI")])
9349 (define_insn "ssse3_palignrti"
9350 [(set (match_operand:TI 0 "register_operand" "=x")
9351 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9352 (match_operand:TI 2 "nonimmediate_operand" "xm")
9353 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9357 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9358 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9360 [(set_attr "type" "sseishft")
9361 (set_attr "atom_unit" "sishuf")
9362 (set_attr "prefix_data16" "1")
9363 (set_attr "prefix_extra" "1")
9364 (set_attr "length_immediate" "1")
9365 (set_attr "mode" "TI")])
9367 (define_insn "ssse3_palignrdi"
9368 [(set (match_operand:DI 0 "register_operand" "=y")
9369 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9370 (match_operand:DI 2 "nonimmediate_operand" "ym")
9371 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9375 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9376 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9378 [(set_attr "type" "sseishft")
9379 (set_attr "atom_unit" "sishuf")
9380 (set_attr "prefix_extra" "1")
9381 (set_attr "length_immediate" "1")
9382 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9383 (set_attr "mode" "DI")])
9385 (define_insn "abs<mode>2"
9386 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9387 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9389 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9390 [(set_attr "type" "sselog1")
9391 (set_attr "prefix_data16" "1")
9392 (set_attr "prefix_extra" "1")
9393 (set_attr "prefix" "maybe_vex")
9394 (set_attr "mode" "TI")])
9396 (define_insn "abs<mode>2"
9397 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9398 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9400 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9401 [(set_attr "type" "sselog1")
9402 (set_attr "prefix_rep" "0")
9403 (set_attr "prefix_extra" "1")
9404 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9405 (set_attr "mode" "DI")])
9407 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9409 ;; AMD SSE4A instructions
9411 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9413 (define_insn "sse4a_movnt<mode>"
9414 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9416 [(match_operand:MODEF 1 "register_operand" "x")]
9419 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9420 [(set_attr "type" "ssemov")
9421 (set_attr "mode" "<MODE>")])
9423 (define_insn "sse4a_vmmovnt<mode>"
9424 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9425 (unspec:<ssescalarmode>
9426 [(vec_select:<ssescalarmode>
9427 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9428 (parallel [(const_int 0)]))]
9431 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
9432 [(set_attr "type" "ssemov")
9433 (set_attr "mode" "<ssescalarmode>")])
9435 (define_insn "sse4a_extrqi"
9436 [(set (match_operand:V2DI 0 "register_operand" "=x")
9437 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9438 (match_operand 2 "const_int_operand" "")
9439 (match_operand 3 "const_int_operand" "")]
9442 "extrq\t{%3, %2, %0|%0, %2, %3}"
9443 [(set_attr "type" "sse")
9444 (set_attr "prefix_data16" "1")
9445 (set_attr "length_immediate" "2")
9446 (set_attr "mode" "TI")])
9448 (define_insn "sse4a_extrq"
9449 [(set (match_operand:V2DI 0 "register_operand" "=x")
9450 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9451 (match_operand:V16QI 2 "register_operand" "x")]
9454 "extrq\t{%2, %0|%0, %2}"
9455 [(set_attr "type" "sse")
9456 (set_attr "prefix_data16" "1")
9457 (set_attr "mode" "TI")])
9459 (define_insn "sse4a_insertqi"
9460 [(set (match_operand:V2DI 0 "register_operand" "=x")
9461 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9462 (match_operand:V2DI 2 "register_operand" "x")
9463 (match_operand 3 "const_int_operand" "")
9464 (match_operand 4 "const_int_operand" "")]
9467 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9468 [(set_attr "type" "sseins")
9469 (set_attr "prefix_data16" "0")
9470 (set_attr "prefix_rep" "1")
9471 (set_attr "length_immediate" "2")
9472 (set_attr "mode" "TI")])
9474 (define_insn "sse4a_insertq"
9475 [(set (match_operand:V2DI 0 "register_operand" "=x")
9476 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9477 (match_operand:V2DI 2 "register_operand" "x")]
9480 "insertq\t{%2, %0|%0, %2}"
9481 [(set_attr "type" "sseins")
9482 (set_attr "prefix_data16" "0")
9483 (set_attr "prefix_rep" "1")
9484 (set_attr "mode" "TI")])
9486 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9488 ;; Intel SSE4.1 instructions
9490 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9492 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
9493 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9494 (vec_merge:AVXMODEF2P
9495 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9496 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9497 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9499 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9500 [(set_attr "type" "ssemov")
9501 (set_attr "prefix_extra" "1")
9502 (set_attr "length_immediate" "1")
9503 (set_attr "prefix" "vex")
9504 (set_attr "mode" "<avxvecmode>")])
9506 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
9507 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9509 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9510 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9511 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9514 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9515 [(set_attr "type" "ssemov")
9516 (set_attr "prefix_extra" "1")
9517 (set_attr "length_immediate" "1")
9518 (set_attr "prefix" "vex")
9519 (set_attr "mode" "<avxvecmode>")])
9521 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
9522 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9523 (vec_merge:SSEMODEF2P
9524 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9525 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9526 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9528 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9529 [(set_attr "type" "ssemov")
9530 (set_attr "prefix_data16" "1")
9531 (set_attr "prefix_extra" "1")
9532 (set_attr "length_immediate" "1")
9533 (set_attr "mode" "<MODE>")])
9535 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
9536 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9538 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9539 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9540 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9543 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9544 [(set_attr "type" "ssemov")
9545 (set_attr "prefix_data16" "1")
9546 (set_attr "prefix_extra" "1")
9547 (set_attr "mode" "<MODE>")])
9549 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
9550 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9552 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9553 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9554 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9557 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9558 [(set_attr "type" "ssemul")
9559 (set_attr "prefix" "vex")
9560 (set_attr "prefix_extra" "1")
9561 (set_attr "length_immediate" "1")
9562 (set_attr "mode" "<avxvecmode>")])
9564 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
9565 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9567 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9568 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9569 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9572 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9573 [(set_attr "type" "ssemul")
9574 (set_attr "prefix_data16" "1")
9575 (set_attr "prefix_extra" "1")
9576 (set_attr "length_immediate" "1")
9577 (set_attr "mode" "<MODE>")])
9579 (define_insn "sse4_1_movntdqa"
9580 [(set (match_operand:V2DI 0 "register_operand" "=x")
9581 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9584 "%vmovntdqa\t{%1, %0|%0, %1}"
9585 [(set_attr "type" "ssemov")
9586 (set_attr "prefix_extra" "1")
9587 (set_attr "prefix" "maybe_vex")
9588 (set_attr "mode" "TI")])
9590 (define_insn "*avx_mpsadbw"
9591 [(set (match_operand:V16QI 0 "register_operand" "=x")
9592 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9593 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9594 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9597 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9598 [(set_attr "type" "sselog1")
9599 (set_attr "prefix" "vex")
9600 (set_attr "prefix_extra" "1")
9601 (set_attr "length_immediate" "1")
9602 (set_attr "mode" "TI")])
9604 (define_insn "sse4_1_mpsadbw"
9605 [(set (match_operand:V16QI 0 "register_operand" "=x")
9606 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9607 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9608 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9611 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9612 [(set_attr "type" "sselog1")
9613 (set_attr "prefix_extra" "1")
9614 (set_attr "length_immediate" "1")
9615 (set_attr "mode" "TI")])
9617 (define_insn "*avx_packusdw"
9618 [(set (match_operand:V8HI 0 "register_operand" "=x")
9621 (match_operand:V4SI 1 "register_operand" "x"))
9623 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9625 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9626 [(set_attr "type" "sselog")
9627 (set_attr "prefix_extra" "1")
9628 (set_attr "prefix" "vex")
9629 (set_attr "mode" "TI")])
9631 (define_insn "sse4_1_packusdw"
9632 [(set (match_operand:V8HI 0 "register_operand" "=x")
9635 (match_operand:V4SI 1 "register_operand" "0"))
9637 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9639 "packusdw\t{%2, %0|%0, %2}"
9640 [(set_attr "type" "sselog")
9641 (set_attr "prefix_extra" "1")
9642 (set_attr "mode" "TI")])
9644 (define_insn "*avx_pblendvb"
9645 [(set (match_operand:V16QI 0 "register_operand" "=x")
9646 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9647 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9648 (match_operand:V16QI 3 "register_operand" "x")]
9651 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9652 [(set_attr "type" "ssemov")
9653 (set_attr "prefix_extra" "1")
9654 (set_attr "length_immediate" "1")
9655 (set_attr "prefix" "vex")
9656 (set_attr "mode" "TI")])
9658 (define_insn "sse4_1_pblendvb"
9659 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9660 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9661 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9662 (match_operand:V16QI 3 "register_operand" "Yz")]
9665 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9666 [(set_attr "type" "ssemov")
9667 (set_attr "prefix_extra" "1")
9668 (set_attr "mode" "TI")])
9670 (define_insn "*avx_pblendw"
9671 [(set (match_operand:V8HI 0 "register_operand" "=x")
9673 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9674 (match_operand:V8HI 1 "register_operand" "x")
9675 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9677 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9678 [(set_attr "type" "ssemov")
9679 (set_attr "prefix" "vex")
9680 (set_attr "prefix_extra" "1")
9681 (set_attr "length_immediate" "1")
9682 (set_attr "mode" "TI")])
9684 (define_insn "sse4_1_pblendw"
9685 [(set (match_operand:V8HI 0 "register_operand" "=x")
9687 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9688 (match_operand:V8HI 1 "register_operand" "0")
9689 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9691 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9692 [(set_attr "type" "ssemov")
9693 (set_attr "prefix_extra" "1")
9694 (set_attr "length_immediate" "1")
9695 (set_attr "mode" "TI")])
9697 (define_insn "sse4_1_phminposuw"
9698 [(set (match_operand:V8HI 0 "register_operand" "=x")
9699 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9700 UNSPEC_PHMINPOSUW))]
9702 "%vphminposuw\t{%1, %0|%0, %1}"
9703 [(set_attr "type" "sselog1")
9704 (set_attr "prefix_extra" "1")
9705 (set_attr "prefix" "maybe_vex")
9706 (set_attr "mode" "TI")])
9708 (define_insn "sse4_1_extendv8qiv8hi2"
9709 [(set (match_operand:V8HI 0 "register_operand" "=x")
9712 (match_operand:V16QI 1 "register_operand" "x")
9713 (parallel [(const_int 0)
9722 "%vpmovsxbw\t{%1, %0|%0, %1}"
9723 [(set_attr "type" "ssemov")
9724 (set_attr "prefix_extra" "1")
9725 (set_attr "prefix" "maybe_vex")
9726 (set_attr "mode" "TI")])
9728 (define_insn "*sse4_1_extendv8qiv8hi2"
9729 [(set (match_operand:V8HI 0 "register_operand" "=x")
9732 (vec_duplicate:V16QI
9733 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9734 (parallel [(const_int 0)
9743 "%vpmovsxbw\t{%1, %0|%0, %1}"
9744 [(set_attr "type" "ssemov")
9745 (set_attr "prefix_extra" "1")
9746 (set_attr "prefix" "maybe_vex")
9747 (set_attr "mode" "TI")])
9749 (define_insn "sse4_1_extendv4qiv4si2"
9750 [(set (match_operand:V4SI 0 "register_operand" "=x")
9753 (match_operand:V16QI 1 "register_operand" "x")
9754 (parallel [(const_int 0)
9759 "%vpmovsxbd\t{%1, %0|%0, %1}"
9760 [(set_attr "type" "ssemov")
9761 (set_attr "prefix_extra" "1")
9762 (set_attr "prefix" "maybe_vex")
9763 (set_attr "mode" "TI")])
9765 (define_insn "*sse4_1_extendv4qiv4si2"
9766 [(set (match_operand:V4SI 0 "register_operand" "=x")
9769 (vec_duplicate:V16QI
9770 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9771 (parallel [(const_int 0)
9776 "%vpmovsxbd\t{%1, %0|%0, %1}"
9777 [(set_attr "type" "ssemov")
9778 (set_attr "prefix_extra" "1")
9779 (set_attr "prefix" "maybe_vex")
9780 (set_attr "mode" "TI")])
9782 (define_insn "sse4_1_extendv2qiv2di2"
9783 [(set (match_operand:V2DI 0 "register_operand" "=x")
9786 (match_operand:V16QI 1 "register_operand" "x")
9787 (parallel [(const_int 0)
9790 "%vpmovsxbq\t{%1, %0|%0, %1}"
9791 [(set_attr "type" "ssemov")
9792 (set_attr "prefix_extra" "1")
9793 (set_attr "prefix" "maybe_vex")
9794 (set_attr "mode" "TI")])
9796 (define_insn "*sse4_1_extendv2qiv2di2"
9797 [(set (match_operand:V2DI 0 "register_operand" "=x")
9800 (vec_duplicate:V16QI
9801 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9802 (parallel [(const_int 0)
9805 "%vpmovsxbq\t{%1, %0|%0, %1}"
9806 [(set_attr "type" "ssemov")
9807 (set_attr "prefix_extra" "1")
9808 (set_attr "prefix" "maybe_vex")
9809 (set_attr "mode" "TI")])
9811 (define_insn "sse4_1_extendv4hiv4si2"
9812 [(set (match_operand:V4SI 0 "register_operand" "=x")
9815 (match_operand:V8HI 1 "register_operand" "x")
9816 (parallel [(const_int 0)
9821 "%vpmovsxwd\t{%1, %0|%0, %1}"
9822 [(set_attr "type" "ssemov")
9823 (set_attr "prefix_extra" "1")
9824 (set_attr "prefix" "maybe_vex")
9825 (set_attr "mode" "TI")])
9827 (define_insn "*sse4_1_extendv4hiv4si2"
9828 [(set (match_operand:V4SI 0 "register_operand" "=x")
9832 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9833 (parallel [(const_int 0)
9838 "%vpmovsxwd\t{%1, %0|%0, %1}"
9839 [(set_attr "type" "ssemov")
9840 (set_attr "prefix_extra" "1")
9841 (set_attr "prefix" "maybe_vex")
9842 (set_attr "mode" "TI")])
9844 (define_insn "sse4_1_extendv2hiv2di2"
9845 [(set (match_operand:V2DI 0 "register_operand" "=x")
9848 (match_operand:V8HI 1 "register_operand" "x")
9849 (parallel [(const_int 0)
9852 "%vpmovsxwq\t{%1, %0|%0, %1}"
9853 [(set_attr "type" "ssemov")
9854 (set_attr "prefix_extra" "1")
9855 (set_attr "prefix" "maybe_vex")
9856 (set_attr "mode" "TI")])
9858 (define_insn "*sse4_1_extendv2hiv2di2"
9859 [(set (match_operand:V2DI 0 "register_operand" "=x")
9863 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9864 (parallel [(const_int 0)
9867 "%vpmovsxwq\t{%1, %0|%0, %1}"
9868 [(set_attr "type" "ssemov")
9869 (set_attr "prefix_extra" "1")
9870 (set_attr "prefix" "maybe_vex")
9871 (set_attr "mode" "TI")])
9873 (define_insn "sse4_1_extendv2siv2di2"
9874 [(set (match_operand:V2DI 0 "register_operand" "=x")
9877 (match_operand:V4SI 1 "register_operand" "x")
9878 (parallel [(const_int 0)
9881 "%vpmovsxdq\t{%1, %0|%0, %1}"
9882 [(set_attr "type" "ssemov")
9883 (set_attr "prefix_extra" "1")
9884 (set_attr "prefix" "maybe_vex")
9885 (set_attr "mode" "TI")])
9887 (define_insn "*sse4_1_extendv2siv2di2"
9888 [(set (match_operand:V2DI 0 "register_operand" "=x")
9892 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9893 (parallel [(const_int 0)
9896 "%vpmovsxdq\t{%1, %0|%0, %1}"
9897 [(set_attr "type" "ssemov")
9898 (set_attr "prefix_extra" "1")
9899 (set_attr "prefix" "maybe_vex")
9900 (set_attr "mode" "TI")])
9902 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9903 [(set (match_operand:V8HI 0 "register_operand" "=x")
9906 (match_operand:V16QI 1 "register_operand" "x")
9907 (parallel [(const_int 0)
9916 "%vpmovzxbw\t{%1, %0|%0, %1}"
9917 [(set_attr "type" "ssemov")
9918 (set_attr "prefix_extra" "1")
9919 (set_attr "prefix" "maybe_vex")
9920 (set_attr "mode" "TI")])
9922 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9923 [(set (match_operand:V8HI 0 "register_operand" "=x")
9926 (vec_duplicate:V16QI
9927 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9928 (parallel [(const_int 0)
9937 "%vpmovzxbw\t{%1, %0|%0, %1}"
9938 [(set_attr "type" "ssemov")
9939 (set_attr "prefix_extra" "1")
9940 (set_attr "prefix" "maybe_vex")
9941 (set_attr "mode" "TI")])
9943 (define_insn "sse4_1_zero_extendv4qiv4si2"
9944 [(set (match_operand:V4SI 0 "register_operand" "=x")
9947 (match_operand:V16QI 1 "register_operand" "x")
9948 (parallel [(const_int 0)
9953 "%vpmovzxbd\t{%1, %0|%0, %1}"
9954 [(set_attr "type" "ssemov")
9955 (set_attr "prefix_extra" "1")
9956 (set_attr "prefix" "maybe_vex")
9957 (set_attr "mode" "TI")])
9959 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9960 [(set (match_operand:V4SI 0 "register_operand" "=x")
9963 (vec_duplicate:V16QI
9964 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9965 (parallel [(const_int 0)
9970 "%vpmovzxbd\t{%1, %0|%0, %1}"
9971 [(set_attr "type" "ssemov")
9972 (set_attr "prefix_extra" "1")
9973 (set_attr "prefix" "maybe_vex")
9974 (set_attr "mode" "TI")])
9976 (define_insn "sse4_1_zero_extendv2qiv2di2"
9977 [(set (match_operand:V2DI 0 "register_operand" "=x")
9980 (match_operand:V16QI 1 "register_operand" "x")
9981 (parallel [(const_int 0)
9984 "%vpmovzxbq\t{%1, %0|%0, %1}"
9985 [(set_attr "type" "ssemov")
9986 (set_attr "prefix_extra" "1")
9987 (set_attr "prefix" "maybe_vex")
9988 (set_attr "mode" "TI")])
9990 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9991 [(set (match_operand:V2DI 0 "register_operand" "=x")
9994 (vec_duplicate:V16QI
9995 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9996 (parallel [(const_int 0)
9999 "%vpmovzxbq\t{%1, %0|%0, %1}"
10000 [(set_attr "type" "ssemov")
10001 (set_attr "prefix_extra" "1")
10002 (set_attr "prefix" "maybe_vex")
10003 (set_attr "mode" "TI")])
10005 (define_insn "sse4_1_zero_extendv4hiv4si2"
10006 [(set (match_operand:V4SI 0 "register_operand" "=x")
10009 (match_operand:V8HI 1 "register_operand" "x")
10010 (parallel [(const_int 0)
10013 (const_int 3)]))))]
10015 "%vpmovzxwd\t{%1, %0|%0, %1}"
10016 [(set_attr "type" "ssemov")
10017 (set_attr "prefix_extra" "1")
10018 (set_attr "prefix" "maybe_vex")
10019 (set_attr "mode" "TI")])
10021 (define_insn "*sse4_1_zero_extendv4hiv4si2"
10022 [(set (match_operand:V4SI 0 "register_operand" "=x")
10025 (vec_duplicate:V8HI
10026 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
10027 (parallel [(const_int 0)
10030 (const_int 3)]))))]
10032 "%vpmovzxwd\t{%1, %0|%0, %1}"
10033 [(set_attr "type" "ssemov")
10034 (set_attr "prefix_extra" "1")
10035 (set_attr "prefix" "maybe_vex")
10036 (set_attr "mode" "TI")])
10038 (define_insn "sse4_1_zero_extendv2hiv2di2"
10039 [(set (match_operand:V2DI 0 "register_operand" "=x")
10042 (match_operand:V8HI 1 "register_operand" "x")
10043 (parallel [(const_int 0)
10044 (const_int 1)]))))]
10046 "%vpmovzxwq\t{%1, %0|%0, %1}"
10047 [(set_attr "type" "ssemov")
10048 (set_attr "prefix_extra" "1")
10049 (set_attr "prefix" "maybe_vex")
10050 (set_attr "mode" "TI")])
10052 (define_insn "*sse4_1_zero_extendv2hiv2di2"
10053 [(set (match_operand:V2DI 0 "register_operand" "=x")
10056 (vec_duplicate:V8HI
10057 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
10058 (parallel [(const_int 0)
10059 (const_int 1)]))))]
10061 "%vpmovzxwq\t{%1, %0|%0, %1}"
10062 [(set_attr "type" "ssemov")
10063 (set_attr "prefix_extra" "1")
10064 (set_attr "prefix" "maybe_vex")
10065 (set_attr "mode" "TI")])
10067 (define_insn "sse4_1_zero_extendv2siv2di2"
10068 [(set (match_operand:V2DI 0 "register_operand" "=x")
10071 (match_operand:V4SI 1 "register_operand" "x")
10072 (parallel [(const_int 0)
10073 (const_int 1)]))))]
10075 "%vpmovzxdq\t{%1, %0|%0, %1}"
10076 [(set_attr "type" "ssemov")
10077 (set_attr "prefix_extra" "1")
10078 (set_attr "prefix" "maybe_vex")
10079 (set_attr "mode" "TI")])
10081 (define_insn "*sse4_1_zero_extendv2siv2di2"
10082 [(set (match_operand:V2DI 0 "register_operand" "=x")
10085 (vec_duplicate:V4SI
10086 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
10087 (parallel [(const_int 0)
10088 (const_int 1)]))))]
10090 "%vpmovzxdq\t{%1, %0|%0, %1}"
10091 [(set_attr "type" "ssemov")
10092 (set_attr "prefix_extra" "1")
10093 (set_attr "prefix" "maybe_vex")
10094 (set_attr "mode" "TI")])
10096 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
10097 ;; setting FLAGS_REG. But it is not a really compare instruction.
10098 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
10099 [(set (reg:CC FLAGS_REG)
10100 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
10101 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
10104 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
10105 [(set_attr "type" "ssecomi")
10106 (set_attr "prefix_extra" "1")
10107 (set_attr "prefix" "vex")
10108 (set_attr "mode" "<MODE>")])
10110 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
10111 ;; But it is not a really compare instruction.
10112 (define_insn "avx_ptest256"
10113 [(set (reg:CC FLAGS_REG)
10114 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
10115 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
10118 "vptest\t{%1, %0|%0, %1}"
10119 [(set_attr "type" "ssecomi")
10120 (set_attr "prefix_extra" "1")
10121 (set_attr "prefix" "vex")
10122 (set_attr "mode" "OI")])
10124 (define_insn "sse4_1_ptest"
10125 [(set (reg:CC FLAGS_REG)
10126 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
10127 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
10130 "%vptest\t{%1, %0|%0, %1}"
10131 [(set_attr "type" "ssecomi")
10132 (set_attr "prefix_extra" "1")
10133 (set_attr "prefix" "maybe_vex")
10134 (set_attr "mode" "TI")])
10136 (define_insn "avx_roundp<avxmodesuffixf2c>256"
10137 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
10138 (unspec:AVX256MODEF2P
10139 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
10140 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10143 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
10144 [(set_attr "type" "ssecvt")
10145 (set_attr "prefix_extra" "1")
10146 (set_attr "length_immediate" "1")
10147 (set_attr "prefix" "vex")
10148 (set_attr "mode" "<MODE>")])
10150 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
10151 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10153 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
10154 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10157 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
10158 [(set_attr "type" "ssecvt")
10159 (set_attr "prefix_data16" "1")
10160 (set_attr "prefix_extra" "1")
10161 (set_attr "length_immediate" "1")
10162 (set_attr "prefix" "maybe_vex")
10163 (set_attr "mode" "<MODE>")])
10165 (define_insn "*avx_rounds<ssemodesuffixf2c>"
10166 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10167 (vec_merge:SSEMODEF2P
10169 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10170 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10172 (match_operand:SSEMODEF2P 1 "register_operand" "x")
10175 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10176 [(set_attr "type" "ssecvt")
10177 (set_attr "prefix_extra" "1")
10178 (set_attr "length_immediate" "1")
10179 (set_attr "prefix" "vex")
10180 (set_attr "mode" "<MODE>")])
10182 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
10183 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10184 (vec_merge:SSEMODEF2P
10186 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10187 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10189 (match_operand:SSEMODEF2P 1 "register_operand" "0")
10192 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
10193 [(set_attr "type" "ssecvt")
10194 (set_attr "prefix_data16" "1")
10195 (set_attr "prefix_extra" "1")
10196 (set_attr "length_immediate" "1")
10197 (set_attr "mode" "<MODE>")])
10199 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10201 ;; Intel SSE4.2 string/text processing instructions
10203 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10205 (define_insn_and_split "sse4_2_pcmpestr"
10206 [(set (match_operand:SI 0 "register_operand" "=c,c")
10208 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10209 (match_operand:SI 3 "register_operand" "a,a")
10210 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10211 (match_operand:SI 5 "register_operand" "d,d")
10212 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10214 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10222 (set (reg:CC FLAGS_REG)
10231 && can_create_pseudo_p ()"
10236 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10237 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10238 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10241 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10242 operands[3], operands[4],
10243 operands[5], operands[6]));
10245 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10246 operands[3], operands[4],
10247 operands[5], operands[6]));
10248 if (flags && !(ecx || xmm0))
10249 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10250 operands[2], operands[3],
10251 operands[4], operands[5],
10255 [(set_attr "type" "sselog")
10256 (set_attr "prefix_data16" "1")
10257 (set_attr "prefix_extra" "1")
10258 (set_attr "length_immediate" "1")
10259 (set_attr "memory" "none,load")
10260 (set_attr "mode" "TI")])
10262 (define_insn "sse4_2_pcmpestri"
10263 [(set (match_operand:SI 0 "register_operand" "=c,c")
10265 [(match_operand:V16QI 1 "register_operand" "x,x")
10266 (match_operand:SI 2 "register_operand" "a,a")
10267 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10268 (match_operand:SI 4 "register_operand" "d,d")
10269 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10271 (set (reg:CC FLAGS_REG)
10280 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10281 [(set_attr "type" "sselog")
10282 (set_attr "prefix_data16" "1")
10283 (set_attr "prefix_extra" "1")
10284 (set_attr "prefix" "maybe_vex")
10285 (set_attr "length_immediate" "1")
10286 (set_attr "memory" "none,load")
10287 (set_attr "mode" "TI")])
10289 (define_insn "sse4_2_pcmpestrm"
10290 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10292 [(match_operand:V16QI 1 "register_operand" "x,x")
10293 (match_operand:SI 2 "register_operand" "a,a")
10294 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10295 (match_operand:SI 4 "register_operand" "d,d")
10296 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10298 (set (reg:CC FLAGS_REG)
10307 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10308 [(set_attr "type" "sselog")
10309 (set_attr "prefix_data16" "1")
10310 (set_attr "prefix_extra" "1")
10311 (set_attr "length_immediate" "1")
10312 (set_attr "prefix" "maybe_vex")
10313 (set_attr "memory" "none,load")
10314 (set_attr "mode" "TI")])
10316 (define_insn "sse4_2_pcmpestr_cconly"
10317 [(set (reg:CC FLAGS_REG)
10319 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10320 (match_operand:SI 3 "register_operand" "a,a,a,a")
10321 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10322 (match_operand:SI 5 "register_operand" "d,d,d,d")
10323 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10325 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10326 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10329 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10330 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10331 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10332 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10333 [(set_attr "type" "sselog")
10334 (set_attr "prefix_data16" "1")
10335 (set_attr "prefix_extra" "1")
10336 (set_attr "length_immediate" "1")
10337 (set_attr "memory" "none,load,none,load")
10338 (set_attr "prefix" "maybe_vex")
10339 (set_attr "mode" "TI")])
10341 (define_insn_and_split "sse4_2_pcmpistr"
10342 [(set (match_operand:SI 0 "register_operand" "=c,c")
10344 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10345 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10346 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10348 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10354 (set (reg:CC FLAGS_REG)
10361 && can_create_pseudo_p ()"
10366 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10367 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10368 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10371 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10372 operands[3], operands[4]));
10374 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10375 operands[3], operands[4]));
10376 if (flags && !(ecx || xmm0))
10377 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10378 operands[2], operands[3],
10382 [(set_attr "type" "sselog")
10383 (set_attr "prefix_data16" "1")
10384 (set_attr "prefix_extra" "1")
10385 (set_attr "length_immediate" "1")
10386 (set_attr "memory" "none,load")
10387 (set_attr "mode" "TI")])
10389 (define_insn "sse4_2_pcmpistri"
10390 [(set (match_operand:SI 0 "register_operand" "=c,c")
10392 [(match_operand:V16QI 1 "register_operand" "x,x")
10393 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10394 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10396 (set (reg:CC FLAGS_REG)
10403 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10404 [(set_attr "type" "sselog")
10405 (set_attr "prefix_data16" "1")
10406 (set_attr "prefix_extra" "1")
10407 (set_attr "length_immediate" "1")
10408 (set_attr "prefix" "maybe_vex")
10409 (set_attr "memory" "none,load")
10410 (set_attr "mode" "TI")])
10412 (define_insn "sse4_2_pcmpistrm"
10413 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10415 [(match_operand:V16QI 1 "register_operand" "x,x")
10416 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10417 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10419 (set (reg:CC FLAGS_REG)
10426 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10427 [(set_attr "type" "sselog")
10428 (set_attr "prefix_data16" "1")
10429 (set_attr "prefix_extra" "1")
10430 (set_attr "length_immediate" "1")
10431 (set_attr "prefix" "maybe_vex")
10432 (set_attr "memory" "none,load")
10433 (set_attr "mode" "TI")])
10435 (define_insn "sse4_2_pcmpistr_cconly"
10436 [(set (reg:CC FLAGS_REG)
10438 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10439 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10440 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10442 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10443 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10446 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10447 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10448 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10449 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10450 [(set_attr "type" "sselog")
10451 (set_attr "prefix_data16" "1")
10452 (set_attr "prefix_extra" "1")
10453 (set_attr "length_immediate" "1")
10454 (set_attr "memory" "none,load,none,load")
10455 (set_attr "prefix" "maybe_vex")
10456 (set_attr "mode" "TI")])
10458 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10460 ;; XOP instructions
10462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10464 ;; XOP parallel integer multiply/add instructions.
10465 ;; Note the instruction does not allow the value being added to be a memory
10466 ;; operation. However by pretending via the nonimmediate_operand predicate
10467 ;; that it does and splitting it later allows the following to be recognized:
10468 ;; a[i] = b[i] * c[i] + d[i];
10469 (define_insn "xop_pmacsww"
10470 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10473 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10474 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x"))
10475 (match_operand:V8HI 3 "register_operand" "x,x")))]
10476 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
10478 vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10479 vpmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10480 [(set_attr "type" "ssemuladd")
10481 (set_attr "mode" "TI")])
10483 ;; Split pmacsww with two memory operands into a load and the pmacsww.
10485 [(set (match_operand:V8HI 0 "register_operand" "")
10487 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
10488 (match_operand:V8HI 2 "nonimmediate_operand" ""))
10489 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
10491 && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
10492 && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
10493 && !reg_mentioned_p (operands[0], operands[1])
10494 && !reg_mentioned_p (operands[0], operands[2])
10495 && !reg_mentioned_p (operands[0], operands[3])"
10498 ix86_expand_fma4_multiple_memory (operands, 4, V8HImode);
10499 emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2],
10504 (define_insn "xop_pmacssww"
10505 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10507 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10508 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x"))
10509 (match_operand:V8HI 3 "register_operand" "x,x")))]
10510 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10512 vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10513 vpmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10514 [(set_attr "type" "ssemuladd")
10515 (set_attr "mode" "TI")])
10517 ;; Note the instruction does not allow the value being added to be a memory
10518 ;; operation. However by pretending via the nonimmediate_operand predicate
10519 ;; that it does and splitting it later allows the following to be recognized:
10520 ;; a[i] = b[i] * c[i] + d[i];
10521 (define_insn "xop_pmacsdd"
10522 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10525 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10526 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x"))
10527 (match_operand:V4SI 3 "register_operand" "x,x")))]
10528 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
10530 vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10531 vpmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10532 [(set_attr "type" "ssemuladd")
10533 (set_attr "mode" "TI")])
10535 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
10537 [(set (match_operand:V4SI 0 "register_operand" "")
10539 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
10540 (match_operand:V4SI 2 "nonimmediate_operand" ""))
10541 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
10543 && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
10544 && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
10545 && !reg_mentioned_p (operands[0], operands[1])
10546 && !reg_mentioned_p (operands[0], operands[2])
10547 && !reg_mentioned_p (operands[0], operands[3])"
10550 ix86_expand_fma4_multiple_memory (operands, 4, V4SImode);
10551 emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2],
10556 (define_insn "xop_pmacssdd"
10557 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10559 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10560 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x"))
10561 (match_operand:V4SI 3 "register_operand" "x,x")))]
10562 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10564 vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10565 vpmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10566 [(set_attr "type" "ssemuladd")
10567 (set_attr "mode" "TI")])
10569 (define_insn "xop_pmacssdql"
10570 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10575 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10576 (parallel [(const_int 1)
10579 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10580 (parallel [(const_int 1)
10582 (match_operand:V2DI 3 "register_operand" "x,x")))]
10583 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10585 vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10586 vpmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10587 [(set_attr "type" "ssemuladd")
10588 (set_attr "mode" "TI")])
10590 (define_insn "xop_pmacssdqh"
10591 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10596 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10597 (parallel [(const_int 0)
10601 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10602 (parallel [(const_int 0)
10604 (match_operand:V2DI 3 "register_operand" "x,x")))]
10605 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10607 vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10608 vpmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10609 [(set_attr "type" "ssemuladd")
10610 (set_attr "mode" "TI")])
10612 (define_insn "xop_pmacsdql"
10613 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10618 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10619 (parallel [(const_int 1)
10623 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10624 (parallel [(const_int 1)
10626 (match_operand:V2DI 3 "register_operand" "x,x")))]
10627 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10629 vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10630 vpmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10631 [(set_attr "type" "ssemuladd")
10632 (set_attr "mode" "TI")])
10634 (define_insn_and_split "*xop_pmacsdql_mem"
10635 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x")
10640 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10641 (parallel [(const_int 1)
10645 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10646 (parallel [(const_int 1)
10648 (match_operand:V2DI 3 "memory_operand" "m,m")))]
10649 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
10651 "&& reload_completed"
10652 [(set (match_dup 0)
10660 (parallel [(const_int 1)
10665 (parallel [(const_int 1)
10669 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10670 ;; fake it with a multiply/add. In general, we expect the define_split to
10671 ;; occur before register allocation, so we have to handle the corner case where
10672 ;; the target is the same as operands 1/2
10673 (define_insn_and_split "xop_mulv2div2di3_low"
10674 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10678 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10679 (parallel [(const_int 1)
10683 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10684 (parallel [(const_int 1)
10685 (const_int 3)])))))]
10688 "&& reload_completed"
10689 [(set (match_dup 0)
10697 (parallel [(const_int 1)
10702 (parallel [(const_int 1)
10706 operands[3] = CONST0_RTX (V2DImode);
10708 [(set_attr "type" "ssemuladd")
10709 (set_attr "mode" "TI")])
10711 (define_insn "xop_pmacsdqh"
10712 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10717 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10718 (parallel [(const_int 0)
10722 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10723 (parallel [(const_int 0)
10725 (match_operand:V2DI 3 "register_operand" "x,x")))]
10726 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10728 vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10729 vpmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10730 [(set_attr "type" "ssemuladd")
10731 (set_attr "mode" "TI")])
10733 (define_insn_and_split "*xop_pmacsdqh_mem"
10734 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x")
10739 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10740 (parallel [(const_int 0)
10744 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10745 (parallel [(const_int 0)
10747 (match_operand:V2DI 3 "memory_operand" "m,m")))]
10748 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
10750 "&& reload_completed"
10751 [(set (match_dup 0)
10759 (parallel [(const_int 0)
10764 (parallel [(const_int 0)
10768 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10769 ;; fake it with a multiply/add. In general, we expect the define_split to
10770 ;; occur before register allocation, so we have to handle the corner case where
10771 ;; the target is the same as either operands[1] or operands[2]
10772 (define_insn_and_split "xop_mulv2div2di3_high"
10773 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10777 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10778 (parallel [(const_int 0)
10782 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10783 (parallel [(const_int 0)
10784 (const_int 2)])))))]
10787 "&& reload_completed"
10788 [(set (match_dup 0)
10796 (parallel [(const_int 0)
10801 (parallel [(const_int 0)
10805 operands[3] = CONST0_RTX (V2DImode);
10807 [(set_attr "type" "ssemuladd")
10808 (set_attr "mode" "TI")])
10810 ;; XOP parallel integer multiply/add instructions for the intrinisics
10811 (define_insn "xop_pmacsswd"
10812 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10817 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10818 (parallel [(const_int 1)
10824 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10825 (parallel [(const_int 1)
10829 (match_operand:V4SI 3 "register_operand" "x,x")))]
10830 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10832 vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10833 vpmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10834 [(set_attr "type" "ssemuladd")
10835 (set_attr "mode" "TI")])
10837 (define_insn "xop_pmacswd"
10838 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10843 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10844 (parallel [(const_int 1)
10850 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10851 (parallel [(const_int 1)
10855 (match_operand:V4SI 3 "register_operand" "x,x")))]
10856 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10858 vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10859 vpmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10860 [(set_attr "type" "ssemuladd")
10861 (set_attr "mode" "TI")])
10863 (define_insn "xop_pmadcsswd"
10864 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10870 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10871 (parallel [(const_int 0)
10877 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10878 (parallel [(const_int 0)
10886 (parallel [(const_int 1)
10893 (parallel [(const_int 1)
10896 (const_int 7)])))))
10897 (match_operand:V4SI 3 "register_operand" "x,x")))]
10898 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10900 vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10901 vpmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10902 [(set_attr "type" "ssemuladd")
10903 (set_attr "mode" "TI")])
10905 (define_insn "xop_pmadcswd"
10906 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10912 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10913 (parallel [(const_int 0)
10919 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10920 (parallel [(const_int 0)
10928 (parallel [(const_int 1)
10935 (parallel [(const_int 1)
10938 (const_int 7)])))))
10939 (match_operand:V4SI 3 "register_operand" "x,x")))]
10940 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10942 vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10943 vpmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10944 [(set_attr "type" "ssemuladd")
10945 (set_attr "mode" "TI")])
10947 ;; XOP parallel XMM conditional moves
10948 (define_insn "xop_pcmov_<mode>"
10949 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x")
10950 (if_then_else:SSEMODE
10951 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,x,m")
10952 (match_operand:SSEMODE 1 "vector_move_operand" "x,m,x")
10953 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x")))]
10954 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
10955 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10956 [(set_attr "type" "sse4arg")])
10958 (define_insn "xop_pcmov_<mode>256"
10959 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
10960 (if_then_else:AVX256MODE
10961 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,x,m")
10962 (match_operand:AVX256MODE 1 "vector_move_operand" "x,m,x")
10963 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x,x")))]
10964 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
10965 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10966 [(set_attr "type" "sse4arg")])
10968 ;; XOP horizontal add/subtract instructions
10969 (define_insn "xop_phaddbw"
10970 [(set (match_operand:V8HI 0 "register_operand" "=x")
10974 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10975 (parallel [(const_int 0)
10986 (parallel [(const_int 1)
10993 (const_int 15)])))))]
10995 "vphaddbw\t{%1, %0|%0, %1}"
10996 [(set_attr "type" "sseiadd1")])
10998 (define_insn "xop_phaddbd"
10999 [(set (match_operand:V4SI 0 "register_operand" "=x")
11004 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11005 (parallel [(const_int 0)
11012 (parallel [(const_int 1)
11015 (const_int 13)]))))
11020 (parallel [(const_int 2)
11027 (parallel [(const_int 3)
11030 (const_int 15)]))))))]
11032 "vphaddbd\t{%1, %0|%0, %1}"
11033 [(set_attr "type" "sseiadd1")])
11035 (define_insn "xop_phaddbq"
11036 [(set (match_operand:V2DI 0 "register_operand" "=x")
11042 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11043 (parallel [(const_int 0)
11048 (parallel [(const_int 1)
11054 (parallel [(const_int 2)
11059 (parallel [(const_int 3)
11060 (const_int 7)])))))
11066 (parallel [(const_int 8)
11071 (parallel [(const_int 9)
11072 (const_int 13)]))))
11077 (parallel [(const_int 10)
11082 (parallel [(const_int 11)
11083 (const_int 15)])))))))]
11085 "vphaddbq\t{%1, %0|%0, %1}"
11086 [(set_attr "type" "sseiadd1")])
11088 (define_insn "xop_phaddwd"
11089 [(set (match_operand:V4SI 0 "register_operand" "=x")
11093 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11094 (parallel [(const_int 0)
11101 (parallel [(const_int 1)
11104 (const_int 7)])))))]
11106 "vphaddwd\t{%1, %0|%0, %1}"
11107 [(set_attr "type" "sseiadd1")])
11109 (define_insn "xop_phaddwq"
11110 [(set (match_operand:V2DI 0 "register_operand" "=x")
11115 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11116 (parallel [(const_int 0)
11121 (parallel [(const_int 1)
11127 (parallel [(const_int 2)
11132 (parallel [(const_int 3)
11133 (const_int 7)]))))))]
11135 "vphaddwq\t{%1, %0|%0, %1}"
11136 [(set_attr "type" "sseiadd1")])
11138 (define_insn "xop_phadddq"
11139 [(set (match_operand:V2DI 0 "register_operand" "=x")
11143 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11144 (parallel [(const_int 0)
11149 (parallel [(const_int 1)
11150 (const_int 3)])))))]
11152 "vphadddq\t{%1, %0|%0, %1}"
11153 [(set_attr "type" "sseiadd1")])
11155 (define_insn "xop_phaddubw"
11156 [(set (match_operand:V8HI 0 "register_operand" "=x")
11160 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11161 (parallel [(const_int 0)
11172 (parallel [(const_int 1)
11179 (const_int 15)])))))]
11181 "vphaddubw\t{%1, %0|%0, %1}"
11182 [(set_attr "type" "sseiadd1")])
11184 (define_insn "xop_phaddubd"
11185 [(set (match_operand:V4SI 0 "register_operand" "=x")
11190 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11191 (parallel [(const_int 0)
11198 (parallel [(const_int 1)
11201 (const_int 13)]))))
11206 (parallel [(const_int 2)
11213 (parallel [(const_int 3)
11216 (const_int 15)]))))))]
11218 "vphaddubd\t{%1, %0|%0, %1}"
11219 [(set_attr "type" "sseiadd1")])
11221 (define_insn "xop_phaddubq"
11222 [(set (match_operand:V2DI 0 "register_operand" "=x")
11228 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11229 (parallel [(const_int 0)
11234 (parallel [(const_int 1)
11240 (parallel [(const_int 2)
11245 (parallel [(const_int 3)
11246 (const_int 7)])))))
11252 (parallel [(const_int 8)
11257 (parallel [(const_int 9)
11258 (const_int 13)]))))
11263 (parallel [(const_int 10)
11268 (parallel [(const_int 11)
11269 (const_int 15)])))))))]
11271 "vphaddubq\t{%1, %0|%0, %1}"
11272 [(set_attr "type" "sseiadd1")])
11274 (define_insn "xop_phadduwd"
11275 [(set (match_operand:V4SI 0 "register_operand" "=x")
11279 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11280 (parallel [(const_int 0)
11287 (parallel [(const_int 1)
11290 (const_int 7)])))))]
11292 "vphadduwd\t{%1, %0|%0, %1}"
11293 [(set_attr "type" "sseiadd1")])
11295 (define_insn "xop_phadduwq"
11296 [(set (match_operand:V2DI 0 "register_operand" "=x")
11301 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11302 (parallel [(const_int 0)
11307 (parallel [(const_int 1)
11313 (parallel [(const_int 2)
11318 (parallel [(const_int 3)
11319 (const_int 7)]))))))]
11321 "vphadduwq\t{%1, %0|%0, %1}"
11322 [(set_attr "type" "sseiadd1")])
11324 (define_insn "xop_phaddudq"
11325 [(set (match_operand:V2DI 0 "register_operand" "=x")
11329 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11330 (parallel [(const_int 0)
11335 (parallel [(const_int 1)
11336 (const_int 3)])))))]
11338 "vphaddudq\t{%1, %0|%0, %1}"
11339 [(set_attr "type" "sseiadd1")])
11341 (define_insn "xop_phsubbw"
11342 [(set (match_operand:V8HI 0 "register_operand" "=x")
11346 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11347 (parallel [(const_int 0)
11358 (parallel [(const_int 1)
11365 (const_int 15)])))))]
11367 "vphsubbw\t{%1, %0|%0, %1}"
11368 [(set_attr "type" "sseiadd1")])
11370 (define_insn "xop_phsubwd"
11371 [(set (match_operand:V4SI 0 "register_operand" "=x")
11375 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11376 (parallel [(const_int 0)
11383 (parallel [(const_int 1)
11386 (const_int 7)])))))]
11388 "vphsubwd\t{%1, %0|%0, %1}"
11389 [(set_attr "type" "sseiadd1")])
11391 (define_insn "xop_phsubdq"
11392 [(set (match_operand:V2DI 0 "register_operand" "=x")
11396 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11397 (parallel [(const_int 0)
11402 (parallel [(const_int 1)
11403 (const_int 3)])))))]
11405 "vphsubdq\t{%1, %0|%0, %1}"
11406 [(set_attr "type" "sseiadd1")])
11408 ;; XOP permute instructions
11409 (define_insn "xop_pperm"
11410 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
11412 [(match_operand:V16QI 1 "nonimmediate_operand" "x,x,m")
11413 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x")
11414 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x")]
11415 UNSPEC_XOP_PERMUTE))]
11416 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11417 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11418 [(set_attr "type" "sse4arg")
11419 (set_attr "mode" "TI")])
11421 ;; XOP pack instructions that combine two vectors into a smaller vector
11422 (define_insn "xop_pperm_pack_v2di_v4si"
11423 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
11426 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,m"))
11428 (match_operand:V2DI 2 "nonimmediate_operand" "x,m,x"))))
11429 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11430 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11431 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11432 [(set_attr "type" "sse4arg")
11433 (set_attr "mode" "TI")])
11435 (define_insn "xop_pperm_pack_v4si_v8hi"
11436 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
11439 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m"))
11441 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))))
11442 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11443 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11444 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11445 [(set_attr "type" "sse4arg")
11446 (set_attr "mode" "TI")])
11448 (define_insn "xop_pperm_pack_v8hi_v16qi"
11449 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
11452 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m"))
11454 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))))
11455 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11456 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11457 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11458 [(set_attr "type" "sse4arg")
11459 (set_attr "mode" "TI")])
11461 ;; XOP packed rotate instructions
11462 (define_expand "rotl<mode>3"
11463 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11464 (rotate:SSEMODE1248
11465 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11466 (match_operand:SI 2 "general_operand")))]
11469 /* If we were given a scalar, convert it to parallel */
11470 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11472 rtvec vs = rtvec_alloc (<ssescalarnum>);
11473 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11474 rtx reg = gen_reg_rtx (<MODE>mode);
11475 rtx op2 = operands[2];
11478 if (GET_MODE (op2) != <ssescalarmode>mode)
11480 op2 = gen_reg_rtx (<ssescalarmode>mode);
11481 convert_move (op2, operands[2], false);
11484 for (i = 0; i < <ssescalarnum>; i++)
11485 RTVEC_ELT (vs, i) = op2;
11487 emit_insn (gen_vec_init<mode> (reg, par));
11488 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11493 (define_expand "rotr<mode>3"
11494 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11495 (rotatert:SSEMODE1248
11496 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11497 (match_operand:SI 2 "general_operand")))]
11500 /* If we were given a scalar, convert it to parallel */
11501 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11503 rtvec vs = rtvec_alloc (<ssescalarnum>);
11504 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11505 rtx neg = gen_reg_rtx (<MODE>mode);
11506 rtx reg = gen_reg_rtx (<MODE>mode);
11507 rtx op2 = operands[2];
11510 if (GET_MODE (op2) != <ssescalarmode>mode)
11512 op2 = gen_reg_rtx (<ssescalarmode>mode);
11513 convert_move (op2, operands[2], false);
11516 for (i = 0; i < <ssescalarnum>; i++)
11517 RTVEC_ELT (vs, i) = op2;
11519 emit_insn (gen_vec_init<mode> (reg, par));
11520 emit_insn (gen_neg<mode>2 (neg, reg));
11521 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11526 (define_insn "xop_rotl<mode>3"
11527 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11528 (rotate:SSEMODE1248
11529 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11530 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11532 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11533 [(set_attr "type" "sseishft")
11534 (set_attr "length_immediate" "1")
11535 (set_attr "mode" "TI")])
11537 (define_insn "xop_rotr<mode>3"
11538 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11539 (rotatert:SSEMODE1248
11540 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11541 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11544 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11545 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11547 [(set_attr "type" "sseishft")
11548 (set_attr "length_immediate" "1")
11549 (set_attr "mode" "TI")])
11551 (define_expand "vrotr<mode>3"
11552 [(match_operand:SSEMODE1248 0 "register_operand" "")
11553 (match_operand:SSEMODE1248 1 "register_operand" "")
11554 (match_operand:SSEMODE1248 2 "register_operand" "")]
11557 rtx reg = gen_reg_rtx (<MODE>mode);
11558 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11559 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11563 (define_expand "vrotl<mode>3"
11564 [(match_operand:SSEMODE1248 0 "register_operand" "")
11565 (match_operand:SSEMODE1248 1 "register_operand" "")
11566 (match_operand:SSEMODE1248 2 "register_operand" "")]
11569 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11573 (define_insn "xop_vrotl<mode>3"
11574 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11575 (if_then_else:SSEMODE1248
11577 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11579 (rotate:SSEMODE1248
11580 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11582 (rotatert:SSEMODE1248
11584 (neg:SSEMODE1248 (match_dup 2)))))]
11585 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11586 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11587 [(set_attr "type" "sseishft")
11588 (set_attr "prefix_data16" "0")
11589 (set_attr "prefix_extra" "2")
11590 (set_attr "mode" "TI")])
11592 ;; XOP packed shift instructions.
11593 ;; FIXME: add V2DI back in
11594 (define_expand "vlshr<mode>3"
11595 [(match_operand:SSEMODE124 0 "register_operand" "")
11596 (match_operand:SSEMODE124 1 "register_operand" "")
11597 (match_operand:SSEMODE124 2 "register_operand" "")]
11600 rtx neg = gen_reg_rtx (<MODE>mode);
11601 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11602 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11606 (define_expand "vashr<mode>3"
11607 [(match_operand:SSEMODE124 0 "register_operand" "")
11608 (match_operand:SSEMODE124 1 "register_operand" "")
11609 (match_operand:SSEMODE124 2 "register_operand" "")]
11612 rtx neg = gen_reg_rtx (<MODE>mode);
11613 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11614 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11618 (define_expand "vashl<mode>3"
11619 [(match_operand:SSEMODE124 0 "register_operand" "")
11620 (match_operand:SSEMODE124 1 "register_operand" "")
11621 (match_operand:SSEMODE124 2 "register_operand" "")]
11624 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11628 (define_insn "xop_ashl<mode>3"
11629 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11630 (if_then_else:SSEMODE1248
11632 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11634 (ashift:SSEMODE1248
11635 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11637 (ashiftrt:SSEMODE1248
11639 (neg:SSEMODE1248 (match_dup 2)))))]
11640 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11641 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11642 [(set_attr "type" "sseishft")
11643 (set_attr "prefix_data16" "0")
11644 (set_attr "prefix_extra" "2")
11645 (set_attr "mode" "TI")])
11647 (define_insn "xop_lshl<mode>3"
11648 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11649 (if_then_else:SSEMODE1248
11651 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11653 (ashift:SSEMODE1248
11654 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11656 (lshiftrt:SSEMODE1248
11658 (neg:SSEMODE1248 (match_dup 2)))))]
11659 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11660 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11661 [(set_attr "type" "sseishft")
11662 (set_attr "prefix_data16" "0")
11663 (set_attr "prefix_extra" "2")
11664 (set_attr "mode" "TI")])
11666 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11667 (define_expand "ashlv16qi3"
11668 [(match_operand:V16QI 0 "register_operand" "")
11669 (match_operand:V16QI 1 "register_operand" "")
11670 (match_operand:SI 2 "nonmemory_operand" "")]
11673 rtvec vs = rtvec_alloc (16);
11674 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11675 rtx reg = gen_reg_rtx (V16QImode);
11677 for (i = 0; i < 16; i++)
11678 RTVEC_ELT (vs, i) = operands[2];
11680 emit_insn (gen_vec_initv16qi (reg, par));
11681 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11685 (define_expand "lshlv16qi3"
11686 [(match_operand:V16QI 0 "register_operand" "")
11687 (match_operand:V16QI 1 "register_operand" "")
11688 (match_operand:SI 2 "nonmemory_operand" "")]
11691 rtvec vs = rtvec_alloc (16);
11692 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11693 rtx reg = gen_reg_rtx (V16QImode);
11695 for (i = 0; i < 16; i++)
11696 RTVEC_ELT (vs, i) = operands[2];
11698 emit_insn (gen_vec_initv16qi (reg, par));
11699 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11703 (define_expand "ashrv16qi3"
11704 [(match_operand:V16QI 0 "register_operand" "")
11705 (match_operand:V16QI 1 "register_operand" "")
11706 (match_operand:SI 2 "nonmemory_operand" "")]
11709 rtvec vs = rtvec_alloc (16);
11710 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11711 rtx reg = gen_reg_rtx (V16QImode);
11713 rtx ele = ((CONST_INT_P (operands[2]))
11714 ? GEN_INT (- INTVAL (operands[2]))
11717 for (i = 0; i < 16; i++)
11718 RTVEC_ELT (vs, i) = ele;
11720 emit_insn (gen_vec_initv16qi (reg, par));
11722 if (!CONST_INT_P (operands[2]))
11724 rtx neg = gen_reg_rtx (V16QImode);
11725 emit_insn (gen_negv16qi2 (neg, reg));
11726 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11729 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11734 (define_expand "ashrv2di3"
11735 [(match_operand:V2DI 0 "register_operand" "")
11736 (match_operand:V2DI 1 "register_operand" "")
11737 (match_operand:DI 2 "nonmemory_operand" "")]
11740 rtvec vs = rtvec_alloc (2);
11741 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11742 rtx reg = gen_reg_rtx (V2DImode);
11745 if (CONST_INT_P (operands[2]))
11746 ele = GEN_INT (- INTVAL (operands[2]));
11747 else if (GET_MODE (operands[2]) != DImode)
11749 rtx move = gen_reg_rtx (DImode);
11750 ele = gen_reg_rtx (DImode);
11751 convert_move (move, operands[2], false);
11752 emit_insn (gen_negdi2 (ele, move));
11756 ele = gen_reg_rtx (DImode);
11757 emit_insn (gen_negdi2 (ele, operands[2]));
11760 RTVEC_ELT (vs, 0) = ele;
11761 RTVEC_ELT (vs, 1) = ele;
11762 emit_insn (gen_vec_initv2di (reg, par));
11763 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11767 ;; XOP FRCZ support
11769 (define_insn "xop_frcz<mode>2"
11770 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11772 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11775 "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11776 [(set_attr "type" "ssecvt1")
11777 (set_attr "mode" "<MODE>")])
11780 (define_insn "xop_vmfrcz<mode>2"
11781 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11782 (vec_merge:SSEMODEF2P
11784 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11786 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11789 "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11790 [(set_attr "type" "ssecvt1")
11791 (set_attr "mode" "<MODE>")])
11793 (define_insn "xop_frcz<mode>2256"
11794 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11796 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11799 "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
11800 [(set_attr "type" "ssecvt1")
11801 (set_attr "mode" "<MODE>")])
11803 (define_insn "xop_maskcmp<mode>3"
11804 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11805 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11806 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11807 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11809 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11810 [(set_attr "type" "sse4arg")
11811 (set_attr "prefix_data16" "0")
11812 (set_attr "prefix_rep" "0")
11813 (set_attr "prefix_extra" "2")
11814 (set_attr "length_immediate" "1")
11815 (set_attr "mode" "TI")])
11817 (define_insn "xop_maskcmp_uns<mode>3"
11818 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11819 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11820 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11821 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11823 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11824 [(set_attr "type" "ssecmp")
11825 (set_attr "prefix_data16" "0")
11826 (set_attr "prefix_rep" "0")
11827 (set_attr "prefix_extra" "2")
11828 (set_attr "length_immediate" "1")
11829 (set_attr "mode" "TI")])
11831 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11832 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11833 ;; the exact instruction generated for the intrinsic.
11834 (define_insn "xop_maskcmp_uns2<mode>3"
11835 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11836 (unspec:SSEMODE1248
11837 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11838 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11839 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11840 UNSPEC_XOP_UNSIGNED_CMP))]
11842 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11843 [(set_attr "type" "ssecmp")
11844 (set_attr "prefix_data16" "0")
11845 (set_attr "prefix_extra" "2")
11846 (set_attr "length_immediate" "1")
11847 (set_attr "mode" "TI")])
11849 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11850 ;; being added here to be complete.
11851 (define_insn "xop_pcom_tf<mode>3"
11852 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11853 (unspec:SSEMODE1248
11854 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11855 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11856 (match_operand:SI 3 "const_int_operand" "n")]
11857 UNSPEC_XOP_TRUEFALSE))]
11860 return ((INTVAL (operands[3]) != 0)
11861 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11862 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11864 [(set_attr "type" "ssecmp")
11865 (set_attr "prefix_data16" "0")
11866 (set_attr "prefix_extra" "2")
11867 (set_attr "length_immediate" "1")
11868 (set_attr "mode" "TI")])
11870 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11871 (define_insn "*avx_aesenc"
11872 [(set (match_operand:V2DI 0 "register_operand" "=x")
11873 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11874 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11876 "TARGET_AES && TARGET_AVX"
11877 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11878 [(set_attr "type" "sselog1")
11879 (set_attr "prefix_extra" "1")
11880 (set_attr "prefix" "vex")
11881 (set_attr "mode" "TI")])
11883 (define_insn "aesenc"
11884 [(set (match_operand:V2DI 0 "register_operand" "=x")
11885 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11886 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11889 "aesenc\t{%2, %0|%0, %2}"
11890 [(set_attr "type" "sselog1")
11891 (set_attr "prefix_extra" "1")
11892 (set_attr "mode" "TI")])
11894 (define_insn "*avx_aesenclast"
11895 [(set (match_operand:V2DI 0 "register_operand" "=x")
11896 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11897 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11898 UNSPEC_AESENCLAST))]
11899 "TARGET_AES && TARGET_AVX"
11900 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11901 [(set_attr "type" "sselog1")
11902 (set_attr "prefix_extra" "1")
11903 (set_attr "prefix" "vex")
11904 (set_attr "mode" "TI")])
11906 (define_insn "aesenclast"
11907 [(set (match_operand:V2DI 0 "register_operand" "=x")
11908 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11909 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11910 UNSPEC_AESENCLAST))]
11912 "aesenclast\t{%2, %0|%0, %2}"
11913 [(set_attr "type" "sselog1")
11914 (set_attr "prefix_extra" "1")
11915 (set_attr "mode" "TI")])
11917 (define_insn "*avx_aesdec"
11918 [(set (match_operand:V2DI 0 "register_operand" "=x")
11919 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11920 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11922 "TARGET_AES && TARGET_AVX"
11923 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11924 [(set_attr "type" "sselog1")
11925 (set_attr "prefix_extra" "1")
11926 (set_attr "prefix" "vex")
11927 (set_attr "mode" "TI")])
11929 (define_insn "aesdec"
11930 [(set (match_operand:V2DI 0 "register_operand" "=x")
11931 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11932 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11935 "aesdec\t{%2, %0|%0, %2}"
11936 [(set_attr "type" "sselog1")
11937 (set_attr "prefix_extra" "1")
11938 (set_attr "mode" "TI")])
11940 (define_insn "*avx_aesdeclast"
11941 [(set (match_operand:V2DI 0 "register_operand" "=x")
11942 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11943 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11944 UNSPEC_AESDECLAST))]
11945 "TARGET_AES && TARGET_AVX"
11946 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11947 [(set_attr "type" "sselog1")
11948 (set_attr "prefix_extra" "1")
11949 (set_attr "prefix" "vex")
11950 (set_attr "mode" "TI")])
11952 (define_insn "aesdeclast"
11953 [(set (match_operand:V2DI 0 "register_operand" "=x")
11954 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11955 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11956 UNSPEC_AESDECLAST))]
11958 "aesdeclast\t{%2, %0|%0, %2}"
11959 [(set_attr "type" "sselog1")
11960 (set_attr "prefix_extra" "1")
11961 (set_attr "mode" "TI")])
11963 (define_insn "aesimc"
11964 [(set (match_operand:V2DI 0 "register_operand" "=x")
11965 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11968 "%vaesimc\t{%1, %0|%0, %1}"
11969 [(set_attr "type" "sselog1")
11970 (set_attr "prefix_extra" "1")
11971 (set_attr "prefix" "maybe_vex")
11972 (set_attr "mode" "TI")])
11974 (define_insn "aeskeygenassist"
11975 [(set (match_operand:V2DI 0 "register_operand" "=x")
11976 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11977 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11978 UNSPEC_AESKEYGENASSIST))]
11980 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11981 [(set_attr "type" "sselog1")
11982 (set_attr "prefix_extra" "1")
11983 (set_attr "length_immediate" "1")
11984 (set_attr "prefix" "maybe_vex")
11985 (set_attr "mode" "TI")])
11987 (define_insn "*vpclmulqdq"
11988 [(set (match_operand:V2DI 0 "register_operand" "=x")
11989 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11990 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11991 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11993 "TARGET_PCLMUL && TARGET_AVX"
11994 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11995 [(set_attr "type" "sselog1")
11996 (set_attr "prefix_extra" "1")
11997 (set_attr "length_immediate" "1")
11998 (set_attr "prefix" "vex")
11999 (set_attr "mode" "TI")])
12001 (define_insn "pclmulqdq"
12002 [(set (match_operand:V2DI 0 "register_operand" "=x")
12003 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
12004 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12005 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12008 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
12009 [(set_attr "type" "sselog1")
12010 (set_attr "prefix_extra" "1")
12011 (set_attr "length_immediate" "1")
12012 (set_attr "mode" "TI")])
12014 (define_expand "avx_vzeroall"
12015 [(match_par_dup 0 [(const_int 0)])]
12018 int nregs = TARGET_64BIT ? 16 : 8;
12021 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
12023 XVECEXP (operands[0], 0, 0)
12024 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
12027 for (regno = 0; regno < nregs; regno++)
12028 XVECEXP (operands[0], 0, regno + 1)
12029 = gen_rtx_SET (VOIDmode,
12030 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
12031 CONST0_RTX (V8SImode));
12034 (define_insn "*avx_vzeroall"
12035 [(match_parallel 0 "vzeroall_operation"
12036 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
12037 (set (match_operand 1 "register_operand" "=x")
12038 (match_operand 2 "const0_operand" "X"))])]
12041 [(set_attr "type" "sse")
12042 (set_attr "modrm" "0")
12043 (set_attr "memory" "none")
12044 (set_attr "prefix" "vex")
12045 (set_attr "mode" "OI")])
12047 ;; vzeroupper clobbers the upper 128bits of AVX registers.
12048 (define_insn "avx_vzeroupper"
12049 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
12050 (clobber (reg:V8SI XMM0_REG))
12051 (clobber (reg:V8SI XMM1_REG))
12052 (clobber (reg:V8SI XMM2_REG))
12053 (clobber (reg:V8SI XMM3_REG))
12054 (clobber (reg:V8SI XMM4_REG))
12055 (clobber (reg:V8SI XMM5_REG))
12056 (clobber (reg:V8SI XMM6_REG))
12057 (clobber (reg:V8SI XMM7_REG))]
12058 "TARGET_AVX && !TARGET_64BIT"
12060 [(set_attr "type" "sse")
12061 (set_attr "modrm" "0")
12062 (set_attr "memory" "none")
12063 (set_attr "prefix" "vex")
12064 (set_attr "mode" "OI")])
12066 (define_insn "avx_vzeroupper_rex64"
12067 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
12068 (clobber (reg:V8SI XMM0_REG))
12069 (clobber (reg:V8SI XMM1_REG))
12070 (clobber (reg:V8SI XMM2_REG))
12071 (clobber (reg:V8SI XMM3_REG))
12072 (clobber (reg:V8SI XMM4_REG))
12073 (clobber (reg:V8SI XMM5_REG))
12074 (clobber (reg:V8SI XMM6_REG))
12075 (clobber (reg:V8SI XMM7_REG))
12076 (clobber (reg:V8SI XMM8_REG))
12077 (clobber (reg:V8SI XMM9_REG))
12078 (clobber (reg:V8SI XMM10_REG))
12079 (clobber (reg:V8SI XMM11_REG))
12080 (clobber (reg:V8SI XMM12_REG))
12081 (clobber (reg:V8SI XMM13_REG))
12082 (clobber (reg:V8SI XMM14_REG))
12083 (clobber (reg:V8SI XMM15_REG))]
12084 "TARGET_AVX && TARGET_64BIT"
12086 [(set_attr "type" "sse")
12087 (set_attr "modrm" "0")
12088 (set_attr "memory" "none")
12089 (set_attr "prefix" "vex")
12090 (set_attr "mode" "OI")])
12092 (define_expand "avx_vpermil<mode>"
12093 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
12094 (vec_select:AVXMODEFDP
12095 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
12096 (match_operand:SI 2 "const_0_to_255_operand" "")))]
12099 int mask = INTVAL (operands[2]);
12100 rtx perm[<ssescalarnum>];
12102 perm[0] = GEN_INT (mask & 1);
12103 perm[1] = GEN_INT ((mask >> 1) & 1);
12104 if (<MODE>mode == V4DFmode)
12106 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
12107 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
12111 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12114 (define_expand "avx_vpermil<mode>"
12115 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
12116 (vec_select:AVXMODEFSP
12117 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
12118 (match_operand:SI 2 "const_0_to_255_operand" "")))]
12121 int mask = INTVAL (operands[2]);
12122 rtx perm[<ssescalarnum>];
12124 perm[0] = GEN_INT (mask & 3);
12125 perm[1] = GEN_INT ((mask >> 2) & 3);
12126 perm[2] = GEN_INT ((mask >> 4) & 3);
12127 perm[3] = GEN_INT ((mask >> 6) & 3);
12128 if (<MODE>mode == V8SFmode)
12130 perm[4] = GEN_INT ((mask & 3) + 4);
12131 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
12132 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
12133 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
12137 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12140 (define_insn "*avx_vpermilp<mode>"
12141 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12142 (vec_select:AVXMODEF2P
12143 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
12144 (match_parallel 2 "avx_vpermilp_<mode>_operand"
12145 [(match_operand 3 "const_int_operand" "")])))]
12148 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
12149 operands[2] = GEN_INT (mask);
12150 return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
12152 [(set_attr "type" "sselog")
12153 (set_attr "prefix_extra" "1")
12154 (set_attr "length_immediate" "1")
12155 (set_attr "prefix" "vex")
12156 (set_attr "mode" "<MODE>")])
12158 (define_insn "avx_vpermilvar<mode>3"
12159 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12161 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12162 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
12165 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12166 [(set_attr "type" "sselog")
12167 (set_attr "prefix_extra" "1")
12168 (set_attr "prefix" "vex")
12169 (set_attr "mode" "<MODE>")])
12171 (define_insn "avx_vperm2f128<mode>3"
12172 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12173 (unspec:AVX256MODE2P
12174 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12175 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12176 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12177 UNSPEC_VPERMIL2F128))]
12179 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12180 [(set_attr "type" "sselog")
12181 (set_attr "prefix_extra" "1")
12182 (set_attr "length_immediate" "1")
12183 (set_attr "prefix" "vex")
12184 (set_attr "mode" "V8SF")])
12186 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
12187 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
12188 (vec_concat:AVXMODEF4P
12189 (vec_concat:<avxhalfvecmode>
12190 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
12192 (vec_concat:<avxhalfvecmode>
12196 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
12197 [(set_attr "type" "ssemov")
12198 (set_attr "prefix_extra" "1")
12199 (set_attr "prefix" "vex")
12200 (set_attr "mode" "<avxscalarmode>")])
12202 (define_insn "avx_vbroadcastss256"
12203 [(set (match_operand:V8SF 0 "register_operand" "=x")
12207 (match_operand:SF 1 "memory_operand" "m")
12220 "vbroadcastss\t{%1, %0|%0, %1}"
12221 [(set_attr "type" "ssemov")
12222 (set_attr "prefix_extra" "1")
12223 (set_attr "prefix" "vex")
12224 (set_attr "mode" "SF")])
12226 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
12227 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
12228 (vec_concat:AVX256MODEF2P
12229 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
12232 "vbroadcastf128\t{%1, %0|%0, %1}"
12233 [(set_attr "type" "ssemov")
12234 (set_attr "prefix_extra" "1")
12235 (set_attr "prefix" "vex")
12236 (set_attr "mode" "V4SF")])
12238 (define_expand "avx_vinsertf128<mode>"
12239 [(match_operand:AVX256MODE 0 "register_operand" "")
12240 (match_operand:AVX256MODE 1 "register_operand" "")
12241 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12242 (match_operand:SI 3 "const_0_to_1_operand" "")]
12245 switch (INTVAL (operands[3]))
12248 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12252 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12256 gcc_unreachable ();
12261 (define_insn "vec_set_lo_<mode>"
12262 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12263 (vec_concat:AVX256MODE4P
12264 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12265 (vec_select:<avxhalfvecmode>
12266 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12267 (parallel [(const_int 2) (const_int 3)]))))]
12269 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12270 [(set_attr "type" "sselog")
12271 (set_attr "prefix_extra" "1")
12272 (set_attr "length_immediate" "1")
12273 (set_attr "prefix" "vex")
12274 (set_attr "mode" "V8SF")])
12276 (define_insn "vec_set_hi_<mode>"
12277 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12278 (vec_concat:AVX256MODE4P
12279 (vec_select:<avxhalfvecmode>
12280 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12281 (parallel [(const_int 0) (const_int 1)]))
12282 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12284 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12285 [(set_attr "type" "sselog")
12286 (set_attr "prefix_extra" "1")
12287 (set_attr "length_immediate" "1")
12288 (set_attr "prefix" "vex")
12289 (set_attr "mode" "V8SF")])
12291 (define_insn "vec_set_lo_<mode>"
12292 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12293 (vec_concat:AVX256MODE8P
12294 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12295 (vec_select:<avxhalfvecmode>
12296 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12297 (parallel [(const_int 4) (const_int 5)
12298 (const_int 6) (const_int 7)]))))]
12300 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12301 [(set_attr "type" "sselog")
12302 (set_attr "prefix_extra" "1")
12303 (set_attr "length_immediate" "1")
12304 (set_attr "prefix" "vex")
12305 (set_attr "mode" "V8SF")])
12307 (define_insn "vec_set_hi_<mode>"
12308 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12309 (vec_concat:AVX256MODE8P
12310 (vec_select:<avxhalfvecmode>
12311 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12312 (parallel [(const_int 0) (const_int 1)
12313 (const_int 2) (const_int 3)]))
12314 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12316 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12317 [(set_attr "type" "sselog")
12318 (set_attr "prefix_extra" "1")
12319 (set_attr "length_immediate" "1")
12320 (set_attr "prefix" "vex")
12321 (set_attr "mode" "V8SF")])
12323 (define_insn "vec_set_lo_v16hi"
12324 [(set (match_operand:V16HI 0 "register_operand" "=x")
12326 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12328 (match_operand:V16HI 1 "register_operand" "x")
12329 (parallel [(const_int 8) (const_int 9)
12330 (const_int 10) (const_int 11)
12331 (const_int 12) (const_int 13)
12332 (const_int 14) (const_int 15)]))))]
12334 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12335 [(set_attr "type" "sselog")
12336 (set_attr "prefix_extra" "1")
12337 (set_attr "length_immediate" "1")
12338 (set_attr "prefix" "vex")
12339 (set_attr "mode" "V8SF")])
12341 (define_insn "vec_set_hi_v16hi"
12342 [(set (match_operand:V16HI 0 "register_operand" "=x")
12345 (match_operand:V16HI 1 "register_operand" "x")
12346 (parallel [(const_int 0) (const_int 1)
12347 (const_int 2) (const_int 3)
12348 (const_int 4) (const_int 5)
12349 (const_int 6) (const_int 7)]))
12350 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12352 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12353 [(set_attr "type" "sselog")
12354 (set_attr "prefix_extra" "1")
12355 (set_attr "length_immediate" "1")
12356 (set_attr "prefix" "vex")
12357 (set_attr "mode" "V8SF")])
12359 (define_insn "vec_set_lo_v32qi"
12360 [(set (match_operand:V32QI 0 "register_operand" "=x")
12362 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12364 (match_operand:V32QI 1 "register_operand" "x")
12365 (parallel [(const_int 16) (const_int 17)
12366 (const_int 18) (const_int 19)
12367 (const_int 20) (const_int 21)
12368 (const_int 22) (const_int 23)
12369 (const_int 24) (const_int 25)
12370 (const_int 26) (const_int 27)
12371 (const_int 28) (const_int 29)
12372 (const_int 30) (const_int 31)]))))]
12374 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12375 [(set_attr "type" "sselog")
12376 (set_attr "prefix_extra" "1")
12377 (set_attr "length_immediate" "1")
12378 (set_attr "prefix" "vex")
12379 (set_attr "mode" "V8SF")])
12381 (define_insn "vec_set_hi_v32qi"
12382 [(set (match_operand:V32QI 0 "register_operand" "=x")
12385 (match_operand:V32QI 1 "register_operand" "x")
12386 (parallel [(const_int 0) (const_int 1)
12387 (const_int 2) (const_int 3)
12388 (const_int 4) (const_int 5)
12389 (const_int 6) (const_int 7)
12390 (const_int 8) (const_int 9)
12391 (const_int 10) (const_int 11)
12392 (const_int 12) (const_int 13)
12393 (const_int 14) (const_int 15)]))
12394 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12396 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12397 [(set_attr "type" "sselog")
12398 (set_attr "prefix_extra" "1")
12399 (set_attr "length_immediate" "1")
12400 (set_attr "prefix" "vex")
12401 (set_attr "mode" "V8SF")])
12403 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
12404 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12406 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12407 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12411 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
12412 [(set_attr "type" "sselog1")
12413 (set_attr "prefix_extra" "1")
12414 (set_attr "prefix" "vex")
12415 (set_attr "mode" "<MODE>")])
12417 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
12418 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12420 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12421 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12423 UNSPEC_MASKSTORE))]
12425 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12426 [(set_attr "type" "sselog1")
12427 (set_attr "prefix_extra" "1")
12428 (set_attr "prefix" "vex")
12429 (set_attr "mode" "<MODE>")])
12431 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12432 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12433 (unspec:AVX256MODE2P
12434 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
12438 switch (which_alternative)
12443 switch (get_attr_mode (insn))
12446 return "vmovaps\t{%1, %x0|%x0, %1}";
12448 return "vmovapd\t{%1, %x0|%x0, %1}";
12450 return "vmovdqa\t{%1, %x0|%x0, %1}";
12457 gcc_unreachable ();
12459 [(set_attr "type" "ssemov")
12460 (set_attr "prefix" "vex")
12461 (set_attr "mode" "<avxvecmode>")
12462 (set (attr "length")
12463 (if_then_else (eq_attr "alternative" "0")
12465 (const_string "*")))])
12467 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
12468 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
12469 (unspec:<avxhalfvecmode>
12470 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12474 switch (which_alternative)
12479 switch (get_attr_mode (insn))
12482 return "vmovaps\t{%x1, %0|%0, %x1}";
12484 return "vmovapd\t{%x1, %0|%0, %x1}";
12486 return "vmovdqa\t{%x1, %0|%0, %x1}";
12493 gcc_unreachable ();
12495 [(set_attr "type" "ssemov")
12496 (set_attr "prefix" "vex")
12497 (set_attr "mode" "<avxvecmode>")
12498 (set (attr "length")
12499 (if_then_else (eq_attr "alternative" "0")
12501 (const_string "*")))])
12503 (define_expand "vec_init<mode>"
12504 [(match_operand:AVX256MODE 0 "register_operand" "")
12505 (match_operand 1 "" "")]
12508 ix86_expand_vector_init (false, operands[0], operands[1]);
12512 (define_insn "*vec_concat<mode>_avx"
12513 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12514 (vec_concat:AVX256MODE
12515 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12516 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12519 switch (which_alternative)
12522 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12524 switch (get_attr_mode (insn))
12527 return "vmovaps\t{%1, %x0|%x0, %1}";
12529 return "vmovapd\t{%1, %x0|%x0, %1}";
12531 return "vmovdqa\t{%1, %x0|%x0, %1}";
12534 gcc_unreachable ();
12537 [(set_attr "type" "sselog,ssemov")
12538 (set_attr "prefix_extra" "1,*")
12539 (set_attr "length_immediate" "1,*")
12540 (set_attr "prefix" "vex")
12541 (set_attr "mode" "<avxvecmode>")])