1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Instruction suffix for sign and zero extensions.
23 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
25 ;; 16 byte integral modes handled by SSE
26 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
28 ;; All 16-byte vector modes handled by SSE
29 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
32 ;; 32 byte integral vector modes handled by AVX
33 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
35 ;; All 32-byte vector modes handled by AVX
36 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
38 ;; All QI vector modes handled by AVX
39 (define_mode_iterator AVXMODEQI [V32QI V16QI])
41 ;; All DI vector modes handled by AVX
42 (define_mode_iterator AVXMODEDI [V4DI V2DI])
44 ;; All vector modes handled by AVX
45 (define_mode_iterator AVXMODE
46 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
47 (define_mode_iterator AVXMODE16
48 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
51 (define_mode_iterator SSEMODE12 [V16QI V8HI])
52 (define_mode_iterator SSEMODE24 [V8HI V4SI])
53 (define_mode_iterator SSEMODE14 [V16QI V4SI])
54 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
55 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
56 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
57 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
58 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
60 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
61 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
62 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
63 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
64 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
65 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
66 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
67 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
68 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
69 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
70 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
72 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
74 ;; Int-float size matches
75 (define_mode_iterator SSEMODE4S [V4SF V4SI])
76 (define_mode_iterator SSEMODE2D [V2DF V2DI])
78 ;; Modes handled by integer vcond pattern
79 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
80 (V2DI "TARGET_SSE4_2")])
82 ;; Modes handled by vec_extract_even/odd pattern.
83 (define_mode_iterator SSEMODE_EO
86 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
87 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
88 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
90 ;; Modes handled by storent patterns.
91 (define_mode_iterator STORENT_MODE
92 [(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
93 (SI "TARGET_SSE2") (V2DI "TARGET_SSE2") (V2DF "TARGET_SSE2")
95 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
97 ;; Modes handled by vector float patterns.
98 (define_mode_iterator VEC_FLOAT_MODE
99 [(V2DF "TARGET_SSE2") (V4SF "TARGET_SSE")
100 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
102 ;; Modes handled by vector extract patterns.
103 (define_mode_iterator VEC_EXTRACT_MODE
104 [(V2DI "TARGET_SSE") (V4SI "TARGET_SSE")
105 (V8HI "TARGET_SSE") (V16QI "TARGET_SSE")
106 (V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
107 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
109 ;; Mapping from float mode to required SSE level
110 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
112 ;; Mapping from integer vector mode to mnemonic suffix
113 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
115 ;; Mapping of the insn mnemonic suffix
116 (define_mode_attr ssemodesuffix
117 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
118 (V8SI "ps") (V4DI "pd")])
119 (define_mode_attr ssescalarmodesuffix
120 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V8SI "ss")
121 (V4DF "sd") (V4SI "d") (V4DI "sd")])
123 ;; Mapping of the max integer size for xop rotate immediate constraint
124 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
126 ;; Mapping of vector modes back to the scalar modes
127 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
128 (V16QI "QI") (V8HI "HI")
129 (V4SI "SI") (V2DI "DI")])
131 ;; Mapping of vector modes to a vector mode of double size
132 (define_mode_attr ssedoublesizemode
133 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
134 (V8HI "V16HI") (V16QI "V32QI")
135 (V4DF "V8DF") (V8SF "V16SF")
136 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
138 ;; Number of scalar elements in each vector type
139 (define_mode_attr ssescalarnum
140 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
141 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
144 (define_mode_attr avxvecmode
145 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
146 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
147 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
148 (define_mode_attr avxvecpsmode
149 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
150 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
151 (define_mode_attr avxhalfvecmode
152 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
153 (V8SF "V4SF") (V4DF "V2DF")
154 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
155 (define_mode_attr avxscalarmode
156 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
157 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
158 (define_mode_attr avxcvtvecmode
159 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
160 (define_mode_attr avxpermvecmode
161 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
162 (define_mode_attr avxmodesuffixp
163 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
165 (define_mode_attr avxmodesuffix
166 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
167 (V8SI "256") (V8SF "256") (V4DF "256")])
169 ;; Mapping of immediate bits for blend instructions
170 (define_mode_attr blendbits
171 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
173 ;; Mapping of immediate bits for pinsr instructions
174 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
176 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
182 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
184 (define_expand "mov<mode>"
185 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
186 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
189 ix86_expand_vector_move (<MODE>mode, operands);
193 (define_insn "*avx_mov<mode>_internal"
194 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
195 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
197 && (register_operand (operands[0], <MODE>mode)
198 || register_operand (operands[1], <MODE>mode))"
200 switch (which_alternative)
203 return standard_sse_constant_opcode (insn, operands[1]);
206 switch (get_attr_mode (insn))
210 return "vmovaps\t{%1, %0|%0, %1}";
213 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
214 return "vmovaps\t{%1, %0|%0, %1}";
216 return "vmovapd\t{%1, %0|%0, %1}";
218 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
219 return "vmovaps\t{%1, %0|%0, %1}";
221 return "vmovdqa\t{%1, %0|%0, %1}";
227 [(set_attr "type" "sselog1,ssemov,ssemov")
228 (set_attr "prefix" "vex")
229 (set_attr "mode" "<avxvecmode>")])
231 ;; All of these patterns are enabled for SSE1 as well as SSE2.
232 ;; This is essential for maintaining stable calling conventions.
234 (define_expand "mov<mode>"
235 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
236 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
239 ix86_expand_vector_move (<MODE>mode, operands);
243 (define_insn "*mov<mode>_internal"
244 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
245 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
247 && (register_operand (operands[0], <MODE>mode)
248 || register_operand (operands[1], <MODE>mode))"
250 switch (which_alternative)
253 return standard_sse_constant_opcode (insn, operands[1]);
256 switch (get_attr_mode (insn))
259 return "movaps\t{%1, %0|%0, %1}";
261 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
262 return "movaps\t{%1, %0|%0, %1}";
264 return "movapd\t{%1, %0|%0, %1}";
266 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
267 return "movaps\t{%1, %0|%0, %1}";
269 return "movdqa\t{%1, %0|%0, %1}";
275 [(set_attr "type" "sselog1,ssemov,ssemov")
277 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
278 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
279 (and (eq_attr "alternative" "2")
280 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
282 (const_string "V4SF")
283 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
284 (const_string "V4SF")
285 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
286 (const_string "V2DF")
288 (const_string "TI")))])
290 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
291 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
292 ;; from memory, we'd prefer to load the memory directly into the %xmm
293 ;; register. To facilitate this happy circumstance, this pattern won't
294 ;; split until after register allocation. If the 64-bit value didn't
295 ;; come from memory, this is the best we can do. This is much better
296 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
299 (define_insn_and_split "movdi_to_sse"
301 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
302 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
303 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
304 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
306 "&& reload_completed"
309 if (register_operand (operands[1], DImode))
311 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
312 Assemble the 64-bit DImode value in an xmm register. */
313 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
314 gen_rtx_SUBREG (SImode, operands[1], 0)));
315 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
316 gen_rtx_SUBREG (SImode, operands[1], 4)));
317 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
320 else if (memory_operand (operands[1], DImode))
321 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
322 operands[1], const0_rtx));
328 [(set (match_operand:V4SF 0 "register_operand" "")
329 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
330 "TARGET_SSE && reload_completed"
333 (vec_duplicate:V4SF (match_dup 1))
337 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
338 operands[2] = CONST0_RTX (V4SFmode);
342 [(set (match_operand:V2DF 0 "register_operand" "")
343 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
344 "TARGET_SSE2 && reload_completed"
345 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
347 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
348 operands[2] = CONST0_RTX (DFmode);
351 (define_expand "push<mode>1"
352 [(match_operand:AVX256MODE 0 "register_operand" "")]
355 ix86_expand_push (<MODE>mode, operands[0]);
359 (define_expand "push<mode>1"
360 [(match_operand:SSEMODE16 0 "register_operand" "")]
363 ix86_expand_push (<MODE>mode, operands[0]);
367 (define_expand "movmisalign<mode>"
368 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
369 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
372 ix86_expand_vector_move_misalign (<MODE>mode, operands);
376 (define_expand "movmisalign<mode>"
377 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
378 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
381 ix86_expand_vector_move_misalign (<MODE>mode, operands);
385 (define_expand "avx_movu<ssemodesuffix><avxmodesuffix>"
386 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "")
388 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "")]
390 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
392 if (MEM_P (operands[0]) && MEM_P (operands[1]))
393 operands[1] = force_reg (<MODE>mode, operands[1]);
396 (define_insn "*avx_movu<ssemodesuffix><avxmodesuffix>"
397 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
399 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
401 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
402 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
403 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
404 [(set_attr "type" "ssemov")
405 (set_attr "movu" "1")
406 (set_attr "prefix" "vex")
407 (set_attr "mode" "<MODE>")])
409 (define_insn "sse2_movq128"
410 [(set (match_operand:V2DI 0 "register_operand" "=x")
413 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
414 (parallel [(const_int 0)]))
417 "%vmovq\t{%1, %0|%0, %1}"
418 [(set_attr "type" "ssemov")
419 (set_attr "prefix" "maybe_vex")
420 (set_attr "mode" "TI")])
422 (define_expand "<sse>_movu<ssemodesuffix>"
423 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
425 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")]
427 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
429 if (MEM_P (operands[0]) && MEM_P (operands[1]))
430 operands[1] = force_reg (<MODE>mode, operands[1]);
433 (define_insn "*<sse>_movu<ssemodesuffix>"
434 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
436 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
438 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
439 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
440 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
441 [(set_attr "type" "ssemov")
442 (set_attr "movu" "1")
443 (set_attr "mode" "<MODE>")])
445 (define_expand "avx_movdqu<avxmodesuffix>"
446 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "")
448 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "")]
452 if (MEM_P (operands[0]) && MEM_P (operands[1]))
453 operands[1] = force_reg (<MODE>mode, operands[1]);
456 (define_insn "*avx_movdqu<avxmodesuffix>"
457 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
459 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
461 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
462 "vmovdqu\t{%1, %0|%0, %1}"
463 [(set_attr "type" "ssemov")
464 (set_attr "movu" "1")
465 (set_attr "prefix" "vex")
466 (set_attr "mode" "<avxvecmode>")])
468 (define_expand "sse2_movdqu"
469 [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
470 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "")]
474 if (MEM_P (operands[0]) && MEM_P (operands[1]))
475 operands[1] = force_reg (V16QImode, operands[1]);
478 (define_insn "*sse2_movdqu"
479 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
480 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
482 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
483 "movdqu\t{%1, %0|%0, %1}"
484 [(set_attr "type" "ssemov")
485 (set_attr "movu" "1")
486 (set_attr "prefix_data16" "1")
487 (set_attr "mode" "TI")])
489 (define_insn "avx_movnt<mode>"
490 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
492 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
494 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
495 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
496 [(set_attr "type" "ssemov")
497 (set_attr "prefix" "vex")
498 (set_attr "mode" "<MODE>")])
500 (define_insn "<sse>_movnt<mode>"
501 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
503 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
505 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
506 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
507 [(set_attr "type" "ssemov")
508 (set_attr "mode" "<MODE>")])
510 (define_insn "avx_movnt<mode>"
511 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
513 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
516 "vmovntdq\t{%1, %0|%0, %1}"
517 [(set_attr "type" "ssecvt")
518 (set_attr "prefix" "vex")
519 (set_attr "mode" "<avxvecmode>")])
521 (define_insn "sse2_movntv2di"
522 [(set (match_operand:V2DI 0 "memory_operand" "=m")
523 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
526 "movntdq\t{%1, %0|%0, %1}"
527 [(set_attr "type" "ssemov")
528 (set_attr "prefix_data16" "1")
529 (set_attr "mode" "TI")])
531 (define_insn "sse2_movntsi"
532 [(set (match_operand:SI 0 "memory_operand" "=m")
533 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
536 "movnti\t{%1, %0|%0, %1}"
537 [(set_attr "type" "ssemov")
538 (set_attr "prefix_data16" "0")
539 (set_attr "mode" "V2DF")])
541 (define_insn "avx_lddqu<avxmodesuffix>"
542 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
544 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
547 "vlddqu\t{%1, %0|%0, %1}"
548 [(set_attr "type" "ssecvt")
549 (set_attr "movu" "1")
550 (set_attr "prefix" "vex")
551 (set_attr "mode" "<avxvecmode>")])
553 (define_insn "sse3_lddqu"
554 [(set (match_operand:V16QI 0 "register_operand" "=x")
555 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
558 "lddqu\t{%1, %0|%0, %1}"
559 [(set_attr "type" "ssemov")
560 (set_attr "movu" "1")
561 (set_attr "prefix_data16" "0")
562 (set_attr "prefix_rep" "1")
563 (set_attr "mode" "TI")])
565 ; Expand patterns for non-temporal stores. At the moment, only those
566 ; that directly map to insns are defined; it would be possible to
567 ; define patterns for other modes that would expand to several insns.
569 (define_expand "storent<mode>"
570 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
572 [(match_operand:STORENT_MODE 1 "register_operand" "")]
575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
577 ;; Parallel floating point arithmetic
579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
581 (define_expand "<code><mode>2"
582 [(set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
583 (absneg:VEC_FLOAT_MODE
584 (match_operand:VEC_FLOAT_MODE 1 "register_operand" "")))]
586 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
588 (define_insn_and_split "*avx_absneg<mode>2"
589 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
590 (match_operator:AVXMODEF2P 3 "absneg_operator"
591 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "x,m")]))
592 (use (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm,x"))]
593 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
595 "&& reload_completed"
600 if (MEM_P (operands[1]))
601 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
602 <MODE>mode, operands[2], operands[1]);
604 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
605 <MODE>mode, operands[1], operands[2]);
606 t = gen_rtx_SET (VOIDmode, operands[0], t);
611 (define_insn_and_split "*sse_absneg<mode>2"
612 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
613 (match_operator:SSEMODEF2P 3 "absneg_operator"
614 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,xm")]))
615 (use (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm,0"))]
616 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
618 "&& reload_completed"
623 t = operands[rtx_equal_p (operands[0], operands[1]) ? 2 : 1];
624 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
625 <MODE>mode, operands[0], t);
626 t = gen_rtx_SET (VOIDmode, operands[0], t);
631 (define_expand "<plusminus_insn><mode>3"
632 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
633 (plusminus:AVX256MODEF2P
634 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
635 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
636 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
637 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
639 (define_insn "*avx_<plusminus_insn><mode>3"
640 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
641 (plusminus:AVXMODEF2P
642 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
643 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
644 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
645 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
646 "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
647 [(set_attr "type" "sseadd")
648 (set_attr "prefix" "vex")
649 (set_attr "mode" "<avxvecmode>")])
651 (define_expand "<plusminus_insn><mode>3"
652 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
653 (plusminus:SSEMODEF2P
654 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
655 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
656 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
657 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
659 (define_insn "*<plusminus_insn><mode>3"
660 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
661 (plusminus:SSEMODEF2P
662 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
663 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
664 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
665 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
666 "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
667 [(set_attr "type" "sseadd")
668 (set_attr "mode" "<MODE>")])
670 (define_insn "*avx_vm<plusminus_insn><mode>3"
671 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
672 (vec_merge:SSEMODEF2P
673 (plusminus:SSEMODEF2P
674 (match_operand:SSEMODEF2P 1 "register_operand" "x")
675 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
678 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
679 "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
680 [(set_attr "type" "sseadd")
681 (set_attr "prefix" "vex")
682 (set_attr "mode" "<ssescalarmode>")])
684 (define_insn "<sse>_vm<plusminus_insn><mode>3"
685 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
686 (vec_merge:SSEMODEF2P
687 (plusminus:SSEMODEF2P
688 (match_operand:SSEMODEF2P 1 "register_operand" "0")
689 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
692 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
693 "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
694 [(set_attr "type" "sseadd")
695 (set_attr "mode" "<ssescalarmode>")])
697 (define_expand "mul<mode>3"
698 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
700 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
701 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
702 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
703 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
705 (define_insn "*avx_mul<mode>3"
706 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
708 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
709 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
710 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
711 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
712 "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
713 [(set_attr "type" "ssemul")
714 (set_attr "prefix" "vex")
715 (set_attr "mode" "<avxvecmode>")])
717 (define_expand "mul<mode>3"
718 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
720 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
721 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
722 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
723 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
725 (define_insn "*mul<mode>3"
726 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
728 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
729 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
730 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
731 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
732 "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
733 [(set_attr "type" "ssemul")
734 (set_attr "mode" "<MODE>")])
736 (define_insn "*avx_vmmul<mode>3"
737 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
738 (vec_merge:SSEMODEF2P
740 (match_operand:SSEMODEF2P 1 "register_operand" "x")
741 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
744 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
745 "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
746 [(set_attr "type" "ssemul")
747 (set_attr "prefix" "vex")
748 (set_attr "mode" "<ssescalarmode>")])
750 (define_insn "<sse>_vmmul<mode>3"
751 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
752 (vec_merge:SSEMODEF2P
754 (match_operand:SSEMODEF2P 1 "register_operand" "0")
755 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
758 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
759 "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
760 [(set_attr "type" "ssemul")
761 (set_attr "mode" "<ssescalarmode>")])
763 (define_expand "divv8sf3"
764 [(set (match_operand:V8SF 0 "register_operand" "")
765 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
766 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
769 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
771 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
772 && flag_finite_math_only && !flag_trapping_math
773 && flag_unsafe_math_optimizations)
775 ix86_emit_swdivsf (operands[0], operands[1],
776 operands[2], V8SFmode);
781 (define_expand "divv4df3"
782 [(set (match_operand:V4DF 0 "register_operand" "")
783 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
784 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
786 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
788 (define_insn "avx_div<mode>3"
789 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
791 (match_operand:AVXMODEF2P 1 "register_operand" "x")
792 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
793 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
794 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
795 [(set_attr "type" "ssediv")
796 (set_attr "prefix" "vex")
797 (set_attr "mode" "<MODE>")])
799 (define_expand "divv4sf3"
800 [(set (match_operand:V4SF 0 "register_operand" "")
801 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
802 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
805 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
806 && flag_finite_math_only && !flag_trapping_math
807 && flag_unsafe_math_optimizations)
809 ix86_emit_swdivsf (operands[0], operands[1],
810 operands[2], V4SFmode);
815 (define_expand "divv2df3"
816 [(set (match_operand:V2DF 0 "register_operand" "")
817 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
818 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
821 (define_insn "*avx_div<mode>3"
822 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
824 (match_operand:SSEMODEF2P 1 "register_operand" "x")
825 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
826 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
827 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
828 [(set_attr "type" "ssediv")
829 (set_attr "prefix" "vex")
830 (set_attr "mode" "<MODE>")])
832 (define_insn "<sse>_div<mode>3"
833 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
835 (match_operand:SSEMODEF2P 1 "register_operand" "0")
836 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
837 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
838 "div<ssemodesuffix>\t{%2, %0|%0, %2}"
839 [(set_attr "type" "ssediv")
840 (set_attr "mode" "<MODE>")])
842 (define_insn "*avx_vmdiv<mode>3"
843 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
844 (vec_merge:SSEMODEF2P
846 (match_operand:SSEMODEF2P 1 "register_operand" "x")
847 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
850 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
851 "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
852 [(set_attr "type" "ssediv")
853 (set_attr "prefix" "vex")
854 (set_attr "mode" "<ssescalarmode>")])
856 (define_insn "<sse>_vmdiv<mode>3"
857 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
858 (vec_merge:SSEMODEF2P
860 (match_operand:SSEMODEF2P 1 "register_operand" "0")
861 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
864 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
865 "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
866 [(set_attr "type" "ssediv")
867 (set_attr "mode" "<ssescalarmode>")])
869 (define_insn "avx_rcpv8sf2"
870 [(set (match_operand:V8SF 0 "register_operand" "=x")
872 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
874 "vrcpps\t{%1, %0|%0, %1}"
875 [(set_attr "type" "sse")
876 (set_attr "prefix" "vex")
877 (set_attr "mode" "V8SF")])
879 (define_insn "sse_rcpv4sf2"
880 [(set (match_operand:V4SF 0 "register_operand" "=x")
882 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
884 "%vrcpps\t{%1, %0|%0, %1}"
885 [(set_attr "type" "sse")
886 (set_attr "atom_sse_attr" "rcp")
887 (set_attr "prefix" "maybe_vex")
888 (set_attr "mode" "V4SF")])
890 (define_insn "*avx_vmrcpv4sf2"
891 [(set (match_operand:V4SF 0 "register_operand" "=x")
893 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
895 (match_operand:V4SF 2 "register_operand" "x")
898 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
899 [(set_attr "type" "sse")
900 (set_attr "prefix" "vex")
901 (set_attr "mode" "SF")])
903 (define_insn "sse_vmrcpv4sf2"
904 [(set (match_operand:V4SF 0 "register_operand" "=x")
906 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
908 (match_operand:V4SF 2 "register_operand" "0")
911 "rcpss\t{%1, %0|%0, %1}"
912 [(set_attr "type" "sse")
913 (set_attr "atom_sse_attr" "rcp")
914 (set_attr "mode" "SF")])
916 (define_expand "sqrtv8sf2"
917 [(set (match_operand:V8SF 0 "register_operand" "")
918 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
921 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
922 && flag_finite_math_only && !flag_trapping_math
923 && flag_unsafe_math_optimizations)
925 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
930 (define_insn "avx_sqrtv8sf2"
931 [(set (match_operand:V8SF 0 "register_operand" "=x")
932 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
934 "vsqrtps\t{%1, %0|%0, %1}"
935 [(set_attr "type" "sse")
936 (set_attr "prefix" "vex")
937 (set_attr "mode" "V8SF")])
939 (define_expand "sqrtv4sf2"
940 [(set (match_operand:V4SF 0 "register_operand" "")
941 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
944 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
945 && flag_finite_math_only && !flag_trapping_math
946 && flag_unsafe_math_optimizations)
948 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
953 (define_insn "sse_sqrtv4sf2"
954 [(set (match_operand:V4SF 0 "register_operand" "=x")
955 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
957 "%vsqrtps\t{%1, %0|%0, %1}"
958 [(set_attr "type" "sse")
959 (set_attr "atom_sse_attr" "sqrt")
960 (set_attr "prefix" "maybe_vex")
961 (set_attr "mode" "V4SF")])
963 (define_insn "sqrtv4df2"
964 [(set (match_operand:V4DF 0 "register_operand" "=x")
965 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
967 "vsqrtpd\t{%1, %0|%0, %1}"
968 [(set_attr "type" "sse")
969 (set_attr "prefix" "vex")
970 (set_attr "mode" "V4DF")])
972 (define_insn "sqrtv2df2"
973 [(set (match_operand:V2DF 0 "register_operand" "=x")
974 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
976 "%vsqrtpd\t{%1, %0|%0, %1}"
977 [(set_attr "type" "sse")
978 (set_attr "prefix" "maybe_vex")
979 (set_attr "mode" "V2DF")])
981 (define_insn "*avx_vmsqrt<mode>2"
982 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
983 (vec_merge:SSEMODEF2P
985 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
986 (match_operand:SSEMODEF2P 2 "register_operand" "x")
988 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
989 "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
990 [(set_attr "type" "sse")
991 (set_attr "prefix" "vex")
992 (set_attr "mode" "<ssescalarmode>")])
994 (define_insn "<sse>_vmsqrt<mode>2"
995 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
996 (vec_merge:SSEMODEF2P
998 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
999 (match_operand:SSEMODEF2P 2 "register_operand" "0")
1001 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1002 "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
1003 [(set_attr "type" "sse")
1004 (set_attr "atom_sse_attr" "sqrt")
1005 (set_attr "mode" "<ssescalarmode>")])
1007 (define_expand "rsqrtv8sf2"
1008 [(set (match_operand:V8SF 0 "register_operand" "")
1010 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
1011 "TARGET_AVX && TARGET_SSE_MATH"
1013 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
1017 (define_insn "avx_rsqrtv8sf2"
1018 [(set (match_operand:V8SF 0 "register_operand" "=x")
1020 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1022 "vrsqrtps\t{%1, %0|%0, %1}"
1023 [(set_attr "type" "sse")
1024 (set_attr "prefix" "vex")
1025 (set_attr "mode" "V8SF")])
1027 (define_expand "rsqrtv4sf2"
1028 [(set (match_operand:V4SF 0 "register_operand" "")
1030 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
1033 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
1037 (define_insn "sse_rsqrtv4sf2"
1038 [(set (match_operand:V4SF 0 "register_operand" "=x")
1040 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1042 "%vrsqrtps\t{%1, %0|%0, %1}"
1043 [(set_attr "type" "sse")
1044 (set_attr "prefix" "maybe_vex")
1045 (set_attr "mode" "V4SF")])
1047 (define_insn "*avx_vmrsqrtv4sf2"
1048 [(set (match_operand:V4SF 0 "register_operand" "=x")
1050 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1052 (match_operand:V4SF 2 "register_operand" "x")
1055 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1056 [(set_attr "type" "sse")
1057 (set_attr "prefix" "vex")
1058 (set_attr "mode" "SF")])
1060 (define_insn "sse_vmrsqrtv4sf2"
1061 [(set (match_operand:V4SF 0 "register_operand" "=x")
1063 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1065 (match_operand:V4SF 2 "register_operand" "0")
1068 "rsqrtss\t{%1, %0|%0, %1}"
1069 [(set_attr "type" "sse")
1070 (set_attr "mode" "SF")])
1072 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1073 ;; isn't really correct, as those rtl operators aren't defined when
1074 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1076 (define_expand "<code><mode>3"
1077 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1078 (smaxmin:AVX256MODEF2P
1079 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1080 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1081 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1083 if (!flag_finite_math_only)
1084 operands[1] = force_reg (<MODE>mode, operands[1]);
1085 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1088 (define_expand "<code><mode>3"
1089 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1091 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1092 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1093 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1095 if (!flag_finite_math_only)
1096 operands[1] = force_reg (<MODE>mode, operands[1]);
1097 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1100 (define_insn "*avx_<code><mode>3_finite"
1101 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1103 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1104 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1105 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1106 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1107 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1108 [(set_attr "type" "sseadd")
1109 (set_attr "prefix" "vex")
1110 (set_attr "mode" "<MODE>")])
1112 (define_insn "*<code><mode>3_finite"
1113 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1115 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1116 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1117 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1118 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1119 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1120 [(set_attr "type" "sseadd")
1121 (set_attr "mode" "<MODE>")])
1123 (define_insn "*avx_<code><mode>3"
1124 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1126 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1127 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1128 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1129 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1130 [(set_attr "type" "sseadd")
1131 (set_attr "prefix" "vex")
1132 (set_attr "mode" "<avxvecmode>")])
1134 (define_insn "*<code><mode>3"
1135 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1137 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1138 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1139 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1140 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1141 [(set_attr "type" "sseadd")
1142 (set_attr "mode" "<MODE>")])
1144 (define_insn "*avx_vm<code><mode>3"
1145 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1146 (vec_merge:SSEMODEF2P
1148 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1149 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1152 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1153 "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1154 [(set_attr "type" "sse")
1155 (set_attr "prefix" "vex")
1156 (set_attr "mode" "<ssescalarmode>")])
1158 (define_insn "<sse>_vm<code><mode>3"
1159 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1160 (vec_merge:SSEMODEF2P
1162 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1163 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1166 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1167 "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1168 [(set_attr "type" "sseadd")
1169 (set_attr "mode" "<ssescalarmode>")])
1171 ;; These versions of the min/max patterns implement exactly the operations
1172 ;; min = (op1 < op2 ? op1 : op2)
1173 ;; max = (!(op1 < op2) ? op1 : op2)
1174 ;; Their operands are not commutative, and thus they may be used in the
1175 ;; presence of -0.0 and NaN.
1177 (define_insn "*avx_ieee_smin<mode>3"
1178 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1180 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1181 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1183 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1184 "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1185 [(set_attr "type" "sseadd")
1186 (set_attr "prefix" "vex")
1187 (set_attr "mode" "<avxvecmode>")])
1189 (define_insn "*avx_ieee_smax<mode>3"
1190 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1192 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1193 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1195 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1196 "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1197 [(set_attr "type" "sseadd")
1198 (set_attr "prefix" "vex")
1199 (set_attr "mode" "<avxvecmode>")])
1201 (define_insn "*ieee_smin<mode>3"
1202 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1204 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1205 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1207 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1208 "min<ssemodesuffix>\t{%2, %0|%0, %2}"
1209 [(set_attr "type" "sseadd")
1210 (set_attr "mode" "<MODE>")])
1212 (define_insn "*ieee_smax<mode>3"
1213 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1215 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1216 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1218 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1219 "max<ssemodesuffix>\t{%2, %0|%0, %2}"
1220 [(set_attr "type" "sseadd")
1221 (set_attr "mode" "<MODE>")])
1223 (define_insn "avx_addsubv8sf3"
1224 [(set (match_operand:V8SF 0 "register_operand" "=x")
1227 (match_operand:V8SF 1 "register_operand" "x")
1228 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1229 (minus:V8SF (match_dup 1) (match_dup 2))
1232 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1233 [(set_attr "type" "sseadd")
1234 (set_attr "prefix" "vex")
1235 (set_attr "mode" "V8SF")])
1237 (define_insn "avx_addsubv4df3"
1238 [(set (match_operand:V4DF 0 "register_operand" "=x")
1241 (match_operand:V4DF 1 "register_operand" "x")
1242 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1243 (minus:V4DF (match_dup 1) (match_dup 2))
1246 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1247 [(set_attr "type" "sseadd")
1248 (set_attr "prefix" "vex")
1249 (set_attr "mode" "V4DF")])
1251 (define_insn "*avx_addsubv4sf3"
1252 [(set (match_operand:V4SF 0 "register_operand" "=x")
1255 (match_operand:V4SF 1 "register_operand" "x")
1256 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1257 (minus:V4SF (match_dup 1) (match_dup 2))
1260 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1261 [(set_attr "type" "sseadd")
1262 (set_attr "prefix" "vex")
1263 (set_attr "mode" "V4SF")])
1265 (define_insn "sse3_addsubv4sf3"
1266 [(set (match_operand:V4SF 0 "register_operand" "=x")
1269 (match_operand:V4SF 1 "register_operand" "0")
1270 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1271 (minus:V4SF (match_dup 1) (match_dup 2))
1274 "addsubps\t{%2, %0|%0, %2}"
1275 [(set_attr "type" "sseadd")
1276 (set_attr "prefix_rep" "1")
1277 (set_attr "mode" "V4SF")])
1279 (define_insn "*avx_addsubv2df3"
1280 [(set (match_operand:V2DF 0 "register_operand" "=x")
1283 (match_operand:V2DF 1 "register_operand" "x")
1284 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1285 (minus:V2DF (match_dup 1) (match_dup 2))
1288 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1289 [(set_attr "type" "sseadd")
1290 (set_attr "prefix" "vex")
1291 (set_attr "mode" "V2DF")])
1293 (define_insn "sse3_addsubv2df3"
1294 [(set (match_operand:V2DF 0 "register_operand" "=x")
1297 (match_operand:V2DF 1 "register_operand" "0")
1298 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1299 (minus:V2DF (match_dup 1) (match_dup 2))
1302 "addsubpd\t{%2, %0|%0, %2}"
1303 [(set_attr "type" "sseadd")
1304 (set_attr "atom_unit" "complex")
1305 (set_attr "mode" "V2DF")])
1307 (define_insn "avx_h<plusminus_insn>v4df3"
1308 [(set (match_operand:V4DF 0 "register_operand" "=x")
1313 (match_operand:V4DF 1 "register_operand" "x")
1314 (parallel [(const_int 0)]))
1315 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1317 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1318 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1322 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1323 (parallel [(const_int 0)]))
1324 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1326 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1327 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1329 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1330 [(set_attr "type" "sseadd")
1331 (set_attr "prefix" "vex")
1332 (set_attr "mode" "V4DF")])
1334 (define_insn "avx_h<plusminus_insn>v8sf3"
1335 [(set (match_operand:V8SF 0 "register_operand" "=x")
1341 (match_operand:V8SF 1 "register_operand" "x")
1342 (parallel [(const_int 0)]))
1343 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1345 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1346 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1350 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1351 (parallel [(const_int 0)]))
1352 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1354 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1355 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1359 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1360 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1362 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1363 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1366 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1367 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1369 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1370 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1372 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1373 [(set_attr "type" "sseadd")
1374 (set_attr "prefix" "vex")
1375 (set_attr "mode" "V8SF")])
1377 (define_insn "*avx_h<plusminus_insn>v4sf3"
1378 [(set (match_operand:V4SF 0 "register_operand" "=x")
1383 (match_operand:V4SF 1 "register_operand" "x")
1384 (parallel [(const_int 0)]))
1385 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1387 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1388 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1392 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1393 (parallel [(const_int 0)]))
1394 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1396 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1397 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1399 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1400 [(set_attr "type" "sseadd")
1401 (set_attr "prefix" "vex")
1402 (set_attr "mode" "V4SF")])
1404 (define_insn "sse3_h<plusminus_insn>v4sf3"
1405 [(set (match_operand:V4SF 0 "register_operand" "=x")
1410 (match_operand:V4SF 1 "register_operand" "0")
1411 (parallel [(const_int 0)]))
1412 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1414 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1415 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1419 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1420 (parallel [(const_int 0)]))
1421 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1423 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1424 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1426 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1427 [(set_attr "type" "sseadd")
1428 (set_attr "atom_unit" "complex")
1429 (set_attr "prefix_rep" "1")
1430 (set_attr "mode" "V4SF")])
1432 (define_insn "*avx_h<plusminus_insn>v2df3"
1433 [(set (match_operand:V2DF 0 "register_operand" "=x")
1437 (match_operand:V2DF 1 "register_operand" "x")
1438 (parallel [(const_int 0)]))
1439 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1442 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1443 (parallel [(const_int 0)]))
1444 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1446 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1447 [(set_attr "type" "sseadd")
1448 (set_attr "prefix" "vex")
1449 (set_attr "mode" "V2DF")])
1451 (define_insn "sse3_h<plusminus_insn>v2df3"
1452 [(set (match_operand:V2DF 0 "register_operand" "=x")
1456 (match_operand:V2DF 1 "register_operand" "0")
1457 (parallel [(const_int 0)]))
1458 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1461 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1462 (parallel [(const_int 0)]))
1463 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1465 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1466 [(set_attr "type" "sseadd")
1467 (set_attr "mode" "V2DF")])
1469 (define_expand "reduc_splus_v8sf"
1470 [(match_operand:V8SF 0 "register_operand" "")
1471 (match_operand:V8SF 1 "register_operand" "")]
1474 rtx tmp = gen_reg_rtx (V8SFmode);
1475 rtx tmp2 = gen_reg_rtx (V8SFmode);
1476 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1477 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1478 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1479 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1483 (define_expand "reduc_splus_v4sf"
1484 [(match_operand:V4SF 0 "register_operand" "")
1485 (match_operand:V4SF 1 "register_operand" "")]
1490 rtx tmp = gen_reg_rtx (V4SFmode);
1491 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1492 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1495 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1499 (define_expand "reduc_splus_v4df"
1500 [(match_operand:V4DF 0 "register_operand" "")
1501 (match_operand:V4DF 1 "register_operand" "")]
1504 rtx tmp = gen_reg_rtx (V4DFmode);
1505 rtx tmp2 = gen_reg_rtx (V4DFmode);
1506 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1507 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1508 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1512 (define_expand "reduc_splus_v2df"
1513 [(match_operand:V2DF 0 "register_operand" "")
1514 (match_operand:V2DF 1 "register_operand" "")]
1517 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1521 (define_expand "reduc_smax_v4sf"
1522 [(match_operand:V4SF 0 "register_operand" "")
1523 (match_operand:V4SF 1 "register_operand" "")]
1526 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1530 (define_expand "reduc_smin_v4sf"
1531 [(match_operand:V4SF 0 "register_operand" "")
1532 (match_operand:V4SF 1 "register_operand" "")]
1535 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1539 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1541 ;; Parallel floating point comparisons
1543 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1545 (define_insn "avx_cmp<ssemodesuffix><mode>3"
1546 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1548 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1549 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1550 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1553 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1554 [(set_attr "type" "ssecmp")
1555 (set_attr "length_immediate" "1")
1556 (set_attr "prefix" "vex")
1557 (set_attr "mode" "<MODE>")])
1559 (define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
1560 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1561 (vec_merge:SSEMODEF2P
1563 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1564 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1565 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1570 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1571 [(set_attr "type" "ssecmp")
1572 (set_attr "length_immediate" "1")
1573 (set_attr "prefix" "vex")
1574 (set_attr "mode" "<ssescalarmode>")])
1576 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1577 ;; may generate 256bit vector compare instructions.
1578 (define_insn "*avx_maskcmp<mode>3"
1579 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1580 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1581 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1582 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1583 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1584 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1585 [(set_attr "type" "ssecmp")
1586 (set_attr "prefix" "vex")
1587 (set_attr "length_immediate" "1")
1588 (set_attr "mode" "<avxvecmode>")])
1590 (define_insn "<sse>_maskcmp<mode>3"
1591 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1592 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1593 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1594 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1596 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1597 "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
1598 [(set_attr "type" "ssecmp")
1599 (set_attr "length_immediate" "1")
1600 (set_attr "mode" "<MODE>")])
1602 (define_insn "*avx_vmmaskcmp<mode>3"
1603 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1604 (vec_merge:SSEMODEF2P
1605 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1606 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1607 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1610 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1611 "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1612 [(set_attr "type" "ssecmp")
1613 (set_attr "prefix" "vex")
1614 (set_attr "mode" "<ssescalarmode>")])
1616 (define_insn "<sse>_vmmaskcmp<mode>3"
1617 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1618 (vec_merge:SSEMODEF2P
1619 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1620 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1621 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1624 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1625 "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1626 [(set_attr "type" "ssecmp")
1627 (set_attr "length_immediate" "1")
1628 (set_attr "mode" "<ssescalarmode>")])
1630 (define_insn "<sse>_comi"
1631 [(set (reg:CCFP FLAGS_REG)
1634 (match_operand:<ssevecmode> 0 "register_operand" "x")
1635 (parallel [(const_int 0)]))
1637 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1638 (parallel [(const_int 0)]))))]
1639 "SSE_FLOAT_MODE_P (<MODE>mode)"
1640 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1641 [(set_attr "type" "ssecomi")
1642 (set_attr "prefix" "maybe_vex")
1643 (set_attr "prefix_rep" "0")
1644 (set (attr "prefix_data16")
1645 (if_then_else (eq_attr "mode" "DF")
1647 (const_string "0")))
1648 (set_attr "mode" "<MODE>")])
1650 (define_insn "<sse>_ucomi"
1651 [(set (reg:CCFPU FLAGS_REG)
1654 (match_operand:<ssevecmode> 0 "register_operand" "x")
1655 (parallel [(const_int 0)]))
1657 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1658 (parallel [(const_int 0)]))))]
1659 "SSE_FLOAT_MODE_P (<MODE>mode)"
1660 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1661 [(set_attr "type" "ssecomi")
1662 (set_attr "prefix" "maybe_vex")
1663 (set_attr "prefix_rep" "0")
1664 (set (attr "prefix_data16")
1665 (if_then_else (eq_attr "mode" "DF")
1667 (const_string "0")))
1668 (set_attr "mode" "<MODE>")])
1670 (define_expand "vcond<mode>"
1671 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1672 (if_then_else:AVXMODEF2P
1673 (match_operator 3 ""
1674 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1675 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1676 (match_operand:AVXMODEF2P 1 "general_operand" "")
1677 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1678 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1679 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1681 bool ok = ix86_expand_fp_vcond (operands);
1686 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1688 ;; Parallel floating point logical operations
1690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1692 (define_insn "avx_andnot<mode>3"
1693 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1696 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1697 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1698 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1699 "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1700 [(set_attr "type" "sselog")
1701 (set_attr "prefix" "vex")
1702 (set_attr "mode" "<avxvecmode>")])
1704 (define_insn "<sse>_andnot<mode>3"
1705 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1708 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1709 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1710 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1711 "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
1712 [(set_attr "type" "sselog")
1713 (set_attr "mode" "<MODE>")])
1715 (define_expand "<code><mode>3"
1716 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1717 (any_logic:AVX256MODEF2P
1718 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1719 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1720 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1721 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1723 (define_insn "*avx_<code><mode>3"
1724 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1725 (any_logic:AVXMODEF2P
1726 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1727 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1728 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1729 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1731 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1732 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1734 return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1736 [(set_attr "type" "sselog")
1737 (set_attr "prefix" "vex")
1738 (set_attr "mode" "<avxvecmode>")])
1740 (define_expand "<code><mode>3"
1741 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1742 (any_logic:SSEMODEF2P
1743 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1744 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1745 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1746 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1748 (define_insn "*<code><mode>3"
1749 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1750 (any_logic:SSEMODEF2P
1751 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1752 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1753 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1754 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1756 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1757 return "<logic>ps\t{%2, %0|%0, %2}";
1759 return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
1761 [(set_attr "type" "sselog")
1762 (set_attr "mode" "<MODE>")])
1764 (define_expand "copysign<mode>3"
1767 (not:VEC_FLOAT_MODE (match_dup 3))
1768 (match_operand:VEC_FLOAT_MODE 1 "nonimmediate_operand" "")))
1770 (and:VEC_FLOAT_MODE (match_dup 3)
1771 (match_operand:VEC_FLOAT_MODE 2 "nonimmediate_operand" "")))
1772 (set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
1773 (ior:VEC_FLOAT_MODE (match_dup 4) (match_dup 5)))]
1776 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1778 operands[4] = gen_reg_rtx (<MODE>mode);
1779 operands[5] = gen_reg_rtx (<MODE>mode);
1782 ;; Also define scalar versions. These are used for abs, neg, and
1783 ;; conditional move. Using subregs into vector modes causes register
1784 ;; allocation lossage. These patterns do not allow memory operands
1785 ;; because the native instructions read the full 128-bits.
1787 (define_insn "*avx_andnot<mode>3"
1788 [(set (match_operand:MODEF 0 "register_operand" "=x")
1791 (match_operand:MODEF 1 "register_operand" "x"))
1792 (match_operand:MODEF 2 "register_operand" "x")))]
1793 "AVX_FLOAT_MODE_P (<MODE>mode)"
1794 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1795 [(set_attr "type" "sselog")
1796 (set_attr "prefix" "vex")
1797 (set_attr "mode" "<ssevecmode>")])
1799 (define_insn "*andnot<mode>3"
1800 [(set (match_operand:MODEF 0 "register_operand" "=x")
1803 (match_operand:MODEF 1 "register_operand" "0"))
1804 (match_operand:MODEF 2 "register_operand" "x")))]
1805 "SSE_FLOAT_MODE_P (<MODE>mode)"
1806 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1807 [(set_attr "type" "sselog")
1808 (set_attr "mode" "<ssevecmode>")])
1810 (define_insn "*avx_<code><mode>3"
1811 [(set (match_operand:MODEF 0 "register_operand" "=x")
1813 (match_operand:MODEF 1 "register_operand" "x")
1814 (match_operand:MODEF 2 "register_operand" "x")))]
1815 "AVX_FLOAT_MODE_P (<MODE>mode)"
1817 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1818 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1820 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1822 [(set_attr "type" "sselog")
1823 (set_attr "prefix" "vex")
1824 (set_attr "mode" "<ssevecmode>")])
1826 (define_insn "*<code><mode>3"
1827 [(set (match_operand:MODEF 0 "register_operand" "=x")
1829 (match_operand:MODEF 1 "register_operand" "0")
1830 (match_operand:MODEF 2 "register_operand" "x")))]
1831 "SSE_FLOAT_MODE_P (<MODE>mode)"
1833 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1834 return "<logic>ps\t{%2, %0|%0, %2}";
1836 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1838 [(set_attr "type" "sselog")
1839 (set_attr "mode" "<ssevecmode>")])
1841 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1843 ;; FMA4 floating point multiply/accumulate instructions. This
1844 ;; includes the scalar version of the instructions as well as the
1847 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1849 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1850 ;; combine to generate a multiply/add with two memory references. We then
1851 ;; split this insn, into loading up the destination register with one of the
1852 ;; memory operations. If we don't manage to split the insn, reload will
1853 ;; generate the appropriate moves. The reason this is needed, is that combine
1854 ;; has already folded one of the memory references into both the multiply and
1855 ;; add insns, and it can't generate a new pseudo. I.e.:
1856 ;; (set (reg1) (mem (addr1)))
1857 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1858 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1860 ;; Intrinsic FMA operations.
1862 ;; The standard name for fma is only available with SSE math enabled.
1863 (define_expand "fma<mode>4"
1864 [(set (match_operand:FMAMODE 0 "register_operand")
1866 (match_operand:FMAMODE 1 "nonimmediate_operand")
1867 (match_operand:FMAMODE 2 "nonimmediate_operand")
1868 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1869 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1872 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1873 (define_expand "fma4i_fmadd_<mode>"
1874 [(set (match_operand:FMAMODE 0 "register_operand")
1876 (match_operand:FMAMODE 1 "nonimmediate_operand")
1877 (match_operand:FMAMODE 2 "nonimmediate_operand")
1878 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1879 "TARGET_FMA || TARGET_FMA4"
1882 (define_insn "*fma4i_fmadd_<mode>"
1883 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1885 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1886 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1887 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1889 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1890 [(set_attr "type" "ssemuladd")
1891 (set_attr "mode" "<MODE>")])
1893 (define_insn "*fma4i_fmsub_<mode>"
1894 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1896 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1897 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1899 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1901 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1902 [(set_attr "type" "ssemuladd")
1903 (set_attr "mode" "<MODE>")])
1905 (define_insn "*fma4i_fnmadd_<mode>"
1906 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1909 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1910 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1911 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1913 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1914 [(set_attr "type" "ssemuladd")
1915 (set_attr "mode" "<MODE>")])
1917 (define_insn "*fma4i_fnmsub_<mode>"
1918 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1921 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1922 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1924 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1926 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1927 [(set_attr "type" "ssemuladd")
1928 (set_attr "mode" "<MODE>")])
1930 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1931 ;; entire destination register, with the high-order elements zeroed.
1933 (define_expand "fma4i_vmfmadd_<mode>"
1934 [(set (match_operand:SSEMODEF2P 0 "register_operand")
1935 (vec_merge:SSEMODEF2P
1937 (match_operand:SSEMODEF2P 1 "nonimmediate_operand")
1938 (match_operand:SSEMODEF2P 2 "nonimmediate_operand")
1939 (match_operand:SSEMODEF2P 3 "nonimmediate_operand"))
1944 operands[4] = CONST0_RTX (<MODE>mode);
1947 (define_insn "*fma4i_vmfmadd_<mode>"
1948 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1949 (vec_merge:SSEMODEF2P
1951 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1952 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1953 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1954 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1957 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1958 [(set_attr "type" "ssemuladd")
1959 (set_attr "mode" "<MODE>")])
1961 (define_insn "*fma4i_vmfmsub_<mode>"
1962 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1963 (vec_merge:SSEMODEF2P
1965 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1966 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1968 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
1969 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1972 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1973 [(set_attr "type" "ssemuladd")
1974 (set_attr "mode" "<MODE>")])
1976 (define_insn "*fma4i_vmfnmadd_<mode>"
1977 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1978 (vec_merge:SSEMODEF2P
1981 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1982 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1983 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1984 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1987 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1988 [(set_attr "type" "ssemuladd")
1989 (set_attr "mode" "<MODE>")])
1991 (define_insn "*fma4i_vmfnmsub_<mode>"
1992 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1993 (vec_merge:SSEMODEF2P
1996 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1997 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1999 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
2000 (match_operand:SSEMODEF2P 4 "const0_operand" "")
2003 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2004 [(set_attr "type" "ssemuladd")
2005 (set_attr "mode" "<MODE>")])
2007 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2009 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2011 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2013 ;; It would be possible to represent these without the UNSPEC as
2016 ;; (fma op1 op2 op3)
2017 ;; (fma op1 op2 (neg op3))
2020 ;; But this doesn't seem useful in practice.
2022 (define_expand "fmaddsub_<mode>"
2023 [(set (match_operand:AVXMODEF2P 0 "register_operand")
2025 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand")
2026 (match_operand:AVXMODEF2P 2 "nonimmediate_operand")
2027 (match_operand:AVXMODEF2P 3 "nonimmediate_operand")]
2029 "TARGET_FMA || TARGET_FMA4"
2032 (define_insn "*fma4_fmaddsub_<mode>"
2033 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
2035 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
2036 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
2037 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")]
2040 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2041 [(set_attr "type" "ssemuladd")
2042 (set_attr "mode" "<MODE>")])
2044 (define_insn "*fma4_fmsubadd_<mode>"
2045 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
2047 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
2048 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
2050 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2053 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2054 [(set_attr "type" "ssemuladd")
2055 (set_attr "mode" "<MODE>")])
2057 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2059 ;; FMA3 floating point multiply/accumulate instructions.
2061 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2063 (define_insn "*fma_fmadd_<mode>"
2064 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2066 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2067 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2068 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2071 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2072 vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2073 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2074 [(set_attr "type" "ssemuladd")
2075 (set_attr "mode" "<MODE>")])
2077 (define_insn "*fma_fmsub_<mode>"
2078 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2080 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2081 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2083 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2086 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2087 vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2088 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2089 [(set_attr "type" "ssemuladd")
2090 (set_attr "mode" "<MODE>")])
2092 (define_insn "*fma_fmadd_<mode>"
2093 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2096 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2097 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2098 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2101 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2102 vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2103 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2104 [(set_attr "type" "ssemuladd")
2105 (set_attr "mode" "<MODE>")])
2107 (define_insn "*fma_fmsub_<mode>"
2108 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2111 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2112 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2114 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2117 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2118 vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2119 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2120 [(set_attr "type" "ssemuladd")
2121 (set_attr "mode" "<MODE>")])
2123 (define_insn "*fma_fmaddsub_<mode>"
2124 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
2126 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
2127 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
2128 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0")]
2132 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2133 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2134 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2135 [(set_attr "type" "ssemuladd")
2136 (set_attr "mode" "<MODE>")])
2138 (define_insn "*fma_fmsubadd_<mode>"
2139 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
2141 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
2142 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
2144 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0"))]
2148 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2149 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2150 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2151 [(set_attr "type" "ssemuladd")
2152 (set_attr "mode" "<MODE>")])
2154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2156 ;; Non-intrinsic versions, matched when fused-multiply-add is allowed.
2158 ;; ??? If fused-madd were a generic flag, combine could do this without
2159 ;; needing splitters here in the backend. Irritatingly, combine won't
2160 ;; recognize many of these with mere splits, since only 3 or more insns
2161 ;; are allowed to split during combine. Thankfully, there's always a
2162 ;; split_all_insns pass that runs before reload.
2164 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2166 (define_insn_and_split "*split_fma"
2167 [(set (match_operand:FMAMODE 0 "register_operand")
2170 (match_operand:FMAMODE 1 "nonimmediate_operand")
2171 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2172 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2173 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2174 && (TARGET_FMA || TARGET_FMA4)
2175 && !(reload_in_progress || reload_completed)"
2176 { gcc_unreachable (); }
2185 ;; Floating multiply and subtract.
2186 (define_insn_and_split "*split_fms"
2187 [(set (match_operand:FMAMODE 0 "register_operand")
2190 (match_operand:FMAMODE 1 "nonimmediate_operand")
2191 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2192 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2193 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2194 && (TARGET_FMA || TARGET_FMA4)
2195 && !(reload_in_progress || reload_completed)"
2196 { gcc_unreachable (); }
2202 (neg:FMAMODE (match_dup 3))))]
2205 ;; Floating point negative multiply and add.
2206 ;; Recognize (-a * b + c) via the canonical form: c - (a * b).
2207 (define_insn_and_split "*split_fnma"
2208 [(set (match_operand:FMAMODE 0 "register_operand")
2210 (match_operand:FMAMODE 3 "nonimmediate_operand")
2212 (match_operand:FMAMODE 1 "nonimmediate_operand")
2213 (match_operand:FMAMODE 2 "nonimmediate_operand"))))]
2214 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2215 && (TARGET_FMA || TARGET_FMA4)
2216 && !(reload_in_progress || reload_completed)"
2217 { gcc_unreachable (); }
2221 (neg:FMAMODE (match_dup 1))
2226 ;; Floating point negative multiply and subtract.
2227 ;; Recognize (-a * b - c) via the canonical form: c - (-a * b).
2228 (define_insn_and_split "*split_fnms"
2229 [(set (match_operand:FMAMODE 0 "register_operand")
2233 (match_operand:FMAMODE 1 "nonimmediate_operand"))
2234 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2235 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2236 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2237 && (TARGET_FMA || TARGET_FMA4)
2238 && !(reload_in_progress || reload_completed)"
2239 { gcc_unreachable (); }
2243 (neg:FMAMODE (match_dup 1))
2245 (neg:FMAMODE (match_dup 3))))]
2248 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2250 ;; Parallel single-precision floating point conversion operations
2252 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2254 (define_insn "sse_cvtpi2ps"
2255 [(set (match_operand:V4SF 0 "register_operand" "=x")
2258 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2259 (match_operand:V4SF 1 "register_operand" "0")
2262 "cvtpi2ps\t{%2, %0|%0, %2}"
2263 [(set_attr "type" "ssecvt")
2264 (set_attr "mode" "V4SF")])
2266 (define_insn "sse_cvtps2pi"
2267 [(set (match_operand:V2SI 0 "register_operand" "=y")
2269 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2271 (parallel [(const_int 0) (const_int 1)])))]
2273 "cvtps2pi\t{%1, %0|%0, %1}"
2274 [(set_attr "type" "ssecvt")
2275 (set_attr "unit" "mmx")
2276 (set_attr "mode" "DI")])
2278 (define_insn "sse_cvttps2pi"
2279 [(set (match_operand:V2SI 0 "register_operand" "=y")
2281 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2282 (parallel [(const_int 0) (const_int 1)])))]
2284 "cvttps2pi\t{%1, %0|%0, %1}"
2285 [(set_attr "type" "ssecvt")
2286 (set_attr "unit" "mmx")
2287 (set_attr "prefix_rep" "0")
2288 (set_attr "mode" "SF")])
2290 (define_insn "*avx_cvtsi2ss"
2291 [(set (match_operand:V4SF 0 "register_operand" "=x")
2294 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2295 (match_operand:V4SF 1 "register_operand" "x")
2298 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2299 [(set_attr "type" "sseicvt")
2300 (set_attr "prefix" "vex")
2301 (set_attr "mode" "SF")])
2303 (define_insn "sse_cvtsi2ss"
2304 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2307 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2308 (match_operand:V4SF 1 "register_operand" "0,0")
2311 "cvtsi2ss\t{%2, %0|%0, %2}"
2312 [(set_attr "type" "sseicvt")
2313 (set_attr "athlon_decode" "vector,double")
2314 (set_attr "amdfam10_decode" "vector,double")
2315 (set_attr "bdver1_decode" "double,direct")
2316 (set_attr "mode" "SF")])
2318 (define_insn "*avx_cvtsi2ssq"
2319 [(set (match_operand:V4SF 0 "register_operand" "=x")
2322 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2323 (match_operand:V4SF 1 "register_operand" "x")
2325 "TARGET_AVX && TARGET_64BIT"
2326 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2327 [(set_attr "type" "sseicvt")
2328 (set_attr "length_vex" "4")
2329 (set_attr "prefix" "vex")
2330 (set_attr "mode" "SF")])
2332 (define_insn "sse_cvtsi2ssq"
2333 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2336 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2337 (match_operand:V4SF 1 "register_operand" "0,0")
2339 "TARGET_SSE && TARGET_64BIT"
2340 "cvtsi2ssq\t{%2, %0|%0, %2}"
2341 [(set_attr "type" "sseicvt")
2342 (set_attr "prefix_rex" "1")
2343 (set_attr "athlon_decode" "vector,double")
2344 (set_attr "amdfam10_decode" "vector,double")
2345 (set_attr "bdver1_decode" "double,direct")
2346 (set_attr "mode" "SF")])
2348 (define_insn "sse_cvtss2si"
2349 [(set (match_operand:SI 0 "register_operand" "=r,r")
2352 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2353 (parallel [(const_int 0)]))]
2354 UNSPEC_FIX_NOTRUNC))]
2356 "%vcvtss2si\t{%1, %0|%0, %1}"
2357 [(set_attr "type" "sseicvt")
2358 (set_attr "athlon_decode" "double,vector")
2359 (set_attr "bdver1_decode" "double,double")
2360 (set_attr "prefix_rep" "1")
2361 (set_attr "prefix" "maybe_vex")
2362 (set_attr "mode" "SI")])
2364 (define_insn "sse_cvtss2si_2"
2365 [(set (match_operand:SI 0 "register_operand" "=r,r")
2366 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2367 UNSPEC_FIX_NOTRUNC))]
2369 "%vcvtss2si\t{%1, %0|%0, %1}"
2370 [(set_attr "type" "sseicvt")
2371 (set_attr "athlon_decode" "double,vector")
2372 (set_attr "amdfam10_decode" "double,double")
2373 (set_attr "bdver1_decode" "double,double")
2374 (set_attr "prefix_rep" "1")
2375 (set_attr "prefix" "maybe_vex")
2376 (set_attr "mode" "SI")])
2378 (define_insn "sse_cvtss2siq"
2379 [(set (match_operand:DI 0 "register_operand" "=r,r")
2382 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2383 (parallel [(const_int 0)]))]
2384 UNSPEC_FIX_NOTRUNC))]
2385 "TARGET_SSE && TARGET_64BIT"
2386 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2387 [(set_attr "type" "sseicvt")
2388 (set_attr "athlon_decode" "double,vector")
2389 (set_attr "bdver1_decode" "double,double")
2390 (set_attr "prefix_rep" "1")
2391 (set_attr "prefix" "maybe_vex")
2392 (set_attr "mode" "DI")])
2394 (define_insn "sse_cvtss2siq_2"
2395 [(set (match_operand:DI 0 "register_operand" "=r,r")
2396 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2397 UNSPEC_FIX_NOTRUNC))]
2398 "TARGET_SSE && TARGET_64BIT"
2399 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2400 [(set_attr "type" "sseicvt")
2401 (set_attr "athlon_decode" "double,vector")
2402 (set_attr "amdfam10_decode" "double,double")
2403 (set_attr "bdver1_decode" "double,double")
2404 (set_attr "prefix_rep" "1")
2405 (set_attr "prefix" "maybe_vex")
2406 (set_attr "mode" "DI")])
2408 (define_insn "sse_cvttss2si"
2409 [(set (match_operand:SI 0 "register_operand" "=r,r")
2412 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2413 (parallel [(const_int 0)]))))]
2415 "%vcvttss2si\t{%1, %0|%0, %1}"
2416 [(set_attr "type" "sseicvt")
2417 (set_attr "athlon_decode" "double,vector")
2418 (set_attr "amdfam10_decode" "double,double")
2419 (set_attr "bdver1_decode" "double,double")
2420 (set_attr "prefix_rep" "1")
2421 (set_attr "prefix" "maybe_vex")
2422 (set_attr "mode" "SI")])
2424 (define_insn "sse_cvttss2siq"
2425 [(set (match_operand:DI 0 "register_operand" "=r,r")
2428 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2429 (parallel [(const_int 0)]))))]
2430 "TARGET_SSE && TARGET_64BIT"
2431 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2432 [(set_attr "type" "sseicvt")
2433 (set_attr "athlon_decode" "double,vector")
2434 (set_attr "amdfam10_decode" "double,double")
2435 (set_attr "bdver1_decode" "double,double")
2436 (set_attr "prefix_rep" "1")
2437 (set_attr "prefix" "maybe_vex")
2438 (set_attr "mode" "DI")])
2440 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2441 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2442 (float:AVXMODEDCVTDQ2PS
2443 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2445 "vcvtdq2ps\t{%1, %0|%0, %1}"
2446 [(set_attr "type" "ssecvt")
2447 (set_attr "prefix" "vex")
2448 (set_attr "mode" "<avxvecmode>")])
2450 (define_insn "sse2_cvtdq2ps"
2451 [(set (match_operand:V4SF 0 "register_operand" "=x")
2452 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2454 "cvtdq2ps\t{%1, %0|%0, %1}"
2455 [(set_attr "type" "ssecvt")
2456 (set_attr "mode" "V4SF")])
2458 (define_expand "sse2_cvtudq2ps"
2460 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2462 (lt:V4SF (match_dup 5) (match_dup 3)))
2464 (and:V4SF (match_dup 6) (match_dup 4)))
2465 (set (match_operand:V4SF 0 "register_operand" "")
2466 (plus:V4SF (match_dup 5) (match_dup 7)))]
2469 REAL_VALUE_TYPE TWO32r;
2473 real_ldexp (&TWO32r, &dconst1, 32);
2474 x = const_double_from_real_value (TWO32r, SFmode);
2476 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2477 operands[4] = force_reg (V4SFmode,
2478 ix86_build_const_vector (V4SFmode, 1, x));
2480 for (i = 5; i < 8; i++)
2481 operands[i] = gen_reg_rtx (V4SFmode);
2484 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2485 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2486 (unspec:AVXMODEDCVTPS2DQ
2487 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2488 UNSPEC_FIX_NOTRUNC))]
2490 "vcvtps2dq\t{%1, %0|%0, %1}"
2491 [(set_attr "type" "ssecvt")
2492 (set_attr "prefix" "vex")
2493 (set_attr "mode" "<avxvecmode>")])
2495 (define_insn "sse2_cvtps2dq"
2496 [(set (match_operand:V4SI 0 "register_operand" "=x")
2497 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2498 UNSPEC_FIX_NOTRUNC))]
2500 "cvtps2dq\t{%1, %0|%0, %1}"
2501 [(set_attr "type" "ssecvt")
2502 (set_attr "prefix_data16" "1")
2503 (set_attr "mode" "TI")])
2505 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2506 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2507 (fix:AVXMODEDCVTPS2DQ
2508 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2510 "vcvttps2dq\t{%1, %0|%0, %1}"
2511 [(set_attr "type" "ssecvt")
2512 (set_attr "prefix" "vex")
2513 (set_attr "mode" "<avxvecmode>")])
2515 (define_insn "sse2_cvttps2dq"
2516 [(set (match_operand:V4SI 0 "register_operand" "=x")
2517 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2519 "cvttps2dq\t{%1, %0|%0, %1}"
2520 [(set_attr "type" "ssecvt")
2521 (set_attr "prefix_rep" "1")
2522 (set_attr "prefix_data16" "0")
2523 (set_attr "mode" "TI")])
2525 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2527 ;; Parallel double-precision floating point conversion operations
2529 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2531 (define_insn "sse2_cvtpi2pd"
2532 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2533 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2535 "cvtpi2pd\t{%1, %0|%0, %1}"
2536 [(set_attr "type" "ssecvt")
2537 (set_attr "unit" "mmx,*")
2538 (set_attr "prefix_data16" "1,*")
2539 (set_attr "mode" "V2DF")])
2541 (define_insn "sse2_cvtpd2pi"
2542 [(set (match_operand:V2SI 0 "register_operand" "=y")
2543 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2544 UNSPEC_FIX_NOTRUNC))]
2546 "cvtpd2pi\t{%1, %0|%0, %1}"
2547 [(set_attr "type" "ssecvt")
2548 (set_attr "unit" "mmx")
2549 (set_attr "prefix_data16" "1")
2550 (set_attr "mode" "DI")
2551 (set_attr "bdver1_decode" "double")])
2553 (define_insn "sse2_cvttpd2pi"
2554 [(set (match_operand:V2SI 0 "register_operand" "=y")
2555 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2557 "cvttpd2pi\t{%1, %0|%0, %1}"
2558 [(set_attr "type" "ssecvt")
2559 (set_attr "unit" "mmx")
2560 (set_attr "prefix_data16" "1")
2561 (set_attr "mode" "TI")
2562 (set_attr "bdver1_decode" "double")])
2564 (define_insn "*avx_cvtsi2sd"
2565 [(set (match_operand:V2DF 0 "register_operand" "=x")
2568 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2569 (match_operand:V2DF 1 "register_operand" "x")
2572 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2573 [(set_attr "type" "sseicvt")
2574 (set_attr "prefix" "vex")
2575 (set_attr "mode" "DF")])
2577 (define_insn "sse2_cvtsi2sd"
2578 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2581 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2582 (match_operand:V2DF 1 "register_operand" "0,0")
2585 "cvtsi2sd\t{%2, %0|%0, %2}"
2586 [(set_attr "type" "sseicvt")
2587 (set_attr "mode" "DF")
2588 (set_attr "athlon_decode" "double,direct")
2589 (set_attr "amdfam10_decode" "vector,double")
2590 (set_attr "bdver1_decode" "double,direct")])
2592 (define_insn "*avx_cvtsi2sdq"
2593 [(set (match_operand:V2DF 0 "register_operand" "=x")
2596 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2597 (match_operand:V2DF 1 "register_operand" "x")
2599 "TARGET_AVX && TARGET_64BIT"
2600 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2601 [(set_attr "type" "sseicvt")
2602 (set_attr "length_vex" "4")
2603 (set_attr "prefix" "vex")
2604 (set_attr "mode" "DF")])
2606 (define_insn "sse2_cvtsi2sdq"
2607 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2610 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2611 (match_operand:V2DF 1 "register_operand" "0,0")
2613 "TARGET_SSE2 && TARGET_64BIT"
2614 "cvtsi2sdq\t{%2, %0|%0, %2}"
2615 [(set_attr "type" "sseicvt")
2616 (set_attr "prefix_rex" "1")
2617 (set_attr "mode" "DF")
2618 (set_attr "athlon_decode" "double,direct")
2619 (set_attr "amdfam10_decode" "vector,double")
2620 (set_attr "bdver1_decode" "double,direct")])
2622 (define_insn "sse2_cvtsd2si"
2623 [(set (match_operand:SI 0 "register_operand" "=r,r")
2626 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2627 (parallel [(const_int 0)]))]
2628 UNSPEC_FIX_NOTRUNC))]
2630 "%vcvtsd2si\t{%1, %0|%0, %1}"
2631 [(set_attr "type" "sseicvt")
2632 (set_attr "athlon_decode" "double,vector")
2633 (set_attr "bdver1_decode" "double,double")
2634 (set_attr "prefix_rep" "1")
2635 (set_attr "prefix" "maybe_vex")
2636 (set_attr "mode" "SI")])
2638 (define_insn "sse2_cvtsd2si_2"
2639 [(set (match_operand:SI 0 "register_operand" "=r,r")
2640 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2641 UNSPEC_FIX_NOTRUNC))]
2643 "%vcvtsd2si\t{%1, %0|%0, %1}"
2644 [(set_attr "type" "sseicvt")
2645 (set_attr "athlon_decode" "double,vector")
2646 (set_attr "amdfam10_decode" "double,double")
2647 (set_attr "bdver1_decode" "double,double")
2648 (set_attr "prefix_rep" "1")
2649 (set_attr "prefix" "maybe_vex")
2650 (set_attr "mode" "SI")])
2652 (define_insn "sse2_cvtsd2siq"
2653 [(set (match_operand:DI 0 "register_operand" "=r,r")
2656 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2657 (parallel [(const_int 0)]))]
2658 UNSPEC_FIX_NOTRUNC))]
2659 "TARGET_SSE2 && TARGET_64BIT"
2660 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2661 [(set_attr "type" "sseicvt")
2662 (set_attr "athlon_decode" "double,vector")
2663 (set_attr "bdver1_decode" "double,double")
2664 (set_attr "prefix_rep" "1")
2665 (set_attr "prefix" "maybe_vex")
2666 (set_attr "mode" "DI")])
2668 (define_insn "sse2_cvtsd2siq_2"
2669 [(set (match_operand:DI 0 "register_operand" "=r,r")
2670 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2671 UNSPEC_FIX_NOTRUNC))]
2672 "TARGET_SSE2 && TARGET_64BIT"
2673 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2674 [(set_attr "type" "sseicvt")
2675 (set_attr "athlon_decode" "double,vector")
2676 (set_attr "amdfam10_decode" "double,double")
2677 (set_attr "bdver1_decode" "double,double")
2678 (set_attr "prefix_rep" "1")
2679 (set_attr "prefix" "maybe_vex")
2680 (set_attr "mode" "DI")])
2682 (define_insn "sse2_cvttsd2si"
2683 [(set (match_operand:SI 0 "register_operand" "=r,r")
2686 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2687 (parallel [(const_int 0)]))))]
2689 "%vcvttsd2si\t{%1, %0|%0, %1}"
2690 [(set_attr "type" "sseicvt")
2691 (set_attr "prefix_rep" "1")
2692 (set_attr "prefix" "maybe_vex")
2693 (set_attr "mode" "SI")
2694 (set_attr "athlon_decode" "double,vector")
2695 (set_attr "amdfam10_decode" "double,double")
2696 (set_attr "bdver1_decode" "double,double")])
2698 (define_insn "sse2_cvttsd2siq"
2699 [(set (match_operand:DI 0 "register_operand" "=r,r")
2702 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2703 (parallel [(const_int 0)]))))]
2704 "TARGET_SSE2 && TARGET_64BIT"
2705 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2706 [(set_attr "type" "sseicvt")
2707 (set_attr "prefix_rep" "1")
2708 (set_attr "prefix" "maybe_vex")
2709 (set_attr "mode" "DI")
2710 (set_attr "athlon_decode" "double,vector")
2711 (set_attr "amdfam10_decode" "double,double")
2712 (set_attr "bdver1_decode" "double,double")])
2714 (define_insn "avx_cvtdq2pd256"
2715 [(set (match_operand:V4DF 0 "register_operand" "=x")
2716 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2718 "vcvtdq2pd\t{%1, %0|%0, %1}"
2719 [(set_attr "type" "ssecvt")
2720 (set_attr "prefix" "vex")
2721 (set_attr "mode" "V4DF")])
2723 (define_insn "*avx_cvtdq2pd256_2"
2724 [(set (match_operand:V4DF 0 "register_operand" "=x")
2727 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2728 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2730 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2731 [(set_attr "type" "ssecvt")
2732 (set_attr "prefix" "vex")
2733 (set_attr "mode" "V4DF")])
2735 (define_insn "sse2_cvtdq2pd"
2736 [(set (match_operand:V2DF 0 "register_operand" "=x")
2739 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2740 (parallel [(const_int 0) (const_int 1)]))))]
2742 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2743 [(set_attr "type" "ssecvt")
2744 (set_attr "prefix" "maybe_vex")
2745 (set_attr "mode" "V2DF")])
2747 (define_insn "avx_cvtpd2dq256"
2748 [(set (match_operand:V4SI 0 "register_operand" "=x")
2749 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2750 UNSPEC_FIX_NOTRUNC))]
2752 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2753 [(set_attr "type" "ssecvt")
2754 (set_attr "prefix" "vex")
2755 (set_attr "mode" "OI")])
2757 (define_expand "sse2_cvtpd2dq"
2758 [(set (match_operand:V4SI 0 "register_operand" "")
2760 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2764 "operands[2] = CONST0_RTX (V2SImode);")
2766 (define_insn "*sse2_cvtpd2dq"
2767 [(set (match_operand:V4SI 0 "register_operand" "=x")
2769 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2771 (match_operand:V2SI 2 "const0_operand" "")))]
2773 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2774 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2775 [(set_attr "type" "ssecvt")
2776 (set_attr "prefix_rep" "1")
2777 (set_attr "prefix_data16" "0")
2778 (set_attr "prefix" "maybe_vex")
2779 (set_attr "mode" "TI")
2780 (set_attr "amdfam10_decode" "double")
2781 (set_attr "bdver1_decode" "double")])
2783 (define_insn "avx_cvttpd2dq256"
2784 [(set (match_operand:V4SI 0 "register_operand" "=x")
2785 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2787 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2788 [(set_attr "type" "ssecvt")
2789 (set_attr "prefix" "vex")
2790 (set_attr "mode" "OI")])
2792 (define_expand "sse2_cvttpd2dq"
2793 [(set (match_operand:V4SI 0 "register_operand" "")
2795 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2798 "operands[2] = CONST0_RTX (V2SImode);")
2800 (define_insn "*sse2_cvttpd2dq"
2801 [(set (match_operand:V4SI 0 "register_operand" "=x")
2803 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2804 (match_operand:V2SI 2 "const0_operand" "")))]
2806 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2807 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2808 [(set_attr "type" "ssecvt")
2809 (set_attr "prefix" "maybe_vex")
2810 (set_attr "mode" "TI")
2811 (set_attr "amdfam10_decode" "double")
2812 (set_attr "bdver1_decode" "double")])
2814 (define_insn "*avx_cvtsd2ss"
2815 [(set (match_operand:V4SF 0 "register_operand" "=x")
2818 (float_truncate:V2SF
2819 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2820 (match_operand:V4SF 1 "register_operand" "x")
2823 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2824 [(set_attr "type" "ssecvt")
2825 (set_attr "prefix" "vex")
2826 (set_attr "mode" "SF")])
2828 (define_insn "sse2_cvtsd2ss"
2829 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2832 (float_truncate:V2SF
2833 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2834 (match_operand:V4SF 1 "register_operand" "0,0")
2837 "cvtsd2ss\t{%2, %0|%0, %2}"
2838 [(set_attr "type" "ssecvt")
2839 (set_attr "athlon_decode" "vector,double")
2840 (set_attr "amdfam10_decode" "vector,double")
2841 (set_attr "bdver1_decode" "direct,direct")
2842 (set_attr "mode" "SF")])
2844 (define_insn "*avx_cvtss2sd"
2845 [(set (match_operand:V2DF 0 "register_operand" "=x")
2849 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2850 (parallel [(const_int 0) (const_int 1)])))
2851 (match_operand:V2DF 1 "register_operand" "x")
2854 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2855 [(set_attr "type" "ssecvt")
2856 (set_attr "prefix" "vex")
2857 (set_attr "mode" "DF")])
2859 (define_insn "sse2_cvtss2sd"
2860 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2864 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2865 (parallel [(const_int 0) (const_int 1)])))
2866 (match_operand:V2DF 1 "register_operand" "0,0")
2869 "cvtss2sd\t{%2, %0|%0, %2}"
2870 [(set_attr "type" "ssecvt")
2871 (set_attr "amdfam10_decode" "vector,double")
2872 (set_attr "bdver1_decode" "direct,direct")
2873 (set_attr "mode" "DF")])
2875 (define_insn "avx_cvtpd2ps256"
2876 [(set (match_operand:V4SF 0 "register_operand" "=x")
2877 (float_truncate:V4SF
2878 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2880 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2881 [(set_attr "type" "ssecvt")
2882 (set_attr "prefix" "vex")
2883 (set_attr "mode" "V4SF")])
2885 (define_expand "sse2_cvtpd2ps"
2886 [(set (match_operand:V4SF 0 "register_operand" "")
2888 (float_truncate:V2SF
2889 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2892 "operands[2] = CONST0_RTX (V2SFmode);")
2894 (define_insn "*sse2_cvtpd2ps"
2895 [(set (match_operand:V4SF 0 "register_operand" "=x")
2897 (float_truncate:V2SF
2898 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2899 (match_operand:V2SF 2 "const0_operand" "")))]
2901 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2902 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2903 [(set_attr "type" "ssecvt")
2904 (set_attr "prefix_data16" "1")
2905 (set_attr "prefix" "maybe_vex")
2906 (set_attr "mode" "V4SF")
2907 (set_attr "amdfam10_decode" "double")
2908 (set_attr "bdver1_decode" "double")])
2910 (define_insn "avx_cvtps2pd256"
2911 [(set (match_operand:V4DF 0 "register_operand" "=x")
2913 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2915 "vcvtps2pd\t{%1, %0|%0, %1}"
2916 [(set_attr "type" "ssecvt")
2917 (set_attr "prefix" "vex")
2918 (set_attr "mode" "V4DF")])
2920 (define_insn "*avx_cvtps2pd256_2"
2921 [(set (match_operand:V4DF 0 "register_operand" "=x")
2924 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2925 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2927 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2928 [(set_attr "type" "ssecvt")
2929 (set_attr "prefix" "vex")
2930 (set_attr "mode" "V4DF")])
2932 (define_insn "sse2_cvtps2pd"
2933 [(set (match_operand:V2DF 0 "register_operand" "=x")
2936 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2937 (parallel [(const_int 0) (const_int 1)]))))]
2939 "%vcvtps2pd\t{%1, %0|%0, %1}"
2940 [(set_attr "type" "ssecvt")
2941 (set_attr "prefix" "maybe_vex")
2942 (set_attr "mode" "V2DF")
2943 (set_attr "prefix_data16" "0")
2944 (set_attr "amdfam10_decode" "direct")
2945 (set_attr "bdver1_decode" "double")])
2947 (define_expand "vec_unpacks_hi_v4sf"
2952 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2953 (parallel [(const_int 6)
2957 (set (match_operand:V2DF 0 "register_operand" "")
2961 (parallel [(const_int 0) (const_int 1)]))))]
2963 "operands[2] = gen_reg_rtx (V4SFmode);")
2965 (define_expand "vec_unpacks_hi_v8sf"
2968 (match_operand:V8SF 1 "nonimmediate_operand" "")
2969 (parallel [(const_int 4)
2973 (set (match_operand:V4DF 0 "register_operand" "")
2978 operands[2] = gen_reg_rtx (V4SFmode);
2981 (define_expand "vec_unpacks_lo_v4sf"
2982 [(set (match_operand:V2DF 0 "register_operand" "")
2985 (match_operand:V4SF 1 "nonimmediate_operand" "")
2986 (parallel [(const_int 0) (const_int 1)]))))]
2989 (define_expand "vec_unpacks_lo_v8sf"
2990 [(set (match_operand:V4DF 0 "register_operand" "")
2993 (match_operand:V8SF 1 "nonimmediate_operand" "")
2994 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2997 (define_expand "vec_unpacks_float_hi_v8hi"
2998 [(match_operand:V4SF 0 "register_operand" "")
2999 (match_operand:V8HI 1 "register_operand" "")]
3002 rtx tmp = gen_reg_rtx (V4SImode);
3004 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3005 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3009 (define_expand "vec_unpacks_float_lo_v8hi"
3010 [(match_operand:V4SF 0 "register_operand" "")
3011 (match_operand:V8HI 1 "register_operand" "")]
3014 rtx tmp = gen_reg_rtx (V4SImode);
3016 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3017 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3021 (define_expand "vec_unpacku_float_hi_v8hi"
3022 [(match_operand:V4SF 0 "register_operand" "")
3023 (match_operand:V8HI 1 "register_operand" "")]
3026 rtx tmp = gen_reg_rtx (V4SImode);
3028 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3029 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3033 (define_expand "vec_unpacku_float_lo_v8hi"
3034 [(match_operand:V4SF 0 "register_operand" "")
3035 (match_operand:V8HI 1 "register_operand" "")]
3038 rtx tmp = gen_reg_rtx (V4SImode);
3040 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3041 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3045 (define_expand "vec_unpacks_float_hi_v4si"
3048 (match_operand:V4SI 1 "nonimmediate_operand" "")
3049 (parallel [(const_int 2)
3053 (set (match_operand:V2DF 0 "register_operand" "")
3057 (parallel [(const_int 0) (const_int 1)]))))]
3059 "operands[2] = gen_reg_rtx (V4SImode);")
3061 (define_expand "vec_unpacks_float_lo_v4si"
3062 [(set (match_operand:V2DF 0 "register_operand" "")
3065 (match_operand:V4SI 1 "nonimmediate_operand" "")
3066 (parallel [(const_int 0) (const_int 1)]))))]
3069 (define_expand "vec_unpacks_float_hi_v8si"
3072 (match_operand:V8SI 1 "nonimmediate_operand" "")
3073 (parallel [(const_int 4)
3077 (set (match_operand:V4DF 0 "register_operand" "")
3081 "operands[2] = gen_reg_rtx (V4SImode);")
3083 (define_expand "vec_unpacks_float_lo_v8si"
3084 [(set (match_operand:V4DF 0 "register_operand" "")
3087 (match_operand:V8SI 1 "nonimmediate_operand" "")
3088 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
3091 (define_expand "vec_unpacku_float_hi_v4si"
3094 (match_operand:V4SI 1 "nonimmediate_operand" "")
3095 (parallel [(const_int 2)
3103 (parallel [(const_int 0) (const_int 1)]))))
3105 (lt:V2DF (match_dup 6) (match_dup 3)))
3107 (and:V2DF (match_dup 7) (match_dup 4)))
3108 (set (match_operand:V2DF 0 "register_operand" "")
3109 (plus:V2DF (match_dup 6) (match_dup 8)))]
3112 REAL_VALUE_TYPE TWO32r;
3116 real_ldexp (&TWO32r, &dconst1, 32);
3117 x = const_double_from_real_value (TWO32r, DFmode);
3119 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3120 operands[4] = force_reg (V2DFmode,
3121 ix86_build_const_vector (V2DFmode, 1, x));
3123 operands[5] = gen_reg_rtx (V4SImode);
3125 for (i = 6; i < 9; i++)
3126 operands[i] = gen_reg_rtx (V2DFmode);
3129 (define_expand "vec_unpacku_float_lo_v4si"
3133 (match_operand:V4SI 1 "nonimmediate_operand" "")
3134 (parallel [(const_int 0) (const_int 1)]))))
3136 (lt:V2DF (match_dup 5) (match_dup 3)))
3138 (and:V2DF (match_dup 6) (match_dup 4)))
3139 (set (match_operand:V2DF 0 "register_operand" "")
3140 (plus:V2DF (match_dup 5) (match_dup 7)))]
3143 REAL_VALUE_TYPE TWO32r;
3147 real_ldexp (&TWO32r, &dconst1, 32);
3148 x = const_double_from_real_value (TWO32r, DFmode);
3150 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3151 operands[4] = force_reg (V2DFmode,
3152 ix86_build_const_vector (V2DFmode, 1, x));
3154 for (i = 5; i < 8; i++)
3155 operands[i] = gen_reg_rtx (V2DFmode);
3158 (define_expand "vec_pack_trunc_v4df"
3160 (float_truncate:V4SF
3161 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3163 (float_truncate:V4SF
3164 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3165 (set (match_operand:V8SF 0 "register_operand" "")
3171 operands[3] = gen_reg_rtx (V4SFmode);
3172 operands[4] = gen_reg_rtx (V4SFmode);
3175 (define_expand "vec_pack_trunc_v2df"
3176 [(match_operand:V4SF 0 "register_operand" "")
3177 (match_operand:V2DF 1 "nonimmediate_operand" "")
3178 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3183 r1 = gen_reg_rtx (V4SFmode);
3184 r2 = gen_reg_rtx (V4SFmode);
3186 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3187 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3188 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3192 (define_expand "vec_pack_sfix_trunc_v2df"
3193 [(match_operand:V4SI 0 "register_operand" "")
3194 (match_operand:V2DF 1 "nonimmediate_operand" "")
3195 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3200 r1 = gen_reg_rtx (V4SImode);
3201 r2 = gen_reg_rtx (V4SImode);
3203 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3204 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3205 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3206 gen_lowpart (V2DImode, r1),
3207 gen_lowpart (V2DImode, r2)));
3211 (define_expand "vec_pack_sfix_v2df"
3212 [(match_operand:V4SI 0 "register_operand" "")
3213 (match_operand:V2DF 1 "nonimmediate_operand" "")
3214 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3219 r1 = gen_reg_rtx (V4SImode);
3220 r2 = gen_reg_rtx (V4SImode);
3222 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3223 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3224 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3225 gen_lowpart (V2DImode, r1),
3226 gen_lowpart (V2DImode, r2)));
3230 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3232 ;; Parallel single-precision floating point element swizzling
3234 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3236 (define_expand "sse_movhlps_exp"
3237 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3240 (match_operand:V4SF 1 "nonimmediate_operand" "")
3241 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3242 (parallel [(const_int 6)
3248 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3250 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3252 /* Fix up the destination if needed. */
3253 if (dst != operands[0])
3254 emit_move_insn (operands[0], dst);
3259 (define_insn "*avx_movhlps"
3260 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3263 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3264 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3265 (parallel [(const_int 6)
3269 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3271 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3272 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3273 vmovhps\t{%2, %0|%0, %2}"
3274 [(set_attr "type" "ssemov")
3275 (set_attr "prefix" "vex")
3276 (set_attr "mode" "V4SF,V2SF,V2SF")])
3278 (define_insn "sse_movhlps"
3279 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3282 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3283 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3284 (parallel [(const_int 6)
3288 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3290 movhlps\t{%2, %0|%0, %2}
3291 movlps\t{%H2, %0|%0, %H2}
3292 movhps\t{%2, %0|%0, %2}"
3293 [(set_attr "type" "ssemov")
3294 (set_attr "mode" "V4SF,V2SF,V2SF")])
3296 (define_expand "sse_movlhps_exp"
3297 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3300 (match_operand:V4SF 1 "nonimmediate_operand" "")
3301 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3302 (parallel [(const_int 0)
3308 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3310 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3312 /* Fix up the destination if needed. */
3313 if (dst != operands[0])
3314 emit_move_insn (operands[0], dst);
3319 (define_insn "*avx_movlhps"
3320 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3323 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3324 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3325 (parallel [(const_int 0)
3329 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3331 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3332 vmovhps\t{%2, %1, %0|%0, %1, %2}
3333 vmovlps\t{%2, %H0|%H0, %2}"
3334 [(set_attr "type" "ssemov")
3335 (set_attr "prefix" "vex")
3336 (set_attr "mode" "V4SF,V2SF,V2SF")])
3338 (define_insn "sse_movlhps"
3339 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3342 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3343 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3344 (parallel [(const_int 0)
3348 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3350 movlhps\t{%2, %0|%0, %2}
3351 movhps\t{%2, %0|%0, %2}
3352 movlps\t{%2, %H0|%H0, %2}"
3353 [(set_attr "type" "ssemov")
3354 (set_attr "mode" "V4SF,V2SF,V2SF")])
3356 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3357 (define_insn "avx_unpckhps256"
3358 [(set (match_operand:V8SF 0 "register_operand" "=x")
3361 (match_operand:V8SF 1 "register_operand" "x")
3362 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3363 (parallel [(const_int 2) (const_int 10)
3364 (const_int 3) (const_int 11)
3365 (const_int 6) (const_int 14)
3366 (const_int 7) (const_int 15)])))]
3368 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3369 [(set_attr "type" "sselog")
3370 (set_attr "prefix" "vex")
3371 (set_attr "mode" "V8SF")])
3373 (define_insn "*avx_interleave_highv4sf"
3374 [(set (match_operand:V4SF 0 "register_operand" "=x")
3377 (match_operand:V4SF 1 "register_operand" "x")
3378 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3379 (parallel [(const_int 2) (const_int 6)
3380 (const_int 3) (const_int 7)])))]
3382 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3383 [(set_attr "type" "sselog")
3384 (set_attr "prefix" "vex")
3385 (set_attr "mode" "V4SF")])
3387 (define_expand "vec_interleave_highv8sf"
3391 (match_operand:V8SF 1 "register_operand" "x")
3392 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3393 (parallel [(const_int 0) (const_int 8)
3394 (const_int 1) (const_int 9)
3395 (const_int 4) (const_int 12)
3396 (const_int 5) (const_int 13)])))
3402 (parallel [(const_int 2) (const_int 10)
3403 (const_int 3) (const_int 11)
3404 (const_int 6) (const_int 14)
3405 (const_int 7) (const_int 15)])))
3406 (set (match_operand:V8SF 0 "register_operand" "")
3411 (parallel [(const_int 4) (const_int 5)
3412 (const_int 6) (const_int 7)
3413 (const_int 12) (const_int 13)
3414 (const_int 14) (const_int 15)])))]
3417 operands[3] = gen_reg_rtx (V8SFmode);
3418 operands[4] = gen_reg_rtx (V8SFmode);
3421 (define_insn "vec_interleave_highv4sf"
3422 [(set (match_operand:V4SF 0 "register_operand" "=x")
3425 (match_operand:V4SF 1 "register_operand" "0")
3426 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3427 (parallel [(const_int 2) (const_int 6)
3428 (const_int 3) (const_int 7)])))]
3430 "unpckhps\t{%2, %0|%0, %2}"
3431 [(set_attr "type" "sselog")
3432 (set_attr "mode" "V4SF")])
3434 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3435 (define_insn "avx_unpcklps256"
3436 [(set (match_operand:V8SF 0 "register_operand" "=x")
3439 (match_operand:V8SF 1 "register_operand" "x")
3440 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3441 (parallel [(const_int 0) (const_int 8)
3442 (const_int 1) (const_int 9)
3443 (const_int 4) (const_int 12)
3444 (const_int 5) (const_int 13)])))]
3446 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3447 [(set_attr "type" "sselog")
3448 (set_attr "prefix" "vex")
3449 (set_attr "mode" "V8SF")])
3451 (define_insn "*avx_interleave_lowv4sf"
3452 [(set (match_operand:V4SF 0 "register_operand" "=x")
3455 (match_operand:V4SF 1 "register_operand" "x")
3456 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3457 (parallel [(const_int 0) (const_int 4)
3458 (const_int 1) (const_int 5)])))]
3460 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3461 [(set_attr "type" "sselog")
3462 (set_attr "prefix" "vex")
3463 (set_attr "mode" "V4SF")])
3465 (define_expand "vec_interleave_lowv8sf"
3469 (match_operand:V8SF 1 "register_operand" "x")
3470 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3471 (parallel [(const_int 0) (const_int 8)
3472 (const_int 1) (const_int 9)
3473 (const_int 4) (const_int 12)
3474 (const_int 5) (const_int 13)])))
3480 (parallel [(const_int 2) (const_int 10)
3481 (const_int 3) (const_int 11)
3482 (const_int 6) (const_int 14)
3483 (const_int 7) (const_int 15)])))
3484 (set (match_operand:V8SF 0 "register_operand" "")
3489 (parallel [(const_int 0) (const_int 1)
3490 (const_int 2) (const_int 3)
3491 (const_int 8) (const_int 9)
3492 (const_int 10) (const_int 11)])))]
3495 operands[3] = gen_reg_rtx (V8SFmode);
3496 operands[4] = gen_reg_rtx (V8SFmode);
3499 (define_insn "vec_interleave_lowv4sf"
3500 [(set (match_operand:V4SF 0 "register_operand" "=x")
3503 (match_operand:V4SF 1 "register_operand" "0")
3504 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3505 (parallel [(const_int 0) (const_int 4)
3506 (const_int 1) (const_int 5)])))]
3508 "unpcklps\t{%2, %0|%0, %2}"
3509 [(set_attr "type" "sselog")
3510 (set_attr "mode" "V4SF")])
3512 ;; These are modeled with the same vec_concat as the others so that we
3513 ;; capture users of shufps that can use the new instructions
3514 (define_insn "avx_movshdup256"
3515 [(set (match_operand:V8SF 0 "register_operand" "=x")
3518 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3520 (parallel [(const_int 1) (const_int 1)
3521 (const_int 3) (const_int 3)
3522 (const_int 5) (const_int 5)
3523 (const_int 7) (const_int 7)])))]
3525 "vmovshdup\t{%1, %0|%0, %1}"
3526 [(set_attr "type" "sse")
3527 (set_attr "prefix" "vex")
3528 (set_attr "mode" "V8SF")])
3530 (define_insn "sse3_movshdup"
3531 [(set (match_operand:V4SF 0 "register_operand" "=x")
3534 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3536 (parallel [(const_int 1)
3541 "%vmovshdup\t{%1, %0|%0, %1}"
3542 [(set_attr "type" "sse")
3543 (set_attr "prefix_rep" "1")
3544 (set_attr "prefix" "maybe_vex")
3545 (set_attr "mode" "V4SF")])
3547 (define_insn "avx_movsldup256"
3548 [(set (match_operand:V8SF 0 "register_operand" "=x")
3551 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3553 (parallel [(const_int 0) (const_int 0)
3554 (const_int 2) (const_int 2)
3555 (const_int 4) (const_int 4)
3556 (const_int 6) (const_int 6)])))]
3558 "vmovsldup\t{%1, %0|%0, %1}"
3559 [(set_attr "type" "sse")
3560 (set_attr "prefix" "vex")
3561 (set_attr "mode" "V8SF")])
3563 (define_insn "sse3_movsldup"
3564 [(set (match_operand:V4SF 0 "register_operand" "=x")
3567 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3569 (parallel [(const_int 0)
3574 "%vmovsldup\t{%1, %0|%0, %1}"
3575 [(set_attr "type" "sse")
3576 (set_attr "prefix_rep" "1")
3577 (set_attr "prefix" "maybe_vex")
3578 (set_attr "mode" "V4SF")])
3580 (define_expand "avx_shufps256"
3581 [(match_operand:V8SF 0 "register_operand" "")
3582 (match_operand:V8SF 1 "register_operand" "")
3583 (match_operand:V8SF 2 "nonimmediate_operand" "")
3584 (match_operand:SI 3 "const_int_operand" "")]
3587 int mask = INTVAL (operands[3]);
3588 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3589 GEN_INT ((mask >> 0) & 3),
3590 GEN_INT ((mask >> 2) & 3),
3591 GEN_INT (((mask >> 4) & 3) + 8),
3592 GEN_INT (((mask >> 6) & 3) + 8),
3593 GEN_INT (((mask >> 0) & 3) + 4),
3594 GEN_INT (((mask >> 2) & 3) + 4),
3595 GEN_INT (((mask >> 4) & 3) + 12),
3596 GEN_INT (((mask >> 6) & 3) + 12)));
3600 ;; One bit in mask selects 2 elements.
3601 (define_insn "avx_shufps256_1"
3602 [(set (match_operand:V8SF 0 "register_operand" "=x")
3605 (match_operand:V8SF 1 "register_operand" "x")
3606 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3607 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3608 (match_operand 4 "const_0_to_3_operand" "")
3609 (match_operand 5 "const_8_to_11_operand" "")
3610 (match_operand 6 "const_8_to_11_operand" "")
3611 (match_operand 7 "const_4_to_7_operand" "")
3612 (match_operand 8 "const_4_to_7_operand" "")
3613 (match_operand 9 "const_12_to_15_operand" "")
3614 (match_operand 10 "const_12_to_15_operand" "")])))]
3616 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3617 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3618 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3619 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3622 mask = INTVAL (operands[3]);
3623 mask |= INTVAL (operands[4]) << 2;
3624 mask |= (INTVAL (operands[5]) - 8) << 4;
3625 mask |= (INTVAL (operands[6]) - 8) << 6;
3626 operands[3] = GEN_INT (mask);
3628 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3630 [(set_attr "type" "sselog")
3631 (set_attr "length_immediate" "1")
3632 (set_attr "prefix" "vex")
3633 (set_attr "mode" "V8SF")])
3635 (define_expand "sse_shufps"
3636 [(match_operand:V4SF 0 "register_operand" "")
3637 (match_operand:V4SF 1 "register_operand" "")
3638 (match_operand:V4SF 2 "nonimmediate_operand" "")
3639 (match_operand:SI 3 "const_int_operand" "")]
3642 int mask = INTVAL (operands[3]);
3643 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3644 GEN_INT ((mask >> 0) & 3),
3645 GEN_INT ((mask >> 2) & 3),
3646 GEN_INT (((mask >> 4) & 3) + 4),
3647 GEN_INT (((mask >> 6) & 3) + 4)));
3651 (define_insn "*avx_shufps_<mode>"
3652 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3653 (vec_select:SSEMODE4S
3654 (vec_concat:<ssedoublesizemode>
3655 (match_operand:SSEMODE4S 1 "register_operand" "x")
3656 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3657 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3658 (match_operand 4 "const_0_to_3_operand" "")
3659 (match_operand 5 "const_4_to_7_operand" "")
3660 (match_operand 6 "const_4_to_7_operand" "")])))]
3664 mask |= INTVAL (operands[3]) << 0;
3665 mask |= INTVAL (operands[4]) << 2;
3666 mask |= (INTVAL (operands[5]) - 4) << 4;
3667 mask |= (INTVAL (operands[6]) - 4) << 6;
3668 operands[3] = GEN_INT (mask);
3670 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3672 [(set_attr "type" "sselog")
3673 (set_attr "length_immediate" "1")
3674 (set_attr "prefix" "vex")
3675 (set_attr "mode" "V4SF")])
3677 (define_insn "sse_shufps_<mode>"
3678 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3679 (vec_select:SSEMODE4S
3680 (vec_concat:<ssedoublesizemode>
3681 (match_operand:SSEMODE4S 1 "register_operand" "0")
3682 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3683 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3684 (match_operand 4 "const_0_to_3_operand" "")
3685 (match_operand 5 "const_4_to_7_operand" "")
3686 (match_operand 6 "const_4_to_7_operand" "")])))]
3690 mask |= INTVAL (operands[3]) << 0;
3691 mask |= INTVAL (operands[4]) << 2;
3692 mask |= (INTVAL (operands[5]) - 4) << 4;
3693 mask |= (INTVAL (operands[6]) - 4) << 6;
3694 operands[3] = GEN_INT (mask);
3696 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3698 [(set_attr "type" "sselog")
3699 (set_attr "length_immediate" "1")
3700 (set_attr "mode" "V4SF")])
3702 (define_insn "sse_storehps"
3703 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3705 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3706 (parallel [(const_int 2) (const_int 3)])))]
3709 %vmovhps\t{%1, %0|%0, %1}
3710 %vmovhlps\t{%1, %d0|%d0, %1}
3711 %vmovlps\t{%H1, %d0|%d0, %H1}"
3712 [(set_attr "type" "ssemov")
3713 (set_attr "prefix" "maybe_vex")
3714 (set_attr "mode" "V2SF,V4SF,V2SF")])
3716 (define_expand "sse_loadhps_exp"
3717 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3720 (match_operand:V4SF 1 "nonimmediate_operand" "")
3721 (parallel [(const_int 0) (const_int 1)]))
3722 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3725 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3727 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3729 /* Fix up the destination if needed. */
3730 if (dst != operands[0])
3731 emit_move_insn (operands[0], dst);
3736 (define_insn "*avx_loadhps"
3737 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3740 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3741 (parallel [(const_int 0) (const_int 1)]))
3742 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3745 vmovhps\t{%2, %1, %0|%0, %1, %2}
3746 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3747 vmovlps\t{%2, %H0|%H0, %2}"
3748 [(set_attr "type" "ssemov")
3749 (set_attr "prefix" "vex")
3750 (set_attr "mode" "V2SF,V4SF,V2SF")])
3752 (define_insn "sse_loadhps"
3753 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3756 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3757 (parallel [(const_int 0) (const_int 1)]))
3758 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3761 movhps\t{%2, %0|%0, %2}
3762 movlhps\t{%2, %0|%0, %2}
3763 movlps\t{%2, %H0|%H0, %2}"
3764 [(set_attr "type" "ssemov")
3765 (set_attr "mode" "V2SF,V4SF,V2SF")])
3767 (define_insn "*avx_storelps"
3768 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3770 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3771 (parallel [(const_int 0) (const_int 1)])))]
3774 vmovlps\t{%1, %0|%0, %1}
3775 vmovaps\t{%1, %0|%0, %1}
3776 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3777 [(set_attr "type" "ssemov")
3778 (set_attr "prefix" "vex")
3779 (set_attr "mode" "V2SF,V2DF,V2SF")])
3781 (define_insn "sse_storelps"
3782 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3784 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3785 (parallel [(const_int 0) (const_int 1)])))]
3788 movlps\t{%1, %0|%0, %1}
3789 movaps\t{%1, %0|%0, %1}
3790 movlps\t{%1, %0|%0, %1}"
3791 [(set_attr "type" "ssemov")
3792 (set_attr "mode" "V2SF,V4SF,V2SF")])
3794 (define_expand "sse_loadlps_exp"
3795 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3797 (match_operand:V2SF 2 "nonimmediate_operand" "")
3799 (match_operand:V4SF 1 "nonimmediate_operand" "")
3800 (parallel [(const_int 2) (const_int 3)]))))]
3803 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3805 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3807 /* Fix up the destination if needed. */
3808 if (dst != operands[0])
3809 emit_move_insn (operands[0], dst);
3814 (define_insn "*avx_loadlps"
3815 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3817 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3819 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3820 (parallel [(const_int 2) (const_int 3)]))))]
3823 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3824 vmovlps\t{%2, %1, %0|%0, %1, %2}
3825 vmovlps\t{%2, %0|%0, %2}"
3826 [(set_attr "type" "sselog,ssemov,ssemov")
3827 (set_attr "length_immediate" "1,*,*")
3828 (set_attr "prefix" "vex")
3829 (set_attr "mode" "V4SF,V2SF,V2SF")])
3831 (define_insn "sse_loadlps"
3832 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3834 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3836 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3837 (parallel [(const_int 2) (const_int 3)]))))]
3840 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3841 movlps\t{%2, %0|%0, %2}
3842 movlps\t{%2, %0|%0, %2}"
3843 [(set_attr "type" "sselog,ssemov,ssemov")
3844 (set_attr "length_immediate" "1,*,*")
3845 (set_attr "mode" "V4SF,V2SF,V2SF")])
3847 (define_insn "*avx_movss"
3848 [(set (match_operand:V4SF 0 "register_operand" "=x")
3850 (match_operand:V4SF 2 "register_operand" "x")
3851 (match_operand:V4SF 1 "register_operand" "x")
3854 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3855 [(set_attr "type" "ssemov")
3856 (set_attr "prefix" "vex")
3857 (set_attr "mode" "SF")])
3859 (define_insn "sse_movss"
3860 [(set (match_operand:V4SF 0 "register_operand" "=x")
3862 (match_operand:V4SF 2 "register_operand" "x")
3863 (match_operand:V4SF 1 "register_operand" "0")
3866 "movss\t{%2, %0|%0, %2}"
3867 [(set_attr "type" "ssemov")
3868 (set_attr "mode" "SF")])
3870 (define_expand "vec_dupv4sf"
3871 [(set (match_operand:V4SF 0 "register_operand" "")
3873 (match_operand:SF 1 "nonimmediate_operand" "")))]
3877 operands[1] = force_reg (V4SFmode, operands[1]);
3880 (define_insn "*vec_dupv4sf_avx"
3881 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3883 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3886 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3887 vbroadcastss\t{%1, %0|%0, %1}"
3888 [(set_attr "type" "sselog1,ssemov")
3889 (set_attr "length_immediate" "1,0")
3890 (set_attr "prefix_extra" "0,1")
3891 (set_attr "prefix" "vex")
3892 (set_attr "mode" "V4SF")])
3894 (define_insn "*vec_dupv4sf"
3895 [(set (match_operand:V4SF 0 "register_operand" "=x")
3897 (match_operand:SF 1 "register_operand" "0")))]
3899 "shufps\t{$0, %0, %0|%0, %0, 0}"
3900 [(set_attr "type" "sselog1")
3901 (set_attr "length_immediate" "1")
3902 (set_attr "mode" "V4SF")])
3904 (define_insn "*vec_concatv2sf_avx"
3905 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3907 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3908 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3911 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3912 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3913 vmovss\t{%1, %0|%0, %1}
3914 punpckldq\t{%2, %0|%0, %2}
3915 movd\t{%1, %0|%0, %1}"
3916 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3917 (set_attr "length_immediate" "*,1,*,*,*")
3918 (set_attr "prefix_extra" "*,1,*,*,*")
3919 (set (attr "prefix")
3920 (if_then_else (eq_attr "alternative" "3,4")
3921 (const_string "orig")
3922 (const_string "vex")))
3923 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3925 ;; Although insertps takes register source, we prefer
3926 ;; unpcklps with register source since it is shorter.
3927 (define_insn "*vec_concatv2sf_sse4_1"
3928 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3930 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3931 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3934 unpcklps\t{%2, %0|%0, %2}
3935 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3936 movss\t{%1, %0|%0, %1}
3937 punpckldq\t{%2, %0|%0, %2}
3938 movd\t{%1, %0|%0, %1}"
3939 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3940 (set_attr "prefix_data16" "*,1,*,*,*")
3941 (set_attr "prefix_extra" "*,1,*,*,*")
3942 (set_attr "length_immediate" "*,1,*,*,*")
3943 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3945 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3946 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3947 ;; alternatives pretty much forces the MMX alternative to be chosen.
3948 (define_insn "*vec_concatv2sf_sse"
3949 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3951 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3952 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3955 unpcklps\t{%2, %0|%0, %2}
3956 movss\t{%1, %0|%0, %1}
3957 punpckldq\t{%2, %0|%0, %2}
3958 movd\t{%1, %0|%0, %1}"
3959 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3960 (set_attr "mode" "V4SF,SF,DI,DI")])
3962 (define_insn "*vec_concatv4sf_avx"
3963 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3965 (match_operand:V2SF 1 "register_operand" " x,x")
3966 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3969 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3970 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3971 [(set_attr "type" "ssemov")
3972 (set_attr "prefix" "vex")
3973 (set_attr "mode" "V4SF,V2SF")])
3975 (define_insn "*vec_concatv4sf_sse"
3976 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3978 (match_operand:V2SF 1 "register_operand" " 0,0")
3979 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3982 movlhps\t{%2, %0|%0, %2}
3983 movhps\t{%2, %0|%0, %2}"
3984 [(set_attr "type" "ssemov")
3985 (set_attr "mode" "V4SF,V2SF")])
3987 (define_expand "vec_init<mode>"
3988 [(match_operand:SSEMODE 0 "register_operand" "")
3989 (match_operand 1 "" "")]
3992 ix86_expand_vector_init (false, operands[0], operands[1]);
3996 (define_insn "*vec_set<mode>_0_avx"
3997 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3998 (vec_merge:SSEMODE4S
3999 (vec_duplicate:SSEMODE4S
4000 (match_operand:<ssescalarmode> 2
4001 "general_operand" " x,m,*r,x,*rm,x*rfF"))
4002 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
4006 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
4007 vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4008 vmovd\t{%2, %0|%0, %2}
4009 vmovss\t{%2, %1, %0|%0, %1, %2}
4010 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
4012 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4013 (set_attr "prefix_extra" "*,*,*,*,1,*")
4014 (set_attr "length_immediate" "*,*,*,*,1,*")
4015 (set_attr "prefix" "vex")
4016 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4018 (define_insn "*vec_set<mode>_0_sse4_1"
4019 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
4020 (vec_merge:SSEMODE4S
4021 (vec_duplicate:SSEMODE4S
4022 (match_operand:<ssescalarmode> 2
4023 "general_operand" " x,m,*r,x,*rm,*rfF"))
4024 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
4028 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
4029 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4030 movd\t{%2, %0|%0, %2}
4031 movss\t{%2, %0|%0, %2}
4032 pinsrd\t{$0, %2, %0|%0, %2, 0}
4034 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4035 (set_attr "prefix_extra" "*,*,*,*,1,*")
4036 (set_attr "length_immediate" "*,*,*,*,1,*")
4037 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4039 (define_insn "*vec_set<mode>_0_sse2"
4040 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
4041 (vec_merge:SSEMODE4S
4042 (vec_duplicate:SSEMODE4S
4043 (match_operand:<ssescalarmode> 2
4044 "general_operand" " m,*r,x,x*rfF"))
4045 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
4049 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4050 movd\t{%2, %0|%0, %2}
4051 movss\t{%2, %0|%0, %2}
4053 [(set_attr "type" "ssemov")
4054 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4056 (define_insn "vec_set<mode>_0"
4057 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4058 (vec_merge:SSEMODE4S
4059 (vec_duplicate:SSEMODE4S
4060 (match_operand:<ssescalarmode> 2
4061 "general_operand" " m,x,x*rfF"))
4062 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4066 movss\t{%2, %0|%0, %2}
4067 movss\t{%2, %0|%0, %2}
4069 [(set_attr "type" "ssemov")
4070 (set_attr "mode" "SF,SF,*")])
4072 ;; A subset is vec_setv4sf.
4073 (define_insn "*vec_setv4sf_avx"
4074 [(set (match_operand:V4SF 0 "register_operand" "=x")
4077 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4078 (match_operand:V4SF 1 "register_operand" "x")
4079 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4082 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4083 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4085 [(set_attr "type" "sselog")
4086 (set_attr "prefix_extra" "1")
4087 (set_attr "length_immediate" "1")
4088 (set_attr "prefix" "vex")
4089 (set_attr "mode" "V4SF")])
4091 (define_insn "*vec_setv4sf_sse4_1"
4092 [(set (match_operand:V4SF 0 "register_operand" "=x")
4095 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4096 (match_operand:V4SF 1 "register_operand" "0")
4097 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4100 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4101 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4103 [(set_attr "type" "sselog")
4104 (set_attr "prefix_data16" "1")
4105 (set_attr "prefix_extra" "1")
4106 (set_attr "length_immediate" "1")
4107 (set_attr "mode" "V4SF")])
4109 (define_insn "*avx_insertps"
4110 [(set (match_operand:V4SF 0 "register_operand" "=x")
4111 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4112 (match_operand:V4SF 1 "register_operand" "x")
4113 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4116 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4117 [(set_attr "type" "sselog")
4118 (set_attr "prefix" "vex")
4119 (set_attr "prefix_extra" "1")
4120 (set_attr "length_immediate" "1")
4121 (set_attr "mode" "V4SF")])
4123 (define_insn "sse4_1_insertps"
4124 [(set (match_operand:V4SF 0 "register_operand" "=x")
4125 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4126 (match_operand:V4SF 1 "register_operand" "0")
4127 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4130 "insertps\t{%3, %2, %0|%0, %2, %3}";
4131 [(set_attr "type" "sselog")
4132 (set_attr "prefix_data16" "1")
4133 (set_attr "prefix_extra" "1")
4134 (set_attr "length_immediate" "1")
4135 (set_attr "mode" "V4SF")])
4138 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
4139 (vec_merge:SSEMODE4S
4140 (vec_duplicate:SSEMODE4S
4141 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4144 "TARGET_SSE && reload_completed"
4147 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4152 (define_expand "vec_set<mode>"
4153 [(match_operand:SSEMODE 0 "register_operand" "")
4154 (match_operand:<ssescalarmode> 1 "register_operand" "")
4155 (match_operand 2 "const_int_operand" "")]
4158 ix86_expand_vector_set (false, operands[0], operands[1],
4159 INTVAL (operands[2]));
4163 (define_insn_and_split "*vec_extractv4sf_0"
4164 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4166 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4167 (parallel [(const_int 0)])))]
4168 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4170 "&& reload_completed"
4173 rtx op1 = operands[1];
4175 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4177 op1 = gen_lowpart (SFmode, op1);
4178 emit_move_insn (operands[0], op1);
4182 (define_expand "avx_vextractf128<mode>"
4183 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4184 (match_operand:AVX256MODE 1 "register_operand" "")
4185 (match_operand:SI 2 "const_0_to_1_operand" "")]
4188 switch (INTVAL (operands[2]))
4191 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4194 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4202 (define_insn_and_split "vec_extract_lo_<mode>"
4203 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4204 (vec_select:<avxhalfvecmode>
4205 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
4206 (parallel [(const_int 0) (const_int 1)])))]
4209 "&& reload_completed"
4212 rtx op1 = operands[1];
4214 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4216 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4217 emit_move_insn (operands[0], op1);
4221 (define_insn "vec_extract_hi_<mode>"
4222 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4223 (vec_select:<avxhalfvecmode>
4224 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4225 (parallel [(const_int 2) (const_int 3)])))]
4227 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4228 [(set_attr "type" "sselog")
4229 (set_attr "prefix_extra" "1")
4230 (set_attr "length_immediate" "1")
4231 (set_attr "memory" "none,store")
4232 (set_attr "prefix" "vex")
4233 (set_attr "mode" "V8SF")])
4235 (define_insn_and_split "vec_extract_lo_<mode>"
4236 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4237 (vec_select:<avxhalfvecmode>
4238 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
4239 (parallel [(const_int 0) (const_int 1)
4240 (const_int 2) (const_int 3)])))]
4243 "&& reload_completed"
4246 rtx op1 = operands[1];
4248 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4250 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4251 emit_move_insn (operands[0], op1);
4255 (define_insn "vec_extract_hi_<mode>"
4256 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4257 (vec_select:<avxhalfvecmode>
4258 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4259 (parallel [(const_int 4) (const_int 5)
4260 (const_int 6) (const_int 7)])))]
4262 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4263 [(set_attr "type" "sselog")
4264 (set_attr "prefix_extra" "1")
4265 (set_attr "length_immediate" "1")
4266 (set_attr "memory" "none,store")
4267 (set_attr "prefix" "vex")
4268 (set_attr "mode" "V8SF")])
4270 (define_insn_and_split "vec_extract_lo_v16hi"
4271 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4273 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4274 (parallel [(const_int 0) (const_int 1)
4275 (const_int 2) (const_int 3)
4276 (const_int 4) (const_int 5)
4277 (const_int 6) (const_int 7)])))]
4280 "&& reload_completed"
4283 rtx op1 = operands[1];
4285 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4287 op1 = gen_lowpart (V8HImode, op1);
4288 emit_move_insn (operands[0], op1);
4292 (define_insn "vec_extract_hi_v16hi"
4293 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4295 (match_operand:V16HI 1 "register_operand" "x,x")
4296 (parallel [(const_int 8) (const_int 9)
4297 (const_int 10) (const_int 11)
4298 (const_int 12) (const_int 13)
4299 (const_int 14) (const_int 15)])))]
4301 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4302 [(set_attr "type" "sselog")
4303 (set_attr "prefix_extra" "1")
4304 (set_attr "length_immediate" "1")
4305 (set_attr "memory" "none,store")
4306 (set_attr "prefix" "vex")
4307 (set_attr "mode" "V8SF")])
4309 (define_insn_and_split "vec_extract_lo_v32qi"
4310 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4312 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4313 (parallel [(const_int 0) (const_int 1)
4314 (const_int 2) (const_int 3)
4315 (const_int 4) (const_int 5)
4316 (const_int 6) (const_int 7)
4317 (const_int 8) (const_int 9)
4318 (const_int 10) (const_int 11)
4319 (const_int 12) (const_int 13)
4320 (const_int 14) (const_int 15)])))]
4323 "&& reload_completed"
4326 rtx op1 = operands[1];
4328 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4330 op1 = gen_lowpart (V16QImode, op1);
4331 emit_move_insn (operands[0], op1);
4335 (define_insn "vec_extract_hi_v32qi"
4336 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4338 (match_operand:V32QI 1 "register_operand" "x,x")
4339 (parallel [(const_int 16) (const_int 17)
4340 (const_int 18) (const_int 19)
4341 (const_int 20) (const_int 21)
4342 (const_int 22) (const_int 23)
4343 (const_int 24) (const_int 25)
4344 (const_int 26) (const_int 27)
4345 (const_int 28) (const_int 29)
4346 (const_int 30) (const_int 31)])))]
4348 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4349 [(set_attr "type" "sselog")
4350 (set_attr "prefix_extra" "1")
4351 (set_attr "length_immediate" "1")
4352 (set_attr "memory" "none,store")
4353 (set_attr "prefix" "vex")
4354 (set_attr "mode" "V8SF")])
4356 (define_insn "*sse4_1_extractps"
4357 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4359 (match_operand:V4SF 1 "register_operand" "x")
4360 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4362 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4363 [(set_attr "type" "sselog")
4364 (set_attr "prefix_data16" "1")
4365 (set_attr "prefix_extra" "1")
4366 (set_attr "length_immediate" "1")
4367 (set_attr "prefix" "maybe_vex")
4368 (set_attr "mode" "V4SF")])
4370 (define_insn_and_split "*vec_extract_v4sf_mem"
4371 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4373 (match_operand:V4SF 1 "memory_operand" "o")
4374 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4380 int i = INTVAL (operands[2]);
4382 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4386 (define_expand "vec_extract<mode>"
4387 [(match_operand:<avxscalarmode> 0 "register_operand" "")
4388 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4389 (match_operand 2 "const_int_operand" "")]
4392 ix86_expand_vector_extract (false, operands[0], operands[1],
4393 INTVAL (operands[2]));
4397 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4399 ;; Parallel double-precision floating point element swizzling
4401 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4403 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4404 (define_insn "avx_unpckhpd256"
4405 [(set (match_operand:V4DF 0 "register_operand" "=x")
4408 (match_operand:V4DF 1 "register_operand" "x")
4409 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4410 (parallel [(const_int 1) (const_int 5)
4411 (const_int 3) (const_int 7)])))]
4413 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4414 [(set_attr "type" "sselog")
4415 (set_attr "prefix" "vex")
4416 (set_attr "mode" "V4DF")])
4418 (define_expand "vec_interleave_highv4df"
4422 (match_operand:V4DF 1 "register_operand" "x")
4423 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4424 (parallel [(const_int 0) (const_int 4)
4425 (const_int 2) (const_int 6)])))
4431 (parallel [(const_int 1) (const_int 5)
4432 (const_int 3) (const_int 7)])))
4433 (set (match_operand:V4DF 0 "register_operand" "")
4438 (parallel [(const_int 2) (const_int 3)
4439 (const_int 6) (const_int 7)])))]
4442 operands[3] = gen_reg_rtx (V4DFmode);
4443 operands[4] = gen_reg_rtx (V4DFmode);
4447 (define_expand "vec_interleave_highv2df"
4448 [(set (match_operand:V2DF 0 "register_operand" "")
4451 (match_operand:V2DF 1 "nonimmediate_operand" "")
4452 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4453 (parallel [(const_int 1)
4457 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4458 operands[2] = force_reg (V2DFmode, operands[2]);
4461 (define_insn "*avx_interleave_highv2df"
4462 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4465 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4466 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4467 (parallel [(const_int 1)
4469 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4471 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4472 vmovddup\t{%H1, %0|%0, %H1}
4473 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4474 vmovhpd\t{%1, %0|%0, %1}"
4475 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4476 (set_attr "prefix" "vex")
4477 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4479 (define_insn "*sse3_interleave_highv2df"
4480 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4483 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4484 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4485 (parallel [(const_int 1)
4487 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4489 unpckhpd\t{%2, %0|%0, %2}
4490 movddup\t{%H1, %0|%0, %H1}
4491 movlpd\t{%H1, %0|%0, %H1}
4492 movhpd\t{%1, %0|%0, %1}"
4493 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4494 (set_attr "prefix_data16" "*,*,1,1")
4495 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4497 (define_insn "*sse2_interleave_highv2df"
4498 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4501 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4502 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4503 (parallel [(const_int 1)
4505 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4507 unpckhpd\t{%2, %0|%0, %2}
4508 movlpd\t{%H1, %0|%0, %H1}
4509 movhpd\t{%1, %0|%0, %1}"
4510 [(set_attr "type" "sselog,ssemov,ssemov")
4511 (set_attr "prefix_data16" "*,1,1")
4512 (set_attr "mode" "V2DF,V1DF,V1DF")])
4514 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4515 (define_expand "avx_movddup256"
4516 [(set (match_operand:V4DF 0 "register_operand" "")
4519 (match_operand:V4DF 1 "nonimmediate_operand" "")
4521 (parallel [(const_int 0) (const_int 4)
4522 (const_int 2) (const_int 6)])))]
4525 (define_expand "avx_unpcklpd256"
4526 [(set (match_operand:V4DF 0 "register_operand" "")
4529 (match_operand:V4DF 1 "register_operand" "")
4530 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4531 (parallel [(const_int 0) (const_int 4)
4532 (const_int 2) (const_int 6)])))]
4535 (define_insn "*avx_unpcklpd256"
4536 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4539 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4540 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4541 (parallel [(const_int 0) (const_int 4)
4542 (const_int 2) (const_int 6)])))]
4544 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4546 vmovddup\t{%1, %0|%0, %1}
4547 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4548 [(set_attr "type" "sselog")
4549 (set_attr "prefix" "vex")
4550 (set_attr "mode" "V4DF")])
4552 (define_expand "vec_interleave_lowv4df"
4556 (match_operand:V4DF 1 "register_operand" "x")
4557 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4558 (parallel [(const_int 0) (const_int 4)
4559 (const_int 2) (const_int 6)])))
4565 (parallel [(const_int 1) (const_int 5)
4566 (const_int 3) (const_int 7)])))
4567 (set (match_operand:V4DF 0 "register_operand" "")
4572 (parallel [(const_int 0) (const_int 1)
4573 (const_int 4) (const_int 5)])))]
4576 operands[3] = gen_reg_rtx (V4DFmode);
4577 operands[4] = gen_reg_rtx (V4DFmode);
4580 (define_expand "vec_interleave_lowv2df"
4581 [(set (match_operand:V2DF 0 "register_operand" "")
4584 (match_operand:V2DF 1 "nonimmediate_operand" "")
4585 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4586 (parallel [(const_int 0)
4590 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4591 operands[1] = force_reg (V2DFmode, operands[1]);
4594 (define_insn "*avx_interleave_lowv2df"
4595 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4598 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4599 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4600 (parallel [(const_int 0)
4602 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4604 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4605 vmovddup\t{%1, %0|%0, %1}
4606 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4607 vmovlpd\t{%2, %H0|%H0, %2}"
4608 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4609 (set_attr "prefix" "vex")
4610 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4612 (define_insn "*sse3_interleave_lowv2df"
4613 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4616 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4617 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4618 (parallel [(const_int 0)
4620 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4622 unpcklpd\t{%2, %0|%0, %2}
4623 movddup\t{%1, %0|%0, %1}
4624 movhpd\t{%2, %0|%0, %2}
4625 movlpd\t{%2, %H0|%H0, %2}"
4626 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4627 (set_attr "prefix_data16" "*,*,1,1")
4628 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4630 (define_insn "*sse2_interleave_lowv2df"
4631 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4634 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4635 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4636 (parallel [(const_int 0)
4638 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4640 unpcklpd\t{%2, %0|%0, %2}
4641 movhpd\t{%2, %0|%0, %2}
4642 movlpd\t{%2, %H0|%H0, %2}"
4643 [(set_attr "type" "sselog,ssemov,ssemov")
4644 (set_attr "prefix_data16" "*,1,1")
4645 (set_attr "mode" "V2DF,V1DF,V1DF")])
4648 [(set (match_operand:V2DF 0 "memory_operand" "")
4651 (match_operand:V2DF 1 "register_operand" "")
4653 (parallel [(const_int 0)
4655 "TARGET_SSE3 && reload_completed"
4658 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4659 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4660 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4665 [(set (match_operand:V2DF 0 "register_operand" "")
4668 (match_operand:V2DF 1 "memory_operand" "")
4670 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4671 (match_operand:SI 3 "const_int_operand" "")])))]
4672 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4673 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4675 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4678 (define_expand "avx_shufpd256"
4679 [(match_operand:V4DF 0 "register_operand" "")
4680 (match_operand:V4DF 1 "register_operand" "")
4681 (match_operand:V4DF 2 "nonimmediate_operand" "")
4682 (match_operand:SI 3 "const_int_operand" "")]
4685 int mask = INTVAL (operands[3]);
4686 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4688 GEN_INT (mask & 2 ? 5 : 4),
4689 GEN_INT (mask & 4 ? 3 : 2),
4690 GEN_INT (mask & 8 ? 7 : 6)));
4694 (define_insn "avx_shufpd256_1"
4695 [(set (match_operand:V4DF 0 "register_operand" "=x")
4698 (match_operand:V4DF 1 "register_operand" "x")
4699 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4700 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4701 (match_operand 4 "const_4_to_5_operand" "")
4702 (match_operand 5 "const_2_to_3_operand" "")
4703 (match_operand 6 "const_6_to_7_operand" "")])))]
4707 mask = INTVAL (operands[3]);
4708 mask |= (INTVAL (operands[4]) - 4) << 1;
4709 mask |= (INTVAL (operands[5]) - 2) << 2;
4710 mask |= (INTVAL (operands[6]) - 6) << 3;
4711 operands[3] = GEN_INT (mask);
4713 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4715 [(set_attr "type" "sselog")
4716 (set_attr "length_immediate" "1")
4717 (set_attr "prefix" "vex")
4718 (set_attr "mode" "V4DF")])
4720 (define_expand "sse2_shufpd"
4721 [(match_operand:V2DF 0 "register_operand" "")
4722 (match_operand:V2DF 1 "register_operand" "")
4723 (match_operand:V2DF 2 "nonimmediate_operand" "")
4724 (match_operand:SI 3 "const_int_operand" "")]
4727 int mask = INTVAL (operands[3]);
4728 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4730 GEN_INT (mask & 2 ? 3 : 2)));
4734 (define_expand "vec_extract_even<mode>"
4735 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4736 (match_operand:SSEMODE_EO 1 "register_operand" "")
4737 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4740 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4744 (define_expand "vec_extract_odd<mode>"
4745 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4746 (match_operand:SSEMODE_EO 1 "register_operand" "")
4747 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4750 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4754 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4755 (define_insn "*avx_interleave_highv2di"
4756 [(set (match_operand:V2DI 0 "register_operand" "=x")
4759 (match_operand:V2DI 1 "register_operand" "x")
4760 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4761 (parallel [(const_int 1)
4764 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4765 [(set_attr "type" "sselog")
4766 (set_attr "prefix" "vex")
4767 (set_attr "mode" "TI")])
4769 (define_insn "vec_interleave_highv2di"
4770 [(set (match_operand:V2DI 0 "register_operand" "=x")
4773 (match_operand:V2DI 1 "register_operand" "0")
4774 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4775 (parallel [(const_int 1)
4778 "punpckhqdq\t{%2, %0|%0, %2}"
4779 [(set_attr "type" "sselog")
4780 (set_attr "prefix_data16" "1")
4781 (set_attr "mode" "TI")])
4783 (define_insn "*avx_interleave_lowv2di"
4784 [(set (match_operand:V2DI 0 "register_operand" "=x")
4787 (match_operand:V2DI 1 "register_operand" "x")
4788 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4789 (parallel [(const_int 0)
4792 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4793 [(set_attr "type" "sselog")
4794 (set_attr "prefix" "vex")
4795 (set_attr "mode" "TI")])
4797 (define_insn "vec_interleave_lowv2di"
4798 [(set (match_operand:V2DI 0 "register_operand" "=x")
4801 (match_operand:V2DI 1 "register_operand" "0")
4802 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4803 (parallel [(const_int 0)
4806 "punpcklqdq\t{%2, %0|%0, %2}"
4807 [(set_attr "type" "sselog")
4808 (set_attr "prefix_data16" "1")
4809 (set_attr "mode" "TI")])
4811 (define_insn "*avx_shufpd_<mode>"
4812 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4813 (vec_select:SSEMODE2D
4814 (vec_concat:<ssedoublesizemode>
4815 (match_operand:SSEMODE2D 1 "register_operand" "x")
4816 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4817 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4818 (match_operand 4 "const_2_to_3_operand" "")])))]
4822 mask = INTVAL (operands[3]);
4823 mask |= (INTVAL (operands[4]) - 2) << 1;
4824 operands[3] = GEN_INT (mask);
4826 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4828 [(set_attr "type" "sselog")
4829 (set_attr "length_immediate" "1")
4830 (set_attr "prefix" "vex")
4831 (set_attr "mode" "V2DF")])
4833 (define_insn "sse2_shufpd_<mode>"
4834 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4835 (vec_select:SSEMODE2D
4836 (vec_concat:<ssedoublesizemode>
4837 (match_operand:SSEMODE2D 1 "register_operand" "0")
4838 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4839 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4840 (match_operand 4 "const_2_to_3_operand" "")])))]
4844 mask = INTVAL (operands[3]);
4845 mask |= (INTVAL (operands[4]) - 2) << 1;
4846 operands[3] = GEN_INT (mask);
4848 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4850 [(set_attr "type" "sselog")
4851 (set_attr "length_immediate" "1")
4852 (set_attr "mode" "V2DF")])
4854 ;; Avoid combining registers from different units in a single alternative,
4855 ;; see comment above inline_secondary_memory_needed function in i386.c
4856 (define_insn "*avx_storehpd"
4857 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4859 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4860 (parallel [(const_int 1)])))]
4861 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4863 vmovhpd\t{%1, %0|%0, %1}
4864 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4868 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4869 (set_attr "prefix" "vex")
4870 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4872 (define_insn "sse2_storehpd"
4873 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4875 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4876 (parallel [(const_int 1)])))]
4877 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4879 movhpd\t{%1, %0|%0, %1}
4884 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4885 (set_attr "prefix_data16" "1,*,*,*,*")
4886 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4889 [(set (match_operand:DF 0 "register_operand" "")
4891 (match_operand:V2DF 1 "memory_operand" "")
4892 (parallel [(const_int 1)])))]
4893 "TARGET_SSE2 && reload_completed"
4894 [(set (match_dup 0) (match_dup 1))]
4895 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4897 ;; Avoid combining registers from different units in a single alternative,
4898 ;; see comment above inline_secondary_memory_needed function in i386.c
4899 (define_insn "sse2_storelpd"
4900 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4902 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4903 (parallel [(const_int 0)])))]
4904 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4906 %vmovlpd\t{%1, %0|%0, %1}
4911 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4912 (set_attr "prefix_data16" "1,*,*,*,*")
4913 (set_attr "prefix" "maybe_vex")
4914 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4917 [(set (match_operand:DF 0 "register_operand" "")
4919 (match_operand:V2DF 1 "nonimmediate_operand" "")
4920 (parallel [(const_int 0)])))]
4921 "TARGET_SSE2 && reload_completed"
4924 rtx op1 = operands[1];
4926 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4928 op1 = gen_lowpart (DFmode, op1);
4929 emit_move_insn (operands[0], op1);
4933 (define_expand "sse2_loadhpd_exp"
4934 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4937 (match_operand:V2DF 1 "nonimmediate_operand" "")
4938 (parallel [(const_int 0)]))
4939 (match_operand:DF 2 "nonimmediate_operand" "")))]
4942 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4944 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4946 /* Fix up the destination if needed. */
4947 if (dst != operands[0])
4948 emit_move_insn (operands[0], dst);
4953 ;; Avoid combining registers from different units in a single alternative,
4954 ;; see comment above inline_secondary_memory_needed function in i386.c
4955 (define_insn "*avx_loadhpd"
4956 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4959 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4960 (parallel [(const_int 0)]))
4961 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4962 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4964 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4965 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4969 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4970 (set_attr "prefix" "vex")
4971 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4973 (define_insn "sse2_loadhpd"
4974 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4977 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4978 (parallel [(const_int 0)]))
4979 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4980 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4982 movhpd\t{%2, %0|%0, %2}
4983 unpcklpd\t{%2, %0|%0, %2}
4984 shufpd\t{$1, %1, %0|%0, %1, 1}
4988 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4989 (set_attr "prefix_data16" "1,*,*,*,*,*")
4990 (set_attr "length_immediate" "*,*,1,*,*,*")
4991 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4994 [(set (match_operand:V2DF 0 "memory_operand" "")
4996 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4997 (match_operand:DF 1 "register_operand" "")))]
4998 "TARGET_SSE2 && reload_completed"
4999 [(set (match_dup 0) (match_dup 1))]
5000 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5002 (define_expand "sse2_loadlpd_exp"
5003 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
5005 (match_operand:DF 2 "nonimmediate_operand" "")
5007 (match_operand:V2DF 1 "nonimmediate_operand" "")
5008 (parallel [(const_int 1)]))))]
5011 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5013 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
5015 /* Fix up the destination if needed. */
5016 if (dst != operands[0])
5017 emit_move_insn (operands[0], dst);
5022 ;; Avoid combining registers from different units in a single alternative,
5023 ;; see comment above inline_secondary_memory_needed function in i386.c
5024 (define_insn "*avx_loadlpd"
5025 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
5027 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
5029 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
5030 (parallel [(const_int 1)]))))]
5031 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5033 vmovsd\t{%2, %0|%0, %2}
5034 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5035 vmovsd\t{%2, %1, %0|%0, %1, %2}
5036 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5040 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
5041 (set_attr "prefix" "vex")
5042 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
5044 (define_insn "sse2_loadlpd"
5045 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
5047 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
5049 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
5050 (parallel [(const_int 1)]))))]
5051 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5053 movsd\t{%2, %0|%0, %2}
5054 movlpd\t{%2, %0|%0, %2}
5055 movsd\t{%2, %0|%0, %2}
5056 shufpd\t{$2, %2, %0|%0, %2, 2}
5057 movhpd\t{%H1, %0|%0, %H1}
5061 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
5062 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
5063 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
5064 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
5067 [(set (match_operand:V2DF 0 "memory_operand" "")
5069 (match_operand:DF 1 "register_operand" "")
5070 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5071 "TARGET_SSE2 && reload_completed"
5072 [(set (match_dup 0) (match_dup 1))]
5073 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5075 ;; Not sure these two are ever used, but it doesn't hurt to have
5077 (define_insn "*vec_extractv2df_1_sse"
5078 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5080 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
5081 (parallel [(const_int 1)])))]
5082 "!TARGET_SSE2 && TARGET_SSE
5083 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5085 movhps\t{%1, %0|%0, %1}
5086 movhlps\t{%1, %0|%0, %1}
5087 movlps\t{%H1, %0|%0, %H1}"
5088 [(set_attr "type" "ssemov")
5089 (set_attr "mode" "V2SF,V4SF,V2SF")])
5091 (define_insn "*vec_extractv2df_0_sse"
5092 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5094 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5095 (parallel [(const_int 0)])))]
5096 "!TARGET_SSE2 && TARGET_SSE
5097 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5099 movlps\t{%1, %0|%0, %1}
5100 movaps\t{%1, %0|%0, %1}
5101 movlps\t{%1, %0|%0, %1}"
5102 [(set_attr "type" "ssemov")
5103 (set_attr "mode" "V2SF,V4SF,V2SF")])
5105 (define_insn "*avx_movsd"
5106 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5108 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5109 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5113 vmovsd\t{%2, %1, %0|%0, %1, %2}
5114 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5115 vmovlpd\t{%2, %0|%0, %2}
5116 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5117 vmovhps\t{%1, %H0|%H0, %1}"
5118 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5119 (set_attr "prefix" "vex")
5120 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5122 (define_insn "sse2_movsd"
5123 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5125 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5126 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5130 movsd\t{%2, %0|%0, %2}
5131 movlpd\t{%2, %0|%0, %2}
5132 movlpd\t{%2, %0|%0, %2}
5133 shufpd\t{$2, %2, %0|%0, %2, 2}
5134 movhps\t{%H1, %0|%0, %H1}
5135 movhps\t{%1, %H0|%H0, %1}"
5136 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5137 (set_attr "prefix_data16" "*,1,1,*,*,*")
5138 (set_attr "length_immediate" "*,*,*,1,*,*")
5139 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5141 (define_insn "*vec_dupv2df_sse3"
5142 [(set (match_operand:V2DF 0 "register_operand" "=x")
5144 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5146 "%vmovddup\t{%1, %0|%0, %1}"
5147 [(set_attr "type" "sselog1")
5148 (set_attr "prefix" "maybe_vex")
5149 (set_attr "mode" "DF")])
5151 (define_insn "vec_dupv2df"
5152 [(set (match_operand:V2DF 0 "register_operand" "=x")
5154 (match_operand:DF 1 "register_operand" "0")))]
5157 [(set_attr "type" "sselog1")
5158 (set_attr "mode" "V2DF")])
5160 (define_insn "*vec_concatv2df_sse3"
5161 [(set (match_operand:V2DF 0 "register_operand" "=x")
5163 (match_operand:DF 1 "nonimmediate_operand" "xm")
5166 "%vmovddup\t{%1, %0|%0, %1}"
5167 [(set_attr "type" "sselog1")
5168 (set_attr "prefix" "maybe_vex")
5169 (set_attr "mode" "DF")])
5171 (define_insn "*vec_concatv2df_avx"
5172 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5174 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5175 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5178 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5179 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5180 vmovsd\t{%1, %0|%0, %1}"
5181 [(set_attr "type" "ssemov")
5182 (set_attr "prefix" "vex")
5183 (set_attr "mode" "DF,V1DF,DF")])
5185 (define_insn "*vec_concatv2df"
5186 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5188 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5189 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5192 unpcklpd\t{%2, %0|%0, %2}
5193 movhpd\t{%2, %0|%0, %2}
5194 movsd\t{%1, %0|%0, %1}
5195 movlhps\t{%2, %0|%0, %2}
5196 movhps\t{%2, %0|%0, %2}"
5197 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5198 (set_attr "prefix_data16" "*,1,*,*,*")
5199 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5201 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5203 ;; Parallel integral arithmetic
5205 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5207 (define_expand "neg<mode>2"
5208 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5211 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5213 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5215 (define_expand "<plusminus_insn><mode>3"
5216 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5218 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5219 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5221 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5223 (define_insn "*avx_<plusminus_insn><mode>3"
5224 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5226 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5227 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5228 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5229 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5230 [(set_attr "type" "sseiadd")
5231 (set_attr "prefix" "vex")
5232 (set_attr "mode" "TI")])
5234 (define_insn "*<plusminus_insn><mode>3"
5235 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5237 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5238 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5239 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5240 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5241 [(set_attr "type" "sseiadd")
5242 (set_attr "prefix_data16" "1")
5243 (set_attr "mode" "TI")])
5245 (define_expand "sse2_<plusminus_insn><mode>3"
5246 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5247 (sat_plusminus:SSEMODE12
5248 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5249 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5251 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5253 (define_insn "*avx_<plusminus_insn><mode>3"
5254 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5255 (sat_plusminus:SSEMODE12
5256 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5257 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5258 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5259 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5260 [(set_attr "type" "sseiadd")
5261 (set_attr "prefix" "vex")
5262 (set_attr "mode" "TI")])
5264 (define_insn "*sse2_<plusminus_insn><mode>3"
5265 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5266 (sat_plusminus:SSEMODE12
5267 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5268 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5269 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5270 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5271 [(set_attr "type" "sseiadd")
5272 (set_attr "prefix_data16" "1")
5273 (set_attr "mode" "TI")])
5275 (define_insn_and_split "mulv16qi3"
5276 [(set (match_operand:V16QI 0 "register_operand" "")
5277 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5278 (match_operand:V16QI 2 "register_operand" "")))]
5280 && can_create_pseudo_p ()"
5288 for (i = 0; i < 6; ++i)
5289 t[i] = gen_reg_rtx (V16QImode);
5291 /* Unpack data such that we've got a source byte in each low byte of
5292 each word. We don't care what goes into the high byte of each word.
5293 Rather than trying to get zero in there, most convenient is to let
5294 it be a copy of the low byte. */
5295 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5296 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5297 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5298 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5300 /* Multiply words. The end-of-line annotations here give a picture of what
5301 the output of that instruction looks like. Dot means don't care; the
5302 letters are the bytes of the result with A being the most significant. */
5303 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5304 gen_lowpart (V8HImode, t[0]),
5305 gen_lowpart (V8HImode, t[1])));
5306 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5307 gen_lowpart (V8HImode, t[2]),
5308 gen_lowpart (V8HImode, t[3])));
5310 /* Extract the even bytes and merge them back together. */
5311 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5315 (define_expand "mulv8hi3"
5316 [(set (match_operand:V8HI 0 "register_operand" "")
5317 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5318 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5320 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5322 (define_insn "*avx_mulv8hi3"
5323 [(set (match_operand:V8HI 0 "register_operand" "=x")
5324 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5325 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5326 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5327 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5328 [(set_attr "type" "sseimul")
5329 (set_attr "prefix" "vex")
5330 (set_attr "mode" "TI")])
5332 (define_insn "*mulv8hi3"
5333 [(set (match_operand:V8HI 0 "register_operand" "=x")
5334 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5335 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5336 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5337 "pmullw\t{%2, %0|%0, %2}"
5338 [(set_attr "type" "sseimul")
5339 (set_attr "prefix_data16" "1")
5340 (set_attr "mode" "TI")])
5342 (define_expand "<s>mulv8hi3_highpart"
5343 [(set (match_operand:V8HI 0 "register_operand" "")
5348 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5350 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5353 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5355 (define_insn "*avx_<s>mulv8hi3_highpart"
5356 [(set (match_operand:V8HI 0 "register_operand" "=x")
5361 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5363 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5365 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5366 "vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5367 [(set_attr "type" "sseimul")
5368 (set_attr "prefix" "vex")
5369 (set_attr "mode" "TI")])
5371 (define_insn "*<s>mulv8hi3_highpart"
5372 [(set (match_operand:V8HI 0 "register_operand" "=x")
5377 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5379 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5381 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5382 "pmulh<u>w\t{%2, %0|%0, %2}"
5383 [(set_attr "type" "sseimul")
5384 (set_attr "prefix_data16" "1")
5385 (set_attr "mode" "TI")])
5387 (define_expand "sse2_umulv2siv2di3"
5388 [(set (match_operand:V2DI 0 "register_operand" "")
5392 (match_operand:V4SI 1 "nonimmediate_operand" "")
5393 (parallel [(const_int 0) (const_int 2)])))
5396 (match_operand:V4SI 2 "nonimmediate_operand" "")
5397 (parallel [(const_int 0) (const_int 2)])))))]
5399 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5401 (define_insn "*avx_umulv2siv2di3"
5402 [(set (match_operand:V2DI 0 "register_operand" "=x")
5406 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5407 (parallel [(const_int 0) (const_int 2)])))
5410 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5411 (parallel [(const_int 0) (const_int 2)])))))]
5412 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5413 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5414 [(set_attr "type" "sseimul")
5415 (set_attr "prefix" "vex")
5416 (set_attr "mode" "TI")])
5418 (define_insn "*sse2_umulv2siv2di3"
5419 [(set (match_operand:V2DI 0 "register_operand" "=x")
5423 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5424 (parallel [(const_int 0) (const_int 2)])))
5427 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5428 (parallel [(const_int 0) (const_int 2)])))))]
5429 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5430 "pmuludq\t{%2, %0|%0, %2}"
5431 [(set_attr "type" "sseimul")
5432 (set_attr "prefix_data16" "1")
5433 (set_attr "mode" "TI")])
5435 (define_expand "sse4_1_mulv2siv2di3"
5436 [(set (match_operand:V2DI 0 "register_operand" "")
5440 (match_operand:V4SI 1 "nonimmediate_operand" "")
5441 (parallel [(const_int 0) (const_int 2)])))
5444 (match_operand:V4SI 2 "nonimmediate_operand" "")
5445 (parallel [(const_int 0) (const_int 2)])))))]
5447 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5449 (define_insn "*avx_mulv2siv2di3"
5450 [(set (match_operand:V2DI 0 "register_operand" "=x")
5454 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5455 (parallel [(const_int 0) (const_int 2)])))
5458 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5459 (parallel [(const_int 0) (const_int 2)])))))]
5460 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5461 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5462 [(set_attr "type" "sseimul")
5463 (set_attr "prefix_extra" "1")
5464 (set_attr "prefix" "vex")
5465 (set_attr "mode" "TI")])
5467 (define_insn "*sse4_1_mulv2siv2di3"
5468 [(set (match_operand:V2DI 0 "register_operand" "=x")
5472 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5473 (parallel [(const_int 0) (const_int 2)])))
5476 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5477 (parallel [(const_int 0) (const_int 2)])))))]
5478 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5479 "pmuldq\t{%2, %0|%0, %2}"
5480 [(set_attr "type" "sseimul")
5481 (set_attr "prefix_extra" "1")
5482 (set_attr "mode" "TI")])
5484 (define_expand "sse2_pmaddwd"
5485 [(set (match_operand:V4SI 0 "register_operand" "")
5490 (match_operand:V8HI 1 "nonimmediate_operand" "")
5491 (parallel [(const_int 0)
5497 (match_operand:V8HI 2 "nonimmediate_operand" "")
5498 (parallel [(const_int 0)
5504 (vec_select:V4HI (match_dup 1)
5505 (parallel [(const_int 1)
5510 (vec_select:V4HI (match_dup 2)
5511 (parallel [(const_int 1)
5514 (const_int 7)]))))))]
5516 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5518 (define_insn "*avx_pmaddwd"
5519 [(set (match_operand:V4SI 0 "register_operand" "=x")
5524 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5525 (parallel [(const_int 0)
5531 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5532 (parallel [(const_int 0)
5538 (vec_select:V4HI (match_dup 1)
5539 (parallel [(const_int 1)
5544 (vec_select:V4HI (match_dup 2)
5545 (parallel [(const_int 1)
5548 (const_int 7)]))))))]
5549 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5550 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5551 [(set_attr "type" "sseiadd")
5552 (set_attr "prefix" "vex")
5553 (set_attr "mode" "TI")])
5555 (define_insn "*sse2_pmaddwd"
5556 [(set (match_operand:V4SI 0 "register_operand" "=x")
5561 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5562 (parallel [(const_int 0)
5568 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5569 (parallel [(const_int 0)
5575 (vec_select:V4HI (match_dup 1)
5576 (parallel [(const_int 1)
5581 (vec_select:V4HI (match_dup 2)
5582 (parallel [(const_int 1)
5585 (const_int 7)]))))))]
5586 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5587 "pmaddwd\t{%2, %0|%0, %2}"
5588 [(set_attr "type" "sseiadd")
5589 (set_attr "atom_unit" "simul")
5590 (set_attr "prefix_data16" "1")
5591 (set_attr "mode" "TI")])
5593 (define_expand "mulv4si3"
5594 [(set (match_operand:V4SI 0 "register_operand" "")
5595 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5596 (match_operand:V4SI 2 "register_operand" "")))]
5599 if (TARGET_SSE4_1 || TARGET_AVX)
5600 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5603 (define_insn "*avx_mulv4si3"
5604 [(set (match_operand:V4SI 0 "register_operand" "=x")
5605 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5606 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5607 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5608 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5609 [(set_attr "type" "sseimul")
5610 (set_attr "prefix_extra" "1")
5611 (set_attr "prefix" "vex")
5612 (set_attr "mode" "TI")])
5614 (define_insn "*sse4_1_mulv4si3"
5615 [(set (match_operand:V4SI 0 "register_operand" "=x")
5616 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5617 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5618 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5619 "pmulld\t{%2, %0|%0, %2}"
5620 [(set_attr "type" "sseimul")
5621 (set_attr "prefix_extra" "1")
5622 (set_attr "mode" "TI")])
5624 (define_insn_and_split "*sse2_mulv4si3"
5625 [(set (match_operand:V4SI 0 "register_operand" "")
5626 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5627 (match_operand:V4SI 2 "register_operand" "")))]
5628 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5629 && can_create_pseudo_p ()"
5634 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5640 t1 = gen_reg_rtx (V4SImode);
5641 t2 = gen_reg_rtx (V4SImode);
5642 t3 = gen_reg_rtx (V4SImode);
5643 t4 = gen_reg_rtx (V4SImode);
5644 t5 = gen_reg_rtx (V4SImode);
5645 t6 = gen_reg_rtx (V4SImode);
5646 thirtytwo = GEN_INT (32);
5648 /* Multiply elements 2 and 0. */
5649 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5652 /* Shift both input vectors down one element, so that elements 3
5653 and 1 are now in the slots for elements 2 and 0. For K8, at
5654 least, this is faster than using a shuffle. */
5655 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5656 gen_lowpart (V1TImode, op1),
5658 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5659 gen_lowpart (V1TImode, op2),
5661 /* Multiply elements 3 and 1. */
5662 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5665 /* Move the results in element 2 down to element 1; we don't care
5666 what goes in elements 2 and 3. */
5667 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5668 const0_rtx, const0_rtx));
5669 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5670 const0_rtx, const0_rtx));
5672 /* Merge the parts back together. */
5673 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5677 (define_insn_and_split "mulv2di3"
5678 [(set (match_operand:V2DI 0 "register_operand" "")
5679 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5680 (match_operand:V2DI 2 "register_operand" "")))]
5682 && can_create_pseudo_p ()"
5687 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5696 /* op1: A,B,C,D, op2: E,F,G,H */
5697 op1 = gen_lowpart (V4SImode, op1);
5698 op2 = gen_lowpart (V4SImode, op2);
5700 t1 = gen_reg_rtx (V4SImode);
5701 t2 = gen_reg_rtx (V4SImode);
5702 t3 = gen_reg_rtx (V2DImode);
5703 t4 = gen_reg_rtx (V2DImode);
5706 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5712 /* t2: (B*E),(A*F),(D*G),(C*H) */
5713 emit_insn (gen_mulv4si3 (t2, t1, op2));
5715 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5716 emit_insn (gen_xop_phadddq (t3, t2));
5718 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5719 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5721 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5722 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5726 t1 = gen_reg_rtx (V2DImode);
5727 t2 = gen_reg_rtx (V2DImode);
5728 t3 = gen_reg_rtx (V2DImode);
5729 t4 = gen_reg_rtx (V2DImode);
5730 t5 = gen_reg_rtx (V2DImode);
5731 t6 = gen_reg_rtx (V2DImode);
5732 thirtytwo = GEN_INT (32);
5734 /* Multiply low parts. */
5735 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5736 gen_lowpart (V4SImode, op2)));
5738 /* Shift input vectors left 32 bits so we can multiply high parts. */
5739 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5740 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5742 /* Multiply high parts by low parts. */
5743 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5744 gen_lowpart (V4SImode, t3)));
5745 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5746 gen_lowpart (V4SImode, t2)));
5748 /* Shift them back. */
5749 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5750 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5752 /* Add the three parts together. */
5753 emit_insn (gen_addv2di3 (t6, t1, t4));
5754 emit_insn (gen_addv2di3 (op0, t6, t5));
5759 (define_expand "vec_widen_smult_hi_v8hi"
5760 [(match_operand:V4SI 0 "register_operand" "")
5761 (match_operand:V8HI 1 "register_operand" "")
5762 (match_operand:V8HI 2 "register_operand" "")]
5765 rtx op1, op2, t1, t2, dest;
5769 t1 = gen_reg_rtx (V8HImode);
5770 t2 = gen_reg_rtx (V8HImode);
5771 dest = gen_lowpart (V8HImode, operands[0]);
5773 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5774 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5775 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5779 (define_expand "vec_widen_smult_lo_v8hi"
5780 [(match_operand:V4SI 0 "register_operand" "")
5781 (match_operand:V8HI 1 "register_operand" "")
5782 (match_operand:V8HI 2 "register_operand" "")]
5785 rtx op1, op2, t1, t2, dest;
5789 t1 = gen_reg_rtx (V8HImode);
5790 t2 = gen_reg_rtx (V8HImode);
5791 dest = gen_lowpart (V8HImode, operands[0]);
5793 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5794 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5795 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5799 (define_expand "vec_widen_umult_hi_v8hi"
5800 [(match_operand:V4SI 0 "register_operand" "")
5801 (match_operand:V8HI 1 "register_operand" "")
5802 (match_operand:V8HI 2 "register_operand" "")]
5805 rtx op1, op2, t1, t2, dest;
5809 t1 = gen_reg_rtx (V8HImode);
5810 t2 = gen_reg_rtx (V8HImode);
5811 dest = gen_lowpart (V8HImode, operands[0]);
5813 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5814 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5815 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5819 (define_expand "vec_widen_umult_lo_v8hi"
5820 [(match_operand:V4SI 0 "register_operand" "")
5821 (match_operand:V8HI 1 "register_operand" "")
5822 (match_operand:V8HI 2 "register_operand" "")]
5825 rtx op1, op2, t1, t2, dest;
5829 t1 = gen_reg_rtx (V8HImode);
5830 t2 = gen_reg_rtx (V8HImode);
5831 dest = gen_lowpart (V8HImode, operands[0]);
5833 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5834 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5835 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5839 (define_expand "vec_widen_smult_hi_v4si"
5840 [(match_operand:V2DI 0 "register_operand" "")
5841 (match_operand:V4SI 1 "register_operand" "")
5842 (match_operand:V4SI 2 "register_operand" "")]
5847 t1 = gen_reg_rtx (V4SImode);
5848 t2 = gen_reg_rtx (V4SImode);
5850 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5855 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5860 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5864 (define_expand "vec_widen_smult_lo_v4si"
5865 [(match_operand:V2DI 0 "register_operand" "")
5866 (match_operand:V4SI 1 "register_operand" "")
5867 (match_operand:V4SI 2 "register_operand" "")]
5872 t1 = gen_reg_rtx (V4SImode);
5873 t2 = gen_reg_rtx (V4SImode);
5875 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5880 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5885 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5889 (define_expand "vec_widen_umult_hi_v4si"
5890 [(match_operand:V2DI 0 "register_operand" "")
5891 (match_operand:V4SI 1 "register_operand" "")
5892 (match_operand:V4SI 2 "register_operand" "")]
5895 rtx op1, op2, t1, t2;
5899 t1 = gen_reg_rtx (V4SImode);
5900 t2 = gen_reg_rtx (V4SImode);
5902 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5903 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5904 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5908 (define_expand "vec_widen_umult_lo_v4si"
5909 [(match_operand:V2DI 0 "register_operand" "")
5910 (match_operand:V4SI 1 "register_operand" "")
5911 (match_operand:V4SI 2 "register_operand" "")]
5914 rtx op1, op2, t1, t2;
5918 t1 = gen_reg_rtx (V4SImode);
5919 t2 = gen_reg_rtx (V4SImode);
5921 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5922 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5923 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5927 (define_expand "sdot_prodv8hi"
5928 [(match_operand:V4SI 0 "register_operand" "")
5929 (match_operand:V8HI 1 "register_operand" "")
5930 (match_operand:V8HI 2 "register_operand" "")
5931 (match_operand:V4SI 3 "register_operand" "")]
5934 rtx t = gen_reg_rtx (V4SImode);
5935 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5936 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5940 (define_expand "udot_prodv4si"
5941 [(match_operand:V2DI 0 "register_operand" "")
5942 (match_operand:V4SI 1 "register_operand" "")
5943 (match_operand:V4SI 2 "register_operand" "")
5944 (match_operand:V2DI 3 "register_operand" "")]
5949 t1 = gen_reg_rtx (V2DImode);
5950 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5951 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5953 t2 = gen_reg_rtx (V4SImode);
5954 t3 = gen_reg_rtx (V4SImode);
5955 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5956 gen_lowpart (V1TImode, operands[1]),
5958 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5959 gen_lowpart (V1TImode, operands[2]),
5962 t4 = gen_reg_rtx (V2DImode);
5963 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5965 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5969 (define_insn "*avx_ashr<mode>3"
5970 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5972 (match_operand:SSEMODE24 1 "register_operand" "x")
5973 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5975 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5976 [(set_attr "type" "sseishft")
5977 (set_attr "prefix" "vex")
5978 (set (attr "length_immediate")
5979 (if_then_else (match_operand 2 "const_int_operand" "")
5981 (const_string "0")))
5982 (set_attr "mode" "TI")])
5984 (define_insn "ashr<mode>3"
5985 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5987 (match_operand:SSEMODE24 1 "register_operand" "0")
5988 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5990 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5991 [(set_attr "type" "sseishft")
5992 (set_attr "prefix_data16" "1")
5993 (set (attr "length_immediate")
5994 (if_then_else (match_operand 2 "const_int_operand" "")
5996 (const_string "0")))
5997 (set_attr "mode" "TI")])
5999 (define_insn "*avx_lshrv1ti3"
6000 [(set (match_operand:V1TI 0 "register_operand" "=x")
6002 (match_operand:V1TI 1 "register_operand" "x")
6003 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6006 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6007 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
6009 [(set_attr "type" "sseishft")
6010 (set_attr "prefix" "vex")
6011 (set_attr "length_immediate" "1")
6012 (set_attr "mode" "TI")])
6014 (define_insn "*avx_lshr<mode>3"
6015 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6016 (lshiftrt:SSEMODE248
6017 (match_operand:SSEMODE248 1 "register_operand" "x")
6018 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6020 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6021 [(set_attr "type" "sseishft")
6022 (set_attr "prefix" "vex")
6023 (set (attr "length_immediate")
6024 (if_then_else (match_operand 2 "const_int_operand" "")
6026 (const_string "0")))
6027 (set_attr "mode" "TI")])
6029 (define_insn "sse2_lshrv1ti3"
6030 [(set (match_operand:V1TI 0 "register_operand" "=x")
6032 (match_operand:V1TI 1 "register_operand" "0")
6033 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6036 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6037 return "psrldq\t{%2, %0|%0, %2}";
6039 [(set_attr "type" "sseishft")
6040 (set_attr "prefix_data16" "1")
6041 (set_attr "length_immediate" "1")
6042 (set_attr "atom_unit" "sishuf")
6043 (set_attr "mode" "TI")])
6045 (define_insn "lshr<mode>3"
6046 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6047 (lshiftrt:SSEMODE248
6048 (match_operand:SSEMODE248 1 "register_operand" "0")
6049 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6051 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
6052 [(set_attr "type" "sseishft")
6053 (set_attr "prefix_data16" "1")
6054 (set (attr "length_immediate")
6055 (if_then_else (match_operand 2 "const_int_operand" "")
6057 (const_string "0")))
6058 (set_attr "mode" "TI")])
6060 (define_insn "*avx_ashlv1ti3"
6061 [(set (match_operand:V1TI 0 "register_operand" "=x")
6062 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
6063 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6066 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6067 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
6069 [(set_attr "type" "sseishft")
6070 (set_attr "prefix" "vex")
6071 (set_attr "length_immediate" "1")
6072 (set_attr "mode" "TI")])
6074 (define_insn "*avx_ashl<mode>3"
6075 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6077 (match_operand:SSEMODE248 1 "register_operand" "x")
6078 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6080 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6081 [(set_attr "type" "sseishft")
6082 (set_attr "prefix" "vex")
6083 (set (attr "length_immediate")
6084 (if_then_else (match_operand 2 "const_int_operand" "")
6086 (const_string "0")))
6087 (set_attr "mode" "TI")])
6089 (define_insn "sse2_ashlv1ti3"
6090 [(set (match_operand:V1TI 0 "register_operand" "=x")
6091 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
6092 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6095 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6096 return "pslldq\t{%2, %0|%0, %2}";
6098 [(set_attr "type" "sseishft")
6099 (set_attr "prefix_data16" "1")
6100 (set_attr "length_immediate" "1")
6101 (set_attr "mode" "TI")])
6103 (define_insn "ashl<mode>3"
6104 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6106 (match_operand:SSEMODE248 1 "register_operand" "0")
6107 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6109 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6110 [(set_attr "type" "sseishft")
6111 (set_attr "prefix_data16" "1")
6112 (set (attr "length_immediate")
6113 (if_then_else (match_operand 2 "const_int_operand" "")
6115 (const_string "0")))
6116 (set_attr "mode" "TI")])
6118 (define_expand "vec_shl_<mode>"
6119 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6121 (match_operand:SSEMODEI 1 "register_operand" "")
6122 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6125 operands[0] = gen_lowpart (V1TImode, operands[0]);
6126 operands[1] = gen_lowpart (V1TImode, operands[1]);
6129 (define_expand "vec_shr_<mode>"
6130 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6132 (match_operand:SSEMODEI 1 "register_operand" "")
6133 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6136 operands[0] = gen_lowpart (V1TImode, operands[0]);
6137 operands[1] = gen_lowpart (V1TImode, operands[1]);
6140 (define_insn "*avx_<code><mode>3"
6141 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6143 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6144 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6145 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6146 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6147 [(set_attr "type" "sseiadd")
6148 (set (attr "prefix_extra")
6149 (if_then_else (match_operand:V16QI 0 "" "")
6151 (const_string "1")))
6152 (set_attr "prefix" "vex")
6153 (set_attr "mode" "TI")])
6155 (define_expand "<code>v16qi3"
6156 [(set (match_operand:V16QI 0 "register_operand" "")
6158 (match_operand:V16QI 1 "nonimmediate_operand" "")
6159 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6161 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6163 (define_insn "*<code>v16qi3"
6164 [(set (match_operand:V16QI 0 "register_operand" "=x")
6166 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6167 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6168 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6169 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6170 [(set_attr "type" "sseiadd")
6171 (set_attr "prefix_data16" "1")
6172 (set_attr "mode" "TI")])
6174 (define_insn "*avx_<code><mode>3"
6175 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6177 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6178 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6179 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6180 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6181 [(set_attr "type" "sseiadd")
6182 (set (attr "prefix_extra")
6183 (if_then_else (match_operand:V8HI 0 "" "")
6185 (const_string "1")))
6186 (set_attr "prefix" "vex")
6187 (set_attr "mode" "TI")])
6189 (define_expand "<code>v8hi3"
6190 [(set (match_operand:V8HI 0 "register_operand" "")
6192 (match_operand:V8HI 1 "nonimmediate_operand" "")
6193 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6195 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6197 (define_insn "*<code>v8hi3"
6198 [(set (match_operand:V8HI 0 "register_operand" "=x")
6200 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6201 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6202 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6203 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6204 [(set_attr "type" "sseiadd")
6205 (set_attr "prefix_data16" "1")
6206 (set_attr "mode" "TI")])
6208 (define_expand "umaxv8hi3"
6209 [(set (match_operand:V8HI 0 "register_operand" "")
6210 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6211 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6215 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6218 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6219 if (rtx_equal_p (op3, op2))
6220 op3 = gen_reg_rtx (V8HImode);
6221 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6222 emit_insn (gen_addv8hi3 (op0, op3, op2));
6227 (define_expand "smax<mode>3"
6228 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6229 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6230 (match_operand:SSEMODE14 2 "register_operand" "")))]
6234 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6240 xops[0] = operands[0];
6241 xops[1] = operands[1];
6242 xops[2] = operands[2];
6243 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6244 xops[4] = operands[1];
6245 xops[5] = operands[2];
6246 ok = ix86_expand_int_vcond (xops);
6252 (define_insn "*sse4_1_<code><mode>3"
6253 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6255 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6256 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6257 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6258 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6259 [(set_attr "type" "sseiadd")
6260 (set_attr "prefix_extra" "1")
6261 (set_attr "mode" "TI")])
6263 (define_expand "smaxv2di3"
6264 [(set (match_operand:V2DI 0 "register_operand" "")
6265 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6266 (match_operand:V2DI 2 "register_operand" "")))]
6272 xops[0] = operands[0];
6273 xops[1] = operands[1];
6274 xops[2] = operands[2];
6275 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6276 xops[4] = operands[1];
6277 xops[5] = operands[2];
6278 ok = ix86_expand_int_vcond (xops);
6283 (define_expand "umaxv4si3"
6284 [(set (match_operand:V4SI 0 "register_operand" "")
6285 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6286 (match_operand:V4SI 2 "register_operand" "")))]
6290 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6296 xops[0] = operands[0];
6297 xops[1] = operands[1];
6298 xops[2] = operands[2];
6299 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6300 xops[4] = operands[1];
6301 xops[5] = operands[2];
6302 ok = ix86_expand_int_vcond (xops);
6308 (define_insn "*sse4_1_<code><mode>3"
6309 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6311 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6312 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6313 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6314 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6315 [(set_attr "type" "sseiadd")
6316 (set_attr "prefix_extra" "1")
6317 (set_attr "mode" "TI")])
6319 (define_expand "umaxv2di3"
6320 [(set (match_operand:V2DI 0 "register_operand" "")
6321 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6322 (match_operand:V2DI 2 "register_operand" "")))]
6328 xops[0] = operands[0];
6329 xops[1] = operands[1];
6330 xops[2] = operands[2];
6331 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6332 xops[4] = operands[1];
6333 xops[5] = operands[2];
6334 ok = ix86_expand_int_vcond (xops);
6339 (define_expand "smin<mode>3"
6340 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6341 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6342 (match_operand:SSEMODE14 2 "register_operand" "")))]
6346 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6352 xops[0] = operands[0];
6353 xops[1] = operands[2];
6354 xops[2] = operands[1];
6355 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6356 xops[4] = operands[1];
6357 xops[5] = operands[2];
6358 ok = ix86_expand_int_vcond (xops);
6364 (define_expand "sminv2di3"
6365 [(set (match_operand:V2DI 0 "register_operand" "")
6366 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6367 (match_operand:V2DI 2 "register_operand" "")))]
6373 xops[0] = operands[0];
6374 xops[1] = operands[2];
6375 xops[2] = operands[1];
6376 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6377 xops[4] = operands[1];
6378 xops[5] = operands[2];
6379 ok = ix86_expand_int_vcond (xops);
6384 (define_expand "umin<mode>3"
6385 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6386 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6387 (match_operand:SSEMODE24 2 "register_operand" "")))]
6391 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6397 xops[0] = operands[0];
6398 xops[1] = operands[2];
6399 xops[2] = operands[1];
6400 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6401 xops[4] = operands[1];
6402 xops[5] = operands[2];
6403 ok = ix86_expand_int_vcond (xops);
6409 (define_expand "uminv2di3"
6410 [(set (match_operand:V2DI 0 "register_operand" "")
6411 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6412 (match_operand:V2DI 2 "register_operand" "")))]
6418 xops[0] = operands[0];
6419 xops[1] = operands[2];
6420 xops[2] = operands[1];
6421 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6422 xops[4] = operands[1];
6423 xops[5] = operands[2];
6424 ok = ix86_expand_int_vcond (xops);
6429 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6431 ;; Parallel integral comparisons
6433 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6435 (define_expand "sse2_eq<mode>3"
6436 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6438 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6439 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6440 "TARGET_SSE2 && !TARGET_XOP "
6441 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6443 (define_insn "*avx_eq<mode>3"
6444 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6446 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6447 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6448 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6449 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6450 [(set_attr "type" "ssecmp")
6451 (set (attr "prefix_extra")
6452 (if_then_else (match_operand:V2DI 0 "" "")
6454 (const_string "*")))
6455 (set_attr "prefix" "vex")
6456 (set_attr "mode" "TI")])
6458 (define_insn "*sse2_eq<mode>3"
6459 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6461 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6462 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6463 "TARGET_SSE2 && !TARGET_XOP
6464 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6465 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6466 [(set_attr "type" "ssecmp")
6467 (set_attr "prefix_data16" "1")
6468 (set_attr "mode" "TI")])
6470 (define_expand "sse4_1_eqv2di3"
6471 [(set (match_operand:V2DI 0 "register_operand" "")
6473 (match_operand:V2DI 1 "nonimmediate_operand" "")
6474 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6476 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6478 (define_insn "*sse4_1_eqv2di3"
6479 [(set (match_operand:V2DI 0 "register_operand" "=x")
6481 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6482 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6483 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6484 "pcmpeqq\t{%2, %0|%0, %2}"
6485 [(set_attr "type" "ssecmp")
6486 (set_attr "prefix_extra" "1")
6487 (set_attr "mode" "TI")])
6489 (define_insn "*avx_gt<mode>3"
6490 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6492 (match_operand:SSEMODE1248 1 "register_operand" "x")
6493 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6495 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6496 [(set_attr "type" "ssecmp")
6497 (set (attr "prefix_extra")
6498 (if_then_else (match_operand:V2DI 0 "" "")
6500 (const_string "*")))
6501 (set_attr "prefix" "vex")
6502 (set_attr "mode" "TI")])
6504 (define_insn "sse2_gt<mode>3"
6505 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6507 (match_operand:SSEMODE124 1 "register_operand" "0")
6508 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6509 "TARGET_SSE2 && !TARGET_XOP"
6510 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6511 [(set_attr "type" "ssecmp")
6512 (set_attr "prefix_data16" "1")
6513 (set_attr "mode" "TI")])
6515 (define_insn "sse4_2_gtv2di3"
6516 [(set (match_operand:V2DI 0 "register_operand" "=x")
6518 (match_operand:V2DI 1 "register_operand" "0")
6519 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6521 "pcmpgtq\t{%2, %0|%0, %2}"
6522 [(set_attr "type" "ssecmp")
6523 (set_attr "prefix_extra" "1")
6524 (set_attr "mode" "TI")])
6526 (define_expand "vcond<mode>"
6527 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6528 (if_then_else:SSEMODE124C8
6529 (match_operator 3 ""
6530 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6531 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6532 (match_operand:SSEMODE124C8 1 "general_operand" "")
6533 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6536 bool ok = ix86_expand_int_vcond (operands);
6541 (define_expand "vcondu<mode>"
6542 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6543 (if_then_else:SSEMODE124C8
6544 (match_operator 3 ""
6545 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6546 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6547 (match_operand:SSEMODE124C8 1 "general_operand" "")
6548 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6551 bool ok = ix86_expand_int_vcond (operands);
6556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6558 ;; Parallel bitwise logical operations
6560 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6562 (define_expand "one_cmpl<mode>2"
6563 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6564 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6568 int i, n = GET_MODE_NUNITS (<MODE>mode);
6569 rtvec v = rtvec_alloc (n);
6571 for (i = 0; i < n; ++i)
6572 RTVEC_ELT (v, i) = constm1_rtx;
6574 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6577 (define_insn "*avx_andnot<mode>3"
6578 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6580 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6581 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6583 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6584 [(set_attr "type" "sselog")
6585 (set_attr "prefix" "vex")
6586 (set_attr "mode" "<avxvecpsmode>")])
6588 (define_insn "*sse_andnot<mode>3"
6589 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6591 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6592 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6593 "(TARGET_SSE && !TARGET_SSE2)"
6594 "andnps\t{%2, %0|%0, %2}"
6595 [(set_attr "type" "sselog")
6596 (set_attr "mode" "V4SF")])
6598 (define_insn "*avx_andnot<mode>3"
6599 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6601 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6602 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6604 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6605 [(set_attr "type" "sselog")
6606 (set_attr "prefix" "vex")
6607 (set_attr "mode" "TI")])
6609 (define_insn "sse2_andnot<mode>3"
6610 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6612 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6613 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6615 "pandn\t{%2, %0|%0, %2}"
6616 [(set_attr "type" "sselog")
6617 (set_attr "prefix_data16" "1")
6618 (set_attr "mode" "TI")])
6620 (define_insn "*andnottf3"
6621 [(set (match_operand:TF 0 "register_operand" "=x")
6623 (not:TF (match_operand:TF 1 "register_operand" "0"))
6624 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6626 "pandn\t{%2, %0|%0, %2}"
6627 [(set_attr "type" "sselog")
6628 (set_attr "prefix_data16" "1")
6629 (set_attr "mode" "TI")])
6631 (define_expand "<code><mode>3"
6632 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6634 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6635 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6637 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6639 (define_insn "*avx_<code><mode>3"
6640 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6641 (any_logic:AVX256MODEI
6642 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6643 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6645 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6646 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6647 [(set_attr "type" "sselog")
6648 (set_attr "prefix" "vex")
6649 (set_attr "mode" "<avxvecpsmode>")])
6651 (define_insn "*sse_<code><mode>3"
6652 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6654 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6655 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6656 "(TARGET_SSE && !TARGET_SSE2)
6657 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6658 "<logic>ps\t{%2, %0|%0, %2}"
6659 [(set_attr "type" "sselog")
6660 (set_attr "mode" "V4SF")])
6662 (define_insn "*avx_<code><mode>3"
6663 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6665 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6666 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6668 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6669 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6670 [(set_attr "type" "sselog")
6671 (set_attr "prefix" "vex")
6672 (set_attr "mode" "TI")])
6674 (define_insn "*sse2_<code><mode>3"
6675 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6677 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6678 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6679 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6680 "p<logic>\t{%2, %0|%0, %2}"
6681 [(set_attr "type" "sselog")
6682 (set_attr "prefix_data16" "1")
6683 (set_attr "mode" "TI")])
6685 (define_expand "<code>tf3"
6686 [(set (match_operand:TF 0 "register_operand" "")
6688 (match_operand:TF 1 "nonimmediate_operand" "")
6689 (match_operand:TF 2 "nonimmediate_operand" "")))]
6691 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6693 (define_insn "*<code>tf3"
6694 [(set (match_operand:TF 0 "register_operand" "=x")
6696 (match_operand:TF 1 "nonimmediate_operand" "%0")
6697 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6698 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6699 "p<logic>\t{%2, %0|%0, %2}"
6700 [(set_attr "type" "sselog")
6701 (set_attr "prefix_data16" "1")
6702 (set_attr "mode" "TI")])
6704 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6706 ;; Parallel integral element swizzling
6708 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6710 (define_expand "vec_pack_trunc_v8hi"
6711 [(match_operand:V16QI 0 "register_operand" "")
6712 (match_operand:V8HI 1 "register_operand" "")
6713 (match_operand:V8HI 2 "register_operand" "")]
6716 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6717 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6718 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6722 (define_expand "vec_pack_trunc_v4si"
6723 [(match_operand:V8HI 0 "register_operand" "")
6724 (match_operand:V4SI 1 "register_operand" "")
6725 (match_operand:V4SI 2 "register_operand" "")]
6728 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6729 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6730 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6734 (define_expand "vec_pack_trunc_v2di"
6735 [(match_operand:V4SI 0 "register_operand" "")
6736 (match_operand:V2DI 1 "register_operand" "")
6737 (match_operand:V2DI 2 "register_operand" "")]
6740 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6741 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6742 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6746 (define_insn "*avx_packsswb"
6747 [(set (match_operand:V16QI 0 "register_operand" "=x")
6750 (match_operand:V8HI 1 "register_operand" "x"))
6752 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6754 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6755 [(set_attr "type" "sselog")
6756 (set_attr "prefix" "vex")
6757 (set_attr "mode" "TI")])
6759 (define_insn "sse2_packsswb"
6760 [(set (match_operand:V16QI 0 "register_operand" "=x")
6763 (match_operand:V8HI 1 "register_operand" "0"))
6765 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6767 "packsswb\t{%2, %0|%0, %2}"
6768 [(set_attr "type" "sselog")
6769 (set_attr "prefix_data16" "1")
6770 (set_attr "mode" "TI")])
6772 (define_insn "*avx_packssdw"
6773 [(set (match_operand:V8HI 0 "register_operand" "=x")
6776 (match_operand:V4SI 1 "register_operand" "x"))
6778 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6780 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6781 [(set_attr "type" "sselog")
6782 (set_attr "prefix" "vex")
6783 (set_attr "mode" "TI")])
6785 (define_insn "sse2_packssdw"
6786 [(set (match_operand:V8HI 0 "register_operand" "=x")
6789 (match_operand:V4SI 1 "register_operand" "0"))
6791 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6793 "packssdw\t{%2, %0|%0, %2}"
6794 [(set_attr "type" "sselog")
6795 (set_attr "prefix_data16" "1")
6796 (set_attr "mode" "TI")])
6798 (define_insn "*avx_packuswb"
6799 [(set (match_operand:V16QI 0 "register_operand" "=x")
6802 (match_operand:V8HI 1 "register_operand" "x"))
6804 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6806 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6807 [(set_attr "type" "sselog")
6808 (set_attr "prefix" "vex")
6809 (set_attr "mode" "TI")])
6811 (define_insn "sse2_packuswb"
6812 [(set (match_operand:V16QI 0 "register_operand" "=x")
6815 (match_operand:V8HI 1 "register_operand" "0"))
6817 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6819 "packuswb\t{%2, %0|%0, %2}"
6820 [(set_attr "type" "sselog")
6821 (set_attr "prefix_data16" "1")
6822 (set_attr "mode" "TI")])
6824 (define_insn "*avx_interleave_highv16qi"
6825 [(set (match_operand:V16QI 0 "register_operand" "=x")
6828 (match_operand:V16QI 1 "register_operand" "x")
6829 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6830 (parallel [(const_int 8) (const_int 24)
6831 (const_int 9) (const_int 25)
6832 (const_int 10) (const_int 26)
6833 (const_int 11) (const_int 27)
6834 (const_int 12) (const_int 28)
6835 (const_int 13) (const_int 29)
6836 (const_int 14) (const_int 30)
6837 (const_int 15) (const_int 31)])))]
6839 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6840 [(set_attr "type" "sselog")
6841 (set_attr "prefix" "vex")
6842 (set_attr "mode" "TI")])
6844 (define_insn "vec_interleave_highv16qi"
6845 [(set (match_operand:V16QI 0 "register_operand" "=x")
6848 (match_operand:V16QI 1 "register_operand" "0")
6849 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6850 (parallel [(const_int 8) (const_int 24)
6851 (const_int 9) (const_int 25)
6852 (const_int 10) (const_int 26)
6853 (const_int 11) (const_int 27)
6854 (const_int 12) (const_int 28)
6855 (const_int 13) (const_int 29)
6856 (const_int 14) (const_int 30)
6857 (const_int 15) (const_int 31)])))]
6859 "punpckhbw\t{%2, %0|%0, %2}"
6860 [(set_attr "type" "sselog")
6861 (set_attr "prefix_data16" "1")
6862 (set_attr "mode" "TI")])
6864 (define_insn "*avx_interleave_lowv16qi"
6865 [(set (match_operand:V16QI 0 "register_operand" "=x")
6868 (match_operand:V16QI 1 "register_operand" "x")
6869 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6870 (parallel [(const_int 0) (const_int 16)
6871 (const_int 1) (const_int 17)
6872 (const_int 2) (const_int 18)
6873 (const_int 3) (const_int 19)
6874 (const_int 4) (const_int 20)
6875 (const_int 5) (const_int 21)
6876 (const_int 6) (const_int 22)
6877 (const_int 7) (const_int 23)])))]
6879 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6880 [(set_attr "type" "sselog")
6881 (set_attr "prefix" "vex")
6882 (set_attr "mode" "TI")])
6884 (define_insn "vec_interleave_lowv16qi"
6885 [(set (match_operand:V16QI 0 "register_operand" "=x")
6888 (match_operand:V16QI 1 "register_operand" "0")
6889 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6890 (parallel [(const_int 0) (const_int 16)
6891 (const_int 1) (const_int 17)
6892 (const_int 2) (const_int 18)
6893 (const_int 3) (const_int 19)
6894 (const_int 4) (const_int 20)
6895 (const_int 5) (const_int 21)
6896 (const_int 6) (const_int 22)
6897 (const_int 7) (const_int 23)])))]
6899 "punpcklbw\t{%2, %0|%0, %2}"
6900 [(set_attr "type" "sselog")
6901 (set_attr "prefix_data16" "1")
6902 (set_attr "mode" "TI")])
6904 (define_insn "*avx_interleave_highv8hi"
6905 [(set (match_operand:V8HI 0 "register_operand" "=x")
6908 (match_operand:V8HI 1 "register_operand" "x")
6909 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6910 (parallel [(const_int 4) (const_int 12)
6911 (const_int 5) (const_int 13)
6912 (const_int 6) (const_int 14)
6913 (const_int 7) (const_int 15)])))]
6915 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6916 [(set_attr "type" "sselog")
6917 (set_attr "prefix" "vex")
6918 (set_attr "mode" "TI")])
6920 (define_insn "vec_interleave_highv8hi"
6921 [(set (match_operand:V8HI 0 "register_operand" "=x")
6924 (match_operand:V8HI 1 "register_operand" "0")
6925 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6926 (parallel [(const_int 4) (const_int 12)
6927 (const_int 5) (const_int 13)
6928 (const_int 6) (const_int 14)
6929 (const_int 7) (const_int 15)])))]
6931 "punpckhwd\t{%2, %0|%0, %2}"
6932 [(set_attr "type" "sselog")
6933 (set_attr "prefix_data16" "1")
6934 (set_attr "mode" "TI")])
6936 (define_insn "*avx_interleave_lowv8hi"
6937 [(set (match_operand:V8HI 0 "register_operand" "=x")
6940 (match_operand:V8HI 1 "register_operand" "x")
6941 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6942 (parallel [(const_int 0) (const_int 8)
6943 (const_int 1) (const_int 9)
6944 (const_int 2) (const_int 10)
6945 (const_int 3) (const_int 11)])))]
6947 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6948 [(set_attr "type" "sselog")
6949 (set_attr "prefix" "vex")
6950 (set_attr "mode" "TI")])
6952 (define_insn "vec_interleave_lowv8hi"
6953 [(set (match_operand:V8HI 0 "register_operand" "=x")
6956 (match_operand:V8HI 1 "register_operand" "0")
6957 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6958 (parallel [(const_int 0) (const_int 8)
6959 (const_int 1) (const_int 9)
6960 (const_int 2) (const_int 10)
6961 (const_int 3) (const_int 11)])))]
6963 "punpcklwd\t{%2, %0|%0, %2}"
6964 [(set_attr "type" "sselog")
6965 (set_attr "prefix_data16" "1")
6966 (set_attr "mode" "TI")])
6968 (define_insn "*avx_interleave_highv4si"
6969 [(set (match_operand:V4SI 0 "register_operand" "=x")
6972 (match_operand:V4SI 1 "register_operand" "x")
6973 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6974 (parallel [(const_int 2) (const_int 6)
6975 (const_int 3) (const_int 7)])))]
6977 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6978 [(set_attr "type" "sselog")
6979 (set_attr "prefix" "vex")
6980 (set_attr "mode" "TI")])
6982 (define_insn "vec_interleave_highv4si"
6983 [(set (match_operand:V4SI 0 "register_operand" "=x")
6986 (match_operand:V4SI 1 "register_operand" "0")
6987 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6988 (parallel [(const_int 2) (const_int 6)
6989 (const_int 3) (const_int 7)])))]
6991 "punpckhdq\t{%2, %0|%0, %2}"
6992 [(set_attr "type" "sselog")
6993 (set_attr "prefix_data16" "1")
6994 (set_attr "mode" "TI")])
6996 (define_insn "*avx_interleave_lowv4si"
6997 [(set (match_operand:V4SI 0 "register_operand" "=x")
7000 (match_operand:V4SI 1 "register_operand" "x")
7001 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7002 (parallel [(const_int 0) (const_int 4)
7003 (const_int 1) (const_int 5)])))]
7005 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
7006 [(set_attr "type" "sselog")
7007 (set_attr "prefix" "vex")
7008 (set_attr "mode" "TI")])
7010 (define_insn "vec_interleave_lowv4si"
7011 [(set (match_operand:V4SI 0 "register_operand" "=x")
7014 (match_operand:V4SI 1 "register_operand" "0")
7015 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7016 (parallel [(const_int 0) (const_int 4)
7017 (const_int 1) (const_int 5)])))]
7019 "punpckldq\t{%2, %0|%0, %2}"
7020 [(set_attr "type" "sselog")
7021 (set_attr "prefix_data16" "1")
7022 (set_attr "mode" "TI")])
7024 (define_insn "*avx_pinsr<ssevecsize>"
7025 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
7026 (vec_merge:SSEMODE124
7027 (vec_duplicate:SSEMODE124
7028 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
7029 (match_operand:SSEMODE124 1 "register_operand" "x")
7030 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
7033 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7034 if (MEM_P (operands[2]))
7035 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7037 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7039 [(set_attr "type" "sselog")
7040 (set (attr "prefix_extra")
7041 (if_then_else (match_operand:V8HI 0 "" "")
7043 (const_string "1")))
7044 (set_attr "length_immediate" "1")
7045 (set_attr "prefix" "vex")
7046 (set_attr "mode" "TI")])
7048 (define_insn "*sse4_1_pinsrb"
7049 [(set (match_operand:V16QI 0 "register_operand" "=x")
7051 (vec_duplicate:V16QI
7052 (match_operand:QI 2 "nonimmediate_operand" "rm"))
7053 (match_operand:V16QI 1 "register_operand" "0")
7054 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
7057 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7058 if (MEM_P (operands[2]))
7059 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
7061 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
7063 [(set_attr "type" "sselog")
7064 (set_attr "prefix_extra" "1")
7065 (set_attr "length_immediate" "1")
7066 (set_attr "mode" "TI")])
7068 (define_insn "*sse2_pinsrw"
7069 [(set (match_operand:V8HI 0 "register_operand" "=x")
7072 (match_operand:HI 2 "nonimmediate_operand" "rm"))
7073 (match_operand:V8HI 1 "register_operand" "0")
7074 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
7077 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7078 if (MEM_P (operands[2]))
7079 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
7081 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
7083 [(set_attr "type" "sselog")
7084 (set_attr "prefix_data16" "1")
7085 (set_attr "length_immediate" "1")
7086 (set_attr "mode" "TI")])
7088 ;; It must come before sse2_loadld since it is preferred.
7089 (define_insn "*sse4_1_pinsrd"
7090 [(set (match_operand:V4SI 0 "register_operand" "=x")
7093 (match_operand:SI 2 "nonimmediate_operand" "rm"))
7094 (match_operand:V4SI 1 "register_operand" "0")
7095 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
7098 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7099 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
7101 [(set_attr "type" "sselog")
7102 (set_attr "prefix_extra" "1")
7103 (set_attr "length_immediate" "1")
7104 (set_attr "mode" "TI")])
7106 (define_insn "*avx_pinsrq"
7107 [(set (match_operand:V2DI 0 "register_operand" "=x")
7110 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7111 (match_operand:V2DI 1 "register_operand" "x")
7112 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7113 "TARGET_AVX && TARGET_64BIT"
7115 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7116 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7118 [(set_attr "type" "sselog")
7119 (set_attr "prefix_extra" "1")
7120 (set_attr "length_immediate" "1")
7121 (set_attr "prefix" "vex")
7122 (set_attr "mode" "TI")])
7124 (define_insn "*sse4_1_pinsrq"
7125 [(set (match_operand:V2DI 0 "register_operand" "=x")
7128 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7129 (match_operand:V2DI 1 "register_operand" "0")
7130 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7131 "TARGET_SSE4_1 && TARGET_64BIT"
7133 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7134 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7136 [(set_attr "type" "sselog")
7137 (set_attr "prefix_rex" "1")
7138 (set_attr "prefix_extra" "1")
7139 (set_attr "length_immediate" "1")
7140 (set_attr "mode" "TI")])
7142 (define_insn "*sse4_1_pextrb_<mode>"
7143 [(set (match_operand:SWI48 0 "register_operand" "=r")
7146 (match_operand:V16QI 1 "register_operand" "x")
7147 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7149 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7150 [(set_attr "type" "sselog")
7151 (set_attr "prefix_extra" "1")
7152 (set_attr "length_immediate" "1")
7153 (set_attr "prefix" "maybe_vex")
7154 (set_attr "mode" "TI")])
7156 (define_insn "*sse4_1_pextrb_memory"
7157 [(set (match_operand:QI 0 "memory_operand" "=m")
7159 (match_operand:V16QI 1 "register_operand" "x")
7160 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7162 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7163 [(set_attr "type" "sselog")
7164 (set_attr "prefix_extra" "1")
7165 (set_attr "length_immediate" "1")
7166 (set_attr "prefix" "maybe_vex")
7167 (set_attr "mode" "TI")])
7169 (define_insn "*sse2_pextrw_<mode>"
7170 [(set (match_operand:SWI48 0 "register_operand" "=r")
7173 (match_operand:V8HI 1 "register_operand" "x")
7174 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7176 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7177 [(set_attr "type" "sselog")
7178 (set_attr "prefix_data16" "1")
7179 (set_attr "length_immediate" "1")
7180 (set_attr "prefix" "maybe_vex")
7181 (set_attr "mode" "TI")])
7183 (define_insn "*sse4_1_pextrw_memory"
7184 [(set (match_operand:HI 0 "memory_operand" "=m")
7186 (match_operand:V8HI 1 "register_operand" "x")
7187 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7189 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7190 [(set_attr "type" "sselog")
7191 (set_attr "prefix_extra" "1")
7192 (set_attr "length_immediate" "1")
7193 (set_attr "prefix" "maybe_vex")
7194 (set_attr "mode" "TI")])
7196 (define_insn "*sse4_1_pextrd"
7197 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7199 (match_operand:V4SI 1 "register_operand" "x")
7200 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7202 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7203 [(set_attr "type" "sselog")
7204 (set_attr "prefix_extra" "1")
7205 (set_attr "length_immediate" "1")
7206 (set_attr "prefix" "maybe_vex")
7207 (set_attr "mode" "TI")])
7209 (define_insn "*sse4_1_pextrd_zext"
7210 [(set (match_operand:DI 0 "register_operand" "=r")
7213 (match_operand:V4SI 1 "register_operand" "x")
7214 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7215 "TARGET_64BIT && TARGET_SSE4_1"
7216 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7217 [(set_attr "type" "sselog")
7218 (set_attr "prefix_extra" "1")
7219 (set_attr "length_immediate" "1")
7220 (set_attr "prefix" "maybe_vex")
7221 (set_attr "mode" "TI")])
7223 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7224 (define_insn "*sse4_1_pextrq"
7225 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7227 (match_operand:V2DI 1 "register_operand" "x")
7228 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7229 "TARGET_SSE4_1 && TARGET_64BIT"
7230 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7231 [(set_attr "type" "sselog")
7232 (set_attr "prefix_rex" "1")
7233 (set_attr "prefix_extra" "1")
7234 (set_attr "length_immediate" "1")
7235 (set_attr "prefix" "maybe_vex")
7236 (set_attr "mode" "TI")])
7238 (define_expand "sse2_pshufd"
7239 [(match_operand:V4SI 0 "register_operand" "")
7240 (match_operand:V4SI 1 "nonimmediate_operand" "")
7241 (match_operand:SI 2 "const_int_operand" "")]
7244 int mask = INTVAL (operands[2]);
7245 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7246 GEN_INT ((mask >> 0) & 3),
7247 GEN_INT ((mask >> 2) & 3),
7248 GEN_INT ((mask >> 4) & 3),
7249 GEN_INT ((mask >> 6) & 3)));
7253 (define_insn "sse2_pshufd_1"
7254 [(set (match_operand:V4SI 0 "register_operand" "=x")
7256 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7257 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7258 (match_operand 3 "const_0_to_3_operand" "")
7259 (match_operand 4 "const_0_to_3_operand" "")
7260 (match_operand 5 "const_0_to_3_operand" "")])))]
7264 mask |= INTVAL (operands[2]) << 0;
7265 mask |= INTVAL (operands[3]) << 2;
7266 mask |= INTVAL (operands[4]) << 4;
7267 mask |= INTVAL (operands[5]) << 6;
7268 operands[2] = GEN_INT (mask);
7270 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7272 [(set_attr "type" "sselog1")
7273 (set_attr "prefix_data16" "1")
7274 (set_attr "prefix" "maybe_vex")
7275 (set_attr "length_immediate" "1")
7276 (set_attr "mode" "TI")])
7278 (define_expand "sse2_pshuflw"
7279 [(match_operand:V8HI 0 "register_operand" "")
7280 (match_operand:V8HI 1 "nonimmediate_operand" "")
7281 (match_operand:SI 2 "const_int_operand" "")]
7284 int mask = INTVAL (operands[2]);
7285 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7286 GEN_INT ((mask >> 0) & 3),
7287 GEN_INT ((mask >> 2) & 3),
7288 GEN_INT ((mask >> 4) & 3),
7289 GEN_INT ((mask >> 6) & 3)));
7293 (define_insn "sse2_pshuflw_1"
7294 [(set (match_operand:V8HI 0 "register_operand" "=x")
7296 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7297 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7298 (match_operand 3 "const_0_to_3_operand" "")
7299 (match_operand 4 "const_0_to_3_operand" "")
7300 (match_operand 5 "const_0_to_3_operand" "")
7308 mask |= INTVAL (operands[2]) << 0;
7309 mask |= INTVAL (operands[3]) << 2;
7310 mask |= INTVAL (operands[4]) << 4;
7311 mask |= INTVAL (operands[5]) << 6;
7312 operands[2] = GEN_INT (mask);
7314 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7316 [(set_attr "type" "sselog")
7317 (set_attr "prefix_data16" "0")
7318 (set_attr "prefix_rep" "1")
7319 (set_attr "prefix" "maybe_vex")
7320 (set_attr "length_immediate" "1")
7321 (set_attr "mode" "TI")])
7323 (define_expand "sse2_pshufhw"
7324 [(match_operand:V8HI 0 "register_operand" "")
7325 (match_operand:V8HI 1 "nonimmediate_operand" "")
7326 (match_operand:SI 2 "const_int_operand" "")]
7329 int mask = INTVAL (operands[2]);
7330 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7331 GEN_INT (((mask >> 0) & 3) + 4),
7332 GEN_INT (((mask >> 2) & 3) + 4),
7333 GEN_INT (((mask >> 4) & 3) + 4),
7334 GEN_INT (((mask >> 6) & 3) + 4)));
7338 (define_insn "sse2_pshufhw_1"
7339 [(set (match_operand:V8HI 0 "register_operand" "=x")
7341 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7342 (parallel [(const_int 0)
7346 (match_operand 2 "const_4_to_7_operand" "")
7347 (match_operand 3 "const_4_to_7_operand" "")
7348 (match_operand 4 "const_4_to_7_operand" "")
7349 (match_operand 5 "const_4_to_7_operand" "")])))]
7353 mask |= (INTVAL (operands[2]) - 4) << 0;
7354 mask |= (INTVAL (operands[3]) - 4) << 2;
7355 mask |= (INTVAL (operands[4]) - 4) << 4;
7356 mask |= (INTVAL (operands[5]) - 4) << 6;
7357 operands[2] = GEN_INT (mask);
7359 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7361 [(set_attr "type" "sselog")
7362 (set_attr "prefix_rep" "1")
7363 (set_attr "prefix_data16" "0")
7364 (set_attr "prefix" "maybe_vex")
7365 (set_attr "length_immediate" "1")
7366 (set_attr "mode" "TI")])
7368 (define_expand "sse2_loadd"
7369 [(set (match_operand:V4SI 0 "register_operand" "")
7372 (match_operand:SI 1 "nonimmediate_operand" ""))
7376 "operands[2] = CONST0_RTX (V4SImode);")
7378 (define_insn "*avx_loadld"
7379 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7382 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7383 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7387 vmovd\t{%2, %0|%0, %2}
7388 vmovd\t{%2, %0|%0, %2}
7389 vmovss\t{%2, %1, %0|%0, %1, %2}"
7390 [(set_attr "type" "ssemov")
7391 (set_attr "prefix" "vex")
7392 (set_attr "mode" "TI,TI,V4SF")])
7394 (define_insn "sse2_loadld"
7395 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7398 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7399 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7403 movd\t{%2, %0|%0, %2}
7404 movd\t{%2, %0|%0, %2}
7405 movss\t{%2, %0|%0, %2}
7406 movss\t{%2, %0|%0, %2}"
7407 [(set_attr "type" "ssemov")
7408 (set_attr "mode" "TI,TI,V4SF,SF")])
7410 (define_insn_and_split "sse2_stored"
7411 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7413 (match_operand:V4SI 1 "register_operand" "x,Yi")
7414 (parallel [(const_int 0)])))]
7417 "&& reload_completed
7418 && (TARGET_INTER_UNIT_MOVES
7419 || MEM_P (operands [0])
7420 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7421 [(set (match_dup 0) (match_dup 1))]
7422 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7424 (define_insn_and_split "*vec_ext_v4si_mem"
7425 [(set (match_operand:SI 0 "register_operand" "=r")
7427 (match_operand:V4SI 1 "memory_operand" "o")
7428 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7434 int i = INTVAL (operands[2]);
7436 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7440 (define_expand "sse_storeq"
7441 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7443 (match_operand:V2DI 1 "register_operand" "")
7444 (parallel [(const_int 0)])))]
7447 (define_insn "*sse2_storeq_rex64"
7448 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7450 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7451 (parallel [(const_int 0)])))]
7452 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7456 %vmov{q}\t{%1, %0|%0, %1}"
7457 [(set_attr "type" "*,*,imov")
7458 (set_attr "prefix" "*,*,maybe_vex")
7459 (set_attr "mode" "*,*,DI")])
7461 (define_insn "*sse2_storeq"
7462 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7464 (match_operand:V2DI 1 "register_operand" "x")
7465 (parallel [(const_int 0)])))]
7470 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7472 (match_operand:V2DI 1 "register_operand" "")
7473 (parallel [(const_int 0)])))]
7476 && (TARGET_INTER_UNIT_MOVES
7477 || MEM_P (operands [0])
7478 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7479 [(set (match_dup 0) (match_dup 1))]
7480 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7482 (define_insn "*vec_extractv2di_1_rex64_avx"
7483 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7485 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7486 (parallel [(const_int 1)])))]
7489 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7491 vmovhps\t{%1, %0|%0, %1}
7492 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7493 vmovq\t{%H1, %0|%0, %H1}
7494 vmov{q}\t{%H1, %0|%0, %H1}"
7495 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7496 (set_attr "length_immediate" "*,1,*,*")
7497 (set_attr "memory" "*,none,*,*")
7498 (set_attr "prefix" "vex")
7499 (set_attr "mode" "V2SF,TI,TI,DI")])
7501 (define_insn "*vec_extractv2di_1_rex64"
7502 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7504 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7505 (parallel [(const_int 1)])))]
7506 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7508 movhps\t{%1, %0|%0, %1}
7509 psrldq\t{$8, %0|%0, 8}
7510 movq\t{%H1, %0|%0, %H1}
7511 mov{q}\t{%H1, %0|%0, %H1}"
7512 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7513 (set_attr "length_immediate" "*,1,*,*")
7514 (set_attr "memory" "*,none,*,*")
7515 (set_attr "mode" "V2SF,TI,TI,DI")])
7517 (define_insn "*vec_extractv2di_1_avx"
7518 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7520 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7521 (parallel [(const_int 1)])))]
7524 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7526 vmovhps\t{%1, %0|%0, %1}
7527 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7528 vmovq\t{%H1, %0|%0, %H1}"
7529 [(set_attr "type" "ssemov,sseishft1,ssemov")
7530 (set_attr "length_immediate" "*,1,*")
7531 (set_attr "memory" "*,none,*")
7532 (set_attr "prefix" "vex")
7533 (set_attr "mode" "V2SF,TI,TI")])
7535 (define_insn "*vec_extractv2di_1_sse2"
7536 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7538 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7539 (parallel [(const_int 1)])))]
7541 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7543 movhps\t{%1, %0|%0, %1}
7544 psrldq\t{$8, %0|%0, 8}
7545 movq\t{%H1, %0|%0, %H1}"
7546 [(set_attr "type" "ssemov,sseishft1,ssemov")
7547 (set_attr "length_immediate" "*,1,*")
7548 (set_attr "memory" "*,none,*")
7549 (set_attr "mode" "V2SF,TI,TI")])
7551 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7552 (define_insn "*vec_extractv2di_1_sse"
7553 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7555 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7556 (parallel [(const_int 1)])))]
7557 "!TARGET_SSE2 && TARGET_SSE
7558 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7560 movhps\t{%1, %0|%0, %1}
7561 movhlps\t{%1, %0|%0, %1}
7562 movlps\t{%H1, %0|%0, %H1}"
7563 [(set_attr "type" "ssemov")
7564 (set_attr "mode" "V2SF,V4SF,V2SF")])
7566 (define_insn "*vec_dupv4si_avx"
7567 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7569 (match_operand:SI 1 "register_operand" "x,m")))]
7572 vpshufd\t{$0, %1, %0|%0, %1, 0}
7573 vbroadcastss\t{%1, %0|%0, %1}"
7574 [(set_attr "type" "sselog1,ssemov")
7575 (set_attr "length_immediate" "1,0")
7576 (set_attr "prefix_extra" "0,1")
7577 (set_attr "prefix" "vex")
7578 (set_attr "mode" "TI,V4SF")])
7580 (define_insn "*vec_dupv4si"
7581 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7583 (match_operand:SI 1 "register_operand" " Y2,0")))]
7586 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7587 shufps\t{$0, %0, %0|%0, %0, 0}"
7588 [(set_attr "type" "sselog1")
7589 (set_attr "length_immediate" "1")
7590 (set_attr "mode" "TI,V4SF")])
7592 (define_insn "*vec_dupv2di_avx"
7593 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7595 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7598 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7599 vmovddup\t{%1, %0|%0, %1}"
7600 [(set_attr "type" "sselog1")
7601 (set_attr "prefix" "vex")
7602 (set_attr "mode" "TI,DF")])
7604 (define_insn "*vec_dupv2di_sse3"
7605 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7607 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7611 movddup\t{%1, %0|%0, %1}"
7612 [(set_attr "type" "sselog1")
7613 (set_attr "mode" "TI,DF")])
7615 (define_insn "*vec_dupv2di"
7616 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7618 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7623 [(set_attr "type" "sselog1,ssemov")
7624 (set_attr "mode" "TI,V4SF")])
7626 (define_insn "*vec_concatv2si_avx"
7627 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7629 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7630 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7633 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7634 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7635 vmovd\t{%1, %0|%0, %1}
7636 punpckldq\t{%2, %0|%0, %2}
7637 movd\t{%1, %0|%0, %1}"
7638 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7639 (set_attr "prefix_extra" "1,*,*,*,*")
7640 (set_attr "length_immediate" "1,*,*,*,*")
7641 (set (attr "prefix")
7642 (if_then_else (eq_attr "alternative" "3,4")
7643 (const_string "orig")
7644 (const_string "vex")))
7645 (set_attr "mode" "TI,TI,TI,DI,DI")])
7647 (define_insn "*vec_concatv2si_sse4_1"
7648 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7650 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7651 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7654 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7655 punpckldq\t{%2, %0|%0, %2}
7656 movd\t{%1, %0|%0, %1}
7657 punpckldq\t{%2, %0|%0, %2}
7658 movd\t{%1, %0|%0, %1}"
7659 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7660 (set_attr "prefix_extra" "1,*,*,*,*")
7661 (set_attr "length_immediate" "1,*,*,*,*")
7662 (set_attr "mode" "TI,TI,TI,DI,DI")])
7664 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7665 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7666 ;; alternatives pretty much forces the MMX alternative to be chosen.
7667 (define_insn "*vec_concatv2si_sse2"
7668 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7670 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7671 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7674 punpckldq\t{%2, %0|%0, %2}
7675 movd\t{%1, %0|%0, %1}
7676 punpckldq\t{%2, %0|%0, %2}
7677 movd\t{%1, %0|%0, %1}"
7678 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7679 (set_attr "mode" "TI,TI,DI,DI")])
7681 (define_insn "*vec_concatv2si_sse"
7682 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7684 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7685 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7688 unpcklps\t{%2, %0|%0, %2}
7689 movss\t{%1, %0|%0, %1}
7690 punpckldq\t{%2, %0|%0, %2}
7691 movd\t{%1, %0|%0, %1}"
7692 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7693 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7695 (define_insn "*vec_concatv4si_1_avx"
7696 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7698 (match_operand:V2SI 1 "register_operand" " x,x")
7699 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7702 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7703 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7704 [(set_attr "type" "sselog,ssemov")
7705 (set_attr "prefix" "vex")
7706 (set_attr "mode" "TI,V2SF")])
7708 (define_insn "*vec_concatv4si_1"
7709 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7711 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7712 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7715 punpcklqdq\t{%2, %0|%0, %2}
7716 movlhps\t{%2, %0|%0, %2}
7717 movhps\t{%2, %0|%0, %2}"
7718 [(set_attr "type" "sselog,ssemov,ssemov")
7719 (set_attr "mode" "TI,V4SF,V2SF")])
7721 (define_insn "*vec_concatv2di_avx"
7722 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7724 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7725 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7726 "!TARGET_64BIT && TARGET_AVX"
7728 vmovq\t{%1, %0|%0, %1}
7729 movq2dq\t{%1, %0|%0, %1}
7730 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7731 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7732 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7733 (set (attr "prefix")
7734 (if_then_else (eq_attr "alternative" "1")
7735 (const_string "orig")
7736 (const_string "vex")))
7737 (set_attr "mode" "TI,TI,TI,V2SF")])
7739 (define_insn "vec_concatv2di"
7740 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7742 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7743 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7744 "!TARGET_64BIT && TARGET_SSE"
7746 movq\t{%1, %0|%0, %1}
7747 movq2dq\t{%1, %0|%0, %1}
7748 punpcklqdq\t{%2, %0|%0, %2}
7749 movlhps\t{%2, %0|%0, %2}
7750 movhps\t{%2, %0|%0, %2}"
7751 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7752 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7754 (define_insn "*vec_concatv2di_rex64_avx"
7755 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7757 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7758 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7759 "TARGET_64BIT && TARGET_AVX"
7761 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7762 vmovq\t{%1, %0|%0, %1}
7763 vmovq\t{%1, %0|%0, %1}
7764 movq2dq\t{%1, %0|%0, %1}
7765 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7766 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7767 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7768 (set_attr "prefix_extra" "1,*,*,*,*,*")
7769 (set_attr "length_immediate" "1,*,*,*,*,*")
7770 (set (attr "prefix")
7771 (if_then_else (eq_attr "alternative" "3")
7772 (const_string "orig")
7773 (const_string "vex")))
7774 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7776 (define_insn "*vec_concatv2di_rex64_sse4_1"
7777 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7779 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7780 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7781 "TARGET_64BIT && TARGET_SSE4_1"
7783 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7784 movq\t{%1, %0|%0, %1}
7785 movq\t{%1, %0|%0, %1}
7786 movq2dq\t{%1, %0|%0, %1}
7787 punpcklqdq\t{%2, %0|%0, %2}
7788 movlhps\t{%2, %0|%0, %2}
7789 movhps\t{%2, %0|%0, %2}"
7790 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7791 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7792 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7793 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7794 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7796 (define_insn "*vec_concatv2di_rex64_sse"
7797 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7799 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7800 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7801 "TARGET_64BIT && TARGET_SSE"
7803 movq\t{%1, %0|%0, %1}
7804 movq\t{%1, %0|%0, %1}
7805 movq2dq\t{%1, %0|%0, %1}
7806 punpcklqdq\t{%2, %0|%0, %2}
7807 movlhps\t{%2, %0|%0, %2}
7808 movhps\t{%2, %0|%0, %2}"
7809 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7810 (set_attr "prefix_rex" "*,1,*,*,*,*")
7811 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7813 (define_expand "vec_unpacku_hi_v16qi"
7814 [(match_operand:V8HI 0 "register_operand" "")
7815 (match_operand:V16QI 1 "register_operand" "")]
7819 ix86_expand_sse4_unpack (operands, true, true);
7821 ix86_expand_sse_unpack (operands, true, true);
7825 (define_expand "vec_unpacks_hi_v16qi"
7826 [(match_operand:V8HI 0 "register_operand" "")
7827 (match_operand:V16QI 1 "register_operand" "")]
7831 ix86_expand_sse4_unpack (operands, false, true);
7833 ix86_expand_sse_unpack (operands, false, true);
7837 (define_expand "vec_unpacku_lo_v16qi"
7838 [(match_operand:V8HI 0 "register_operand" "")
7839 (match_operand:V16QI 1 "register_operand" "")]
7843 ix86_expand_sse4_unpack (operands, true, false);
7845 ix86_expand_sse_unpack (operands, true, false);
7849 (define_expand "vec_unpacks_lo_v16qi"
7850 [(match_operand:V8HI 0 "register_operand" "")
7851 (match_operand:V16QI 1 "register_operand" "")]
7855 ix86_expand_sse4_unpack (operands, false, false);
7857 ix86_expand_sse_unpack (operands, false, false);
7861 (define_expand "vec_unpacku_hi_v8hi"
7862 [(match_operand:V4SI 0 "register_operand" "")
7863 (match_operand:V8HI 1 "register_operand" "")]
7867 ix86_expand_sse4_unpack (operands, true, true);
7869 ix86_expand_sse_unpack (operands, true, true);
7873 (define_expand "vec_unpacks_hi_v8hi"
7874 [(match_operand:V4SI 0 "register_operand" "")
7875 (match_operand:V8HI 1 "register_operand" "")]
7879 ix86_expand_sse4_unpack (operands, false, true);
7881 ix86_expand_sse_unpack (operands, false, true);
7885 (define_expand "vec_unpacku_lo_v8hi"
7886 [(match_operand:V4SI 0 "register_operand" "")
7887 (match_operand:V8HI 1 "register_operand" "")]
7891 ix86_expand_sse4_unpack (operands, true, false);
7893 ix86_expand_sse_unpack (operands, true, false);
7897 (define_expand "vec_unpacks_lo_v8hi"
7898 [(match_operand:V4SI 0 "register_operand" "")
7899 (match_operand:V8HI 1 "register_operand" "")]
7903 ix86_expand_sse4_unpack (operands, false, false);
7905 ix86_expand_sse_unpack (operands, false, false);
7909 (define_expand "vec_unpacku_hi_v4si"
7910 [(match_operand:V2DI 0 "register_operand" "")
7911 (match_operand:V4SI 1 "register_operand" "")]
7915 ix86_expand_sse4_unpack (operands, true, true);
7917 ix86_expand_sse_unpack (operands, true, true);
7921 (define_expand "vec_unpacks_hi_v4si"
7922 [(match_operand:V2DI 0 "register_operand" "")
7923 (match_operand:V4SI 1 "register_operand" "")]
7927 ix86_expand_sse4_unpack (operands, false, true);
7929 ix86_expand_sse_unpack (operands, false, true);
7933 (define_expand "vec_unpacku_lo_v4si"
7934 [(match_operand:V2DI 0 "register_operand" "")
7935 (match_operand:V4SI 1 "register_operand" "")]
7939 ix86_expand_sse4_unpack (operands, true, false);
7941 ix86_expand_sse_unpack (operands, true, false);
7945 (define_expand "vec_unpacks_lo_v4si"
7946 [(match_operand:V2DI 0 "register_operand" "")
7947 (match_operand:V4SI 1 "register_operand" "")]
7951 ix86_expand_sse4_unpack (operands, false, false);
7953 ix86_expand_sse_unpack (operands, false, false);
7957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7961 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7963 (define_expand "sse2_uavgv16qi3"
7964 [(set (match_operand:V16QI 0 "register_operand" "")
7970 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7972 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7973 (const_vector:V16QI [(const_int 1) (const_int 1)
7974 (const_int 1) (const_int 1)
7975 (const_int 1) (const_int 1)
7976 (const_int 1) (const_int 1)
7977 (const_int 1) (const_int 1)
7978 (const_int 1) (const_int 1)
7979 (const_int 1) (const_int 1)
7980 (const_int 1) (const_int 1)]))
7983 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7985 (define_insn "*avx_uavgv16qi3"
7986 [(set (match_operand:V16QI 0 "register_operand" "=x")
7992 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7994 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7995 (const_vector:V16QI [(const_int 1) (const_int 1)
7996 (const_int 1) (const_int 1)
7997 (const_int 1) (const_int 1)
7998 (const_int 1) (const_int 1)
7999 (const_int 1) (const_int 1)
8000 (const_int 1) (const_int 1)
8001 (const_int 1) (const_int 1)
8002 (const_int 1) (const_int 1)]))
8004 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8005 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
8006 [(set_attr "type" "sseiadd")
8007 (set_attr "prefix" "vex")
8008 (set_attr "mode" "TI")])
8010 (define_insn "*sse2_uavgv16qi3"
8011 [(set (match_operand:V16QI 0 "register_operand" "=x")
8017 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
8019 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8020 (const_vector:V16QI [(const_int 1) (const_int 1)
8021 (const_int 1) (const_int 1)
8022 (const_int 1) (const_int 1)
8023 (const_int 1) (const_int 1)
8024 (const_int 1) (const_int 1)
8025 (const_int 1) (const_int 1)
8026 (const_int 1) (const_int 1)
8027 (const_int 1) (const_int 1)]))
8029 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8030 "pavgb\t{%2, %0|%0, %2}"
8031 [(set_attr "type" "sseiadd")
8032 (set_attr "prefix_data16" "1")
8033 (set_attr "mode" "TI")])
8035 (define_expand "sse2_uavgv8hi3"
8036 [(set (match_operand:V8HI 0 "register_operand" "")
8042 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8044 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8045 (const_vector:V8HI [(const_int 1) (const_int 1)
8046 (const_int 1) (const_int 1)
8047 (const_int 1) (const_int 1)
8048 (const_int 1) (const_int 1)]))
8051 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
8053 (define_insn "*avx_uavgv8hi3"
8054 [(set (match_operand:V8HI 0 "register_operand" "=x")
8060 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8062 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8063 (const_vector:V8HI [(const_int 1) (const_int 1)
8064 (const_int 1) (const_int 1)
8065 (const_int 1) (const_int 1)
8066 (const_int 1) (const_int 1)]))
8068 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8069 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
8070 [(set_attr "type" "sseiadd")
8071 (set_attr "prefix" "vex")
8072 (set_attr "mode" "TI")])
8074 (define_insn "*sse2_uavgv8hi3"
8075 [(set (match_operand:V8HI 0 "register_operand" "=x")
8081 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8083 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8084 (const_vector:V8HI [(const_int 1) (const_int 1)
8085 (const_int 1) (const_int 1)
8086 (const_int 1) (const_int 1)
8087 (const_int 1) (const_int 1)]))
8089 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8090 "pavgw\t{%2, %0|%0, %2}"
8091 [(set_attr "type" "sseiadd")
8092 (set_attr "prefix_data16" "1")
8093 (set_attr "mode" "TI")])
8095 ;; The correct representation for this is absolutely enormous, and
8096 ;; surely not generally useful.
8097 (define_insn "*avx_psadbw"
8098 [(set (match_operand:V2DI 0 "register_operand" "=x")
8099 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
8100 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8103 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8104 [(set_attr "type" "sseiadd")
8105 (set_attr "prefix" "vex")
8106 (set_attr "mode" "TI")])
8108 (define_insn "sse2_psadbw"
8109 [(set (match_operand:V2DI 0 "register_operand" "=x")
8110 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8111 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8114 "psadbw\t{%2, %0|%0, %2}"
8115 [(set_attr "type" "sseiadd")
8116 (set_attr "atom_unit" "simul")
8117 (set_attr "prefix_data16" "1")
8118 (set_attr "mode" "TI")])
8120 (define_insn "avx_movmsk<ssemodesuffix>256"
8121 [(set (match_operand:SI 0 "register_operand" "=r")
8123 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8125 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8126 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8127 [(set_attr "type" "ssecvt")
8128 (set_attr "prefix" "vex")
8129 (set_attr "mode" "<MODE>")])
8131 (define_insn "<sse>_movmsk<ssemodesuffix>"
8132 [(set (match_operand:SI 0 "register_operand" "=r")
8134 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8136 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8137 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8138 [(set_attr "type" "ssemov")
8139 (set_attr "prefix" "maybe_vex")
8140 (set_attr "mode" "<MODE>")])
8142 (define_insn "sse2_pmovmskb"
8143 [(set (match_operand:SI 0 "register_operand" "=r")
8144 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8147 "%vpmovmskb\t{%1, %0|%0, %1}"
8148 [(set_attr "type" "ssemov")
8149 (set_attr "prefix_data16" "1")
8150 (set_attr "prefix" "maybe_vex")
8151 (set_attr "mode" "SI")])
8153 (define_expand "sse2_maskmovdqu"
8154 [(set (match_operand:V16QI 0 "memory_operand" "")
8155 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8156 (match_operand:V16QI 2 "register_operand" "")
8161 (define_insn "*sse2_maskmovdqu"
8162 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8163 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8164 (match_operand:V16QI 2 "register_operand" "x")
8165 (mem:V16QI (match_dup 0))]
8167 "TARGET_SSE2 && !TARGET_64BIT"
8168 ;; @@@ check ordering of operands in intel/nonintel syntax
8169 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8170 [(set_attr "type" "ssemov")
8171 (set_attr "prefix_data16" "1")
8172 ;; The implicit %rdi operand confuses default length_vex computation.
8173 (set_attr "length_vex" "3")
8174 (set_attr "prefix" "maybe_vex")
8175 (set_attr "mode" "TI")])
8177 (define_insn "*sse2_maskmovdqu_rex64"
8178 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8179 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8180 (match_operand:V16QI 2 "register_operand" "x")
8181 (mem:V16QI (match_dup 0))]
8183 "TARGET_SSE2 && TARGET_64BIT"
8184 ;; @@@ check ordering of operands in intel/nonintel syntax
8185 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8186 [(set_attr "type" "ssemov")
8187 (set_attr "prefix_data16" "1")
8188 ;; The implicit %rdi operand confuses default length_vex computation.
8189 (set (attr "length_vex")
8190 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8191 (set_attr "prefix" "maybe_vex")
8192 (set_attr "mode" "TI")])
8194 (define_insn "sse_ldmxcsr"
8195 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8199 [(set_attr "type" "sse")
8200 (set_attr "atom_sse_attr" "mxcsr")
8201 (set_attr "prefix" "maybe_vex")
8202 (set_attr "memory" "load")])
8204 (define_insn "sse_stmxcsr"
8205 [(set (match_operand:SI 0 "memory_operand" "=m")
8206 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8209 [(set_attr "type" "sse")
8210 (set_attr "atom_sse_attr" "mxcsr")
8211 (set_attr "prefix" "maybe_vex")
8212 (set_attr "memory" "store")])
8214 (define_expand "sse_sfence"
8216 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8217 "TARGET_SSE || TARGET_3DNOW_A"
8219 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8220 MEM_VOLATILE_P (operands[0]) = 1;
8223 (define_insn "*sse_sfence"
8224 [(set (match_operand:BLK 0 "" "")
8225 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8226 "TARGET_SSE || TARGET_3DNOW_A"
8228 [(set_attr "type" "sse")
8229 (set_attr "length_address" "0")
8230 (set_attr "atom_sse_attr" "fence")
8231 (set_attr "memory" "unknown")])
8233 (define_insn "sse2_clflush"
8234 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8238 [(set_attr "type" "sse")
8239 (set_attr "atom_sse_attr" "fence")
8240 (set_attr "memory" "unknown")])
8242 (define_expand "sse2_mfence"
8244 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8247 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8248 MEM_VOLATILE_P (operands[0]) = 1;
8251 (define_insn "*sse2_mfence"
8252 [(set (match_operand:BLK 0 "" "")
8253 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8254 "TARGET_64BIT || TARGET_SSE2"
8256 [(set_attr "type" "sse")
8257 (set_attr "length_address" "0")
8258 (set_attr "atom_sse_attr" "fence")
8259 (set_attr "memory" "unknown")])
8261 (define_expand "sse2_lfence"
8263 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8266 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8267 MEM_VOLATILE_P (operands[0]) = 1;
8270 (define_insn "*sse2_lfence"
8271 [(set (match_operand:BLK 0 "" "")
8272 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8275 [(set_attr "type" "sse")
8276 (set_attr "length_address" "0")
8277 (set_attr "atom_sse_attr" "lfence")
8278 (set_attr "memory" "unknown")])
8280 (define_insn "sse3_mwait"
8281 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8282 (match_operand:SI 1 "register_operand" "c")]
8285 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8286 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8287 ;; we only need to set up 32bit registers.
8289 [(set_attr "length" "3")])
8291 (define_insn "sse3_monitor"
8292 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8293 (match_operand:SI 1 "register_operand" "c")
8294 (match_operand:SI 2 "register_operand" "d")]
8296 "TARGET_SSE3 && !TARGET_64BIT"
8297 "monitor\t%0, %1, %2"
8298 [(set_attr "length" "3")])
8300 (define_insn "sse3_monitor64"
8301 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8302 (match_operand:SI 1 "register_operand" "c")
8303 (match_operand:SI 2 "register_operand" "d")]
8305 "TARGET_SSE3 && TARGET_64BIT"
8306 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8307 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8308 ;; zero extended to 64bit, we only need to set up 32bit registers.
8310 [(set_attr "length" "3")])
8312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8314 ;; SSSE3 instructions
8316 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8318 (define_insn "*avx_phaddwv8hi3"
8319 [(set (match_operand:V8HI 0 "register_operand" "=x")
8325 (match_operand:V8HI 1 "register_operand" "x")
8326 (parallel [(const_int 0)]))
8327 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8329 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8330 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8333 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8334 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8336 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8337 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8342 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8343 (parallel [(const_int 0)]))
8344 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8346 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8347 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8350 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8351 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8353 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8354 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8356 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8357 [(set_attr "type" "sseiadd")
8358 (set_attr "prefix_extra" "1")
8359 (set_attr "prefix" "vex")
8360 (set_attr "mode" "TI")])
8362 (define_insn "ssse3_phaddwv8hi3"
8363 [(set (match_operand:V8HI 0 "register_operand" "=x")
8369 (match_operand:V8HI 1 "register_operand" "0")
8370 (parallel [(const_int 0)]))
8371 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8373 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8374 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8377 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8378 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8380 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8381 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8386 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8387 (parallel [(const_int 0)]))
8388 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8390 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8391 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8394 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8395 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8397 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8398 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8400 "phaddw\t{%2, %0|%0, %2}"
8401 [(set_attr "type" "sseiadd")
8402 (set_attr "atom_unit" "complex")
8403 (set_attr "prefix_data16" "1")
8404 (set_attr "prefix_extra" "1")
8405 (set_attr "mode" "TI")])
8407 (define_insn "ssse3_phaddwv4hi3"
8408 [(set (match_operand:V4HI 0 "register_operand" "=y")
8413 (match_operand:V4HI 1 "register_operand" "0")
8414 (parallel [(const_int 0)]))
8415 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8417 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8418 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8422 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8423 (parallel [(const_int 0)]))
8424 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8426 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8427 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8429 "phaddw\t{%2, %0|%0, %2}"
8430 [(set_attr "type" "sseiadd")
8431 (set_attr "atom_unit" "complex")
8432 (set_attr "prefix_extra" "1")
8433 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8434 (set_attr "mode" "DI")])
8436 (define_insn "*avx_phadddv4si3"
8437 [(set (match_operand:V4SI 0 "register_operand" "=x")
8442 (match_operand:V4SI 1 "register_operand" "x")
8443 (parallel [(const_int 0)]))
8444 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8446 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8447 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8451 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8452 (parallel [(const_int 0)]))
8453 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8455 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8456 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8458 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8459 [(set_attr "type" "sseiadd")
8460 (set_attr "prefix_extra" "1")
8461 (set_attr "prefix" "vex")
8462 (set_attr "mode" "TI")])
8464 (define_insn "ssse3_phadddv4si3"
8465 [(set (match_operand:V4SI 0 "register_operand" "=x")
8470 (match_operand:V4SI 1 "register_operand" "0")
8471 (parallel [(const_int 0)]))
8472 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8474 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8475 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8479 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8480 (parallel [(const_int 0)]))
8481 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8483 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8484 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8486 "phaddd\t{%2, %0|%0, %2}"
8487 [(set_attr "type" "sseiadd")
8488 (set_attr "atom_unit" "complex")
8489 (set_attr "prefix_data16" "1")
8490 (set_attr "prefix_extra" "1")
8491 (set_attr "mode" "TI")])
8493 (define_insn "ssse3_phadddv2si3"
8494 [(set (match_operand:V2SI 0 "register_operand" "=y")
8498 (match_operand:V2SI 1 "register_operand" "0")
8499 (parallel [(const_int 0)]))
8500 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8503 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8504 (parallel [(const_int 0)]))
8505 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8507 "phaddd\t{%2, %0|%0, %2}"
8508 [(set_attr "type" "sseiadd")
8509 (set_attr "atom_unit" "complex")
8510 (set_attr "prefix_extra" "1")
8511 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8512 (set_attr "mode" "DI")])
8514 (define_insn "*avx_phaddswv8hi3"
8515 [(set (match_operand:V8HI 0 "register_operand" "=x")
8521 (match_operand:V8HI 1 "register_operand" "x")
8522 (parallel [(const_int 0)]))
8523 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8525 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8526 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8529 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8530 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8532 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8533 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8538 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8539 (parallel [(const_int 0)]))
8540 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8542 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8543 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8546 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8547 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8549 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8550 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8552 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8553 [(set_attr "type" "sseiadd")
8554 (set_attr "prefix_extra" "1")
8555 (set_attr "prefix" "vex")
8556 (set_attr "mode" "TI")])
8558 (define_insn "ssse3_phaddswv8hi3"
8559 [(set (match_operand:V8HI 0 "register_operand" "=x")
8565 (match_operand:V8HI 1 "register_operand" "0")
8566 (parallel [(const_int 0)]))
8567 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8569 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8570 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8573 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8574 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8576 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8577 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8582 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8583 (parallel [(const_int 0)]))
8584 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8586 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8587 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8590 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8591 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8593 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8594 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8596 "phaddsw\t{%2, %0|%0, %2}"
8597 [(set_attr "type" "sseiadd")
8598 (set_attr "atom_unit" "complex")
8599 (set_attr "prefix_data16" "1")
8600 (set_attr "prefix_extra" "1")
8601 (set_attr "mode" "TI")])
8603 (define_insn "ssse3_phaddswv4hi3"
8604 [(set (match_operand:V4HI 0 "register_operand" "=y")
8609 (match_operand:V4HI 1 "register_operand" "0")
8610 (parallel [(const_int 0)]))
8611 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8613 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8614 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8618 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8619 (parallel [(const_int 0)]))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8622 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8623 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8625 "phaddsw\t{%2, %0|%0, %2}"
8626 [(set_attr "type" "sseiadd")
8627 (set_attr "atom_unit" "complex")
8628 (set_attr "prefix_extra" "1")
8629 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8630 (set_attr "mode" "DI")])
8632 (define_insn "*avx_phsubwv8hi3"
8633 [(set (match_operand:V8HI 0 "register_operand" "=x")
8639 (match_operand:V8HI 1 "register_operand" "x")
8640 (parallel [(const_int 0)]))
8641 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8643 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8644 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8647 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8648 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8650 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8651 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8656 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8657 (parallel [(const_int 0)]))
8658 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8660 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8661 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8664 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8665 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8667 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8668 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8670 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8671 [(set_attr "type" "sseiadd")
8672 (set_attr "prefix_extra" "1")
8673 (set_attr "prefix" "vex")
8674 (set_attr "mode" "TI")])
8676 (define_insn "ssse3_phsubwv8hi3"
8677 [(set (match_operand:V8HI 0 "register_operand" "=x")
8683 (match_operand:V8HI 1 "register_operand" "0")
8684 (parallel [(const_int 0)]))
8685 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8687 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8688 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8691 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8692 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8694 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8695 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8700 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8701 (parallel [(const_int 0)]))
8702 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8704 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8705 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8708 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8709 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8711 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8712 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8714 "phsubw\t{%2, %0|%0, %2}"
8715 [(set_attr "type" "sseiadd")
8716 (set_attr "atom_unit" "complex")
8717 (set_attr "prefix_data16" "1")
8718 (set_attr "prefix_extra" "1")
8719 (set_attr "mode" "TI")])
8721 (define_insn "ssse3_phsubwv4hi3"
8722 [(set (match_operand:V4HI 0 "register_operand" "=y")
8727 (match_operand:V4HI 1 "register_operand" "0")
8728 (parallel [(const_int 0)]))
8729 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8731 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8732 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8736 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8737 (parallel [(const_int 0)]))
8738 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8740 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8741 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8743 "phsubw\t{%2, %0|%0, %2}"
8744 [(set_attr "type" "sseiadd")
8745 (set_attr "atom_unit" "complex")
8746 (set_attr "prefix_extra" "1")
8747 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8748 (set_attr "mode" "DI")])
8750 (define_insn "*avx_phsubdv4si3"
8751 [(set (match_operand:V4SI 0 "register_operand" "=x")
8756 (match_operand:V4SI 1 "register_operand" "x")
8757 (parallel [(const_int 0)]))
8758 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8760 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8761 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8765 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8766 (parallel [(const_int 0)]))
8767 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8769 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8770 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8772 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8773 [(set_attr "type" "sseiadd")
8774 (set_attr "prefix_extra" "1")
8775 (set_attr "prefix" "vex")
8776 (set_attr "mode" "TI")])
8778 (define_insn "ssse3_phsubdv4si3"
8779 [(set (match_operand:V4SI 0 "register_operand" "=x")
8784 (match_operand:V4SI 1 "register_operand" "0")
8785 (parallel [(const_int 0)]))
8786 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8788 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8789 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8793 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8794 (parallel [(const_int 0)]))
8795 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8797 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8798 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8800 "phsubd\t{%2, %0|%0, %2}"
8801 [(set_attr "type" "sseiadd")
8802 (set_attr "atom_unit" "complex")
8803 (set_attr "prefix_data16" "1")
8804 (set_attr "prefix_extra" "1")
8805 (set_attr "mode" "TI")])
8807 (define_insn "ssse3_phsubdv2si3"
8808 [(set (match_operand:V2SI 0 "register_operand" "=y")
8812 (match_operand:V2SI 1 "register_operand" "0")
8813 (parallel [(const_int 0)]))
8814 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8817 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8818 (parallel [(const_int 0)]))
8819 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8821 "phsubd\t{%2, %0|%0, %2}"
8822 [(set_attr "type" "sseiadd")
8823 (set_attr "atom_unit" "complex")
8824 (set_attr "prefix_extra" "1")
8825 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8826 (set_attr "mode" "DI")])
8828 (define_insn "*avx_phsubswv8hi3"
8829 [(set (match_operand:V8HI 0 "register_operand" "=x")
8835 (match_operand:V8HI 1 "register_operand" "x")
8836 (parallel [(const_int 0)]))
8837 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8839 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8840 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8843 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8844 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8846 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8847 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8852 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8853 (parallel [(const_int 0)]))
8854 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8856 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8857 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8860 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8861 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8863 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8864 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8866 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8867 [(set_attr "type" "sseiadd")
8868 (set_attr "prefix_extra" "1")
8869 (set_attr "prefix" "vex")
8870 (set_attr "mode" "TI")])
8872 (define_insn "ssse3_phsubswv8hi3"
8873 [(set (match_operand:V8HI 0 "register_operand" "=x")
8879 (match_operand:V8HI 1 "register_operand" "0")
8880 (parallel [(const_int 0)]))
8881 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8883 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8884 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8887 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8888 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8890 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8891 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8896 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8897 (parallel [(const_int 0)]))
8898 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8900 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8901 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8904 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8905 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8907 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8908 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8910 "phsubsw\t{%2, %0|%0, %2}"
8911 [(set_attr "type" "sseiadd")
8912 (set_attr "atom_unit" "complex")
8913 (set_attr "prefix_data16" "1")
8914 (set_attr "prefix_extra" "1")
8915 (set_attr "mode" "TI")])
8917 (define_insn "ssse3_phsubswv4hi3"
8918 [(set (match_operand:V4HI 0 "register_operand" "=y")
8923 (match_operand:V4HI 1 "register_operand" "0")
8924 (parallel [(const_int 0)]))
8925 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8927 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8928 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8932 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8933 (parallel [(const_int 0)]))
8934 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8936 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8937 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8939 "phsubsw\t{%2, %0|%0, %2}"
8940 [(set_attr "type" "sseiadd")
8941 (set_attr "atom_unit" "complex")
8942 (set_attr "prefix_extra" "1")
8943 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8944 (set_attr "mode" "DI")])
8946 (define_insn "*avx_pmaddubsw128"
8947 [(set (match_operand:V8HI 0 "register_operand" "=x")
8952 (match_operand:V16QI 1 "register_operand" "x")
8953 (parallel [(const_int 0)
8963 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8964 (parallel [(const_int 0)
8974 (vec_select:V16QI (match_dup 1)
8975 (parallel [(const_int 1)
8984 (vec_select:V16QI (match_dup 2)
8985 (parallel [(const_int 1)
8992 (const_int 15)]))))))]
8994 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8995 [(set_attr "type" "sseiadd")
8996 (set_attr "prefix_extra" "1")
8997 (set_attr "prefix" "vex")
8998 (set_attr "mode" "TI")])
9000 (define_insn "ssse3_pmaddubsw128"
9001 [(set (match_operand:V8HI 0 "register_operand" "=x")
9006 (match_operand:V16QI 1 "register_operand" "0")
9007 (parallel [(const_int 0)
9017 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9018 (parallel [(const_int 0)
9028 (vec_select:V16QI (match_dup 1)
9029 (parallel [(const_int 1)
9038 (vec_select:V16QI (match_dup 2)
9039 (parallel [(const_int 1)
9046 (const_int 15)]))))))]
9048 "pmaddubsw\t{%2, %0|%0, %2}"
9049 [(set_attr "type" "sseiadd")
9050 (set_attr "atom_unit" "simul")
9051 (set_attr "prefix_data16" "1")
9052 (set_attr "prefix_extra" "1")
9053 (set_attr "mode" "TI")])
9055 (define_insn "ssse3_pmaddubsw"
9056 [(set (match_operand:V4HI 0 "register_operand" "=y")
9061 (match_operand:V8QI 1 "register_operand" "0")
9062 (parallel [(const_int 0)
9068 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9069 (parallel [(const_int 0)
9075 (vec_select:V8QI (match_dup 1)
9076 (parallel [(const_int 1)
9081 (vec_select:V8QI (match_dup 2)
9082 (parallel [(const_int 1)
9085 (const_int 7)]))))))]
9087 "pmaddubsw\t{%2, %0|%0, %2}"
9088 [(set_attr "type" "sseiadd")
9089 (set_attr "atom_unit" "simul")
9090 (set_attr "prefix_extra" "1")
9091 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9092 (set_attr "mode" "DI")])
9094 (define_expand "ssse3_pmulhrswv8hi3"
9095 [(set (match_operand:V8HI 0 "register_operand" "")
9102 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9104 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9106 (const_vector:V8HI [(const_int 1) (const_int 1)
9107 (const_int 1) (const_int 1)
9108 (const_int 1) (const_int 1)
9109 (const_int 1) (const_int 1)]))
9112 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9114 (define_insn "*avx_pmulhrswv8hi3"
9115 [(set (match_operand:V8HI 0 "register_operand" "=x")
9122 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9124 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9126 (const_vector:V8HI [(const_int 1) (const_int 1)
9127 (const_int 1) (const_int 1)
9128 (const_int 1) (const_int 1)
9129 (const_int 1) (const_int 1)]))
9131 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9132 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9133 [(set_attr "type" "sseimul")
9134 (set_attr "prefix_extra" "1")
9135 (set_attr "prefix" "vex")
9136 (set_attr "mode" "TI")])
9138 (define_insn "*ssse3_pmulhrswv8hi3"
9139 [(set (match_operand:V8HI 0 "register_operand" "=x")
9146 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9148 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9150 (const_vector:V8HI [(const_int 1) (const_int 1)
9151 (const_int 1) (const_int 1)
9152 (const_int 1) (const_int 1)
9153 (const_int 1) (const_int 1)]))
9155 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9156 "pmulhrsw\t{%2, %0|%0, %2}"
9157 [(set_attr "type" "sseimul")
9158 (set_attr "prefix_data16" "1")
9159 (set_attr "prefix_extra" "1")
9160 (set_attr "mode" "TI")])
9162 (define_expand "ssse3_pmulhrswv4hi3"
9163 [(set (match_operand:V4HI 0 "register_operand" "")
9170 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9172 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9174 (const_vector:V4HI [(const_int 1) (const_int 1)
9175 (const_int 1) (const_int 1)]))
9178 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9180 (define_insn "*ssse3_pmulhrswv4hi3"
9181 [(set (match_operand:V4HI 0 "register_operand" "=y")
9188 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9190 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9192 (const_vector:V4HI [(const_int 1) (const_int 1)
9193 (const_int 1) (const_int 1)]))
9195 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9196 "pmulhrsw\t{%2, %0|%0, %2}"
9197 [(set_attr "type" "sseimul")
9198 (set_attr "prefix_extra" "1")
9199 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9200 (set_attr "mode" "DI")])
9202 (define_insn "*avx_pshufbv16qi3"
9203 [(set (match_operand:V16QI 0 "register_operand" "=x")
9204 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9205 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9208 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9209 [(set_attr "type" "sselog1")
9210 (set_attr "prefix_extra" "1")
9211 (set_attr "prefix" "vex")
9212 (set_attr "mode" "TI")])
9214 (define_insn "ssse3_pshufbv16qi3"
9215 [(set (match_operand:V16QI 0 "register_operand" "=x")
9216 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9217 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9220 "pshufb\t{%2, %0|%0, %2}";
9221 [(set_attr "type" "sselog1")
9222 (set_attr "prefix_data16" "1")
9223 (set_attr "prefix_extra" "1")
9224 (set_attr "mode" "TI")])
9226 (define_insn "ssse3_pshufbv8qi3"
9227 [(set (match_operand:V8QI 0 "register_operand" "=y")
9228 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9229 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9232 "pshufb\t{%2, %0|%0, %2}";
9233 [(set_attr "type" "sselog1")
9234 (set_attr "prefix_extra" "1")
9235 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9236 (set_attr "mode" "DI")])
9238 (define_insn "*avx_psign<mode>3"
9239 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9241 [(match_operand:SSEMODE124 1 "register_operand" "x")
9242 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9245 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9246 [(set_attr "type" "sselog1")
9247 (set_attr "prefix_extra" "1")
9248 (set_attr "prefix" "vex")
9249 (set_attr "mode" "TI")])
9251 (define_insn "ssse3_psign<mode>3"
9252 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9254 [(match_operand:SSEMODE124 1 "register_operand" "0")
9255 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9258 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9259 [(set_attr "type" "sselog1")
9260 (set_attr "prefix_data16" "1")
9261 (set_attr "prefix_extra" "1")
9262 (set_attr "mode" "TI")])
9264 (define_insn "ssse3_psign<mode>3"
9265 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9267 [(match_operand:MMXMODEI 1 "register_operand" "0")
9268 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9271 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9272 [(set_attr "type" "sselog1")
9273 (set_attr "prefix_extra" "1")
9274 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9275 (set_attr "mode" "DI")])
9277 (define_insn "*avx_palignrti"
9278 [(set (match_operand:TI 0 "register_operand" "=x")
9279 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9280 (match_operand:TI 2 "nonimmediate_operand" "xm")
9281 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9285 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9286 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9288 [(set_attr "type" "sseishft")
9289 (set_attr "prefix_extra" "1")
9290 (set_attr "length_immediate" "1")
9291 (set_attr "prefix" "vex")
9292 (set_attr "mode" "TI")])
9294 (define_insn "ssse3_palignrti"
9295 [(set (match_operand:TI 0 "register_operand" "=x")
9296 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9297 (match_operand:TI 2 "nonimmediate_operand" "xm")
9298 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9302 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9303 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9305 [(set_attr "type" "sseishft")
9306 (set_attr "atom_unit" "sishuf")
9307 (set_attr "prefix_data16" "1")
9308 (set_attr "prefix_extra" "1")
9309 (set_attr "length_immediate" "1")
9310 (set_attr "mode" "TI")])
9312 (define_insn "ssse3_palignrdi"
9313 [(set (match_operand:DI 0 "register_operand" "=y")
9314 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9315 (match_operand:DI 2 "nonimmediate_operand" "ym")
9316 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9320 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9321 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9323 [(set_attr "type" "sseishft")
9324 (set_attr "atom_unit" "sishuf")
9325 (set_attr "prefix_extra" "1")
9326 (set_attr "length_immediate" "1")
9327 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9328 (set_attr "mode" "DI")])
9330 (define_insn "abs<mode>2"
9331 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9332 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9334 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9335 [(set_attr "type" "sselog1")
9336 (set_attr "prefix_data16" "1")
9337 (set_attr "prefix_extra" "1")
9338 (set_attr "prefix" "maybe_vex")
9339 (set_attr "mode" "TI")])
9341 (define_insn "abs<mode>2"
9342 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9343 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9345 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9346 [(set_attr "type" "sselog1")
9347 (set_attr "prefix_rep" "0")
9348 (set_attr "prefix_extra" "1")
9349 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9350 (set_attr "mode" "DI")])
9352 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9354 ;; AMD SSE4A instructions
9356 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9358 (define_insn "sse4a_movnt<mode>"
9359 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9361 [(match_operand:MODEF 1 "register_operand" "x")]
9364 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9365 [(set_attr "type" "ssemov")
9366 (set_attr "mode" "<MODE>")])
9368 (define_insn "sse4a_vmmovnt<mode>"
9369 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9370 (unspec:<ssescalarmode>
9371 [(vec_select:<ssescalarmode>
9372 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9373 (parallel [(const_int 0)]))]
9376 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9377 [(set_attr "type" "ssemov")
9378 (set_attr "mode" "<ssescalarmode>")])
9380 (define_insn "sse4a_extrqi"
9381 [(set (match_operand:V2DI 0 "register_operand" "=x")
9382 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9383 (match_operand 2 "const_int_operand" "")
9384 (match_operand 3 "const_int_operand" "")]
9387 "extrq\t{%3, %2, %0|%0, %2, %3}"
9388 [(set_attr "type" "sse")
9389 (set_attr "prefix_data16" "1")
9390 (set_attr "length_immediate" "2")
9391 (set_attr "mode" "TI")])
9393 (define_insn "sse4a_extrq"
9394 [(set (match_operand:V2DI 0 "register_operand" "=x")
9395 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9396 (match_operand:V16QI 2 "register_operand" "x")]
9399 "extrq\t{%2, %0|%0, %2}"
9400 [(set_attr "type" "sse")
9401 (set_attr "prefix_data16" "1")
9402 (set_attr "mode" "TI")])
9404 (define_insn "sse4a_insertqi"
9405 [(set (match_operand:V2DI 0 "register_operand" "=x")
9406 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9407 (match_operand:V2DI 2 "register_operand" "x")
9408 (match_operand 3 "const_int_operand" "")
9409 (match_operand 4 "const_int_operand" "")]
9412 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9413 [(set_attr "type" "sseins")
9414 (set_attr "prefix_data16" "0")
9415 (set_attr "prefix_rep" "1")
9416 (set_attr "length_immediate" "2")
9417 (set_attr "mode" "TI")])
9419 (define_insn "sse4a_insertq"
9420 [(set (match_operand:V2DI 0 "register_operand" "=x")
9421 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9422 (match_operand:V2DI 2 "register_operand" "x")]
9425 "insertq\t{%2, %0|%0, %2}"
9426 [(set_attr "type" "sseins")
9427 (set_attr "prefix_data16" "0")
9428 (set_attr "prefix_rep" "1")
9429 (set_attr "mode" "TI")])
9431 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9433 ;; Intel SSE4.1 instructions
9435 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9437 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
9438 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9439 (vec_merge:AVXMODEF2P
9440 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9441 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9442 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9444 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9445 [(set_attr "type" "ssemov")
9446 (set_attr "prefix_extra" "1")
9447 (set_attr "length_immediate" "1")
9448 (set_attr "prefix" "vex")
9449 (set_attr "mode" "<avxvecmode>")])
9451 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
9452 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9454 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9455 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9456 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9459 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9460 [(set_attr "type" "ssemov")
9461 (set_attr "prefix_extra" "1")
9462 (set_attr "length_immediate" "1")
9463 (set_attr "prefix" "vex")
9464 (set_attr "mode" "<avxvecmode>")])
9466 (define_insn "sse4_1_blend<ssemodesuffix>"
9467 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9468 (vec_merge:SSEMODEF2P
9469 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9470 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9471 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9473 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9474 [(set_attr "type" "ssemov")
9475 (set_attr "prefix_data16" "1")
9476 (set_attr "prefix_extra" "1")
9477 (set_attr "length_immediate" "1")
9478 (set_attr "mode" "<MODE>")])
9480 (define_insn "sse4_1_blendv<ssemodesuffix>"
9481 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9483 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9484 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9485 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9488 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9489 [(set_attr "type" "ssemov")
9490 (set_attr "prefix_data16" "1")
9491 (set_attr "prefix_extra" "1")
9492 (set_attr "mode" "<MODE>")])
9494 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
9495 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9497 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9498 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9499 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9502 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9503 [(set_attr "type" "ssemul")
9504 (set_attr "prefix" "vex")
9505 (set_attr "prefix_extra" "1")
9506 (set_attr "length_immediate" "1")
9507 (set_attr "mode" "<avxvecmode>")])
9509 (define_insn "sse4_1_dp<ssemodesuffix>"
9510 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9512 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9513 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9514 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9517 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9518 [(set_attr "type" "ssemul")
9519 (set_attr "prefix_data16" "1")
9520 (set_attr "prefix_extra" "1")
9521 (set_attr "length_immediate" "1")
9522 (set_attr "mode" "<MODE>")])
9524 (define_insn "sse4_1_movntdqa"
9525 [(set (match_operand:V2DI 0 "register_operand" "=x")
9526 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9529 "%vmovntdqa\t{%1, %0|%0, %1}"
9530 [(set_attr "type" "ssemov")
9531 (set_attr "prefix_extra" "1")
9532 (set_attr "prefix" "maybe_vex")
9533 (set_attr "mode" "TI")])
9535 (define_insn "*avx_mpsadbw"
9536 [(set (match_operand:V16QI 0 "register_operand" "=x")
9537 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9538 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9539 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9542 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9543 [(set_attr "type" "sselog1")
9544 (set_attr "prefix" "vex")
9545 (set_attr "prefix_extra" "1")
9546 (set_attr "length_immediate" "1")
9547 (set_attr "mode" "TI")])
9549 (define_insn "sse4_1_mpsadbw"
9550 [(set (match_operand:V16QI 0 "register_operand" "=x")
9551 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9552 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9553 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9556 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9557 [(set_attr "type" "sselog1")
9558 (set_attr "prefix_extra" "1")
9559 (set_attr "length_immediate" "1")
9560 (set_attr "mode" "TI")])
9562 (define_insn "*avx_packusdw"
9563 [(set (match_operand:V8HI 0 "register_operand" "=x")
9566 (match_operand:V4SI 1 "register_operand" "x"))
9568 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9570 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9571 [(set_attr "type" "sselog")
9572 (set_attr "prefix_extra" "1")
9573 (set_attr "prefix" "vex")
9574 (set_attr "mode" "TI")])
9576 (define_insn "sse4_1_packusdw"
9577 [(set (match_operand:V8HI 0 "register_operand" "=x")
9580 (match_operand:V4SI 1 "register_operand" "0"))
9582 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9584 "packusdw\t{%2, %0|%0, %2}"
9585 [(set_attr "type" "sselog")
9586 (set_attr "prefix_extra" "1")
9587 (set_attr "mode" "TI")])
9589 (define_insn "*avx_pblendvb"
9590 [(set (match_operand:V16QI 0 "register_operand" "=x")
9591 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9592 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9593 (match_operand:V16QI 3 "register_operand" "x")]
9596 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9597 [(set_attr "type" "ssemov")
9598 (set_attr "prefix_extra" "1")
9599 (set_attr "length_immediate" "1")
9600 (set_attr "prefix" "vex")
9601 (set_attr "mode" "TI")])
9603 (define_insn "sse4_1_pblendvb"
9604 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9605 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9606 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9607 (match_operand:V16QI 3 "register_operand" "Yz")]
9610 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9611 [(set_attr "type" "ssemov")
9612 (set_attr "prefix_extra" "1")
9613 (set_attr "mode" "TI")])
9615 (define_insn "*avx_pblendw"
9616 [(set (match_operand:V8HI 0 "register_operand" "=x")
9618 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9619 (match_operand:V8HI 1 "register_operand" "x")
9620 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9622 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9623 [(set_attr "type" "ssemov")
9624 (set_attr "prefix" "vex")
9625 (set_attr "prefix_extra" "1")
9626 (set_attr "length_immediate" "1")
9627 (set_attr "mode" "TI")])
9629 (define_insn "sse4_1_pblendw"
9630 [(set (match_operand:V8HI 0 "register_operand" "=x")
9632 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9633 (match_operand:V8HI 1 "register_operand" "0")
9634 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9636 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9637 [(set_attr "type" "ssemov")
9638 (set_attr "prefix_extra" "1")
9639 (set_attr "length_immediate" "1")
9640 (set_attr "mode" "TI")])
9642 (define_insn "sse4_1_phminposuw"
9643 [(set (match_operand:V8HI 0 "register_operand" "=x")
9644 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9645 UNSPEC_PHMINPOSUW))]
9647 "%vphminposuw\t{%1, %0|%0, %1}"
9648 [(set_attr "type" "sselog1")
9649 (set_attr "prefix_extra" "1")
9650 (set_attr "prefix" "maybe_vex")
9651 (set_attr "mode" "TI")])
9653 (define_insn "sse4_1_<code>v8qiv8hi2"
9654 [(set (match_operand:V8HI 0 "register_operand" "=x")
9657 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9658 (parallel [(const_int 0)
9667 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9668 [(set_attr "type" "ssemov")
9669 (set_attr "prefix_extra" "1")
9670 (set_attr "prefix" "maybe_vex")
9671 (set_attr "mode" "TI")])
9673 (define_insn "sse4_1_<code>v4qiv4si2"
9674 [(set (match_operand:V4SI 0 "register_operand" "=x")
9677 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9678 (parallel [(const_int 0)
9683 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9684 [(set_attr "type" "ssemov")
9685 (set_attr "prefix_extra" "1")
9686 (set_attr "prefix" "maybe_vex")
9687 (set_attr "mode" "TI")])
9689 (define_insn "sse4_1_<code>v4hiv4si2"
9690 [(set (match_operand:V4SI 0 "register_operand" "=x")
9693 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9694 (parallel [(const_int 0)
9699 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9700 [(set_attr "type" "ssemov")
9701 (set_attr "prefix_extra" "1")
9702 (set_attr "prefix" "maybe_vex")
9703 (set_attr "mode" "TI")])
9705 (define_insn "sse4_1_<code>v2qiv2di2"
9706 [(set (match_operand:V2DI 0 "register_operand" "=x")
9709 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9710 (parallel [(const_int 0)
9713 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9714 [(set_attr "type" "ssemov")
9715 (set_attr "prefix_extra" "1")
9716 (set_attr "prefix" "maybe_vex")
9717 (set_attr "mode" "TI")])
9719 (define_insn "sse4_1_<code>v2hiv2di2"
9720 [(set (match_operand:V2DI 0 "register_operand" "=x")
9723 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9724 (parallel [(const_int 0)
9727 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9728 [(set_attr "type" "ssemov")
9729 (set_attr "prefix_extra" "1")
9730 (set_attr "prefix" "maybe_vex")
9731 (set_attr "mode" "TI")])
9733 (define_insn "sse4_1_<code>v2siv2di2"
9734 [(set (match_operand:V2DI 0 "register_operand" "=x")
9737 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9738 (parallel [(const_int 0)
9741 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9742 [(set_attr "type" "ssemov")
9743 (set_attr "prefix_extra" "1")
9744 (set_attr "prefix" "maybe_vex")
9745 (set_attr "mode" "TI")])
9747 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9748 ;; setting FLAGS_REG. But it is not a really compare instruction.
9749 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
9750 [(set (reg:CC FLAGS_REG)
9751 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9752 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9755 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9756 [(set_attr "type" "ssecomi")
9757 (set_attr "prefix_extra" "1")
9758 (set_attr "prefix" "vex")
9759 (set_attr "mode" "<MODE>")])
9761 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9762 ;; But it is not a really compare instruction.
9763 (define_insn "avx_ptest256"
9764 [(set (reg:CC FLAGS_REG)
9765 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9766 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9769 "vptest\t{%1, %0|%0, %1}"
9770 [(set_attr "type" "ssecomi")
9771 (set_attr "prefix_extra" "1")
9772 (set_attr "prefix" "vex")
9773 (set_attr "mode" "OI")])
9775 (define_insn "sse4_1_ptest"
9776 [(set (reg:CC FLAGS_REG)
9777 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9778 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9781 "%vptest\t{%1, %0|%0, %1}"
9782 [(set_attr "type" "ssecomi")
9783 (set_attr "prefix_extra" "1")
9784 (set_attr "prefix" "maybe_vex")
9785 (set_attr "mode" "TI")])
9787 (define_insn "avx_round<ssemodesuffix>256"
9788 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9789 (unspec:AVX256MODEF2P
9790 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9791 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9794 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9795 [(set_attr "type" "ssecvt")
9796 (set_attr "prefix_extra" "1")
9797 (set_attr "length_immediate" "1")
9798 (set_attr "prefix" "vex")
9799 (set_attr "mode" "<MODE>")])
9801 (define_insn "sse4_1_round<ssemodesuffix>"
9802 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9804 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9805 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9808 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9809 [(set_attr "type" "ssecvt")
9810 (set_attr "prefix_data16" "1")
9811 (set_attr "prefix_extra" "1")
9812 (set_attr "length_immediate" "1")
9813 (set_attr "prefix" "maybe_vex")
9814 (set_attr "mode" "<MODE>")])
9816 (define_insn "*avx_round<ssescalarmodesuffix>"
9817 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9818 (vec_merge:SSEMODEF2P
9820 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9821 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9823 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9826 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9827 [(set_attr "type" "ssecvt")
9828 (set_attr "prefix_extra" "1")
9829 (set_attr "length_immediate" "1")
9830 (set_attr "prefix" "vex")
9831 (set_attr "mode" "<MODE>")])
9833 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9834 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9835 (vec_merge:SSEMODEF2P
9837 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9838 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9840 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9843 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9844 [(set_attr "type" "ssecvt")
9845 (set_attr "prefix_data16" "1")
9846 (set_attr "prefix_extra" "1")
9847 (set_attr "length_immediate" "1")
9848 (set_attr "mode" "<MODE>")])
9850 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9852 ;; Intel SSE4.2 string/text processing instructions
9854 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9856 (define_insn_and_split "sse4_2_pcmpestr"
9857 [(set (match_operand:SI 0 "register_operand" "=c,c")
9859 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9860 (match_operand:SI 3 "register_operand" "a,a")
9861 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9862 (match_operand:SI 5 "register_operand" "d,d")
9863 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9865 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9873 (set (reg:CC FLAGS_REG)
9882 && can_create_pseudo_p ()"
9887 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9888 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9889 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9892 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9893 operands[3], operands[4],
9894 operands[5], operands[6]));
9896 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9897 operands[3], operands[4],
9898 operands[5], operands[6]));
9899 if (flags && !(ecx || xmm0))
9900 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9901 operands[2], operands[3],
9902 operands[4], operands[5],
9906 [(set_attr "type" "sselog")
9907 (set_attr "prefix_data16" "1")
9908 (set_attr "prefix_extra" "1")
9909 (set_attr "length_immediate" "1")
9910 (set_attr "memory" "none,load")
9911 (set_attr "mode" "TI")])
9913 (define_insn "sse4_2_pcmpestri"
9914 [(set (match_operand:SI 0 "register_operand" "=c,c")
9916 [(match_operand:V16QI 1 "register_operand" "x,x")
9917 (match_operand:SI 2 "register_operand" "a,a")
9918 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9919 (match_operand:SI 4 "register_operand" "d,d")
9920 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9922 (set (reg:CC FLAGS_REG)
9931 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9932 [(set_attr "type" "sselog")
9933 (set_attr "prefix_data16" "1")
9934 (set_attr "prefix_extra" "1")
9935 (set_attr "prefix" "maybe_vex")
9936 (set_attr "length_immediate" "1")
9937 (set_attr "memory" "none,load")
9938 (set_attr "mode" "TI")])
9940 (define_insn "sse4_2_pcmpestrm"
9941 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9943 [(match_operand:V16QI 1 "register_operand" "x,x")
9944 (match_operand:SI 2 "register_operand" "a,a")
9945 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9946 (match_operand:SI 4 "register_operand" "d,d")
9947 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9949 (set (reg:CC FLAGS_REG)
9958 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9959 [(set_attr "type" "sselog")
9960 (set_attr "prefix_data16" "1")
9961 (set_attr "prefix_extra" "1")
9962 (set_attr "length_immediate" "1")
9963 (set_attr "prefix" "maybe_vex")
9964 (set_attr "memory" "none,load")
9965 (set_attr "mode" "TI")])
9967 (define_insn "sse4_2_pcmpestr_cconly"
9968 [(set (reg:CC FLAGS_REG)
9970 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9971 (match_operand:SI 3 "register_operand" "a,a,a,a")
9972 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9973 (match_operand:SI 5 "register_operand" "d,d,d,d")
9974 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9976 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9977 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9980 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9981 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9982 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9983 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9984 [(set_attr "type" "sselog")
9985 (set_attr "prefix_data16" "1")
9986 (set_attr "prefix_extra" "1")
9987 (set_attr "length_immediate" "1")
9988 (set_attr "memory" "none,load,none,load")
9989 (set_attr "prefix" "maybe_vex")
9990 (set_attr "mode" "TI")])
9992 (define_insn_and_split "sse4_2_pcmpistr"
9993 [(set (match_operand:SI 0 "register_operand" "=c,c")
9995 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9996 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9997 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9999 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10005 (set (reg:CC FLAGS_REG)
10012 && can_create_pseudo_p ()"
10017 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10018 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10019 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10022 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10023 operands[3], operands[4]));
10025 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10026 operands[3], operands[4]));
10027 if (flags && !(ecx || xmm0))
10028 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10029 operands[2], operands[3],
10033 [(set_attr "type" "sselog")
10034 (set_attr "prefix_data16" "1")
10035 (set_attr "prefix_extra" "1")
10036 (set_attr "length_immediate" "1")
10037 (set_attr "memory" "none,load")
10038 (set_attr "mode" "TI")])
10040 (define_insn "sse4_2_pcmpistri"
10041 [(set (match_operand:SI 0 "register_operand" "=c,c")
10043 [(match_operand:V16QI 1 "register_operand" "x,x")
10044 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10045 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10047 (set (reg:CC FLAGS_REG)
10054 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10055 [(set_attr "type" "sselog")
10056 (set_attr "prefix_data16" "1")
10057 (set_attr "prefix_extra" "1")
10058 (set_attr "length_immediate" "1")
10059 (set_attr "prefix" "maybe_vex")
10060 (set_attr "memory" "none,load")
10061 (set_attr "mode" "TI")])
10063 (define_insn "sse4_2_pcmpistrm"
10064 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10066 [(match_operand:V16QI 1 "register_operand" "x,x")
10067 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10068 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10070 (set (reg:CC FLAGS_REG)
10077 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10078 [(set_attr "type" "sselog")
10079 (set_attr "prefix_data16" "1")
10080 (set_attr "prefix_extra" "1")
10081 (set_attr "length_immediate" "1")
10082 (set_attr "prefix" "maybe_vex")
10083 (set_attr "memory" "none,load")
10084 (set_attr "mode" "TI")])
10086 (define_insn "sse4_2_pcmpistr_cconly"
10087 [(set (reg:CC FLAGS_REG)
10089 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10090 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10091 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10093 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10094 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10097 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10098 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10099 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10100 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10101 [(set_attr "type" "sselog")
10102 (set_attr "prefix_data16" "1")
10103 (set_attr "prefix_extra" "1")
10104 (set_attr "length_immediate" "1")
10105 (set_attr "memory" "none,load,none,load")
10106 (set_attr "prefix" "maybe_vex")
10107 (set_attr "mode" "TI")])
10109 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10111 ;; XOP instructions
10113 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10115 ;; XOP parallel integer multiply/add instructions.
10116 ;; Note the XOP multiply/add instructions
10117 ;; a[i] = b[i] * c[i] + d[i];
10118 ;; do not allow the value being added to be a memory operation.
10119 (define_insn "xop_pmacsww"
10120 [(set (match_operand:V8HI 0 "register_operand" "=x")
10123 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10124 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10125 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10127 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10128 [(set_attr "type" "ssemuladd")
10129 (set_attr "mode" "TI")])
10131 (define_insn "xop_pmacssww"
10132 [(set (match_operand:V8HI 0 "register_operand" "=x")
10134 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10135 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10136 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10138 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10139 [(set_attr "type" "ssemuladd")
10140 (set_attr "mode" "TI")])
10142 (define_insn "xop_pmacsdd"
10143 [(set (match_operand:V4SI 0 "register_operand" "=x")
10146 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10147 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10148 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10150 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10151 [(set_attr "type" "ssemuladd")
10152 (set_attr "mode" "TI")])
10154 (define_insn "xop_pmacssdd"
10155 [(set (match_operand:V4SI 0 "register_operand" "=x")
10157 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10158 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10159 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10161 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10162 [(set_attr "type" "ssemuladd")
10163 (set_attr "mode" "TI")])
10165 (define_insn "xop_pmacssdql"
10166 [(set (match_operand:V2DI 0 "register_operand" "=x")
10171 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10172 (parallel [(const_int 1)
10175 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10176 (parallel [(const_int 1)
10178 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10180 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10181 [(set_attr "type" "ssemuladd")
10182 (set_attr "mode" "TI")])
10184 (define_insn "xop_pmacssdqh"
10185 [(set (match_operand:V2DI 0 "register_operand" "=x")
10190 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10191 (parallel [(const_int 0)
10195 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10196 (parallel [(const_int 0)
10198 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10200 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10201 [(set_attr "type" "ssemuladd")
10202 (set_attr "mode" "TI")])
10204 (define_insn "xop_pmacsdql"
10205 [(set (match_operand:V2DI 0 "register_operand" "=x")
10210 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10211 (parallel [(const_int 1)
10215 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10216 (parallel [(const_int 1)
10218 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10220 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10221 [(set_attr "type" "ssemuladd")
10222 (set_attr "mode" "TI")])
10224 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10225 ;; fake it with a multiply/add. In general, we expect the define_split to
10226 ;; occur before register allocation, so we have to handle the corner case where
10227 ;; the target is the same as operands 1/2
10228 (define_insn_and_split "xop_mulv2div2di3_low"
10229 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10233 (match_operand:V4SI 1 "register_operand" "%x")
10234 (parallel [(const_int 1)
10238 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10239 (parallel [(const_int 1)
10240 (const_int 3)])))))]
10243 "&& reload_completed"
10244 [(set (match_dup 0)
10252 (parallel [(const_int 1)
10257 (parallel [(const_int 1)
10261 operands[3] = CONST0_RTX (V2DImode);
10263 [(set_attr "type" "ssemul")
10264 (set_attr "mode" "TI")])
10266 (define_insn "xop_pmacsdqh"
10267 [(set (match_operand:V2DI 0 "register_operand" "=x")
10272 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10273 (parallel [(const_int 0)
10277 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10278 (parallel [(const_int 0)
10280 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10282 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10283 [(set_attr "type" "ssemuladd")
10284 (set_attr "mode" "TI")])
10286 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10287 ;; fake it with a multiply/add. In general, we expect the define_split to
10288 ;; occur before register allocation, so we have to handle the corner case where
10289 ;; the target is the same as either operands[1] or operands[2]
10290 (define_insn_and_split "xop_mulv2div2di3_high"
10291 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10295 (match_operand:V4SI 1 "register_operand" "%x")
10296 (parallel [(const_int 0)
10300 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10301 (parallel [(const_int 0)
10302 (const_int 2)])))))]
10305 "&& reload_completed"
10306 [(set (match_dup 0)
10314 (parallel [(const_int 0)
10319 (parallel [(const_int 0)
10323 operands[3] = CONST0_RTX (V2DImode);
10325 [(set_attr "type" "ssemul")
10326 (set_attr "mode" "TI")])
10328 ;; XOP parallel integer multiply/add instructions for the intrinisics
10329 (define_insn "xop_pmacsswd"
10330 [(set (match_operand:V4SI 0 "register_operand" "=x")
10335 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10336 (parallel [(const_int 1)
10342 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10343 (parallel [(const_int 1)
10347 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10349 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10350 [(set_attr "type" "ssemuladd")
10351 (set_attr "mode" "TI")])
10353 (define_insn "xop_pmacswd"
10354 [(set (match_operand:V4SI 0 "register_operand" "=x")
10359 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10360 (parallel [(const_int 1)
10366 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10367 (parallel [(const_int 1)
10371 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10373 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10374 [(set_attr "type" "ssemuladd")
10375 (set_attr "mode" "TI")])
10377 (define_insn "xop_pmadcsswd"
10378 [(set (match_operand:V4SI 0 "register_operand" "=x")
10384 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10385 (parallel [(const_int 0)
10391 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10392 (parallel [(const_int 0)
10400 (parallel [(const_int 1)
10407 (parallel [(const_int 1)
10410 (const_int 7)])))))
10411 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10413 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10414 [(set_attr "type" "ssemuladd")
10415 (set_attr "mode" "TI")])
10417 (define_insn "xop_pmadcswd"
10418 [(set (match_operand:V4SI 0 "register_operand" "=x")
10424 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10425 (parallel [(const_int 0)
10431 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10432 (parallel [(const_int 0)
10440 (parallel [(const_int 1)
10447 (parallel [(const_int 1)
10450 (const_int 7)])))))
10451 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10453 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10454 [(set_attr "type" "ssemuladd")
10455 (set_attr "mode" "TI")])
10457 ;; XOP parallel XMM conditional moves
10458 (define_insn "xop_pcmov_<mode>"
10459 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10460 (if_then_else:SSEMODE
10461 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10462 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10463 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10465 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10466 [(set_attr "type" "sse4arg")])
10468 (define_insn "xop_pcmov_<mode>256"
10469 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10470 (if_then_else:AVX256MODE
10471 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10472 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10473 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10475 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10476 [(set_attr "type" "sse4arg")])
10478 ;; XOP horizontal add/subtract instructions
10479 (define_insn "xop_phaddbw"
10480 [(set (match_operand:V8HI 0 "register_operand" "=x")
10484 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10485 (parallel [(const_int 0)
10496 (parallel [(const_int 1)
10503 (const_int 15)])))))]
10505 "vphaddbw\t{%1, %0|%0, %1}"
10506 [(set_attr "type" "sseiadd1")])
10508 (define_insn "xop_phaddbd"
10509 [(set (match_operand:V4SI 0 "register_operand" "=x")
10514 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10515 (parallel [(const_int 0)
10522 (parallel [(const_int 1)
10525 (const_int 13)]))))
10530 (parallel [(const_int 2)
10537 (parallel [(const_int 3)
10540 (const_int 15)]))))))]
10542 "vphaddbd\t{%1, %0|%0, %1}"
10543 [(set_attr "type" "sseiadd1")])
10545 (define_insn "xop_phaddbq"
10546 [(set (match_operand:V2DI 0 "register_operand" "=x")
10552 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10553 (parallel [(const_int 0)
10558 (parallel [(const_int 1)
10564 (parallel [(const_int 2)
10569 (parallel [(const_int 3)
10570 (const_int 7)])))))
10576 (parallel [(const_int 8)
10581 (parallel [(const_int 9)
10582 (const_int 13)]))))
10587 (parallel [(const_int 10)
10592 (parallel [(const_int 11)
10593 (const_int 15)])))))))]
10595 "vphaddbq\t{%1, %0|%0, %1}"
10596 [(set_attr "type" "sseiadd1")])
10598 (define_insn "xop_phaddwd"
10599 [(set (match_operand:V4SI 0 "register_operand" "=x")
10603 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10604 (parallel [(const_int 0)
10611 (parallel [(const_int 1)
10614 (const_int 7)])))))]
10616 "vphaddwd\t{%1, %0|%0, %1}"
10617 [(set_attr "type" "sseiadd1")])
10619 (define_insn "xop_phaddwq"
10620 [(set (match_operand:V2DI 0 "register_operand" "=x")
10625 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10626 (parallel [(const_int 0)
10631 (parallel [(const_int 1)
10637 (parallel [(const_int 2)
10642 (parallel [(const_int 3)
10643 (const_int 7)]))))))]
10645 "vphaddwq\t{%1, %0|%0, %1}"
10646 [(set_attr "type" "sseiadd1")])
10648 (define_insn "xop_phadddq"
10649 [(set (match_operand:V2DI 0 "register_operand" "=x")
10653 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10654 (parallel [(const_int 0)
10659 (parallel [(const_int 1)
10660 (const_int 3)])))))]
10662 "vphadddq\t{%1, %0|%0, %1}"
10663 [(set_attr "type" "sseiadd1")])
10665 (define_insn "xop_phaddubw"
10666 [(set (match_operand:V8HI 0 "register_operand" "=x")
10670 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10671 (parallel [(const_int 0)
10682 (parallel [(const_int 1)
10689 (const_int 15)])))))]
10691 "vphaddubw\t{%1, %0|%0, %1}"
10692 [(set_attr "type" "sseiadd1")])
10694 (define_insn "xop_phaddubd"
10695 [(set (match_operand:V4SI 0 "register_operand" "=x")
10700 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10701 (parallel [(const_int 0)
10708 (parallel [(const_int 1)
10711 (const_int 13)]))))
10716 (parallel [(const_int 2)
10723 (parallel [(const_int 3)
10726 (const_int 15)]))))))]
10728 "vphaddubd\t{%1, %0|%0, %1}"
10729 [(set_attr "type" "sseiadd1")])
10731 (define_insn "xop_phaddubq"
10732 [(set (match_operand:V2DI 0 "register_operand" "=x")
10738 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10739 (parallel [(const_int 0)
10744 (parallel [(const_int 1)
10750 (parallel [(const_int 2)
10755 (parallel [(const_int 3)
10756 (const_int 7)])))))
10762 (parallel [(const_int 8)
10767 (parallel [(const_int 9)
10768 (const_int 13)]))))
10773 (parallel [(const_int 10)
10778 (parallel [(const_int 11)
10779 (const_int 15)])))))))]
10781 "vphaddubq\t{%1, %0|%0, %1}"
10782 [(set_attr "type" "sseiadd1")])
10784 (define_insn "xop_phadduwd"
10785 [(set (match_operand:V4SI 0 "register_operand" "=x")
10789 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10790 (parallel [(const_int 0)
10797 (parallel [(const_int 1)
10800 (const_int 7)])))))]
10802 "vphadduwd\t{%1, %0|%0, %1}"
10803 [(set_attr "type" "sseiadd1")])
10805 (define_insn "xop_phadduwq"
10806 [(set (match_operand:V2DI 0 "register_operand" "=x")
10811 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10812 (parallel [(const_int 0)
10817 (parallel [(const_int 1)
10823 (parallel [(const_int 2)
10828 (parallel [(const_int 3)
10829 (const_int 7)]))))))]
10831 "vphadduwq\t{%1, %0|%0, %1}"
10832 [(set_attr "type" "sseiadd1")])
10834 (define_insn "xop_phaddudq"
10835 [(set (match_operand:V2DI 0 "register_operand" "=x")
10839 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10840 (parallel [(const_int 0)
10845 (parallel [(const_int 1)
10846 (const_int 3)])))))]
10848 "vphaddudq\t{%1, %0|%0, %1}"
10849 [(set_attr "type" "sseiadd1")])
10851 (define_insn "xop_phsubbw"
10852 [(set (match_operand:V8HI 0 "register_operand" "=x")
10856 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10857 (parallel [(const_int 0)
10868 (parallel [(const_int 1)
10875 (const_int 15)])))))]
10877 "vphsubbw\t{%1, %0|%0, %1}"
10878 [(set_attr "type" "sseiadd1")])
10880 (define_insn "xop_phsubwd"
10881 [(set (match_operand:V4SI 0 "register_operand" "=x")
10885 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10886 (parallel [(const_int 0)
10893 (parallel [(const_int 1)
10896 (const_int 7)])))))]
10898 "vphsubwd\t{%1, %0|%0, %1}"
10899 [(set_attr "type" "sseiadd1")])
10901 (define_insn "xop_phsubdq"
10902 [(set (match_operand:V2DI 0 "register_operand" "=x")
10906 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10907 (parallel [(const_int 0)
10912 (parallel [(const_int 1)
10913 (const_int 3)])))))]
10915 "vphsubdq\t{%1, %0|%0, %1}"
10916 [(set_attr "type" "sseiadd1")])
10918 ;; XOP permute instructions
10919 (define_insn "xop_pperm"
10920 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10922 [(match_operand:V16QI 1 "register_operand" "x,x")
10923 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10924 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10925 UNSPEC_XOP_PERMUTE))]
10926 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10927 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10928 [(set_attr "type" "sse4arg")
10929 (set_attr "mode" "TI")])
10931 ;; XOP pack instructions that combine two vectors into a smaller vector
10932 (define_insn "xop_pperm_pack_v2di_v4si"
10933 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10936 (match_operand:V2DI 1 "register_operand" "x,x"))
10938 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10939 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10940 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10941 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10942 [(set_attr "type" "sse4arg")
10943 (set_attr "mode" "TI")])
10945 (define_insn "xop_pperm_pack_v4si_v8hi"
10946 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10949 (match_operand:V4SI 1 "register_operand" "x,x"))
10951 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10952 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10953 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10954 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10955 [(set_attr "type" "sse4arg")
10956 (set_attr "mode" "TI")])
10958 (define_insn "xop_pperm_pack_v8hi_v16qi"
10959 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10962 (match_operand:V8HI 1 "register_operand" "x,x"))
10964 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10965 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10966 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10967 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10968 [(set_attr "type" "sse4arg")
10969 (set_attr "mode" "TI")])
10971 ;; XOP packed rotate instructions
10972 (define_expand "rotl<mode>3"
10973 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10974 (rotate:SSEMODE1248
10975 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10976 (match_operand:SI 2 "general_operand")))]
10979 /* If we were given a scalar, convert it to parallel */
10980 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10982 rtvec vs = rtvec_alloc (<ssescalarnum>);
10983 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10984 rtx reg = gen_reg_rtx (<MODE>mode);
10985 rtx op2 = operands[2];
10988 if (GET_MODE (op2) != <ssescalarmode>mode)
10990 op2 = gen_reg_rtx (<ssescalarmode>mode);
10991 convert_move (op2, operands[2], false);
10994 for (i = 0; i < <ssescalarnum>; i++)
10995 RTVEC_ELT (vs, i) = op2;
10997 emit_insn (gen_vec_init<mode> (reg, par));
10998 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11003 (define_expand "rotr<mode>3"
11004 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11005 (rotatert:SSEMODE1248
11006 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11007 (match_operand:SI 2 "general_operand")))]
11010 /* If we were given a scalar, convert it to parallel */
11011 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11013 rtvec vs = rtvec_alloc (<ssescalarnum>);
11014 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11015 rtx neg = gen_reg_rtx (<MODE>mode);
11016 rtx reg = gen_reg_rtx (<MODE>mode);
11017 rtx op2 = operands[2];
11020 if (GET_MODE (op2) != <ssescalarmode>mode)
11022 op2 = gen_reg_rtx (<ssescalarmode>mode);
11023 convert_move (op2, operands[2], false);
11026 for (i = 0; i < <ssescalarnum>; i++)
11027 RTVEC_ELT (vs, i) = op2;
11029 emit_insn (gen_vec_init<mode> (reg, par));
11030 emit_insn (gen_neg<mode>2 (neg, reg));
11031 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11036 (define_insn "xop_rotl<mode>3"
11037 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11038 (rotate:SSEMODE1248
11039 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11040 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11042 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11043 [(set_attr "type" "sseishft")
11044 (set_attr "length_immediate" "1")
11045 (set_attr "mode" "TI")])
11047 (define_insn "xop_rotr<mode>3"
11048 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11049 (rotatert:SSEMODE1248
11050 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11051 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11054 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11055 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11057 [(set_attr "type" "sseishft")
11058 (set_attr "length_immediate" "1")
11059 (set_attr "mode" "TI")])
11061 (define_expand "vrotr<mode>3"
11062 [(match_operand:SSEMODE1248 0 "register_operand" "")
11063 (match_operand:SSEMODE1248 1 "register_operand" "")
11064 (match_operand:SSEMODE1248 2 "register_operand" "")]
11067 rtx reg = gen_reg_rtx (<MODE>mode);
11068 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11069 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11073 (define_expand "vrotl<mode>3"
11074 [(match_operand:SSEMODE1248 0 "register_operand" "")
11075 (match_operand:SSEMODE1248 1 "register_operand" "")
11076 (match_operand:SSEMODE1248 2 "register_operand" "")]
11079 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11083 (define_insn "xop_vrotl<mode>3"
11084 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11085 (if_then_else:SSEMODE1248
11087 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11089 (rotate:SSEMODE1248
11090 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11092 (rotatert:SSEMODE1248
11094 (neg:SSEMODE1248 (match_dup 2)))))]
11095 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11096 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11097 [(set_attr "type" "sseishft")
11098 (set_attr "prefix_data16" "0")
11099 (set_attr "prefix_extra" "2")
11100 (set_attr "mode" "TI")])
11102 ;; XOP packed shift instructions.
11103 ;; FIXME: add V2DI back in
11104 (define_expand "vlshr<mode>3"
11105 [(match_operand:SSEMODE124 0 "register_operand" "")
11106 (match_operand:SSEMODE124 1 "register_operand" "")
11107 (match_operand:SSEMODE124 2 "register_operand" "")]
11110 rtx neg = gen_reg_rtx (<MODE>mode);
11111 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11112 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11116 (define_expand "vashr<mode>3"
11117 [(match_operand:SSEMODE124 0 "register_operand" "")
11118 (match_operand:SSEMODE124 1 "register_operand" "")
11119 (match_operand:SSEMODE124 2 "register_operand" "")]
11122 rtx neg = gen_reg_rtx (<MODE>mode);
11123 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11124 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11128 (define_expand "vashl<mode>3"
11129 [(match_operand:SSEMODE124 0 "register_operand" "")
11130 (match_operand:SSEMODE124 1 "register_operand" "")
11131 (match_operand:SSEMODE124 2 "register_operand" "")]
11134 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11138 (define_insn "xop_ashl<mode>3"
11139 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11140 (if_then_else:SSEMODE1248
11142 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11144 (ashift:SSEMODE1248
11145 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11147 (ashiftrt:SSEMODE1248
11149 (neg:SSEMODE1248 (match_dup 2)))))]
11150 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11151 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11152 [(set_attr "type" "sseishft")
11153 (set_attr "prefix_data16" "0")
11154 (set_attr "prefix_extra" "2")
11155 (set_attr "mode" "TI")])
11157 (define_insn "xop_lshl<mode>3"
11158 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11159 (if_then_else:SSEMODE1248
11161 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11163 (ashift:SSEMODE1248
11164 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11166 (lshiftrt:SSEMODE1248
11168 (neg:SSEMODE1248 (match_dup 2)))))]
11169 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11170 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11171 [(set_attr "type" "sseishft")
11172 (set_attr "prefix_data16" "0")
11173 (set_attr "prefix_extra" "2")
11174 (set_attr "mode" "TI")])
11176 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11177 (define_expand "ashlv16qi3"
11178 [(match_operand:V16QI 0 "register_operand" "")
11179 (match_operand:V16QI 1 "register_operand" "")
11180 (match_operand:SI 2 "nonmemory_operand" "")]
11183 rtvec vs = rtvec_alloc (16);
11184 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11185 rtx reg = gen_reg_rtx (V16QImode);
11187 for (i = 0; i < 16; i++)
11188 RTVEC_ELT (vs, i) = operands[2];
11190 emit_insn (gen_vec_initv16qi (reg, par));
11191 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11195 (define_expand "lshlv16qi3"
11196 [(match_operand:V16QI 0 "register_operand" "")
11197 (match_operand:V16QI 1 "register_operand" "")
11198 (match_operand:SI 2 "nonmemory_operand" "")]
11201 rtvec vs = rtvec_alloc (16);
11202 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11203 rtx reg = gen_reg_rtx (V16QImode);
11205 for (i = 0; i < 16; i++)
11206 RTVEC_ELT (vs, i) = operands[2];
11208 emit_insn (gen_vec_initv16qi (reg, par));
11209 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11213 (define_expand "ashrv16qi3"
11214 [(match_operand:V16QI 0 "register_operand" "")
11215 (match_operand:V16QI 1 "register_operand" "")
11216 (match_operand:SI 2 "nonmemory_operand" "")]
11219 rtvec vs = rtvec_alloc (16);
11220 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11221 rtx reg = gen_reg_rtx (V16QImode);
11223 rtx ele = ((CONST_INT_P (operands[2]))
11224 ? GEN_INT (- INTVAL (operands[2]))
11227 for (i = 0; i < 16; i++)
11228 RTVEC_ELT (vs, i) = ele;
11230 emit_insn (gen_vec_initv16qi (reg, par));
11232 if (!CONST_INT_P (operands[2]))
11234 rtx neg = gen_reg_rtx (V16QImode);
11235 emit_insn (gen_negv16qi2 (neg, reg));
11236 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11239 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11244 (define_expand "ashrv2di3"
11245 [(match_operand:V2DI 0 "register_operand" "")
11246 (match_operand:V2DI 1 "register_operand" "")
11247 (match_operand:DI 2 "nonmemory_operand" "")]
11250 rtvec vs = rtvec_alloc (2);
11251 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11252 rtx reg = gen_reg_rtx (V2DImode);
11255 if (CONST_INT_P (operands[2]))
11256 ele = GEN_INT (- INTVAL (operands[2]));
11257 else if (GET_MODE (operands[2]) != DImode)
11259 rtx move = gen_reg_rtx (DImode);
11260 ele = gen_reg_rtx (DImode);
11261 convert_move (move, operands[2], false);
11262 emit_insn (gen_negdi2 (ele, move));
11266 ele = gen_reg_rtx (DImode);
11267 emit_insn (gen_negdi2 (ele, operands[2]));
11270 RTVEC_ELT (vs, 0) = ele;
11271 RTVEC_ELT (vs, 1) = ele;
11272 emit_insn (gen_vec_initv2di (reg, par));
11273 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11277 ;; XOP FRCZ support
11278 (define_insn "xop_frcz<mode>2"
11279 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11281 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11284 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11285 [(set_attr "type" "ssecvt1")
11286 (set_attr "mode" "<MODE>")])
11289 (define_expand "xop_vmfrcz<mode>2"
11290 [(set (match_operand:SSEMODEF2P 0 "register_operand")
11291 (vec_merge:SSEMODEF2P
11293 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand")]
11299 operands[3] = CONST0_RTX (<MODE>mode);
11302 (define_insn "*xop_vmfrcz_<mode>"
11303 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11304 (vec_merge:SSEMODEF2P
11306 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11308 (match_operand:SSEMODEF2P 2 "const0_operand")
11311 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11312 [(set_attr "type" "ssecvt1")
11313 (set_attr "mode" "<MODE>")])
11315 (define_insn "xop_maskcmp<mode>3"
11316 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11317 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11318 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11319 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11321 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11322 [(set_attr "type" "sse4arg")
11323 (set_attr "prefix_data16" "0")
11324 (set_attr "prefix_rep" "0")
11325 (set_attr "prefix_extra" "2")
11326 (set_attr "length_immediate" "1")
11327 (set_attr "mode" "TI")])
11329 (define_insn "xop_maskcmp_uns<mode>3"
11330 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11331 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11332 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11333 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11335 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11336 [(set_attr "type" "ssecmp")
11337 (set_attr "prefix_data16" "0")
11338 (set_attr "prefix_rep" "0")
11339 (set_attr "prefix_extra" "2")
11340 (set_attr "length_immediate" "1")
11341 (set_attr "mode" "TI")])
11343 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11344 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11345 ;; the exact instruction generated for the intrinsic.
11346 (define_insn "xop_maskcmp_uns2<mode>3"
11347 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11348 (unspec:SSEMODE1248
11349 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11350 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11351 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11352 UNSPEC_XOP_UNSIGNED_CMP))]
11354 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11355 [(set_attr "type" "ssecmp")
11356 (set_attr "prefix_data16" "0")
11357 (set_attr "prefix_extra" "2")
11358 (set_attr "length_immediate" "1")
11359 (set_attr "mode" "TI")])
11361 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11362 ;; being added here to be complete.
11363 (define_insn "xop_pcom_tf<mode>3"
11364 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11365 (unspec:SSEMODE1248
11366 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11367 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11368 (match_operand:SI 3 "const_int_operand" "n")]
11369 UNSPEC_XOP_TRUEFALSE))]
11372 return ((INTVAL (operands[3]) != 0)
11373 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11374 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11376 [(set_attr "type" "ssecmp")
11377 (set_attr "prefix_data16" "0")
11378 (set_attr "prefix_extra" "2")
11379 (set_attr "length_immediate" "1")
11380 (set_attr "mode" "TI")])
11382 (define_insn "xop_vpermil2<mode>3"
11383 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11385 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11386 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11387 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11388 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11391 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11392 [(set_attr "type" "sse4arg")
11393 (set_attr "length_immediate" "1")
11394 (set_attr "mode" "<MODE>")])
11396 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11397 (define_insn "*avx_aesenc"
11398 [(set (match_operand:V2DI 0 "register_operand" "=x")
11399 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11400 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11402 "TARGET_AES && TARGET_AVX"
11403 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11404 [(set_attr "type" "sselog1")
11405 (set_attr "prefix_extra" "1")
11406 (set_attr "prefix" "vex")
11407 (set_attr "mode" "TI")])
11409 (define_insn "aesenc"
11410 [(set (match_operand:V2DI 0 "register_operand" "=x")
11411 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11412 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11415 "aesenc\t{%2, %0|%0, %2}"
11416 [(set_attr "type" "sselog1")
11417 (set_attr "prefix_extra" "1")
11418 (set_attr "mode" "TI")])
11420 (define_insn "*avx_aesenclast"
11421 [(set (match_operand:V2DI 0 "register_operand" "=x")
11422 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11423 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11424 UNSPEC_AESENCLAST))]
11425 "TARGET_AES && TARGET_AVX"
11426 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11427 [(set_attr "type" "sselog1")
11428 (set_attr "prefix_extra" "1")
11429 (set_attr "prefix" "vex")
11430 (set_attr "mode" "TI")])
11432 (define_insn "aesenclast"
11433 [(set (match_operand:V2DI 0 "register_operand" "=x")
11434 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11435 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11436 UNSPEC_AESENCLAST))]
11438 "aesenclast\t{%2, %0|%0, %2}"
11439 [(set_attr "type" "sselog1")
11440 (set_attr "prefix_extra" "1")
11441 (set_attr "mode" "TI")])
11443 (define_insn "*avx_aesdec"
11444 [(set (match_operand:V2DI 0 "register_operand" "=x")
11445 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11446 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11448 "TARGET_AES && TARGET_AVX"
11449 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11450 [(set_attr "type" "sselog1")
11451 (set_attr "prefix_extra" "1")
11452 (set_attr "prefix" "vex")
11453 (set_attr "mode" "TI")])
11455 (define_insn "aesdec"
11456 [(set (match_operand:V2DI 0 "register_operand" "=x")
11457 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11458 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11461 "aesdec\t{%2, %0|%0, %2}"
11462 [(set_attr "type" "sselog1")
11463 (set_attr "prefix_extra" "1")
11464 (set_attr "mode" "TI")])
11466 (define_insn "*avx_aesdeclast"
11467 [(set (match_operand:V2DI 0 "register_operand" "=x")
11468 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11469 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11470 UNSPEC_AESDECLAST))]
11471 "TARGET_AES && TARGET_AVX"
11472 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11473 [(set_attr "type" "sselog1")
11474 (set_attr "prefix_extra" "1")
11475 (set_attr "prefix" "vex")
11476 (set_attr "mode" "TI")])
11478 (define_insn "aesdeclast"
11479 [(set (match_operand:V2DI 0 "register_operand" "=x")
11480 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11481 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11482 UNSPEC_AESDECLAST))]
11484 "aesdeclast\t{%2, %0|%0, %2}"
11485 [(set_attr "type" "sselog1")
11486 (set_attr "prefix_extra" "1")
11487 (set_attr "mode" "TI")])
11489 (define_insn "aesimc"
11490 [(set (match_operand:V2DI 0 "register_operand" "=x")
11491 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11494 "%vaesimc\t{%1, %0|%0, %1}"
11495 [(set_attr "type" "sselog1")
11496 (set_attr "prefix_extra" "1")
11497 (set_attr "prefix" "maybe_vex")
11498 (set_attr "mode" "TI")])
11500 (define_insn "aeskeygenassist"
11501 [(set (match_operand:V2DI 0 "register_operand" "=x")
11502 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11503 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11504 UNSPEC_AESKEYGENASSIST))]
11506 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11507 [(set_attr "type" "sselog1")
11508 (set_attr "prefix_extra" "1")
11509 (set_attr "length_immediate" "1")
11510 (set_attr "prefix" "maybe_vex")
11511 (set_attr "mode" "TI")])
11513 (define_insn "*vpclmulqdq"
11514 [(set (match_operand:V2DI 0 "register_operand" "=x")
11515 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11516 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11517 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11519 "TARGET_PCLMUL && TARGET_AVX"
11520 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11521 [(set_attr "type" "sselog1")
11522 (set_attr "prefix_extra" "1")
11523 (set_attr "length_immediate" "1")
11524 (set_attr "prefix" "vex")
11525 (set_attr "mode" "TI")])
11527 (define_insn "pclmulqdq"
11528 [(set (match_operand:V2DI 0 "register_operand" "=x")
11529 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11530 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11531 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11534 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11535 [(set_attr "type" "sselog1")
11536 (set_attr "prefix_extra" "1")
11537 (set_attr "length_immediate" "1")
11538 (set_attr "mode" "TI")])
11540 (define_expand "avx_vzeroall"
11541 [(match_par_dup 0 [(const_int 0)])]
11544 int nregs = TARGET_64BIT ? 16 : 8;
11547 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11549 XVECEXP (operands[0], 0, 0)
11550 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11553 for (regno = 0; regno < nregs; regno++)
11554 XVECEXP (operands[0], 0, regno + 1)
11555 = gen_rtx_SET (VOIDmode,
11556 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11557 CONST0_RTX (V8SImode));
11560 (define_insn "*avx_vzeroall"
11561 [(match_parallel 0 "vzeroall_operation"
11562 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11565 [(set_attr "type" "sse")
11566 (set_attr "modrm" "0")
11567 (set_attr "memory" "none")
11568 (set_attr "prefix" "vex")
11569 (set_attr "mode" "OI")])
11571 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11572 ;; if the upper 128bits are unused.
11573 (define_insn "avx_vzeroupper"
11574 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11575 UNSPECV_VZEROUPPER)]
11578 [(set_attr "type" "sse")
11579 (set_attr "modrm" "0")
11580 (set_attr "memory" "none")
11581 (set_attr "prefix" "vex")
11582 (set_attr "mode" "OI")])
11584 (define_insn_and_split "vec_dup<mode>"
11585 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11586 (vec_duplicate:AVX256MODE24P
11587 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11590 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11592 "&& reload_completed && REG_P (operands[1])"
11593 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11594 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11595 "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
11596 [(set_attr "type" "ssemov")
11597 (set_attr "prefix_extra" "1")
11598 (set_attr "prefix" "vex")
11599 (set_attr "mode" "V8SF")])
11601 (define_insn "avx_vbroadcastf128_<mode>"
11602 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11603 (vec_concat:AVX256MODE
11604 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11608 vbroadcastf128\t{%1, %0|%0, %1}
11609 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11610 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11611 [(set_attr "type" "ssemov,sselog1,sselog1")
11612 (set_attr "prefix_extra" "1")
11613 (set_attr "length_immediate" "0,1,1")
11614 (set_attr "prefix" "vex")
11615 (set_attr "mode" "V4SF,V8SF,V8SF")])
11617 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11618 ;; If it so happens that the input is in memory, use vbroadcast.
11619 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11620 (define_insn "*avx_vperm_broadcast_v4sf"
11621 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11623 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11624 (match_parallel 2 "avx_vbroadcast_operand"
11625 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11628 int elt = INTVAL (operands[3]);
11629 switch (which_alternative)
11633 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11634 return "vbroadcastss\t{%1, %0|%0, %1}";
11636 operands[2] = GEN_INT (elt * 0x55);
11637 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11639 gcc_unreachable ();
11642 [(set_attr "type" "ssemov,ssemov,sselog1")
11643 (set_attr "prefix_extra" "1")
11644 (set_attr "length_immediate" "0,0,1")
11645 (set_attr "prefix" "vex")
11646 (set_attr "mode" "SF,SF,V4SF")])
11648 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11649 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11650 (vec_select:AVX256MODEF2P
11651 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11652 (match_parallel 2 "avx_vbroadcast_operand"
11653 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11656 "&& reload_completed"
11657 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11659 rtx op0 = operands[0], op1 = operands[1];
11660 int elt = INTVAL (operands[3]);
11666 /* Shuffle element we care about into all elements of the 128-bit lane.
11667 The other lane gets shuffled too, but we don't care. */
11668 if (<MODE>mode == V4DFmode)
11669 mask = (elt & 1 ? 15 : 0);
11671 mask = (elt & 3) * 0x55;
11672 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11674 /* Shuffle the lane we care about into both lanes of the dest. */
11675 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11676 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11680 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11681 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11684 (define_expand "avx_vpermil<mode>"
11685 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11686 (vec_select:AVXMODEFDP
11687 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11688 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11691 int mask = INTVAL (operands[2]);
11692 rtx perm[<ssescalarnum>];
11694 perm[0] = GEN_INT (mask & 1);
11695 perm[1] = GEN_INT ((mask >> 1) & 1);
11696 if (<MODE>mode == V4DFmode)
11698 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11699 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11703 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11706 (define_expand "avx_vpermil<mode>"
11707 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11708 (vec_select:AVXMODEFSP
11709 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11710 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11713 int mask = INTVAL (operands[2]);
11714 rtx perm[<ssescalarnum>];
11716 perm[0] = GEN_INT (mask & 3);
11717 perm[1] = GEN_INT ((mask >> 2) & 3);
11718 perm[2] = GEN_INT ((mask >> 4) & 3);
11719 perm[3] = GEN_INT ((mask >> 6) & 3);
11720 if (<MODE>mode == V8SFmode)
11722 perm[4] = GEN_INT ((mask & 3) + 4);
11723 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11724 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11725 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11729 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11732 (define_insn "*avx_vpermilp<mode>"
11733 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11734 (vec_select:AVXMODEF2P
11735 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11736 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11737 [(match_operand 3 "const_int_operand" "")])))]
11740 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11741 operands[2] = GEN_INT (mask);
11742 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11744 [(set_attr "type" "sselog")
11745 (set_attr "prefix_extra" "1")
11746 (set_attr "length_immediate" "1")
11747 (set_attr "prefix" "vex")
11748 (set_attr "mode" "<MODE>")])
11750 (define_insn "avx_vpermilvar<mode>3"
11751 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11753 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11754 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11757 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11758 [(set_attr "type" "sselog")
11759 (set_attr "prefix_extra" "1")
11760 (set_attr "prefix" "vex")
11761 (set_attr "mode" "<MODE>")])
11763 (define_expand "avx_vperm2f128<mode>3"
11764 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11765 (unspec:AVX256MODE2P
11766 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11767 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11768 (match_operand:SI 3 "const_0_to_255_operand" "")]
11769 UNSPEC_VPERMIL2F128))]
11772 int mask = INTVAL (operands[3]);
11773 if ((mask & 0x88) == 0)
11775 rtx perm[<ssescalarnum>], t1, t2;
11776 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11778 base = (mask & 3) * nelt2;
11779 for (i = 0; i < nelt2; ++i)
11780 perm[i] = GEN_INT (base + i);
11782 base = ((mask >> 4) & 3) * nelt2;
11783 for (i = 0; i < nelt2; ++i)
11784 perm[i + nelt2] = GEN_INT (base + i);
11786 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11787 operands[1], operands[2]);
11788 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11789 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11790 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11796 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11797 ;; means that in order to represent this properly in rtl we'd have to
11798 ;; nest *another* vec_concat with a zero operand and do the select from
11799 ;; a 4x wide vector. That doesn't seem very nice.
11800 (define_insn "*avx_vperm2f128<mode>_full"
11801 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11802 (unspec:AVX256MODE2P
11803 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11804 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11805 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11806 UNSPEC_VPERMIL2F128))]
11808 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11809 [(set_attr "type" "sselog")
11810 (set_attr "prefix_extra" "1")
11811 (set_attr "length_immediate" "1")
11812 (set_attr "prefix" "vex")
11813 (set_attr "mode" "V8SF")])
11815 (define_insn "*avx_vperm2f128<mode>_nozero"
11816 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11817 (vec_select:AVX256MODE2P
11818 (vec_concat:<ssedoublesizemode>
11819 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11820 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11821 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11822 [(match_operand 4 "const_int_operand" "")])))]
11825 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11826 operands[3] = GEN_INT (mask);
11827 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11829 [(set_attr "type" "sselog")
11830 (set_attr "prefix_extra" "1")
11831 (set_attr "length_immediate" "1")
11832 (set_attr "prefix" "vex")
11833 (set_attr "mode" "V8SF")])
11835 (define_expand "avx_vinsertf128<mode>"
11836 [(match_operand:AVX256MODE 0 "register_operand" "")
11837 (match_operand:AVX256MODE 1 "register_operand" "")
11838 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11839 (match_operand:SI 3 "const_0_to_1_operand" "")]
11842 switch (INTVAL (operands[3]))
11845 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11849 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11853 gcc_unreachable ();
11858 (define_insn "vec_set_lo_<mode>"
11859 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11860 (vec_concat:AVX256MODE4P
11861 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11862 (vec_select:<avxhalfvecmode>
11863 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11864 (parallel [(const_int 2) (const_int 3)]))))]
11866 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11867 [(set_attr "type" "sselog")
11868 (set_attr "prefix_extra" "1")
11869 (set_attr "length_immediate" "1")
11870 (set_attr "prefix" "vex")
11871 (set_attr "mode" "V8SF")])
11873 (define_insn "vec_set_hi_<mode>"
11874 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11875 (vec_concat:AVX256MODE4P
11876 (vec_select:<avxhalfvecmode>
11877 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11878 (parallel [(const_int 0) (const_int 1)]))
11879 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11881 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11882 [(set_attr "type" "sselog")
11883 (set_attr "prefix_extra" "1")
11884 (set_attr "length_immediate" "1")
11885 (set_attr "prefix" "vex")
11886 (set_attr "mode" "V8SF")])
11888 (define_insn "vec_set_lo_<mode>"
11889 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11890 (vec_concat:AVX256MODE8P
11891 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11892 (vec_select:<avxhalfvecmode>
11893 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11894 (parallel [(const_int 4) (const_int 5)
11895 (const_int 6) (const_int 7)]))))]
11897 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11898 [(set_attr "type" "sselog")
11899 (set_attr "prefix_extra" "1")
11900 (set_attr "length_immediate" "1")
11901 (set_attr "prefix" "vex")
11902 (set_attr "mode" "V8SF")])
11904 (define_insn "vec_set_hi_<mode>"
11905 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11906 (vec_concat:AVX256MODE8P
11907 (vec_select:<avxhalfvecmode>
11908 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11909 (parallel [(const_int 0) (const_int 1)
11910 (const_int 2) (const_int 3)]))
11911 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11913 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11914 [(set_attr "type" "sselog")
11915 (set_attr "prefix_extra" "1")
11916 (set_attr "length_immediate" "1")
11917 (set_attr "prefix" "vex")
11918 (set_attr "mode" "V8SF")])
11920 (define_insn "vec_set_lo_v16hi"
11921 [(set (match_operand:V16HI 0 "register_operand" "=x")
11923 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11925 (match_operand:V16HI 1 "register_operand" "x")
11926 (parallel [(const_int 8) (const_int 9)
11927 (const_int 10) (const_int 11)
11928 (const_int 12) (const_int 13)
11929 (const_int 14) (const_int 15)]))))]
11931 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11932 [(set_attr "type" "sselog")
11933 (set_attr "prefix_extra" "1")
11934 (set_attr "length_immediate" "1")
11935 (set_attr "prefix" "vex")
11936 (set_attr "mode" "V8SF")])
11938 (define_insn "vec_set_hi_v16hi"
11939 [(set (match_operand:V16HI 0 "register_operand" "=x")
11942 (match_operand:V16HI 1 "register_operand" "x")
11943 (parallel [(const_int 0) (const_int 1)
11944 (const_int 2) (const_int 3)
11945 (const_int 4) (const_int 5)
11946 (const_int 6) (const_int 7)]))
11947 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11949 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11950 [(set_attr "type" "sselog")
11951 (set_attr "prefix_extra" "1")
11952 (set_attr "length_immediate" "1")
11953 (set_attr "prefix" "vex")
11954 (set_attr "mode" "V8SF")])
11956 (define_insn "vec_set_lo_v32qi"
11957 [(set (match_operand:V32QI 0 "register_operand" "=x")
11959 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11961 (match_operand:V32QI 1 "register_operand" "x")
11962 (parallel [(const_int 16) (const_int 17)
11963 (const_int 18) (const_int 19)
11964 (const_int 20) (const_int 21)
11965 (const_int 22) (const_int 23)
11966 (const_int 24) (const_int 25)
11967 (const_int 26) (const_int 27)
11968 (const_int 28) (const_int 29)
11969 (const_int 30) (const_int 31)]))))]
11971 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11972 [(set_attr "type" "sselog")
11973 (set_attr "prefix_extra" "1")
11974 (set_attr "length_immediate" "1")
11975 (set_attr "prefix" "vex")
11976 (set_attr "mode" "V8SF")])
11978 (define_insn "vec_set_hi_v32qi"
11979 [(set (match_operand:V32QI 0 "register_operand" "=x")
11982 (match_operand:V32QI 1 "register_operand" "x")
11983 (parallel [(const_int 0) (const_int 1)
11984 (const_int 2) (const_int 3)
11985 (const_int 4) (const_int 5)
11986 (const_int 6) (const_int 7)
11987 (const_int 8) (const_int 9)
11988 (const_int 10) (const_int 11)
11989 (const_int 12) (const_int 13)
11990 (const_int 14) (const_int 15)]))
11991 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11993 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11994 [(set_attr "type" "sselog")
11995 (set_attr "prefix_extra" "1")
11996 (set_attr "length_immediate" "1")
11997 (set_attr "prefix" "vex")
11998 (set_attr "mode" "V8SF")])
12000 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
12001 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12003 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12004 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12008 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12009 [(set_attr "type" "sselog1")
12010 (set_attr "prefix_extra" "1")
12011 (set_attr "prefix" "vex")
12012 (set_attr "mode" "<MODE>")])
12014 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
12015 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12017 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12018 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12020 UNSPEC_MASKSTORE))]
12022 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12023 [(set_attr "type" "sselog1")
12024 (set_attr "prefix_extra" "1")
12025 (set_attr "prefix" "vex")
12026 (set_attr "mode" "<MODE>")])
12028 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12029 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12030 (unspec:AVX256MODE2P
12031 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12035 "&& reload_completed"
12038 rtx op1 = operands[1];
12040 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12042 op1 = gen_lowpart (<MODE>mode, op1);
12043 emit_move_insn (operands[0], op1);
12047 (define_expand "vec_init<mode>"
12048 [(match_operand:AVX256MODE 0 "register_operand" "")
12049 (match_operand 1 "" "")]
12052 ix86_expand_vector_init (false, operands[0], operands[1]);
12056 (define_insn "*vec_concat<mode>_avx"
12057 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12058 (vec_concat:AVX256MODE
12059 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12060 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12063 switch (which_alternative)
12066 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12068 switch (get_attr_mode (insn))
12071 return "vmovaps\t{%1, %x0|%x0, %1}";
12073 return "vmovapd\t{%1, %x0|%x0, %1}";
12075 return "vmovdqa\t{%1, %x0|%x0, %1}";
12078 gcc_unreachable ();
12081 [(set_attr "type" "sselog,ssemov")
12082 (set_attr "prefix_extra" "1,*")
12083 (set_attr "length_immediate" "1,*")
12084 (set_attr "prefix" "vex")
12085 (set_attr "mode" "<avxvecmode>")])
12087 (define_insn "vcvtph2ps"
12088 [(set (match_operand:V4SF 0 "register_operand" "=x")
12090 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12092 (parallel [(const_int 0) (const_int 1)
12093 (const_int 1) (const_int 2)])))]
12095 "vcvtph2ps\t{%1, %0|%0, %1}"
12096 [(set_attr "type" "ssecvt")
12097 (set_attr "prefix" "vex")
12098 (set_attr "mode" "V4SF")])
12100 (define_insn "*vcvtph2ps_load"
12101 [(set (match_operand:V4SF 0 "register_operand" "=x")
12102 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12103 UNSPEC_VCVTPH2PS))]
12105 "vcvtph2ps\t{%1, %0|%0, %1}"
12106 [(set_attr "type" "ssecvt")
12107 (set_attr "prefix" "vex")
12108 (set_attr "mode" "V8SF")])
12110 (define_insn "vcvtph2ps256"
12111 [(set (match_operand:V8SF 0 "register_operand" "=x")
12112 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12113 UNSPEC_VCVTPH2PS))]
12115 "vcvtph2ps\t{%1, %0|%0, %1}"
12116 [(set_attr "type" "ssecvt")
12117 (set_attr "prefix" "vex")
12118 (set_attr "mode" "V8SF")])
12120 (define_expand "vcvtps2ph"
12121 [(set (match_operand:V8HI 0 "register_operand" "")
12123 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12124 (match_operand:SI 2 "immediate_operand" "")]
12128 "operands[3] = CONST0_RTX (V4HImode);")
12130 (define_insn "*vcvtps2ph"
12131 [(set (match_operand:V8HI 0 "register_operand" "=x")
12133 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12134 (match_operand:SI 2 "immediate_operand" "N")]
12136 (match_operand:V4HI 3 "const0_operand" "")))]
12138 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12139 [(set_attr "type" "ssecvt")
12140 (set_attr "prefix" "vex")
12141 (set_attr "mode" "V4SF")])
12143 (define_insn "*vcvtps2ph_store"
12144 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12145 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12146 (match_operand:SI 2 "immediate_operand" "N")]
12147 UNSPEC_VCVTPS2PH))]
12149 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12150 [(set_attr "type" "ssecvt")
12151 (set_attr "prefix" "vex")
12152 (set_attr "mode" "V4SF")])
12154 (define_insn "vcvtps2ph256"
12155 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12156 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12157 (match_operand:SI 2 "immediate_operand" "N")]
12158 UNSPEC_VCVTPS2PH))]
12160 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12161 [(set_attr "type" "ssecvt")
12162 (set_attr "prefix" "vex")
12163 (set_attr "mode" "V8SF")])